pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (64) hide show
  1. pixeltable/__init__.py +6 -1
  2. pixeltable/catalog/catalog.py +107 -45
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +5 -0
  6. pixeltable/catalog/table_version.py +100 -106
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/update_status.py +12 -0
  9. pixeltable/config.py +6 -0
  10. pixeltable/dataframe.py +11 -5
  11. pixeltable/env.py +52 -19
  12. pixeltable/exec/__init__.py +2 -0
  13. pixeltable/exec/cell_materialization_node.py +231 -0
  14. pixeltable/exec/cell_reconstruction_node.py +135 -0
  15. pixeltable/exec/exec_node.py +1 -1
  16. pixeltable/exec/expr_eval/evaluators.py +1 -0
  17. pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
  18. pixeltable/exec/expr_eval/globals.py +2 -0
  19. pixeltable/exec/globals.py +32 -0
  20. pixeltable/exec/object_store_save_node.py +1 -4
  21. pixeltable/exec/row_update_node.py +16 -9
  22. pixeltable/exec/sql_node.py +107 -14
  23. pixeltable/exprs/__init__.py +1 -1
  24. pixeltable/exprs/arithmetic_expr.py +10 -11
  25. pixeltable/exprs/column_property_ref.py +10 -10
  26. pixeltable/exprs/column_ref.py +2 -2
  27. pixeltable/exprs/data_row.py +106 -37
  28. pixeltable/exprs/expr.py +9 -0
  29. pixeltable/exprs/expr_set.py +14 -7
  30. pixeltable/exprs/inline_expr.py +2 -19
  31. pixeltable/exprs/json_path.py +45 -12
  32. pixeltable/exprs/row_builder.py +54 -22
  33. pixeltable/functions/__init__.py +1 -0
  34. pixeltable/functions/bedrock.py +7 -0
  35. pixeltable/functions/deepseek.py +11 -4
  36. pixeltable/functions/llama_cpp.py +7 -0
  37. pixeltable/functions/math.py +1 -1
  38. pixeltable/functions/ollama.py +7 -0
  39. pixeltable/functions/openai.py +4 -4
  40. pixeltable/functions/openrouter.py +143 -0
  41. pixeltable/functions/video.py +123 -9
  42. pixeltable/functions/whisperx.py +2 -0
  43. pixeltable/functions/yolox.py +2 -0
  44. pixeltable/globals.py +56 -31
  45. pixeltable/io/__init__.py +1 -0
  46. pixeltable/io/globals.py +16 -15
  47. pixeltable/io/table_data_conduit.py +46 -21
  48. pixeltable/iterators/__init__.py +1 -0
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/publish.py +0 -1
  54. pixeltable/store.py +2 -2
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/console_output.py +4 -1
  57. pixeltable/utils/exception_handler.py +5 -28
  58. pixeltable/utils/image.py +7 -0
  59. pixeltable/utils/misc.py +5 -0
  60. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
  61. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
  62. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
  63. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
  64. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -1,3 +1,7 @@
1
+ """
2
+ Core Pixeltable API for table operations, data processing, and UDF management.
3
+ """
4
+
1
5
  # ruff: noqa: F401
2
6
 
3
7
  from .__version__ import __version__, __version_tuple__
@@ -20,7 +24,6 @@ from .globals import (
20
24
  array,
21
25
  configure_logging,
22
26
  create_dir,
23
- create_replica,
24
27
  create_snapshot,
25
28
  create_table,
26
29
  create_view,
@@ -34,6 +37,8 @@ from .globals import (
34
37
  list_tables,
35
38
  ls,
36
39
  move,
40
+ publish,
41
+ replicate,
37
42
  tool,
38
43
  tools,
39
44
  )
@@ -12,11 +12,13 @@ from uuid import UUID
12
12
 
13
13
  import psycopg
14
14
  import sqlalchemy as sql
15
+ import sqlalchemy.exc as sql_exc
15
16
 
16
17
  from pixeltable import exceptions as excs
17
18
  from pixeltable.env import Env
18
19
  from pixeltable.iterators import ComponentIterator
19
20
  from pixeltable.metadata import schema
21
+ from pixeltable.utils.exception_handler import run_cleanup
20
22
 
21
23
  from .column import Column
22
24
  from .dir import Dir
@@ -101,7 +103,7 @@ def retry_loop(
101
103
  except PendingTableOpsError as e:
102
104
  Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
103
105
  Catalog.get()._finalize_pending_ops(e.tbl_id)
104
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
106
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
105
107
  # TODO: what other exceptions should we be looking for?
106
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
107
109
  if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
@@ -167,6 +169,7 @@ class Catalog:
167
169
  _in_write_xact: bool # True if we're in a write transaction
168
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
169
171
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _undo_actions: list[Callable[[], None]]
170
173
  _in_retry_loop: bool
171
174
 
172
175
  # cached column dependencies
@@ -199,6 +202,7 @@ class Catalog:
199
202
  self._in_write_xact = False
200
203
  self._x_locked_tbl_ids = set()
201
204
  self._modified_tvs = set()
205
+ self._undo_actions = []
202
206
  self._in_retry_loop = False
203
207
  self._column_dependencies = {}
204
208
  self._column_dependents = None
@@ -245,6 +249,11 @@ class Catalog:
245
249
  for v in tbl_version.mutable_views:
246
250
  assert v.effective_version is None, f'{v.id}:{v.effective_version}'
247
251
 
252
+ def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
253
+ """Record that the given TableVersion instances were modified in the current transaction"""
254
+ assert Env.get().in_xact
255
+ self._modified_tvs.update(handle)
256
+
248
257
  @contextmanager
249
258
  def begin_xact(
250
259
  self,
@@ -309,6 +318,7 @@ class Catalog:
309
318
  self._column_dependents = None
310
319
  has_exc = False
311
320
 
321
+ assert not self._undo_actions
312
322
  with Env.get().begin_xact(for_write=for_write) as conn:
313
323
  if tbl is not None or tbl_id is not None:
314
324
  try:
@@ -352,7 +362,7 @@ class Catalog:
352
362
  # raise to abort the transaction
353
363
  raise
354
364
 
355
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
365
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
356
366
  has_exc = True
357
367
  if isinstance(
358
368
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
@@ -360,10 +370,12 @@ class Catalog:
360
370
  num_retries += 1
361
371
  _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
362
372
  time.sleep(random.uniform(0.1, 0.5))
373
+ assert not self._undo_actions # We should not have any undo actions at this point
363
374
  continue
364
375
  else:
365
376
  raise
366
377
 
378
+ assert not self._undo_actions
367
379
  yield conn
368
380
  return
369
381
 
@@ -376,44 +388,19 @@ class Catalog:
376
388
  # we got this exception after getting the initial table locks and therefore need to abort
377
389
  raise
378
390
 
379
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
391
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
380
392
  has_exc = True
381
- # we got some db error during the actual operation (not just while trying to get locks on the metadata
382
- # records): we convert these into Errors, if asked to do so, and abort
383
- # TODO: what other concurrency-related exceptions should we expect?
384
-
385
- # we always convert UndefinedTable exceptions (they can't be retried)
386
- if isinstance(e.orig, psycopg.errors.UndefinedTable):
387
- # the table got dropped in the middle of the table operation
388
- tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
389
- _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
390
- assert tbl is not None
391
- raise excs.Error(f'Table was dropped: {tbl_name}') from None
392
- elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
393
- # we still got a serialization error, despite getting x-locks at the beginning
394
- msg: str
395
- if tbl is not None:
396
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
397
- elif tbl_id is not None:
398
- msg = f'{tbl_id}'
399
- else:
400
- msg = ''
401
- _logger.debug(f'Exception: serialization failure: {msg} ({e})')
402
- raise excs.Error(
403
- 'That Pixeltable operation could not be completed because it conflicted with another '
404
- 'operation that was run on a different process.\n'
405
- 'Please re-run the operation.'
406
- ) from None
407
- else:
408
- raise
393
+ self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
394
+ raise # re-raise the error if it didn't convert to a pxt.Error
409
395
 
410
- except:
396
+ except (Exception, KeyboardInterrupt) as e:
411
397
  has_exc = True
398
+ _logger.debug(f'Caught {e.__class__}')
412
399
  raise
413
400
 
414
401
  finally:
415
402
  self._in_write_xact = False
416
- self._x_locked_tbl_ids = set()
403
+ self._x_locked_tbl_ids.clear()
417
404
  self._column_dependents = None
418
405
 
419
406
  # invalidate cached current TableVersion instances
@@ -423,11 +410,73 @@ class Catalog:
423
410
  tv.is_validated = False
424
411
 
425
412
  if has_exc:
426
- # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
413
+ # Execute undo actions in reverse order (LIFO)
414
+ for hook in reversed(self._undo_actions):
415
+ run_cleanup(hook, raise_error=False)
416
+ # purge all modified TableVersion instances; we can't guarantee they are still consistent with the
427
417
  # stored metadata
428
418
  for handle in self._modified_tvs:
429
419
  self._clear_tv_cache(handle.id, handle.effective_version)
430
- self._modified_tvs = set()
420
+ # Clear potentially corrupted cached metadata
421
+ if tbl is not None:
422
+ tbl.clear_cached_md()
423
+
424
+ self._undo_actions.clear()
425
+ self._modified_tvs.clear()
426
+
427
+ def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
428
+ """Registers a function to be called if the current transaction fails.
429
+
430
+ The function is called only if the current transaction fails due to an exception.
431
+
432
+ Rollback functions are called in reverse order of registration (LIFO).
433
+
434
+ The function should not raise exceptions; if it does, they are logged and ignored.
435
+ """
436
+ assert Env.get().in_xact
437
+ self._undo_actions.append(func)
438
+ return func
439
+
440
+ def convert_sql_exc(
441
+ self,
442
+ e: sql_exc.StatementError,
443
+ tbl_id: UUID | None = None,
444
+ tbl: TableVersionHandle | None = None,
445
+ convert_db_excs: bool = True,
446
+ ) -> None:
447
+ # we got some db error during the actual operation (not just while trying to get locks on the metadata
448
+ # records); we convert these into pxt.Error exceptions if appropriate
449
+
450
+ # we always convert UndefinedTable exceptions (they can't be retried)
451
+ if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
452
+ # the table got dropped in the middle of the operation
453
+ tbl_name = tbl.get().name
454
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
455
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
456
+ elif (
457
+ isinstance(
458
+ e.orig,
459
+ (
460
+ psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
461
+ psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
462
+ psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
463
+ ),
464
+ )
465
+ and convert_db_excs
466
+ ):
467
+ msg: str
468
+ if tbl is not None:
469
+ msg = f'{tbl.get().name} ({tbl.id})'
470
+ elif tbl_id is not None:
471
+ msg = f'{tbl_id}'
472
+ else:
473
+ msg = ''
474
+ _logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
475
+ raise excs.Error(
476
+ 'That Pixeltable operation could not be completed because it conflicted with another '
477
+ 'operation that was run on a different process.\n'
478
+ 'Please re-run the operation.'
479
+ ) from None
431
480
 
432
481
  @property
433
482
  def in_write_xact(self) -> bool:
@@ -593,7 +642,7 @@ class Catalog:
593
642
  if op.op_sn == op.num_ops - 1:
594
643
  conn.execute(reset_has_pending_stmt)
595
644
 
596
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
645
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
597
646
  # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
598
647
  # logic of begin_xact()?
599
648
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -920,11 +969,18 @@ class Catalog:
920
969
  num_retained_versions: int,
921
970
  comment: str,
922
971
  media_validation: MediaValidation,
923
- ) -> Table:
972
+ ) -> tuple[Table, bool]:
973
+ """
974
+ Creates a new InsertableTable at the given path.
975
+
976
+ If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
977
+
978
+ Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
979
+ """
924
980
  existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
925
981
  if existing is not None:
926
982
  assert isinstance(existing, Table)
927
- return existing
983
+ return existing, False
928
984
 
929
985
  dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
930
986
  assert dir is not None
@@ -940,7 +996,7 @@ class Catalog:
940
996
  media_validation=media_validation,
941
997
  )
942
998
  self._tbls[tbl._id, None] = tbl
943
- return tbl
999
+ return tbl, True
944
1000
 
945
1001
  def create_view(
946
1002
  self,
@@ -1195,8 +1251,14 @@ class Catalog:
1195
1251
  TableVersion.create_replica(md)
1196
1252
 
1197
1253
  @retry_loop(for_write=False)
1198
- def get_table(self, path: Path) -> Table:
1199
- obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
1254
+ def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
1255
+ obj = Catalog.get()._get_schema_object(
1256
+ path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
1257
+ )
1258
+ if obj is None:
1259
+ _logger.info(f'Skipped table {path!r} (does not exist).')
1260
+ return None
1261
+
1200
1262
  assert isinstance(obj, Table)
1201
1263
  # We need to clear cached metadata from tbl_version_path, in case the schema has been changed
1202
1264
  # by another process.
@@ -1208,7 +1270,7 @@ class Catalog:
1208
1270
  tbl = self._get_schema_object(
1209
1271
  path,
1210
1272
  expected=Table,
1211
- raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1273
+ raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
1212
1274
  lock_parent=True,
1213
1275
  lock_obj=False,
1214
1276
  )
@@ -1293,7 +1355,7 @@ class Catalog:
1293
1355
  base_id = tvp.base.tbl_id
1294
1356
  base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
1295
1357
  base_tv.tbl_md.view_sn += 1
1296
- self._modified_tvs.add(base_tv.handle)
1358
+ self.mark_modified_tvs(base_tv.handle)
1297
1359
  result = Env.get().conn.execute(
1298
1360
  sql.update(schema.Table.__table__)
1299
1361
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -1305,7 +1367,7 @@ class Catalog:
1305
1367
  if not is_pure_snapshot:
1306
1368
  # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1307
1369
  # has been dropped
1308
- self._modified_tvs.add(tvp.tbl_version)
1370
+ self.mark_modified_tvs(tvp.tbl_version)
1309
1371
  tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
1310
1372
  if not is_pure_snapshot:
1311
1373
  # drop the store table before deleting the Table record
@@ -1997,7 +2059,7 @@ class Catalog:
1997
2059
  self._tbl_versions[tbl_id, effective_version] = tbl_version
1998
2060
  # register this instance as modified, so that it gets purged if the transaction fails, it may not be
1999
2061
  # fully initialized
2000
- self._modified_tvs.add(tbl_version.handle)
2062
+ self.mark_modified_tvs(tbl_version.handle)
2001
2063
  tbl_version.init()
2002
2064
  return tbl_version
2003
2065
 
@@ -48,7 +48,7 @@ class Column:
48
48
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
49
49
  """
50
50
 
51
- name: str
51
+ name: Optional[str]
52
52
  id: Optional[int]
53
53
  col_type: ts.ColumnType
54
54
  stored: bool
@@ -259,7 +259,12 @@ class Column:
259
259
  # default: record errors for computed and media columns
260
260
  if self._stores_cellmd is not None:
261
261
  return self._stores_cellmd
262
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
262
+ return self.is_stored and (
263
+ self.is_computed
264
+ or self.col_type.is_media_type()
265
+ or self.col_type.is_json_type()
266
+ or self.col_type.is_array_type()
267
+ )
263
268
 
264
269
  @property
265
270
  def qualified_name(self) -> str:
@@ -117,6 +117,7 @@ class Table(SchemaObject):
117
117
  is_primary_key=col.is_pk,
118
118
  media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
119
119
  computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
120
+ defined_in=col.tbl.name,
120
121
  )
121
122
  # Pure snapshots have no indices
122
123
  indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
@@ -19,6 +19,10 @@ class ColumnMetadata(TypedDict):
19
19
  """The media validation policy for this column."""
20
20
  computed_with: Optional[str]
21
21
  """Expression used to compute this column; `None` if this is not a computed column."""
22
+ defined_in: Optional[str]
23
+ """Name of the table where this column was originally defined.
24
+
25
+ If the current table is a view, then `defined_in` may differ from the current table name."""
22
26
 
23
27
 
24
28
  class EmbeddingIndexParams(TypedDict):
@@ -38,6 +42,7 @@ class IndexMetadata(TypedDict):
38
42
  index_type: Literal['embedding']
39
43
  """The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
40
44
  parameters: EmbeddingIndexParams
45
+ """Parameters specific to the index type."""
41
46
 
42
47
 
43
48
  class TableMetadata(TypedDict):