pixeltable 0.4.15__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (57) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +105 -51
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +99 -78
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/config.py +6 -0
  9. pixeltable/dataframe.py +10 -5
  10. pixeltable/env.py +48 -19
  11. pixeltable/exec/__init__.py +2 -0
  12. pixeltable/exec/cell_materialization_node.py +231 -0
  13. pixeltable/exec/cell_reconstruction_node.py +135 -0
  14. pixeltable/exec/exec_node.py +1 -1
  15. pixeltable/exec/expr_eval/evaluators.py +1 -0
  16. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  17. pixeltable/exec/expr_eval/globals.py +2 -0
  18. pixeltable/exec/globals.py +32 -0
  19. pixeltable/exec/object_store_save_node.py +1 -4
  20. pixeltable/exec/row_update_node.py +16 -9
  21. pixeltable/exec/sql_node.py +107 -14
  22. pixeltable/exprs/__init__.py +1 -1
  23. pixeltable/exprs/arithmetic_expr.py +10 -11
  24. pixeltable/exprs/column_property_ref.py +10 -10
  25. pixeltable/exprs/column_ref.py +2 -2
  26. pixeltable/exprs/data_row.py +106 -37
  27. pixeltable/exprs/expr.py +9 -0
  28. pixeltable/exprs/expr_set.py +14 -7
  29. pixeltable/exprs/inline_expr.py +2 -19
  30. pixeltable/exprs/json_path.py +45 -12
  31. pixeltable/exprs/row_builder.py +54 -22
  32. pixeltable/functions/__init__.py +1 -0
  33. pixeltable/functions/bedrock.py +7 -0
  34. pixeltable/functions/deepseek.py +11 -4
  35. pixeltable/functions/llama_cpp.py +7 -0
  36. pixeltable/functions/math.py +1 -1
  37. pixeltable/functions/ollama.py +7 -0
  38. pixeltable/functions/openai.py +4 -4
  39. pixeltable/functions/openrouter.py +143 -0
  40. pixeltable/globals.py +10 -4
  41. pixeltable/io/globals.py +16 -15
  42. pixeltable/io/table_data_conduit.py +46 -21
  43. pixeltable/metadata/__init__.py +1 -1
  44. pixeltable/metadata/converters/convert_40.py +73 -0
  45. pixeltable/metadata/notes.py +1 -0
  46. pixeltable/plan.py +175 -46
  47. pixeltable/store.py +1 -1
  48. pixeltable/type_system.py +5 -3
  49. pixeltable/utils/console_output.py +4 -1
  50. pixeltable/utils/exception_handler.py +5 -28
  51. pixeltable/utils/image.py +7 -0
  52. pixeltable/utils/misc.py +5 -0
  53. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
  54. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/RECORD +57 -50
  55. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
  56. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
  57. {pixeltable-0.4.15.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -1,3 +1,7 @@
1
+ """
2
+ Core Pixeltable API for table operations, data processing, and UDF management.
3
+ """
4
+
1
5
  # ruff: noqa: F401
2
6
 
3
7
  from .__version__ import __version__, __version_tuple__
@@ -12,11 +12,13 @@ from uuid import UUID
12
12
 
13
13
  import psycopg
14
14
  import sqlalchemy as sql
15
+ import sqlalchemy.exc as sql_exc
15
16
 
16
17
  from pixeltable import exceptions as excs
17
18
  from pixeltable.env import Env
18
19
  from pixeltable.iterators import ComponentIterator
19
20
  from pixeltable.metadata import schema
21
+ from pixeltable.utils.exception_handler import run_cleanup
20
22
 
21
23
  from .column import Column
22
24
  from .dir import Dir
@@ -101,7 +103,7 @@ def retry_loop(
101
103
  except PendingTableOpsError as e:
102
104
  Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
103
105
  Catalog.get()._finalize_pending_ops(e.tbl_id)
104
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
106
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
105
107
  # TODO: what other exceptions should we be looking for?
106
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
107
109
  if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
@@ -167,6 +169,7 @@ class Catalog:
167
169
  _in_write_xact: bool # True if we're in a write transaction
168
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
169
171
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _undo_actions: list[Callable[[], None]]
170
173
  _in_retry_loop: bool
171
174
 
172
175
  # cached column dependencies
@@ -199,6 +202,7 @@ class Catalog:
199
202
  self._in_write_xact = False
200
203
  self._x_locked_tbl_ids = set()
201
204
  self._modified_tvs = set()
205
+ self._undo_actions = []
202
206
  self._in_retry_loop = False
203
207
  self._column_dependencies = {}
204
208
  self._column_dependents = None
@@ -245,6 +249,11 @@ class Catalog:
245
249
  for v in tbl_version.mutable_views:
246
250
  assert v.effective_version is None, f'{v.id}:{v.effective_version}'
247
251
 
252
+ def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
253
+ """Record that the given TableVersion instances were modified in the current transaction"""
254
+ assert Env.get().in_xact
255
+ self._modified_tvs.update(handle)
256
+
248
257
  @contextmanager
249
258
  def begin_xact(
250
259
  self,
@@ -309,6 +318,7 @@ class Catalog:
309
318
  self._column_dependents = None
310
319
  has_exc = False
311
320
 
321
+ assert not self._undo_actions
312
322
  with Env.get().begin_xact(for_write=for_write) as conn:
313
323
  if tbl is not None or tbl_id is not None:
314
324
  try:
@@ -352,7 +362,7 @@ class Catalog:
352
362
  # raise to abort the transaction
353
363
  raise
354
364
 
355
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
365
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
356
366
  has_exc = True
357
367
  if isinstance(
358
368
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
@@ -360,10 +370,12 @@ class Catalog:
360
370
  num_retries += 1
361
371
  _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
362
372
  time.sleep(random.uniform(0.1, 0.5))
373
+ assert not self._undo_actions # We should not have any undo actions at this point
363
374
  continue
364
375
  else:
365
376
  raise
366
377
 
378
+ assert not self._undo_actions
367
379
  yield conn
368
380
  return
369
381
 
@@ -376,49 +388,19 @@ class Catalog:
376
388
  # we got this exception after getting the initial table locks and therefore need to abort
377
389
  raise
378
390
 
379
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
380
- has_exc = True
381
- # we got some db error during the actual operation (not just while trying to get locks on the metadata
382
- # records): we convert these into Errors, if asked to do so, and abort
383
- # TODO: what other concurrency-related exceptions should we expect?
384
-
385
- # we always convert UndefinedTable exceptions (they can't be retried)
386
- if isinstance(e.orig, psycopg.errors.UndefinedTable):
387
- # the table got dropped in the middle of the table operation
388
- tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
389
- _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
390
- assert tbl is not None
391
- raise excs.Error(f'Table was dropped: {tbl_name}') from None
392
- elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
393
- # we still got a serialization error, despite getting x-locks at the beginning
394
- msg: str
395
- if tbl is not None:
396
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
397
- elif tbl_id is not None:
398
- msg = f'{tbl_id}'
399
- else:
400
- msg = ''
401
- _logger.debug(f'Exception: serialization failure: {msg} ({e})')
402
- raise excs.Error(
403
- 'That Pixeltable operation could not be completed because it conflicted with another '
404
- 'operation that was run on a different process.\n'
405
- 'Please re-run the operation.'
406
- ) from None
407
- else:
408
- raise
409
-
410
- except KeyboardInterrupt:
391
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
411
392
  has_exc = True
412
- _logger.debug('Caught KeyboardInterrupt')
413
- raise
393
+ self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
394
+ raise # re-raise the error if it didn't convert to a pxt.Error
414
395
 
415
- except:
396
+ except (Exception, KeyboardInterrupt) as e:
416
397
  has_exc = True
398
+ _logger.debug(f'Caught {e.__class__}')
417
399
  raise
418
400
 
419
401
  finally:
420
402
  self._in_write_xact = False
421
- self._x_locked_tbl_ids = set()
403
+ self._x_locked_tbl_ids.clear()
422
404
  self._column_dependents = None
423
405
 
424
406
  # invalidate cached current TableVersion instances
@@ -428,14 +410,73 @@ class Catalog:
428
410
  tv.is_validated = False
429
411
 
430
412
  if has_exc:
431
- # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
413
+ # Execute undo actions in reverse order (LIFO)
414
+ for hook in reversed(self._undo_actions):
415
+ run_cleanup(hook, raise_error=False)
416
+ # purge all modified TableVersion instances; we can't guarantee they are still consistent with the
432
417
  # stored metadata
433
418
  for handle in self._modified_tvs:
434
419
  self._clear_tv_cache(handle.id, handle.effective_version)
435
- # Clear potentially corrupted cached metadata after error
420
+ # Clear potentially corrupted cached metadata
436
421
  if tbl is not None:
437
422
  tbl.clear_cached_md()
438
- self._modified_tvs = set()
423
+
424
+ self._undo_actions.clear()
425
+ self._modified_tvs.clear()
426
+
427
+ def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
428
+ """Registers a function to be called if the current transaction fails.
429
+
430
+ The function is called only if the current transaction fails due to an exception.
431
+
432
+ Rollback functions are called in reverse order of registration (LIFO).
433
+
434
+ The function should not raise exceptions; if it does, they are logged and ignored.
435
+ """
436
+ assert Env.get().in_xact
437
+ self._undo_actions.append(func)
438
+ return func
439
+
440
+ def convert_sql_exc(
441
+ self,
442
+ e: sql_exc.StatementError,
443
+ tbl_id: UUID | None = None,
444
+ tbl: TableVersionHandle | None = None,
445
+ convert_db_excs: bool = True,
446
+ ) -> None:
447
+ # we got some db error during the actual operation (not just while trying to get locks on the metadata
448
+ # records); we convert these into pxt.Error exceptions if appropriate
449
+
450
+ # we always convert UndefinedTable exceptions (they can't be retried)
451
+ if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
452
+ # the table got dropped in the middle of the operation
453
+ tbl_name = tbl.get().name
454
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
455
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
456
+ elif (
457
+ isinstance(
458
+ e.orig,
459
+ (
460
+ psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
461
+ psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
462
+ psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
463
+ ),
464
+ )
465
+ and convert_db_excs
466
+ ):
467
+ msg: str
468
+ if tbl is not None:
469
+ msg = f'{tbl.get().name} ({tbl.id})'
470
+ elif tbl_id is not None:
471
+ msg = f'{tbl_id}'
472
+ else:
473
+ msg = ''
474
+ _logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
475
+ raise excs.Error(
476
+ 'That Pixeltable operation could not be completed because it conflicted with another '
477
+ 'operation that was run on a different process.\n'
478
+ 'Please re-run the operation.'
479
+ ) from None
439
480
 
440
481
  @property
441
482
  def in_write_xact(self) -> bool:
@@ -601,7 +642,7 @@ class Catalog:
601
642
  if op.op_sn == op.num_ops - 1:
602
643
  conn.execute(reset_has_pending_stmt)
603
644
 
604
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
645
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
605
646
  # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
606
647
  # logic of begin_xact()?
607
648
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -928,11 +969,18 @@ class Catalog:
928
969
  num_retained_versions: int,
929
970
  comment: str,
930
971
  media_validation: MediaValidation,
931
- ) -> Table:
972
+ ) -> tuple[Table, bool]:
973
+ """
974
+ Creates a new InsertableTable at the given path.
975
+
976
+ If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
977
+
978
+ Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
979
+ """
932
980
  existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
933
981
  if existing is not None:
934
982
  assert isinstance(existing, Table)
935
- return existing
983
+ return existing, False
936
984
 
937
985
  dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
938
986
  assert dir is not None
@@ -948,7 +996,7 @@ class Catalog:
948
996
  media_validation=media_validation,
949
997
  )
950
998
  self._tbls[tbl._id, None] = tbl
951
- return tbl
999
+ return tbl, True
952
1000
 
953
1001
  def create_view(
954
1002
  self,
@@ -1203,8 +1251,14 @@ class Catalog:
1203
1251
  TableVersion.create_replica(md)
1204
1252
 
1205
1253
  @retry_loop(for_write=False)
1206
- def get_table(self, path: Path) -> Table:
1207
- obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
1254
+ def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
1255
+ obj = Catalog.get()._get_schema_object(
1256
+ path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
1257
+ )
1258
+ if obj is None:
1259
+ _logger.info(f'Skipped table {path!r} (does not exist).')
1260
+ return None
1261
+
1208
1262
  assert isinstance(obj, Table)
1209
1263
  # We need to clear cached metadata from tbl_version_path, in case the schema has been changed
1210
1264
  # by another process.
@@ -1216,7 +1270,7 @@ class Catalog:
1216
1270
  tbl = self._get_schema_object(
1217
1271
  path,
1218
1272
  expected=Table,
1219
- raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1273
+ raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
1220
1274
  lock_parent=True,
1221
1275
  lock_obj=False,
1222
1276
  )
@@ -1301,7 +1355,7 @@ class Catalog:
1301
1355
  base_id = tvp.base.tbl_id
1302
1356
  base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
1303
1357
  base_tv.tbl_md.view_sn += 1
1304
- self._modified_tvs.add(base_tv.handle)
1358
+ self.mark_modified_tvs(base_tv.handle)
1305
1359
  result = Env.get().conn.execute(
1306
1360
  sql.update(schema.Table.__table__)
1307
1361
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -1313,7 +1367,7 @@ class Catalog:
1313
1367
  if not is_pure_snapshot:
1314
1368
  # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1315
1369
  # has been dropped
1316
- self._modified_tvs.add(tvp.tbl_version)
1370
+ self.mark_modified_tvs(tvp.tbl_version)
1317
1371
  tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
1318
1372
  if not is_pure_snapshot:
1319
1373
  # drop the store table before deleting the Table record
@@ -2005,7 +2059,7 @@ class Catalog:
2005
2059
  self._tbl_versions[tbl_id, effective_version] = tbl_version
2006
2060
  # register this instance as modified, so that it gets purged if the transaction fails, it may not be
2007
2061
  # fully initialized
2008
- self._modified_tvs.add(tbl_version.handle)
2062
+ self.mark_modified_tvs(tbl_version.handle)
2009
2063
  tbl_version.init()
2010
2064
  return tbl_version
2011
2065
 
@@ -48,7 +48,7 @@ class Column:
48
48
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
49
49
  """
50
50
 
51
- name: str
51
+ name: Optional[str]
52
52
  id: Optional[int]
53
53
  col_type: ts.ColumnType
54
54
  stored: bool
@@ -259,7 +259,12 @@ class Column:
259
259
  # default: record errors for computed and media columns
260
260
  if self._stores_cellmd is not None:
261
261
  return self._stores_cellmd
262
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
262
+ return self.is_stored and (
263
+ self.is_computed
264
+ or self.col_type.is_media_type()
265
+ or self.col_type.is_json_type()
266
+ or self.col_type.is_array_type()
267
+ )
263
268
 
264
269
  @property
265
270
  def qualified_name(self) -> str:
@@ -117,6 +117,7 @@ class Table(SchemaObject):
117
117
  is_primary_key=col.is_pk,
118
118
  media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
119
119
  computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
120
+ defined_in=col.tbl.name,
120
121
  )
121
122
  # Pure snapshots have no indices
122
123
  indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
@@ -19,6 +19,10 @@ class ColumnMetadata(TypedDict):
19
19
  """The media validation policy for this column."""
20
20
  computed_with: Optional[str]
21
21
  """Expression used to compute this column; `None` if this is not a computed column."""
22
+ defined_in: Optional[str]
23
+ """Name of the table where this column was originally defined.
24
+
25
+ If the current table is a view, then `defined_in` may differ from the current table name."""
22
26
 
23
27
 
24
28
  class EmbeddingIndexParams(TypedDict):