pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +125 -63
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +174 -117
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/table_version_path.py +0 -11
  9. pixeltable/catalog/view.py +6 -0
  10. pixeltable/config.py +7 -0
  11. pixeltable/dataframe.py +10 -5
  12. pixeltable/env.py +56 -19
  13. pixeltable/exec/__init__.py +2 -0
  14. pixeltable/exec/cell_materialization_node.py +231 -0
  15. pixeltable/exec/cell_reconstruction_node.py +135 -0
  16. pixeltable/exec/exec_node.py +1 -1
  17. pixeltable/exec/expr_eval/evaluators.py +1 -0
  18. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  19. pixeltable/exec/expr_eval/globals.py +2 -0
  20. pixeltable/exec/globals.py +32 -0
  21. pixeltable/exec/object_store_save_node.py +1 -4
  22. pixeltable/exec/row_update_node.py +16 -9
  23. pixeltable/exec/sql_node.py +107 -14
  24. pixeltable/exprs/__init__.py +1 -1
  25. pixeltable/exprs/arithmetic_expr.py +23 -18
  26. pixeltable/exprs/column_property_ref.py +10 -10
  27. pixeltable/exprs/column_ref.py +2 -2
  28. pixeltable/exprs/data_row.py +106 -37
  29. pixeltable/exprs/expr.py +9 -0
  30. pixeltable/exprs/expr_set.py +14 -7
  31. pixeltable/exprs/inline_expr.py +2 -19
  32. pixeltable/exprs/json_path.py +45 -12
  33. pixeltable/exprs/row_builder.py +54 -22
  34. pixeltable/functions/__init__.py +1 -0
  35. pixeltable/functions/bedrock.py +7 -0
  36. pixeltable/functions/deepseek.py +11 -4
  37. pixeltable/functions/llama_cpp.py +7 -0
  38. pixeltable/functions/math.py +1 -1
  39. pixeltable/functions/ollama.py +7 -0
  40. pixeltable/functions/openai.py +4 -4
  41. pixeltable/functions/openrouter.py +143 -0
  42. pixeltable/functions/video.py +110 -28
  43. pixeltable/globals.py +10 -4
  44. pixeltable/io/globals.py +18 -17
  45. pixeltable/io/parquet.py +1 -1
  46. pixeltable/io/table_data_conduit.py +47 -22
  47. pixeltable/iterators/document.py +61 -23
  48. pixeltable/iterators/video.py +126 -53
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/packager.py +155 -26
  54. pixeltable/store.py +2 -3
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/arrow.py +6 -6
  57. pixeltable/utils/av.py +65 -0
  58. pixeltable/utils/console_output.py +4 -1
  59. pixeltable/utils/exception_handler.py +5 -28
  60. pixeltable/utils/image.py +7 -0
  61. pixeltable/utils/misc.py +5 -0
  62. pixeltable/utils/object_stores.py +16 -1
  63. pixeltable/utils/s3_store.py +44 -11
  64. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
  65. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
  66. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
  67. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
  68. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -1,3 +1,7 @@
1
+ """
2
+ Core Pixeltable API for table operations, data processing, and UDF management.
3
+ """
4
+
1
5
  # ruff: noqa: F401
2
6
 
3
7
  from .__version__ import __version__, __version_tuple__
@@ -12,11 +12,13 @@ from uuid import UUID
12
12
 
13
13
  import psycopg
14
14
  import sqlalchemy as sql
15
+ import sqlalchemy.exc as sql_exc
15
16
 
16
17
  from pixeltable import exceptions as excs
17
18
  from pixeltable.env import Env
18
19
  from pixeltable.iterators import ComponentIterator
19
20
  from pixeltable.metadata import schema
21
+ from pixeltable.utils.exception_handler import run_cleanup
20
22
 
21
23
  from .column import Column
22
24
  from .dir import Dir
@@ -101,7 +103,7 @@ def retry_loop(
101
103
  except PendingTableOpsError as e:
102
104
  Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
103
105
  Catalog.get()._finalize_pending_ops(e.tbl_id)
104
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
106
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
105
107
  # TODO: what other exceptions should we be looking for?
106
108
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
107
109
  if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
@@ -167,6 +169,7 @@ class Catalog:
167
169
  _in_write_xact: bool # True if we're in a write transaction
168
170
  _x_locked_tbl_ids: set[UUID] # non-empty for write transactions
169
171
  _modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
172
+ _undo_actions: list[Callable[[], None]]
170
173
  _in_retry_loop: bool
171
174
 
172
175
  # cached column dependencies
@@ -199,6 +202,7 @@ class Catalog:
199
202
  self._in_write_xact = False
200
203
  self._x_locked_tbl_ids = set()
201
204
  self._modified_tvs = set()
205
+ self._undo_actions = []
202
206
  self._in_retry_loop = False
203
207
  self._column_dependencies = {}
204
208
  self._column_dependents = None
@@ -245,6 +249,11 @@ class Catalog:
245
249
  for v in tbl_version.mutable_views:
246
250
  assert v.effective_version is None, f'{v.id}:{v.effective_version}'
247
251
 
252
+ def mark_modified_tvs(self, *handle: TableVersionHandle) -> None:
253
+ """Record that the given TableVersion instances were modified in the current transaction"""
254
+ assert Env.get().in_xact
255
+ self._modified_tvs.update(handle)
256
+
248
257
  @contextmanager
249
258
  def begin_xact(
250
259
  self,
@@ -309,6 +318,7 @@ class Catalog:
309
318
  self._column_dependents = None
310
319
  has_exc = False
311
320
 
321
+ assert not self._undo_actions
312
322
  with Env.get().begin_xact(for_write=for_write) as conn:
313
323
  if tbl is not None or tbl_id is not None:
314
324
  try:
@@ -352,7 +362,7 @@ class Catalog:
352
362
  # raise to abort the transaction
353
363
  raise
354
364
 
355
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
365
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
356
366
  has_exc = True
357
367
  if isinstance(
358
368
  e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
@@ -360,10 +370,12 @@ class Catalog:
360
370
  num_retries += 1
361
371
  _logger.debug(f'Retrying ({num_retries}) after {type(e.orig)}')
362
372
  time.sleep(random.uniform(0.1, 0.5))
373
+ assert not self._undo_actions # We should not have any undo actions at this point
363
374
  continue
364
375
  else:
365
376
  raise
366
377
 
378
+ assert not self._undo_actions
367
379
  yield conn
368
380
  return
369
381
 
@@ -376,49 +388,19 @@ class Catalog:
376
388
  # we got this exception after getting the initial table locks and therefore need to abort
377
389
  raise
378
390
 
379
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
380
- has_exc = True
381
- # we got some db error during the actual operation (not just while trying to get locks on the metadata
382
- # records): we convert these into Errors, if asked to do so, and abort
383
- # TODO: what other concurrency-related exceptions should we expect?
384
-
385
- # we always convert UndefinedTable exceptions (they can't be retried)
386
- if isinstance(e.orig, psycopg.errors.UndefinedTable):
387
- # the table got dropped in the middle of the table operation
388
- tbl_name = tbl.tbl_name() if tbl is not None else str(tbl_id) if tbl_id is not None else '?'
389
- _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
390
- assert tbl is not None
391
- raise excs.Error(f'Table was dropped: {tbl_name}') from None
392
- elif isinstance(e.orig, psycopg.errors.SerializationFailure) and convert_db_excs:
393
- # we still got a serialization error, despite getting x-locks at the beginning
394
- msg: str
395
- if tbl is not None:
396
- msg = f'{tbl.tbl_name()} ({tbl.tbl_id})'
397
- elif tbl_id is not None:
398
- msg = f'{tbl_id}'
399
- else:
400
- msg = ''
401
- _logger.debug(f'Exception: serialization failure: {msg} ({e})')
402
- raise excs.Error(
403
- 'That Pixeltable operation could not be completed because it conflicted with another '
404
- 'operation that was run on a different process.\n'
405
- 'Please re-run the operation.'
406
- ) from None
407
- else:
408
- raise
409
-
410
- except KeyboardInterrupt:
391
+ except (sql_exc.DBAPIError, sql_exc.OperationalError, sql_exc.InternalError) as e:
411
392
  has_exc = True
412
- _logger.debug('Caught KeyboardInterrupt')
413
- raise
393
+ self.convert_sql_exc(e, tbl_id, tbl.tbl_version if tbl is not None else None, convert_db_excs)
394
+ raise # re-raise the error if it didn't convert to a pxt.Error
414
395
 
415
- except:
396
+ except (Exception, KeyboardInterrupt) as e:
416
397
  has_exc = True
398
+ _logger.debug(f'Caught {e.__class__}')
417
399
  raise
418
400
 
419
401
  finally:
420
402
  self._in_write_xact = False
421
- self._x_locked_tbl_ids = set()
403
+ self._x_locked_tbl_ids.clear()
422
404
  self._column_dependents = None
423
405
 
424
406
  # invalidate cached current TableVersion instances
@@ -428,14 +410,75 @@ class Catalog:
428
410
  tv.is_validated = False
429
411
 
430
412
  if has_exc:
431
- # purge all modified TableVersion instances, we can't guarantee they are still consistent with the
413
+ # Execute undo actions in reverse order (LIFO)
414
+ for hook in reversed(self._undo_actions):
415
+ run_cleanup(hook, raise_error=False)
416
+ # purge all modified TableVersion instances; we can't guarantee they are still consistent with the
432
417
  # stored metadata
433
418
  for handle in self._modified_tvs:
434
419
  self._clear_tv_cache(handle.id, handle.effective_version)
435
- # Clear potentially corrupted cached metadata after error
420
+ # Clear potentially corrupted cached metadata
436
421
  if tbl is not None:
437
422
  tbl.clear_cached_md()
438
- self._modified_tvs = set()
423
+
424
+ self._undo_actions.clear()
425
+ self._modified_tvs.clear()
426
+
427
+ def register_undo_action(self, func: Callable[[], None]) -> Callable[[], None]:
428
+ """Registers a function to be called if the current transaction fails.
429
+
430
+ The function is called only if the current transaction fails due to an exception.
431
+
432
+ Rollback functions are called in reverse order of registration (LIFO).
433
+
434
+ The function should not raise exceptions; if it does, they are logged and ignored.
435
+ """
436
+ assert Env.get().in_xact
437
+ self._undo_actions.append(func)
438
+ return func
439
+
440
+ def convert_sql_exc(
441
+ self,
442
+ e: sql_exc.StatementError,
443
+ tbl_id: UUID | None = None,
444
+ tbl: TableVersionHandle | None = None,
445
+ convert_db_excs: bool = True,
446
+ ) -> None:
447
+ # we got some db error during the actual operation (not just while trying to get locks on the metadata
448
+ # records); we convert these into pxt.Error exceptions if appropriate
449
+
450
+ # we always convert UndefinedTable exceptions (they can't be retried)
451
+ if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
452
+ # the table got dropped in the middle of the operation
453
+ tbl_name = tbl.get().name
454
+ _logger.debug(f'Exception: undefined table ({tbl_name}): Caught {type(e.orig)}: {e!r}')
455
+ raise excs.Error(f'Table was dropped: {tbl_name}') from None
456
+ elif (
457
+ isinstance(
458
+ e.orig,
459
+ (
460
+ psycopg.errors.SerializationFailure, # serialization error despite getting x-locks
461
+ psycopg.errors.InFailedSqlTransaction, # can happen after tx fails for another reason
462
+ psycopg.errors.DuplicateColumn, # if a different process added a column concurrently
463
+ ),
464
+ )
465
+ and convert_db_excs
466
+ ):
467
+ msg: str
468
+ if tbl is not None:
469
+ msg = f'{tbl.get().name} ({tbl.id})'
470
+ elif tbl_id is not None:
471
+ msg = f'{tbl_id}'
472
+ else:
473
+ msg = ''
474
+ _logger.debug(f'Exception: {e.orig.__class__}: {msg} ({e})')
475
+ # Suppress the underlying SQL exception unless DEBUG is enabled
476
+ raise_from = e if _logger.isEnabledFor(logging.DEBUG) else None
477
+ raise excs.Error(
478
+ 'That Pixeltable operation could not be completed because it conflicted with another '
479
+ 'operation that was run on a different process.\n'
480
+ 'Please re-run the operation.'
481
+ ) from raise_from
439
482
 
440
483
  @property
441
484
  def in_write_xact(self) -> bool:
@@ -601,7 +644,7 @@ class Catalog:
601
644
  if op.op_sn == op.num_ops - 1:
602
645
  conn.execute(reset_has_pending_stmt)
603
646
 
604
- except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
647
+ except (sql_exc.DBAPIError, sql_exc.OperationalError) as e:
605
648
  # TODO: why are we still seeing these here, instead of them getting taken care of by the retry
606
649
  # logic of begin_xact()?
607
650
  if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
@@ -928,11 +971,18 @@ class Catalog:
928
971
  num_retained_versions: int,
929
972
  comment: str,
930
973
  media_validation: MediaValidation,
931
- ) -> Table:
974
+ ) -> tuple[Table, bool]:
975
+ """
976
+ Creates a new InsertableTable at the given path.
977
+
978
+ If `if_exists == IfExistsParam.IGNORE` and a table `t` already exists at the given path, returns `t, False`.
979
+
980
+ Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
981
+ """
932
982
  existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
933
983
  if existing is not None:
934
984
  assert isinstance(existing, Table)
935
- return existing
985
+ return existing, False
936
986
 
937
987
  dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
938
988
  assert dir is not None
@@ -948,7 +998,7 @@ class Catalog:
948
998
  media_validation=media_validation,
949
999
  )
950
1000
  self._tbls[tbl._id, None] = tbl
951
- return tbl
1001
+ return tbl, True
952
1002
 
953
1003
  def create_view(
954
1004
  self,
@@ -1203,8 +1253,14 @@ class Catalog:
1203
1253
  TableVersion.create_replica(md)
1204
1254
 
1205
1255
  @retry_loop(for_write=False)
1206
- def get_table(self, path: Path) -> Table:
1207
- obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
1256
+ def get_table(self, path: Path, if_not_exists: IfNotExistsParam) -> Table | None:
1257
+ obj = Catalog.get()._get_schema_object(
1258
+ path, expected=Table, raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR)
1259
+ )
1260
+ if obj is None:
1261
+ _logger.info(f'Skipped table {path!r} (does not exist).')
1262
+ return None
1263
+
1208
1264
  assert isinstance(obj, Table)
1209
1265
  # We need to clear cached metadata from tbl_version_path, in case the schema has been changed
1210
1266
  # by another process.
@@ -1216,7 +1272,7 @@ class Catalog:
1216
1272
  tbl = self._get_schema_object(
1217
1273
  path,
1218
1274
  expected=Table,
1219
- raise_if_not_exists=if_not_exists == IfNotExistsParam.ERROR and not force,
1275
+ raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR and not force),
1220
1276
  lock_parent=True,
1221
1277
  lock_obj=False,
1222
1278
  )
@@ -1301,7 +1357,7 @@ class Catalog:
1301
1357
  base_id = tvp.base.tbl_id
1302
1358
  base_tv = self.get_tbl_version(base_id, None, validate_initialized=True)
1303
1359
  base_tv.tbl_md.view_sn += 1
1304
- self._modified_tvs.add(base_tv.handle)
1360
+ self.mark_modified_tvs(base_tv.handle)
1305
1361
  result = Env.get().conn.execute(
1306
1362
  sql.update(schema.Table.__table__)
1307
1363
  .values({schema.Table.md: dataclasses.asdict(base_tv.tbl_md)})
@@ -1313,7 +1369,7 @@ class Catalog:
1313
1369
  if not is_pure_snapshot:
1314
1370
  # invalidate the TableVersion instance when we're done so that existing references to it can find out it
1315
1371
  # has been dropped
1316
- self._modified_tvs.add(tvp.tbl_version)
1372
+ self.mark_modified_tvs(tvp.tbl_version)
1317
1373
  tv = tvp.tbl_version.get() if tvp.tbl_version is not None else None
1318
1374
  if not is_pure_snapshot:
1319
1375
  # drop the store table before deleting the Table record
@@ -1682,6 +1738,9 @@ class Catalog:
1682
1738
 
1683
1739
  @retry_loop(for_write=False)
1684
1740
  def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1741
+ return self._collect_tbl_history(tbl_id, n)
1742
+
1743
+ def _collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1685
1744
  """
1686
1745
  Returns the history of up to n versions of the table with the given UUID.
1687
1746
 
@@ -1694,14 +1753,15 @@ class Catalog:
1694
1753
  Each row contains a TableVersion and a TableSchemaVersion object.
1695
1754
  """
1696
1755
  q = (
1697
- sql.select(schema.TableVersion, schema.TableSchemaVersion)
1698
- .select_from(schema.TableVersion)
1699
- .join(
1700
- schema.TableSchemaVersion,
1701
- schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version,
1702
- )
1756
+ sql.select(schema.Table, schema.TableVersion, schema.TableSchemaVersion)
1757
+ .where(schema.Table.id == tbl_id)
1758
+ .join(schema.TableVersion)
1703
1759
  .where(schema.TableVersion.tbl_id == tbl_id)
1760
+ .join(schema.TableSchemaVersion)
1704
1761
  .where(schema.TableSchemaVersion.tbl_id == tbl_id)
1762
+ .where(
1763
+ schema.TableVersion.md['schema_version'].cast(sql.Integer) == schema.TableSchemaVersion.schema_version
1764
+ )
1705
1765
  .order_by(schema.TableVersion.version.desc())
1706
1766
  )
1707
1767
  if n is not None:
@@ -1709,7 +1769,7 @@ class Catalog:
1709
1769
  src_rows = Env.get().session.execute(q).fetchall()
1710
1770
  return [
1711
1771
  schema.FullTableMd(
1712
- None,
1772
+ schema.md_from_dict(schema.TableMd, row.Table.md),
1713
1773
  schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
1714
1774
  schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
1715
1775
  )
@@ -1904,11 +1964,13 @@ class Catalog:
1904
1964
 
1905
1965
  # If `tbl` is a named pure snapshot, we're not quite done, since the snapshot metadata won't appear in the
1906
1966
  # TableVersionPath. We need to prepend it separately.
1907
- if isinstance(tbl, View) and tbl._snapshot_only:
1967
+ if isinstance(tbl, View) and tbl._is_named_pure_snapshot():
1908
1968
  snapshot_md = self.load_tbl_md(tbl._id, 0)
1909
1969
  md = [snapshot_md, *md]
1910
1970
 
1911
- for ancestor_md in md[1:]:
1971
+ for ancestor_md in md:
1972
+ # Set the `is_replica` flag on every ancestor's TableMd.
1973
+ ancestor_md.tbl_md.is_replica = True
1912
1974
  # For replica metadata, we guarantee that the current_version and current_schema_version of TableMd
1913
1975
  # match the corresponding values in TableVersionMd and TableSchemaVersionMd. This is to ensure that,
1914
1976
  # when the metadata is later stored in the catalog of a different Pixeltable instance, the values of
@@ -1916,6 +1978,8 @@ class Catalog:
1916
1978
  # destination catalog.
1917
1979
  ancestor_md.tbl_md.current_version = ancestor_md.version_md.version
1918
1980
  ancestor_md.tbl_md.current_schema_version = ancestor_md.schema_version_md.schema_version
1981
+
1982
+ for ancestor_md in md[1:]:
1919
1983
  # Also, the table version of every proper ancestor is emphemeral; it does not represent a queryable
1920
1984
  # table version (the data might be incomplete, since we have only retrieved one of its views, not
1921
1985
  # the table itself).
@@ -1968,9 +2032,7 @@ class Catalog:
1968
2032
  tbl_version: TableVersion
1969
2033
  if view_md is None:
1970
2034
  # this is a base table
1971
- tbl_version = TableVersion(
1972
- tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views=mutable_views
1973
- )
2035
+ tbl_version = TableVersion(tbl_id, tbl_md, version_md, effective_version, schema_version_md, mutable_views)
1974
2036
  else:
1975
2037
  assert len(view_md.base_versions) > 0 # a view needs to have a base
1976
2038
  # TODO: add TableVersionMd.is_pure_snapshot() and use that
@@ -2005,7 +2067,7 @@ class Catalog:
2005
2067
  self._tbl_versions[tbl_id, effective_version] = tbl_version
2006
2068
  # register this instance as modified, so that it gets purged if the transaction fails, it may not be
2007
2069
  # fully initialized
2008
- self._modified_tvs.add(tbl_version.handle)
2070
+ self.mark_modified_tvs(tbl_version.handle)
2009
2071
  tbl_version.init()
2010
2072
  return tbl_version
2011
2073
 
@@ -48,7 +48,7 @@ class Column:
48
48
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
49
49
  """
50
50
 
51
- name: str
51
+ name: Optional[str]
52
52
  id: Optional[int]
53
53
  col_type: ts.ColumnType
54
54
  stored: bool
@@ -259,7 +259,12 @@ class Column:
259
259
  # default: record errors for computed and media columns
260
260
  if self._stores_cellmd is not None:
261
261
  return self._stores_cellmd
262
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
262
+ return self.is_stored and (
263
+ self.is_computed
264
+ or self.col_type.is_media_type()
265
+ or self.col_type.is_json_type()
266
+ or self.col_type.is_array_type()
267
+ )
263
268
 
264
269
  @property
265
270
  def qualified_name(self) -> str:
@@ -117,6 +117,7 @@ class Table(SchemaObject):
117
117
  is_primary_key=col.is_pk,
118
118
  media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
119
119
  computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
120
+ defined_in=col.tbl.name,
120
121
  )
121
122
  # Pure snapshots have no indices
122
123
  indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
@@ -19,6 +19,10 @@ class ColumnMetadata(TypedDict):
19
19
  """The media validation policy for this column."""
20
20
  computed_with: Optional[str]
21
21
  """Expression used to compute this column; `None` if this is not a computed column."""
22
+ defined_in: Optional[str]
23
+ """Name of the table where this column was originally defined.
24
+
25
+ If the current table is a view, then `defined_in` may differ from the current table name."""
22
26
 
23
27
 
24
28
  class EmbeddingIndexParams(TypedDict):