pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -10
  4. pixeltable/catalog/catalog.py +139 -59
  5. pixeltable/catalog/column.py +32 -23
  6. pixeltable/catalog/globals.py +2 -45
  7. pixeltable/catalog/insertable_table.py +5 -2
  8. pixeltable/catalog/path.py +6 -0
  9. pixeltable/catalog/table.py +173 -23
  10. pixeltable/catalog/table_version.py +156 -92
  11. pixeltable/catalog/table_version_handle.py +26 -1
  12. pixeltable/catalog/update_status.py +179 -0
  13. pixeltable/catalog/view.py +12 -3
  14. pixeltable/config.py +76 -12
  15. pixeltable/dataframe.py +1 -1
  16. pixeltable/env.py +29 -0
  17. pixeltable/exec/exec_node.py +7 -24
  18. pixeltable/exec/expr_eval/schedulers.py +134 -7
  19. pixeltable/exprs/column_property_ref.py +23 -20
  20. pixeltable/exprs/column_ref.py +24 -18
  21. pixeltable/exprs/data_row.py +9 -0
  22. pixeltable/exprs/function_call.py +2 -2
  23. pixeltable/exprs/row_builder.py +46 -14
  24. pixeltable/exprs/rowid_ref.py +0 -4
  25. pixeltable/func/function.py +3 -3
  26. pixeltable/functions/audio.py +36 -9
  27. pixeltable/functions/video.py +57 -10
  28. pixeltable/globals.py +61 -1
  29. pixeltable/io/__init__.py +1 -1
  30. pixeltable/io/external_store.py +39 -64
  31. pixeltable/io/globals.py +4 -4
  32. pixeltable/io/hf_datasets.py +10 -2
  33. pixeltable/io/label_studio.py +52 -48
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_38.py +39 -0
  36. pixeltable/metadata/converters/convert_39.py +125 -0
  37. pixeltable/metadata/converters/util.py +3 -0
  38. pixeltable/metadata/notes.py +2 -0
  39. pixeltable/metadata/schema.py +14 -2
  40. pixeltable/metadata/utils.py +78 -0
  41. pixeltable/plan.py +26 -18
  42. pixeltable/share/packager.py +20 -38
  43. pixeltable/store.py +121 -142
  44. pixeltable/type_system.py +2 -2
  45. pixeltable/utils/coroutine.py +6 -23
  46. pixeltable/utils/media_store.py +39 -0
  47. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
  48. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
  49. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -20,6 +20,7 @@ from .globals import (
20
20
  list_dirs,
21
21
  list_functions,
22
22
  list_tables,
23
+ ls,
23
24
  move,
24
25
  tool,
25
26
  tools,
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.4.1'
3
- __version_tuple__ = (0, 4, 1)
2
+ __version__ = '0.4.3'
3
+ __version_tuple__ = (0, 4, 3)
@@ -3,21 +3,14 @@
3
3
  from .catalog import Catalog
4
4
  from .column import Column
5
5
  from .dir import Dir
6
- from .globals import (
7
- IfExistsParam,
8
- IfNotExistsParam,
9
- MediaValidation,
10
- QColumnId,
11
- UpdateStatus,
12
- is_valid_identifier,
13
- is_valid_path,
14
- )
6
+ from .globals import IfExistsParam, IfNotExistsParam, MediaValidation, QColumnId, is_valid_identifier, is_valid_path
15
7
  from .insertable_table import InsertableTable
16
8
  from .named_function import NamedFunction
17
9
  from .path import Path
18
10
  from .schema_object import SchemaObject
19
11
  from .table import Table
20
12
  from .table_version import TableVersion
21
- from .table_version_handle import TableVersionHandle
13
+ from .table_version_handle import ColumnHandle, TableVersionHandle
22
14
  from .table_version_path import TableVersionPath
15
+ from .update_status import RowCountStats, UpdateStatus
23
16
  from .view import View
@@ -308,7 +308,11 @@ class Catalog:
308
308
  # we still got a serialization error, despite getting x-locks at the beginning
309
309
  msg = f'{tbl.tbl_name()} ({tbl.tbl_id})' if tbl is not None else ''
310
310
  _logger.debug(f'Exception: serialization failure: {msg} ({e})')
311
- raise excs.Error('Serialization failure. Please re-run the operation.') from None
311
+ raise excs.Error(
312
+ 'That Pixeltable operation could not be completed because it conflicted with another '
313
+ 'operation that was run on a different process.\n'
314
+ 'Please re-run the operation.'
315
+ ) from None
312
316
  else:
313
317
  raise
314
318
 
@@ -762,56 +766,47 @@ class Catalog:
762
766
  self._tbls[view._id] = view
763
767
  return view
764
768
 
765
- @_retry_loop(for_write=True)
766
- def create_replica(
767
- self, path: Path, md: list[schema.FullTableMd], if_exists: IfExistsParam = IfExistsParam.ERROR
768
- ) -> None:
769
+ def create_replica(self, path: Path, md: list[schema.FullTableMd]) -> None:
769
770
  """
770
771
  Creates table, table_version, and table_schema_version records for a replica with the given metadata.
771
772
  The metadata should be presented in standard "ancestor order", with the table being replicated at
772
773
  list position 0 and the (root) base table at list position -1.
773
-
774
- TODO: create_replica() also needs to create the store tables and populate them in order to make
775
- replica creation atomic.
776
774
  """
775
+ assert Env.get().in_xact
776
+
777
777
  tbl_id = UUID(md[0].tbl_md.tbl_id)
778
778
 
779
- # First handle path collisions (if_exists='ignore' or 'replace' or etc).
780
- existing = self._handle_path_collision(path, View, False, if_exists)
781
- if existing is not None:
782
- if existing._id != tbl_id:
783
- raise excs.Error(
784
- f"An attempt was made to create a replica table at {path!r} with if_exists='ignore', "
785
- 'but a different table already exists at that location.'
786
- )
787
- assert isinstance(existing, View)
788
- return
779
+ existing = self._handle_path_collision(path, Table, False, if_exists=IfExistsParam.IGNORE) # type: ignore[type-abstract]
780
+ if existing is not None and existing._id != tbl_id:
781
+ raise excs.Error(
782
+ f'An attempt was made to create a replica table at {path!r}, '
783
+ 'but a different table already exists at that location.'
784
+ )
789
785
 
790
786
  # Ensure that the system directory exists.
791
787
  self._create_dir(Path('_system', allow_system_paths=True), if_exists=IfExistsParam.IGNORE, parents=False)
792
788
 
793
- # Now check to see if this table already exists in the catalog.
789
+ # Now check to see if this table UUID already exists in the catalog.
794
790
  existing = Catalog.get().get_table_by_id(tbl_id)
795
791
  if existing is not None:
796
792
  existing_path = Path(existing._path(), allow_system_paths=True)
797
- # It does exist. If it's a non-system table, that's an error: it's already been replicated.
798
- if not existing_path.is_system_path:
799
- raise excs.Error(
800
- f'That table has already been replicated as {existing._path()!r}. \n'
801
- f'Drop the existing replica if you wish to re-create it.'
802
- )
803
- # If it's a system table, then this means it was created at some point as the ancestor of some other
804
- # table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named) location.
805
- self._move(existing_path, path)
806
-
807
- # Now store the metadata for this replica. In the case where the table already exists (and was just moved
808
- # into a named location), this will be a no-op, but it still serves to validate that the newly received
809
- # metadata is identical to what's in the catalog.
810
- self.__store_replica_md(path, md[0])
793
+ if existing_path != path:
794
+ # It does exist, under a different path from the specified one.
795
+ if not existing_path.is_system_path:
796
+ raise excs.Error(
797
+ f'That table has already been replicated as {existing_path!r}.\n'
798
+ f'Drop the existing replica if you wish to re-create it.'
799
+ )
800
+ # If it's a system table, then this means it was created at some point as the ancestor of some other
801
+ # table (a snapshot-over-snapshot scenario). In that case, we simply move it to the new (named)
802
+ # location.
803
+ self._move(existing_path, path)
811
804
 
812
- # Now store the metadata for all of this table's proper ancestors. If one or more proper ancestors
805
+ # Now store the metadata for this replica's proper ancestors. If one or more proper ancestors
813
806
  # do not yet exist in the store, they will be created as anonymous system tables.
814
- for ancestor_md in md[1:]:
807
+ # We instantiate the ancestors starting with the base table and ending with the immediate parent of the
808
+ # table being replicated.
809
+ for ancestor_md in md[:0:-1]:
815
810
  ancestor_id = UUID(ancestor_md.tbl_md.tbl_id)
816
811
  replica = Catalog.get().get_table_by_id(ancestor_id)
817
812
  replica_path: Path
@@ -824,12 +819,22 @@ class Catalog:
824
819
  # that was directly replicated by the user at some point). In either case, use the existing path.
825
820
  replica_path = Path(replica._path(), allow_system_paths=True)
826
821
 
827
- # Store the metadata; it could be a new version (in which case a new record will be created) or a
828
- # known version (in which case the newly received metadata will be validated as identical).
822
+ # Store the metadata; it could be a new version (in which case a new record will be created), or a known
823
+ # version (in which case the newly received metadata will be validated as identical).
824
+ # If it's a new version, this will result in a new TableVersion record being created.
829
825
  self.__store_replica_md(replica_path, ancestor_md)
830
826
 
831
- # don't create TableVersion instances at this point, they would be superseded by calls to TV.create_replica()
832
- # in TableRestorer.restore()
827
+ # Now we must clear cached metadata for the ancestor table, to force the next table operation to pick up
828
+ # the new TableVersion instance. This is necessary because computed columns of descendant tables might
829
+ # reference columns of the ancestor table that only exist in the new version.
830
+ replica = Catalog.get().get_table_by_id(ancestor_id)
831
+ assert replica is not None # If it didn't exist before, it must have been created by now.
832
+ replica._tbl_version_path.clear_cached_md()
833
+
834
+ # Finally, store the metadata for the table being replicated; as before, it could be a new version or a known
835
+ # version. If it's a new version, then a TableVersion record will be created, unless the table being replicated
836
+ # is a pure snapshot.
837
+ self.__store_replica_md(path, md[0])
833
838
 
834
839
  def __store_replica_md(self, path: Path, md: schema.FullTableMd) -> None:
835
840
  _logger.info(f'Creating replica table at {path!r} with ID: {md.tbl_md.tbl_id}')
@@ -911,12 +916,19 @@ class Catalog:
911
916
  'This is likely due to data corruption in the replicated table.'
912
917
  )
913
918
 
914
- self.store_tbl_md(UUID(tbl_id), new_tbl_md, new_version_md, new_schema_version_md)
919
+ self.store_tbl_md(UUID(tbl_id), None, new_tbl_md, new_version_md, new_schema_version_md)
920
+
921
+ if new_version_md is not None and not md.is_pure_snapshot:
922
+ # It's a new version of a table that has a physical store, so we need to create a TableVersion instance.
923
+ TableVersion.create_replica(md)
915
924
 
916
925
  @_retry_loop(for_write=False)
917
926
  def get_table(self, path: Path) -> Table:
918
927
  obj = Catalog.get()._get_schema_object(path, expected=Table, raise_if_not_exists=True)
919
928
  assert isinstance(obj, Table)
929
+ # We need to clear cached metadata from tbl_version_path, in case the schema has been changed
930
+ # by another process.
931
+ obj._tbl_version_path.clear_cached_md()
920
932
  return obj
921
933
 
922
934
  @_retry_loop(for_write=True)
@@ -1228,6 +1240,43 @@ class Catalog:
1228
1240
  self._tbls[tbl_id] = view
1229
1241
  return view
1230
1242
 
1243
+ @_retry_loop(for_write=False)
1244
+ def collect_tbl_history(self, tbl_id: UUID, n: Optional[int]) -> list[schema.FullTableMd]:
1245
+ """
1246
+ Returns the history of up to n versions of the table with the given UUID.
1247
+
1248
+ Args:
1249
+ tbl_id: the UUID of the table to collect history for.
1250
+ n: Optional limit on the maximum number of versions returned.
1251
+
1252
+ Returns:
1253
+ A sequence of rows, ordered by version number
1254
+ Each row contains a TableVersion and a TableSchemaVersion object.
1255
+ """
1256
+ q = (
1257
+ sql.select(schema.TableVersion, schema.TableSchemaVersion)
1258
+ .select_from(schema.TableVersion)
1259
+ .join(
1260
+ schema.TableSchemaVersion,
1261
+ sql.cast(schema.TableVersion.md['schema_version'], sql.Integer)
1262
+ == schema.TableSchemaVersion.schema_version,
1263
+ )
1264
+ .where(schema.TableVersion.tbl_id == tbl_id)
1265
+ .where(schema.TableSchemaVersion.tbl_id == tbl_id)
1266
+ .order_by(schema.TableVersion.version.desc())
1267
+ )
1268
+ if n is not None:
1269
+ q = q.limit(n)
1270
+ src_rows = Env.get().session.execute(q).fetchall()
1271
+ return [
1272
+ schema.FullTableMd(
1273
+ None,
1274
+ schema.md_from_dict(schema.TableVersionMd, row.TableVersion.md),
1275
+ schema.md_from_dict(schema.TableSchemaVersionMd, row.TableSchemaVersion.md),
1276
+ )
1277
+ for row in src_rows
1278
+ ]
1279
+
1231
1280
  def load_tbl_md(self, tbl_id: UUID, effective_version: Optional[int]) -> schema.FullTableMd:
1232
1281
  """
1233
1282
  Loads metadata from the store for a given table UUID and version.
@@ -1297,19 +1346,27 @@ class Catalog:
1297
1346
  def store_tbl_md(
1298
1347
  self,
1299
1348
  tbl_id: UUID,
1349
+ dir_id: Optional[UUID],
1300
1350
  tbl_md: Optional[schema.TableMd],
1301
1351
  version_md: Optional[schema.TableVersionMd],
1302
1352
  schema_version_md: Optional[schema.TableSchemaVersionMd],
1303
1353
  ) -> None:
1304
1354
  """
1305
- Stores metadata to the DB. If specified, `tbl_md` will be updated in place (only one such record can exist
1306
- per UUID); `version_md` and `schema_version_md` will be inserted as new records.
1355
+ Stores metadata to the DB.
1356
+
1357
+ Args:
1358
+ tbl_id: UUID of the table to store metadata for.
1359
+ dir_id: If specified, the tbl_md will be added to the given directory; if None, the table must already exist
1360
+ tbl_md: If specified, `tbl_md` will be inserted, or updated (only one such record can exist per UUID)
1361
+ version_md: inserted as a new record if present
1362
+ schema_version_md: will be inserted as a new record if present
1307
1363
 
1308
1364
  If inserting `version_md` or `schema_version_md` would be a primary key violation, an exception will be raised.
1309
1365
  """
1310
- conn = Env.get().conn
1311
1366
  assert self._in_write_xact
1367
+ session = Env.get().session
1312
1368
 
1369
+ # Construct and insert or update table record if requested.
1313
1370
  if tbl_md is not None:
1314
1371
  assert tbl_md.tbl_id == str(tbl_id)
1315
1372
  if version_md is not None:
@@ -1317,32 +1374,55 @@ class Catalog:
1317
1374
  assert tbl_md.current_schema_version == version_md.schema_version
1318
1375
  if schema_version_md is not None:
1319
1376
  assert tbl_md.current_schema_version == schema_version_md.schema_version
1320
- result = conn.execute(
1321
- sql.update(schema.Table.__table__)
1322
- .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1323
- .where(schema.Table.id == tbl_id)
1324
- )
1325
- assert result.rowcount == 1, result.rowcount
1377
+ if dir_id is not None:
1378
+ # We are inserting a record while creating a new table.
1379
+ tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(tbl_md))
1380
+ session.add(tbl_record)
1381
+ else:
1382
+ # Update the existing table record.
1383
+ result = session.execute(
1384
+ sql.update(schema.Table.__table__)
1385
+ .values({schema.Table.md: dataclasses.asdict(tbl_md)})
1386
+ .where(schema.Table.id == tbl_id)
1387
+ )
1388
+ assert result.rowcount == 1, result.rowcount
1326
1389
 
1390
+ # Construct and insert new table version record if requested.
1327
1391
  if version_md is not None:
1328
1392
  assert version_md.tbl_id == str(tbl_id)
1329
1393
  if schema_version_md is not None:
1330
1394
  assert version_md.schema_version == schema_version_md.schema_version
1331
- conn.execute(
1332
- sql.insert(schema.TableVersion.__table__).values(
1333
- tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
1334
- )
1395
+ tbl_version_record = schema.TableVersion(
1396
+ tbl_id=tbl_id, version=version_md.version, md=dataclasses.asdict(version_md)
1335
1397
  )
1398
+ session.add(tbl_version_record)
1336
1399
 
1400
+ # Construct and insert a new schema version record if requested.
1337
1401
  if schema_version_md is not None:
1338
1402
  assert schema_version_md.tbl_id == str(tbl_id)
1339
- conn.execute(
1340
- sql.insert(schema.TableSchemaVersion.__table__).values(
1341
- tbl_id=tbl_id,
1342
- schema_version=schema_version_md.schema_version,
1343
- md=dataclasses.asdict(schema_version_md),
1344
- )
1403
+ schema_version_record = schema.TableSchemaVersion(
1404
+ tbl_id=tbl_id, schema_version=schema_version_md.schema_version, md=dataclasses.asdict(schema_version_md)
1345
1405
  )
1406
+ session.add(schema_version_record)
1407
+ session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1408
+
1409
+ def update_tbl_version_md(self, version_md: Optional[schema.TableVersionMd]) -> None:
1410
+ """
1411
+ Update the TableVersion.md field in the DB. Typically used to update the cascade row count status.
1412
+
1413
+ Args:
1414
+ version_md: TableVersionMd
1415
+ """
1416
+ assert self._in_write_xact
1417
+ session = Env.get().session
1418
+
1419
+ session.execute(
1420
+ sql.update(schema.TableVersion.__table__)
1421
+ .values({schema.TableVersion.md: dataclasses.asdict(version_md)})
1422
+ .where(schema.TableVersion.tbl_id == version_md.tbl_id, schema.TableVersion.version == version_md.version)
1423
+ )
1424
+
1425
+ session.flush() # Inform SQLAlchemy that we want to write these changes to the DB.
1346
1426
 
1347
1427
  def delete_tbl_md(self, tbl_id: UUID) -> None:
1348
1428
  """
@@ -15,6 +15,7 @@ from .globals import MediaValidation, is_valid_identifier
15
15
 
16
16
  if TYPE_CHECKING:
17
17
  from .table_version import TableVersion
18
+ from .table_version_handle import ColumnHandle
18
19
  from .table_version_path import TableVersionPath
19
20
 
20
21
  _logger = logging.getLogger('pixeltable')
@@ -35,11 +36,10 @@ class Column:
35
36
  _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
36
37
  schema_version_add: Optional[int]
37
38
  schema_version_drop: Optional[int]
38
- _records_errors: Optional[bool]
39
+ _stores_cellmd: Optional[bool]
39
40
  sa_col: Optional[sql.schema.Column]
40
41
  sa_col_type: Optional[sql.sqltypes.TypeEngine]
41
- sa_errormsg_col: Optional[sql.schema.Column]
42
- sa_errortype_col: Optional[sql.schema.Column]
42
+ sa_cellmd_col: Optional[sql.schema.Column] # JSON metadata for the cell, e.g. errortype, errormsg for media columns
43
43
  _value_expr: Optional[exprs.Expr]
44
44
  value_expr_dict: Optional[dict[str, Any]]
45
45
  # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
@@ -58,7 +58,7 @@ class Column:
58
58
  schema_version_add: Optional[int] = None,
59
59
  schema_version_drop: Optional[int] = None,
60
60
  sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
61
- records_errors: Optional[bool] = None,
61
+ stores_cellmd: Optional[bool] = None,
62
62
  value_expr_dict: Optional[dict[str, Any]] = None,
63
63
  tbl: Optional[TableVersion] = None,
64
64
  ):
@@ -117,15 +117,14 @@ class Column:
117
117
  self.schema_version_add = schema_version_add
118
118
  self.schema_version_drop = schema_version_drop
119
119
 
120
- self._records_errors = records_errors
120
+ self._stores_cellmd = stores_cellmd
121
121
 
122
122
  # column in the stored table for the values of this Column
123
123
  self.sa_col = None
124
124
  self.sa_col_type = sa_col_type
125
125
 
126
126
  # computed cols also have storage columns for the exception string and type
127
- self.sa_errormsg_col = None
128
- self.sa_errortype_col = None
127
+ self.sa_cellmd_col = None
129
128
 
130
129
  def init_value_expr(self) -> None:
131
130
  from pixeltable import exprs
@@ -148,6 +147,15 @@ class Column:
148
147
  )
149
148
  warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
150
149
 
150
+ @property
151
+ def handle(self) -> 'ColumnHandle':
152
+ """Returns a ColumnHandle for this Column."""
153
+ from .table_version_handle import ColumnHandle
154
+
155
+ assert self.tbl is not None
156
+ assert self.id is not None
157
+ return ColumnHandle(self.tbl.handle, self.id)
158
+
151
159
  @property
152
160
  def value_expr(self) -> Optional[exprs.Expr]:
153
161
  assert self.value_expr_dict is None or self._value_expr is not None
@@ -193,11 +201,11 @@ class Column:
193
201
  return self.stored
194
202
 
195
203
  @property
196
- def records_errors(self) -> bool:
204
+ def stores_cellmd(self) -> bool:
197
205
  """True if this column also stores error information."""
198
206
  # default: record errors for computed and media columns
199
- if self._records_errors is not None:
200
- return self._records_errors
207
+ if self._stores_cellmd is not None:
208
+ return self._stores_cellmd
201
209
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
202
210
 
203
211
  @property
@@ -233,28 +241,29 @@ class Column:
233
241
  """
234
242
  assert self.is_stored
235
243
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
236
- self.sa_col = sql.Column(
237
- self.store_name(),
238
- self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
239
- nullable=True,
240
- )
241
- if self.is_computed or self.col_type.is_media_type():
242
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
243
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
244
+ self.sa_col = sql.Column(self.store_name(), self.get_sa_col_type(), nullable=True)
245
+ if self.stores_cellmd:
246
+ # JSON metadata for the cell, e.g. errortype, errormsg for media columns
247
+ self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
244
248
 
245
249
  def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
246
250
  return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
247
251
 
252
+ @classmethod
253
+ def cellmd_type(cls) -> ts.ColumnType:
254
+ return ts.JsonType(nullable=True)
255
+
256
+ @classmethod
257
+ def sa_cellmd_type(cls) -> sql.sqltypes.TypeEngine:
258
+ return cls.cellmd_type().to_sa_type()
259
+
248
260
  def store_name(self) -> str:
249
261
  assert self.id is not None
250
262
  assert self.is_stored
251
263
  return f'col_{self.id}'
252
264
 
253
- def errormsg_store_name(self) -> str:
254
- return f'{self.store_name()}_errormsg'
255
-
256
- def errortype_store_name(self) -> str:
257
- return f'{self.store_name()}_errortype'
265
+ def cellmd_store_name(self) -> str:
266
+ return f'{self.store_name()}_cellmd'
258
267
 
259
268
  def __str__(self) -> str:
260
269
  return f'{self.name}: {self.col_type}'
@@ -1,14 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- import dataclasses
4
3
  import enum
5
4
  import itertools
6
5
  import logging
6
+ from dataclasses import dataclass
7
7
  from typing import Optional
8
8
  from uuid import UUID
9
9
 
10
- from typing_extensions import Self
11
-
12
10
  import pixeltable.exceptions as excs
13
11
 
14
12
  _logger = logging.getLogger('pixeltable')
@@ -22,54 +20,13 @@ _ROWID_COLUMN_NAME = '_rowid'
22
20
  _PREDEF_SYMBOLS: Optional[set[str]] = None
23
21
 
24
22
 
25
- @dataclasses.dataclass(frozen=True)
23
+ @dataclass(frozen=True)
26
24
  class QColumnId:
27
25
  """Qualified column id"""
28
26
 
29
27
  tbl_id: UUID
30
28
  col_id: int
31
29
 
32
- # def __hash__(self) -> int:
33
- # return hash((self.tbl_id, self.col_id))
34
-
35
-
36
- @dataclasses.dataclass
37
- class UpdateStatus:
38
- """
39
- Information about updates that resulted from a table operation.
40
- """
41
-
42
- num_rows: int = 0
43
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
44
- num_computed_values: int = 0
45
- num_excs: int = 0
46
- updated_cols: list[str] = dataclasses.field(default_factory=list)
47
- cols_with_excs: list[str] = dataclasses.field(default_factory=list)
48
-
49
- def __iadd__(self, other: 'UpdateStatus') -> Self:
50
- self.num_rows += other.num_rows
51
- self.num_computed_values += other.num_computed_values
52
- self.num_excs += other.num_excs
53
- self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
54
- self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
55
- return self
56
-
57
- @property
58
- def insert_msg(self) -> str:
59
- """Return a message describing the results of an insert operation."""
60
- if self.num_excs == 0:
61
- cols_with_excs_str = ''
62
- else:
63
- cols_with_excs_str = (
64
- f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
65
- )
66
- cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
67
- msg = (
68
- f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
69
- f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
70
- )
71
- return msg
72
-
73
30
 
74
31
  class MediaValidation(enum.Enum):
75
32
  ON_READ = 0
@@ -10,11 +10,12 @@ from pixeltable import exceptions as excs, type_system as ts
10
10
  from pixeltable.env import Env
11
11
  from pixeltable.utils.filecache import FileCache
12
12
 
13
- from .globals import MediaValidation, UpdateStatus
13
+ from .globals import MediaValidation
14
14
  from .table import Table
15
15
  from .table_version import TableVersion
16
16
  from .table_version_handle import TableVersionHandle
17
17
  from .table_version_path import TableVersionPath
18
+ from .update_status import UpdateStatus
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from pixeltable import exprs
@@ -106,6 +107,7 @@ class InsertableTable(Table):
106
107
 
107
108
  def _get_metadata(self) -> dict[str, Any]:
108
109
  md = super()._get_metadata()
110
+ md['base'] = None
109
111
  md['is_view'] = False
110
112
  md['is_snapshot'] = False
111
113
  return md
@@ -171,13 +173,14 @@ class InsertableTable(Table):
171
173
  from pixeltable.catalog import Catalog
172
174
  from pixeltable.io.table_data_conduit import DFTableDataConduit
173
175
 
174
- status = pxt.UpdateStatus()
175
176
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
176
177
  if isinstance(data_source, DFTableDataConduit):
178
+ status = pxt.UpdateStatus()
177
179
  status += self._tbl_version.get().insert(
178
180
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
179
181
  )
180
182
  else:
183
+ status = pxt.UpdateStatus()
181
184
  for row_batch in data_source.valid_row_batch():
182
185
  status += self._tbl_version.get().insert(
183
186
  rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -77,5 +77,11 @@ class Path:
77
77
  def __str__(self) -> str:
78
78
  return '.'.join(self.components)
79
79
 
80
+ def __eq__(self, other: object) -> bool:
81
+ return isinstance(other, Path) and str(self) == str(other)
82
+
83
+ def __hash__(self) -> int:
84
+ return hash(str(self))
85
+
80
86
  def __lt__(self, other: Path) -> bool:
81
87
  return str(self) < str(other)