pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (48) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +292 -105
  3. pixeltable/catalog/column.py +10 -8
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +25 -20
  6. pixeltable/catalog/schema_object.py +3 -6
  7. pixeltable/catalog/table.py +245 -189
  8. pixeltable/catalog/table_version.py +317 -201
  9. pixeltable/catalog/table_version_handle.py +15 -2
  10. pixeltable/catalog/table_version_path.py +60 -14
  11. pixeltable/catalog/view.py +14 -5
  12. pixeltable/dataframe.py +11 -9
  13. pixeltable/env.py +2 -4
  14. pixeltable/exec/in_memory_data_node.py +1 -1
  15. pixeltable/exec/sql_node.py +20 -11
  16. pixeltable/exprs/column_property_ref.py +15 -6
  17. pixeltable/exprs/column_ref.py +32 -11
  18. pixeltable/exprs/comparison.py +1 -1
  19. pixeltable/exprs/row_builder.py +4 -6
  20. pixeltable/exprs/rowid_ref.py +8 -0
  21. pixeltable/exprs/similarity_expr.py +1 -0
  22. pixeltable/func/query_template_function.py +1 -1
  23. pixeltable/functions/string.py +212 -58
  24. pixeltable/globals.py +7 -4
  25. pixeltable/index/base.py +5 -0
  26. pixeltable/index/btree.py +5 -0
  27. pixeltable/index/embedding_index.py +5 -0
  28. pixeltable/io/external_store.py +8 -29
  29. pixeltable/io/label_studio.py +1 -1
  30. pixeltable/io/parquet.py +2 -2
  31. pixeltable/io/table_data_conduit.py +0 -31
  32. pixeltable/metadata/__init__.py +1 -1
  33. pixeltable/metadata/converters/convert_13.py +2 -2
  34. pixeltable/metadata/converters/convert_30.py +6 -11
  35. pixeltable/metadata/converters/convert_35.py +9 -0
  36. pixeltable/metadata/converters/util.py +3 -9
  37. pixeltable/metadata/notes.py +1 -0
  38. pixeltable/metadata/schema.py +5 -1
  39. pixeltable/plan.py +4 -4
  40. pixeltable/share/packager.py +24 -9
  41. pixeltable/share/publish.py +2 -2
  42. pixeltable/store.py +19 -13
  43. pixeltable/utils/dbms.py +1 -1
  44. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
  45. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +48 -47
  46. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
  47. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
  48. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
@@ -23,7 +23,6 @@ from .utils import normalize_schema_names
23
23
 
24
24
  _logger = logging.getLogger('pixeltable')
25
25
 
26
- # ---------------------------------------------------------------------------------------------------------
27
26
 
28
27
  if TYPE_CHECKING:
29
28
  import datasets # type: ignore[import-untyped]
@@ -46,9 +45,6 @@ class TableDataConduitFormat(str, enum.Enum):
46
45
  return False
47
46
 
48
47
 
49
- # ---------------------------------------------------------------------------------------------------------
50
-
51
-
52
48
  @dataclass
53
49
  class TableDataConduit:
54
50
  source: TableDataSource
@@ -129,9 +125,6 @@ class TableDataConduit:
129
125
  raise excs.Error(f'Missing required column(s) ({", ".join(missing_cols)})')
130
126
 
131
127
 
132
- # ---------------------------------------------------------------------------------------------------------
133
-
134
-
135
128
  class DFTableDataConduit(TableDataConduit):
136
129
  pxt_df: pxt.DataFrame = None
137
130
 
@@ -155,9 +148,6 @@ class DFTableDataConduit(TableDataConduit):
155
148
  self.check_source_columns_are_insertable(self.pxt_df.schema.keys())
156
149
 
157
150
 
158
- # ---------------------------------------------------------------------------------------------------------
159
-
160
-
161
151
  class RowDataTableDataConduit(TableDataConduit):
162
152
  raw_rows: Optional[RowData] = None
163
153
  disable_mapping: bool = True
@@ -235,9 +225,6 @@ class RowDataTableDataConduit(TableDataConduit):
235
225
  yield self.valid_rows
236
226
 
237
227
 
238
- # ---------------------------------------------------------------------------------------------------------
239
-
240
-
241
228
  class PandasTableDataConduit(TableDataConduit):
242
229
  pd_df: pd.DataFrame = None
243
230
  batch_count: int = 0
@@ -293,9 +280,6 @@ class PandasTableDataConduit(TableDataConduit):
293
280
  yield self.valid_rows
294
281
 
295
282
 
296
- # ---------------------------------------------------------------------------------------------------------
297
-
298
-
299
283
  class CSVTableDataConduit(TableDataConduit):
300
284
  @classmethod
301
285
  def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
@@ -307,9 +291,6 @@ class CSVTableDataConduit(TableDataConduit):
307
291
  return PandasTableDataConduit.from_tds(t)
308
292
 
309
293
 
310
- # ---------------------------------------------------------------------------------------------------------
311
-
312
-
313
294
  class ExcelTableDataConduit(TableDataConduit):
314
295
  @classmethod
315
296
  def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
@@ -321,9 +302,6 @@ class ExcelTableDataConduit(TableDataConduit):
321
302
  return PandasTableDataConduit.from_tds(t)
322
303
 
323
304
 
324
- # ---------------------------------------------------------------------------------------------------------
325
-
326
-
327
305
  class JsonTableDataConduit(TableDataConduit):
328
306
  @classmethod
329
307
  def from_tds(cls, tds: TableDataConduit) -> RowDataTableDataConduit:
@@ -346,9 +324,6 @@ class JsonTableDataConduit(TableDataConduit):
346
324
  return t2
347
325
 
348
326
 
349
- # ---------------------------------------------------------------------------------------------------------
350
-
351
-
352
327
  class HFTableDataConduit(TableDataConduit):
353
328
  hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
354
329
  column_name_for_split: Optional[str] = None
@@ -478,9 +453,6 @@ class HFTableDataConduit(TableDataConduit):
478
453
  yield batch
479
454
 
480
455
 
481
- # ---------------------------------------------------------------------------------------------------------
482
-
483
-
484
456
  class ParquetTableDataConduit(TableDataConduit):
485
457
  pq_ds: Optional[ParquetDataset] = None
486
458
 
@@ -542,9 +514,6 @@ class ParquetTableDataConduit(TableDataConduit):
542
514
  raise e
543
515
 
544
516
 
545
- # ---------------------------------------------------------------------------------------------------------
546
-
547
-
548
517
  class UnkTableDataConduit(TableDataConduit):
549
518
  """Source type is not known at the time of creation"""
550
519
 
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
16
16
 
17
17
 
18
18
  # current version of the metadata; this is incremented whenever the metadata schema changes
19
- VERSION = 35
19
+ VERSION = 36
20
20
 
21
21
 
22
22
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
12
12
  @register_converter(version=13)
13
13
  def _(engine: sql.engine.Engine) -> None:
14
14
  with engine.begin() as conn:
15
- for row in conn.execute(sql.select(Table)):
15
+ for row in conn.execute(sql.select(Table.id, Table.md)):
16
16
  id = row[0]
17
- md = row[2]
17
+ md = row[1]
18
18
  updated_md = __update_md(md)
19
19
  if updated_md != md:
20
20
  _logger.info(f'Updating schema for table: {id}')
@@ -1,33 +1,28 @@
1
1
  import copy
2
+ from uuid import UUID
2
3
 
3
4
  import sqlalchemy as sql
4
5
 
5
6
  from pixeltable.metadata import register_converter
6
7
  from pixeltable.metadata.converters.util import (
7
- convert_table_record,
8
+ convert_table_md,
8
9
  convert_table_schema_version_record,
9
10
  convert_table_version_record,
10
11
  )
11
- from pixeltable.metadata.schema import Table, TableSchemaVersion, TableVersion
12
+ from pixeltable.metadata.schema import TableSchemaVersion, TableVersion
12
13
 
13
14
 
14
15
  @register_converter(version=30)
15
16
  def _(engine: sql.engine.Engine) -> None:
16
- convert_table_record(engine, table_record_updater=__update_table_record)
17
+ convert_table_md(engine, table_md_updater=__update_table_md)
17
18
  convert_table_version_record(engine, table_version_record_updater=__update_table_version_record)
18
19
  convert_table_schema_version_record(
19
20
  engine, table_schema_version_record_updater=__update_table_schema_version_record
20
21
  )
21
22
 
22
23
 
23
- def __update_table_record(record: Table) -> None:
24
- """
25
- Update TableMd with table_id
26
- """
27
- assert isinstance(record.md, dict)
28
- md = copy.copy(record.md)
29
- md['tbl_id'] = str(record.id)
30
- record.md = md
24
+ def __update_table_md(md: dict, tbl_id: UUID) -> None:
25
+ md['tbl_id'] = str(tbl_id)
31
26
 
32
27
 
33
28
  def __update_table_version_record(record: TableVersion) -> None:
@@ -0,0 +1,9 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=35)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ with engine.begin() as conn:
9
+ conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
@@ -33,9 +33,10 @@ def convert_table_md(
33
33
  the original entry will be replaced, and the traversal will continue with `v'`.
34
34
  """
35
35
  with engine.begin() as conn:
36
- for row in conn.execute(sql.select(Table)):
36
+ # avoid a SELECT * here, which breaks when we add new columns to Table
37
+ for row in conn.execute(sql.select(Table.id, Table.md)):
37
38
  tbl_id = row[0]
38
- table_md = row[2]
39
+ table_md = row[1]
39
40
  assert isinstance(table_md, dict)
40
41
  updated_table_md = copy.deepcopy(table_md)
41
42
  if table_md_updater is not None:
@@ -145,13 +146,6 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
145
146
  schema_column_updater(schema_col)
146
147
 
147
148
 
148
- def convert_table_record(engine: sql.engine.Engine, table_record_updater: Optional[Callable[[Table], None]]) -> None:
149
- with sql.orm.Session(engine, future=True) as session:
150
- for record in session.query(Table).all():
151
- table_record_updater(record)
152
- session.commit()
153
-
154
-
155
149
  def convert_table_version_record(
156
150
  engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
157
151
  ) -> None:
@@ -2,6 +2,7 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 36: 'Added Table.lock_dummy',
5
6
  35: 'Track reference_tbl in ColumnRef',
6
7
  34: 'Set default value for is_pk field in column metadata to False',
7
8
  33: 'Add is_replica field to table metadata',
@@ -84,7 +84,8 @@ class Dir(Base):
84
84
  )
85
85
  parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
86
86
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
87
- # This field is updated to synchronize database operations across multiple sessions
87
+
88
+ # used to force acquisition of an X-lock via an Update stmt
88
89
  lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
89
90
 
90
91
 
@@ -200,6 +201,9 @@ class Table(Base):
200
201
  dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
201
202
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
202
203
 
204
+ # used to force acquisition of an X-lock via an Update stmt
205
+ lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
206
+
203
207
 
204
208
  @dataclasses.dataclass
205
209
  class TableVersionMd:
pixeltable/plan.py CHANGED
@@ -289,7 +289,7 @@ class Planner:
289
289
 
290
290
  # create InMemoryDataNode for 'rows'
291
291
  plan: exec.ExecNode = exec.InMemoryDataNode(
292
- TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_rowid
292
+ TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
293
293
  )
294
294
 
295
295
  media_input_col_info = [
@@ -385,7 +385,7 @@ class Planner:
385
385
 
386
386
  cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
387
387
 
388
- recomputed_base_cols = {col for col in recomputed_cols if col.tbl == tbl.tbl_version}
388
+ recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
389
389
  copied_cols = [
390
390
  col
391
391
  for col in target.cols_by_id.values()
@@ -409,7 +409,7 @@ class Planner:
409
409
  for i, col in enumerate(all_base_cols):
410
410
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
411
411
  recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
412
- return plan, [f'{c.tbl.get().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
412
+ return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
413
413
 
414
414
  @classmethod
415
415
  def __check_valid_columns(
@@ -465,7 +465,7 @@ class Planner:
465
465
  recomputed_cols.update(idx_val_cols)
466
466
  # we only need to recompute stored columns (unstored ones are substituted away)
467
467
  recomputed_cols = {c for c in recomputed_cols if c.is_stored}
468
- recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
468
+ recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == target.id}
469
469
  copied_cols = [
470
470
  col
471
471
  for col in target.cols_by_id.values()
@@ -7,6 +7,7 @@ import urllib.request
7
7
  import uuid
8
8
  from pathlib import Path
9
9
  from typing import Any, Iterator, Optional
10
+ from uuid import UUID
10
11
 
11
12
  import more_itertools
12
13
  import pyarrow as pa
@@ -51,7 +52,7 @@ class TablePackager:
51
52
  self.media_files = {}
52
53
 
53
54
  # Load metadata
54
- with Env.get().begin_xact():
55
+ with catalog.Catalog.get().begin_xact(for_write=False):
55
56
  tbl_md = catalog.Catalog.get().load_replica_md(table)
56
57
  self.md = {
57
58
  'pxt_version': pxt.__version__,
@@ -66,15 +67,15 @@ class TablePackager:
66
67
  Export the table to a tarball containing Parquet tables and media files.
67
68
  """
68
69
  assert not self.tmp_dir.exists() # Packaging can only be done once per TablePackager instance
69
- _logger.info(f"Packaging table '{self.table._path}' and its ancestors in: {self.tmp_dir}")
70
+ _logger.info(f"Packaging table '{self.table._path()}' and its ancestors in: {self.tmp_dir}")
70
71
  self.tmp_dir.mkdir()
71
72
  with open(self.tmp_dir / 'metadata.json', 'w', encoding='utf8') as fp:
72
73
  json.dump(self.md, fp)
73
74
  self.tables_dir = self.tmp_dir / 'tables'
74
75
  self.tables_dir.mkdir()
75
- with Env.get().begin_xact():
76
+ with catalog.Catalog.get().begin_xact(for_write=False):
76
77
  for tv in self.table._tbl_version_path.get_tbl_versions():
77
- _logger.info(f"Exporting table '{tv.get().name}:{tv.get().version}'.")
78
+ _logger.info(f"Exporting table '{tv.get().versioned_name}'.")
78
79
  self.__export_table(tv.get())
79
80
  _logger.info('Building archive.')
80
81
  bundle_path = self.__build_tarball()
@@ -253,13 +254,26 @@ class TableRestorer:
253
254
  tbl_md = [schema.FullTableMd.from_dict(t) for t in self.md['md']['tables']]
254
255
 
255
256
  # Create the replica table
256
- # TODO: This needs to be made concurrency-safe.
257
- replica_tbl = catalog.Catalog.get().create_replica(catalog.Path(self.tbl_path), tbl_md)
258
- assert replica_tbl._tbl_version.get().is_snapshot
257
+ # The logic here needs to be completely restructured in order to make it concurrency-safe.
258
+ # - Catalog.create_replica() needs to write the metadata and also create the physical store tables
259
+ # and populate them, otherwise concurrent readers will see an inconsistent state (table metadata w/o
260
+ # an actual table)
261
+ # - this could be done one replica at a time (instead of the entire hierarchy)
262
+ cat = catalog.Catalog.get()
263
+ cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
264
+ # don't call get_table() until after the calls to create_replica() and __import_table() below;
265
+ # the TV instances created by get_table() would be replaced by create_replica(), which creates duplicate
266
+ # TV instances for the same replica version, which then leads to failures when constructing queries
259
267
 
260
268
  # Now we need to instantiate and load data for replica_tbl and its ancestors, except that we skip
261
269
  # replica_tbl itself if it's a pure snapshot.
262
- if replica_tbl._id != replica_tbl._tbl_version.id:
270
+ target_md = tbl_md[0]
271
+ is_pure_snapshot = (
272
+ target_md.tbl_md.view_md is not None
273
+ and target_md.tbl_md.view_md.predicate is None
274
+ and len(target_md.schema_version_md.columns) == 0
275
+ )
276
+ if is_pure_snapshot:
263
277
  ancestor_md = tbl_md[1:] # Pure snapshot; skip replica_tbl
264
278
  else:
265
279
  ancestor_md = tbl_md # Not a pure snapshot; include replica_tbl
@@ -273,7 +287,8 @@ class TableRestorer:
273
287
  _logger.info(f'Importing table {tv.name!r}.')
274
288
  self.__import_table(self.tmp_dir, tv, md)
275
289
 
276
- return replica_tbl
290
+ with cat.begin_xact(for_write=False):
291
+ return cat.get_table_by_id(UUID(tbl_md[0].tbl_md.tbl_id))
277
292
 
278
293
  def __import_table(self, bundle_path: Path, tv: catalog.TableVersion, tbl_md: schema.FullTableMd) -> None:
279
294
  """
@@ -35,7 +35,7 @@ def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
35
35
  upload_id = response_json['upload_id']
36
36
  destination_uri = response_json['destination_uri']
37
37
 
38
- Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path}' at: {dest_tbl_uri}")
38
+ Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path()}' at: {dest_tbl_uri}")
39
39
 
40
40
  bundle = packager.package()
41
41
 
@@ -117,7 +117,7 @@ def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
117
117
 
118
118
  restorer = TableRestorer(dest_path, response_json)
119
119
  tbl = restorer.restore(bundle_path)
120
- Env.get().console_logger.info(f'Created local replica {tbl._path!r} from URI: {src_tbl_uri}')
120
+ Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
121
121
  return tbl
122
122
 
123
123
 
pixeltable/store.py CHANGED
@@ -52,7 +52,8 @@ class StoreBase:
52
52
  # We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
53
53
  # since it's referenced by various methods of `StoreBase`
54
54
  self.base = tbl_version.base.get().store_tbl if tbl_version.base is not None else None
55
- self.create_sa_tbl()
55
+ # we're passing in tbl_version to avoid a circular call to TableVersionHandle.get()
56
+ self.create_sa_tbl(tbl_version)
56
57
 
57
58
  def system_columns(self) -> list[sql.Column]:
58
59
  return [*self._pk_cols, self.v_max_col]
@@ -77,11 +78,13 @@ class StoreBase:
77
78
  self._pk_cols = [*rowid_cols, self.v_min_col]
78
79
  return [*rowid_cols, self.v_min_col, self.v_max_col]
79
80
 
80
- def create_sa_tbl(self) -> None:
81
+ def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
81
82
  """Create self.sa_tbl from self.tbl_version."""
83
+ if tbl_version is None:
84
+ tbl_version = self.tbl_version.get()
82
85
  system_cols = self._create_system_columns()
83
86
  all_cols = system_cols.copy()
84
- for col in [c for c in self.tbl_version.get().cols if c.is_stored]:
87
+ for col in [c for c in tbl_version.cols if c.is_stored]:
85
88
  # re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
86
89
  # to the last sql.Table version we created and cannot be reused
87
90
  col.create_sa_cols()
@@ -99,16 +102,17 @@ class StoreBase:
99
102
  # - base x view joins can be executed as merge joins
100
103
  # - speeds up ORDER BY rowid DESC
101
104
  # - allows filtering for a particular table version in index scan
102
- idx_name = f'sys_cols_idx_{self.tbl_version.id.hex}'
105
+ idx_name = f'sys_cols_idx_{tbl_version.id.hex}'
103
106
  idxs.append(sql.Index(idx_name, *system_cols))
104
107
 
105
108
  # v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
106
- idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
109
+ idx_name = f'vmin_idx_{tbl_version.id.hex}'
107
110
  idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
108
- idx_name = f'vmax_idx_{self.tbl_version.id.hex}'
111
+ idx_name = f'vmax_idx_{tbl_version.id.hex}'
109
112
  idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
110
113
 
111
114
  self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
115
+ # _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
112
116
 
113
117
  @abc.abstractmethod
114
118
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
@@ -285,7 +289,7 @@ class StoreBase:
285
289
  else:
286
290
  if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
287
291
  # we have yet to store this image
288
- filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
292
+ filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
289
293
  result_row.flush_img(value_expr_slot_idx, filepath)
290
294
  val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
291
295
  if col.col_type.is_media_type():
@@ -415,9 +419,7 @@ class StoreBase:
415
419
  number of deleted rows
416
420
  """
417
421
  where_clause = sql.true() if where_clause is None else where_clause
418
- where_clause = sql.and_(
419
- self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
420
- )
422
+ version_clause = sql.and_(self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION)
421
423
  rowid_join_clause = self._rowid_join_predicate()
422
424
  base_versions_clause = (
423
425
  sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
@@ -428,10 +430,12 @@ class StoreBase:
428
430
  set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
429
431
  # set value column to NULL
430
432
  set_clause[index_info.val_col.sa_col] = None
433
+
431
434
  stmt = (
432
435
  sql.update(self.sa_tbl)
433
436
  .values(set_clause)
434
437
  .where(where_clause)
438
+ .where(version_clause)
435
439
  .where(rowid_join_clause)
436
440
  .where(base_versions_clause)
437
441
  )
@@ -528,10 +532,12 @@ class StoreComponentView(StoreView):
528
532
  self.rowid_cols.append(self.pos_col)
529
533
  return self.rowid_cols
530
534
 
531
- def create_sa_tbl(self) -> None:
532
- super().create_sa_tbl()
535
+ def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
536
+ if tbl_version is None:
537
+ tbl_version = self.tbl_version.get()
538
+ super().create_sa_tbl(tbl_version)
533
539
  # we need to fix up the 'pos' column in TableVersion
534
- self.tbl_version.get().cols_by_name['pos'].sa_col = self.pos_col
540
+ tbl_version.cols_by_name['pos'].sa_col = self.pos_col
535
541
 
536
542
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
537
543
  return sql.and_(
pixeltable/utils/dbms.py CHANGED
@@ -35,7 +35,7 @@ class PostgresqlDbms(Dbms):
35
35
  """
36
36
 
37
37
  def __init__(self, db_url: URL):
38
- super().__init__('postgresql', 'REPEATABLE READ', 'brin', db_url)
38
+ super().__init__('postgresql', 'SERIALIZABLE', 'brin', db_url)
39
39
 
40
40
  def drop_db_stmt(self, database: str) -> str:
41
41
  return f'DROP DATABASE {database}'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.3.15
3
+ Version: 0.4.0rc1
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai