pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +292 -105
- pixeltable/catalog/column.py +10 -8
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +25 -20
- pixeltable/catalog/schema_object.py +3 -6
- pixeltable/catalog/table.py +245 -189
- pixeltable/catalog/table_version.py +317 -201
- pixeltable/catalog/table_version_handle.py +15 -2
- pixeltable/catalog/table_version_path.py +60 -14
- pixeltable/catalog/view.py +14 -5
- pixeltable/dataframe.py +11 -9
- pixeltable/env.py +2 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +20 -11
- pixeltable/exprs/column_property_ref.py +15 -6
- pixeltable/exprs/column_ref.py +32 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/query_template_function.py +1 -1
- pixeltable/functions/string.py +212 -58
- pixeltable/globals.py +7 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +8 -29
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +0 -31
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -1
- pixeltable/plan.py +4 -4
- pixeltable/share/packager.py +24 -9
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/utils/dbms.py +1 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +48 -47
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -23,7 +23,6 @@ from .utils import normalize_schema_names
|
|
|
23
23
|
|
|
24
24
|
_logger = logging.getLogger('pixeltable')
|
|
25
25
|
|
|
26
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
27
26
|
|
|
28
27
|
if TYPE_CHECKING:
|
|
29
28
|
import datasets # type: ignore[import-untyped]
|
|
@@ -46,9 +45,6 @@ class TableDataConduitFormat(str, enum.Enum):
|
|
|
46
45
|
return False
|
|
47
46
|
|
|
48
47
|
|
|
49
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
50
|
-
|
|
51
|
-
|
|
52
48
|
@dataclass
|
|
53
49
|
class TableDataConduit:
|
|
54
50
|
source: TableDataSource
|
|
@@ -129,9 +125,6 @@ class TableDataConduit:
|
|
|
129
125
|
raise excs.Error(f'Missing required column(s) ({", ".join(missing_cols)})')
|
|
130
126
|
|
|
131
127
|
|
|
132
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
133
|
-
|
|
134
|
-
|
|
135
128
|
class DFTableDataConduit(TableDataConduit):
|
|
136
129
|
pxt_df: pxt.DataFrame = None
|
|
137
130
|
|
|
@@ -155,9 +148,6 @@ class DFTableDataConduit(TableDataConduit):
|
|
|
155
148
|
self.check_source_columns_are_insertable(self.pxt_df.schema.keys())
|
|
156
149
|
|
|
157
150
|
|
|
158
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
159
|
-
|
|
160
|
-
|
|
161
151
|
class RowDataTableDataConduit(TableDataConduit):
|
|
162
152
|
raw_rows: Optional[RowData] = None
|
|
163
153
|
disable_mapping: bool = True
|
|
@@ -235,9 +225,6 @@ class RowDataTableDataConduit(TableDataConduit):
|
|
|
235
225
|
yield self.valid_rows
|
|
236
226
|
|
|
237
227
|
|
|
238
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
239
|
-
|
|
240
|
-
|
|
241
228
|
class PandasTableDataConduit(TableDataConduit):
|
|
242
229
|
pd_df: pd.DataFrame = None
|
|
243
230
|
batch_count: int = 0
|
|
@@ -293,9 +280,6 @@ class PandasTableDataConduit(TableDataConduit):
|
|
|
293
280
|
yield self.valid_rows
|
|
294
281
|
|
|
295
282
|
|
|
296
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
297
|
-
|
|
298
|
-
|
|
299
283
|
class CSVTableDataConduit(TableDataConduit):
|
|
300
284
|
@classmethod
|
|
301
285
|
def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
|
|
@@ -307,9 +291,6 @@ class CSVTableDataConduit(TableDataConduit):
|
|
|
307
291
|
return PandasTableDataConduit.from_tds(t)
|
|
308
292
|
|
|
309
293
|
|
|
310
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
311
|
-
|
|
312
|
-
|
|
313
294
|
class ExcelTableDataConduit(TableDataConduit):
|
|
314
295
|
@classmethod
|
|
315
296
|
def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
|
|
@@ -321,9 +302,6 @@ class ExcelTableDataConduit(TableDataConduit):
|
|
|
321
302
|
return PandasTableDataConduit.from_tds(t)
|
|
322
303
|
|
|
323
304
|
|
|
324
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
325
|
-
|
|
326
|
-
|
|
327
305
|
class JsonTableDataConduit(TableDataConduit):
|
|
328
306
|
@classmethod
|
|
329
307
|
def from_tds(cls, tds: TableDataConduit) -> RowDataTableDataConduit:
|
|
@@ -346,9 +324,6 @@ class JsonTableDataConduit(TableDataConduit):
|
|
|
346
324
|
return t2
|
|
347
325
|
|
|
348
326
|
|
|
349
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
350
|
-
|
|
351
|
-
|
|
352
327
|
class HFTableDataConduit(TableDataConduit):
|
|
353
328
|
hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
|
|
354
329
|
column_name_for_split: Optional[str] = None
|
|
@@ -478,9 +453,6 @@ class HFTableDataConduit(TableDataConduit):
|
|
|
478
453
|
yield batch
|
|
479
454
|
|
|
480
455
|
|
|
481
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
482
|
-
|
|
483
|
-
|
|
484
456
|
class ParquetTableDataConduit(TableDataConduit):
|
|
485
457
|
pq_ds: Optional[ParquetDataset] = None
|
|
486
458
|
|
|
@@ -542,9 +514,6 @@ class ParquetTableDataConduit(TableDataConduit):
|
|
|
542
514
|
raise e
|
|
543
515
|
|
|
544
516
|
|
|
545
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
546
|
-
|
|
547
|
-
|
|
548
517
|
class UnkTableDataConduit(TableDataConduit):
|
|
549
518
|
"""Source type is not known at the time of creation"""
|
|
550
519
|
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
19
|
-
VERSION =
|
|
19
|
+
VERSION = 36
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
|
|
|
12
12
|
@register_converter(version=13)
|
|
13
13
|
def _(engine: sql.engine.Engine) -> None:
|
|
14
14
|
with engine.begin() as conn:
|
|
15
|
-
for row in conn.execute(sql.select(Table)):
|
|
15
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
16
16
|
id = row[0]
|
|
17
|
-
md = row[
|
|
17
|
+
md = row[1]
|
|
18
18
|
updated_md = __update_md(md)
|
|
19
19
|
if updated_md != md:
|
|
20
20
|
_logger.info(f'Updating schema for table: {id}')
|
|
@@ -1,33 +1,28 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
from uuid import UUID
|
|
2
3
|
|
|
3
4
|
import sqlalchemy as sql
|
|
4
5
|
|
|
5
6
|
from pixeltable.metadata import register_converter
|
|
6
7
|
from pixeltable.metadata.converters.util import (
|
|
7
|
-
|
|
8
|
+
convert_table_md,
|
|
8
9
|
convert_table_schema_version_record,
|
|
9
10
|
convert_table_version_record,
|
|
10
11
|
)
|
|
11
|
-
from pixeltable.metadata.schema import
|
|
12
|
+
from pixeltable.metadata.schema import TableSchemaVersion, TableVersion
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@register_converter(version=30)
|
|
15
16
|
def _(engine: sql.engine.Engine) -> None:
|
|
16
|
-
|
|
17
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
17
18
|
convert_table_version_record(engine, table_version_record_updater=__update_table_version_record)
|
|
18
19
|
convert_table_schema_version_record(
|
|
19
20
|
engine, table_schema_version_record_updater=__update_table_schema_version_record
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
Update TableMd with table_id
|
|
26
|
-
"""
|
|
27
|
-
assert isinstance(record.md, dict)
|
|
28
|
-
md = copy.copy(record.md)
|
|
29
|
-
md['tbl_id'] = str(record.id)
|
|
30
|
-
record.md = md
|
|
24
|
+
def __update_table_md(md: dict, tbl_id: UUID) -> None:
|
|
25
|
+
md['tbl_id'] = str(tbl_id)
|
|
31
26
|
|
|
32
27
|
|
|
33
28
|
def __update_table_version_record(record: TableVersion) -> None:
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=35)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
with engine.begin() as conn:
|
|
9
|
+
conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
|
|
@@ -33,9 +33,10 @@ def convert_table_md(
|
|
|
33
33
|
the original entry will be replaced, and the traversal will continue with `v'`.
|
|
34
34
|
"""
|
|
35
35
|
with engine.begin() as conn:
|
|
36
|
-
|
|
36
|
+
# avoid a SELECT * here, which breaks when we add new columns to Table
|
|
37
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
37
38
|
tbl_id = row[0]
|
|
38
|
-
table_md = row[
|
|
39
|
+
table_md = row[1]
|
|
39
40
|
assert isinstance(table_md, dict)
|
|
40
41
|
updated_table_md = copy.deepcopy(table_md)
|
|
41
42
|
if table_md_updater is not None:
|
|
@@ -145,13 +146,6 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
|
|
|
145
146
|
schema_column_updater(schema_col)
|
|
146
147
|
|
|
147
148
|
|
|
148
|
-
def convert_table_record(engine: sql.engine.Engine, table_record_updater: Optional[Callable[[Table], None]]) -> None:
|
|
149
|
-
with sql.orm.Session(engine, future=True) as session:
|
|
150
|
-
for record in session.query(Table).all():
|
|
151
|
-
table_record_updater(record)
|
|
152
|
-
session.commit()
|
|
153
|
-
|
|
154
|
-
|
|
155
149
|
def convert_table_version_record(
|
|
156
150
|
engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
|
|
157
151
|
) -> None:
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
36: 'Added Table.lock_dummy',
|
|
5
6
|
35: 'Track reference_tbl in ColumnRef',
|
|
6
7
|
34: 'Set default value for is_pk field in column metadata to False',
|
|
7
8
|
33: 'Add is_replica field to table metadata',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -84,7 +84,8 @@ class Dir(Base):
|
|
|
84
84
|
)
|
|
85
85
|
parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
86
86
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
|
|
87
|
-
|
|
87
|
+
|
|
88
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
88
89
|
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
89
90
|
|
|
90
91
|
|
|
@@ -200,6 +201,9 @@ class Table(Base):
|
|
|
200
201
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
|
|
201
202
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
|
|
202
203
|
|
|
204
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
205
|
+
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
206
|
+
|
|
203
207
|
|
|
204
208
|
@dataclasses.dataclass
|
|
205
209
|
class TableVersionMd:
|
pixeltable/plan.py
CHANGED
|
@@ -289,7 +289,7 @@ class Planner:
|
|
|
289
289
|
|
|
290
290
|
# create InMemoryDataNode for 'rows'
|
|
291
291
|
plan: exec.ExecNode = exec.InMemoryDataNode(
|
|
292
|
-
TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.
|
|
292
|
+
TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
|
|
293
293
|
)
|
|
294
294
|
|
|
295
295
|
media_input_col_info = [
|
|
@@ -385,7 +385,7 @@ class Planner:
|
|
|
385
385
|
|
|
386
386
|
cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
|
|
387
387
|
|
|
388
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.tbl == tbl.tbl_version}
|
|
388
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
|
|
389
389
|
copied_cols = [
|
|
390
390
|
col
|
|
391
391
|
for col in target.cols_by_id.values()
|
|
@@ -409,7 +409,7 @@ class Planner:
|
|
|
409
409
|
for i, col in enumerate(all_base_cols):
|
|
410
410
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
411
411
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
412
|
-
return plan, [f'{c.tbl.
|
|
412
|
+
return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
413
413
|
|
|
414
414
|
@classmethod
|
|
415
415
|
def __check_valid_columns(
|
|
@@ -465,7 +465,7 @@ class Planner:
|
|
|
465
465
|
recomputed_cols.update(idx_val_cols)
|
|
466
466
|
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
467
467
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
468
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
|
|
468
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == target.id}
|
|
469
469
|
copied_cols = [
|
|
470
470
|
col
|
|
471
471
|
for col in target.cols_by_id.values()
|
pixeltable/share/packager.py
CHANGED
|
@@ -7,6 +7,7 @@ import urllib.request
|
|
|
7
7
|
import uuid
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Any, Iterator, Optional
|
|
10
|
+
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import more_itertools
|
|
12
13
|
import pyarrow as pa
|
|
@@ -51,7 +52,7 @@ class TablePackager:
|
|
|
51
52
|
self.media_files = {}
|
|
52
53
|
|
|
53
54
|
# Load metadata
|
|
54
|
-
with
|
|
55
|
+
with catalog.Catalog.get().begin_xact(for_write=False):
|
|
55
56
|
tbl_md = catalog.Catalog.get().load_replica_md(table)
|
|
56
57
|
self.md = {
|
|
57
58
|
'pxt_version': pxt.__version__,
|
|
@@ -66,15 +67,15 @@ class TablePackager:
|
|
|
66
67
|
Export the table to a tarball containing Parquet tables and media files.
|
|
67
68
|
"""
|
|
68
69
|
assert not self.tmp_dir.exists() # Packaging can only be done once per TablePackager instance
|
|
69
|
-
_logger.info(f"Packaging table '{self.table._path}' and its ancestors in: {self.tmp_dir}")
|
|
70
|
+
_logger.info(f"Packaging table '{self.table._path()}' and its ancestors in: {self.tmp_dir}")
|
|
70
71
|
self.tmp_dir.mkdir()
|
|
71
72
|
with open(self.tmp_dir / 'metadata.json', 'w', encoding='utf8') as fp:
|
|
72
73
|
json.dump(self.md, fp)
|
|
73
74
|
self.tables_dir = self.tmp_dir / 'tables'
|
|
74
75
|
self.tables_dir.mkdir()
|
|
75
|
-
with
|
|
76
|
+
with catalog.Catalog.get().begin_xact(for_write=False):
|
|
76
77
|
for tv in self.table._tbl_version_path.get_tbl_versions():
|
|
77
|
-
_logger.info(f"Exporting table '{tv.get().
|
|
78
|
+
_logger.info(f"Exporting table '{tv.get().versioned_name}'.")
|
|
78
79
|
self.__export_table(tv.get())
|
|
79
80
|
_logger.info('Building archive.')
|
|
80
81
|
bundle_path = self.__build_tarball()
|
|
@@ -253,13 +254,26 @@ class TableRestorer:
|
|
|
253
254
|
tbl_md = [schema.FullTableMd.from_dict(t) for t in self.md['md']['tables']]
|
|
254
255
|
|
|
255
256
|
# Create the replica table
|
|
256
|
-
#
|
|
257
|
-
|
|
258
|
-
|
|
257
|
+
# The logic here needs to be completely restructured in order to make it concurrency-safe.
|
|
258
|
+
# - Catalog.create_replica() needs to write the metadata and also create the physical store tables
|
|
259
|
+
# and populate them, otherwise concurrent readers will see an inconsistent state (table metadata w/o
|
|
260
|
+
# an actual table)
|
|
261
|
+
# - this could be done one replica at a time (instead of the entire hierarchy)
|
|
262
|
+
cat = catalog.Catalog.get()
|
|
263
|
+
cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
|
|
264
|
+
# don't call get_table() until after the calls to create_replica() and __import_table() below;
|
|
265
|
+
# the TV instances created by get_table() would be replaced by create_replica(), which creates duplicate
|
|
266
|
+
# TV instances for the same replica version, which then leads to failures when constructing queries
|
|
259
267
|
|
|
260
268
|
# Now we need to instantiate and load data for replica_tbl and its ancestors, except that we skip
|
|
261
269
|
# replica_tbl itself if it's a pure snapshot.
|
|
262
|
-
|
|
270
|
+
target_md = tbl_md[0]
|
|
271
|
+
is_pure_snapshot = (
|
|
272
|
+
target_md.tbl_md.view_md is not None
|
|
273
|
+
and target_md.tbl_md.view_md.predicate is None
|
|
274
|
+
and len(target_md.schema_version_md.columns) == 0
|
|
275
|
+
)
|
|
276
|
+
if is_pure_snapshot:
|
|
263
277
|
ancestor_md = tbl_md[1:] # Pure snapshot; skip replica_tbl
|
|
264
278
|
else:
|
|
265
279
|
ancestor_md = tbl_md # Not a pure snapshot; include replica_tbl
|
|
@@ -273,7 +287,8 @@ class TableRestorer:
|
|
|
273
287
|
_logger.info(f'Importing table {tv.name!r}.')
|
|
274
288
|
self.__import_table(self.tmp_dir, tv, md)
|
|
275
289
|
|
|
276
|
-
|
|
290
|
+
with cat.begin_xact(for_write=False):
|
|
291
|
+
return cat.get_table_by_id(UUID(tbl_md[0].tbl_md.tbl_id))
|
|
277
292
|
|
|
278
293
|
def __import_table(self, bundle_path: Path, tv: catalog.TableVersion, tbl_md: schema.FullTableMd) -> None:
|
|
279
294
|
"""
|
pixeltable/share/publish.py
CHANGED
|
@@ -35,7 +35,7 @@ def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
|
|
|
35
35
|
upload_id = response_json['upload_id']
|
|
36
36
|
destination_uri = response_json['destination_uri']
|
|
37
37
|
|
|
38
|
-
Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path}' at: {dest_tbl_uri}")
|
|
38
|
+
Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path()}' at: {dest_tbl_uri}")
|
|
39
39
|
|
|
40
40
|
bundle = packager.package()
|
|
41
41
|
|
|
@@ -117,7 +117,7 @@ def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
|
|
|
117
117
|
|
|
118
118
|
restorer = TableRestorer(dest_path, response_json)
|
|
119
119
|
tbl = restorer.restore(bundle_path)
|
|
120
|
-
Env.get().console_logger.info(f'Created local replica {tbl._path!r} from URI: {src_tbl_uri}')
|
|
120
|
+
Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
|
|
121
121
|
return tbl
|
|
122
122
|
|
|
123
123
|
|
pixeltable/store.py
CHANGED
|
@@ -52,7 +52,8 @@ class StoreBase:
|
|
|
52
52
|
# We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
|
|
53
53
|
# since it's referenced by various methods of `StoreBase`
|
|
54
54
|
self.base = tbl_version.base.get().store_tbl if tbl_version.base is not None else None
|
|
55
|
-
|
|
55
|
+
# we're passing in tbl_version to avoid a circular call to TableVersionHandle.get()
|
|
56
|
+
self.create_sa_tbl(tbl_version)
|
|
56
57
|
|
|
57
58
|
def system_columns(self) -> list[sql.Column]:
|
|
58
59
|
return [*self._pk_cols, self.v_max_col]
|
|
@@ -77,11 +78,13 @@ class StoreBase:
|
|
|
77
78
|
self._pk_cols = [*rowid_cols, self.v_min_col]
|
|
78
79
|
return [*rowid_cols, self.v_min_col, self.v_max_col]
|
|
79
80
|
|
|
80
|
-
def create_sa_tbl(self) -> None:
|
|
81
|
+
def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
|
|
81
82
|
"""Create self.sa_tbl from self.tbl_version."""
|
|
83
|
+
if tbl_version is None:
|
|
84
|
+
tbl_version = self.tbl_version.get()
|
|
82
85
|
system_cols = self._create_system_columns()
|
|
83
86
|
all_cols = system_cols.copy()
|
|
84
|
-
for col in [c for c in
|
|
87
|
+
for col in [c for c in tbl_version.cols if c.is_stored]:
|
|
85
88
|
# re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
|
|
86
89
|
# to the last sql.Table version we created and cannot be reused
|
|
87
90
|
col.create_sa_cols()
|
|
@@ -99,16 +102,17 @@ class StoreBase:
|
|
|
99
102
|
# - base x view joins can be executed as merge joins
|
|
100
103
|
# - speeds up ORDER BY rowid DESC
|
|
101
104
|
# - allows filtering for a particular table version in index scan
|
|
102
|
-
idx_name = f'sys_cols_idx_{
|
|
105
|
+
idx_name = f'sys_cols_idx_{tbl_version.id.hex}'
|
|
103
106
|
idxs.append(sql.Index(idx_name, *system_cols))
|
|
104
107
|
|
|
105
108
|
# v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
|
|
106
|
-
idx_name = f'vmin_idx_{
|
|
109
|
+
idx_name = f'vmin_idx_{tbl_version.id.hex}'
|
|
107
110
|
idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
108
|
-
idx_name = f'vmax_idx_{
|
|
111
|
+
idx_name = f'vmax_idx_{tbl_version.id.hex}'
|
|
109
112
|
idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
|
|
110
113
|
|
|
111
114
|
self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
|
|
115
|
+
# _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
|
|
112
116
|
|
|
113
117
|
@abc.abstractmethod
|
|
114
118
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
@@ -285,7 +289,7 @@ class StoreBase:
|
|
|
285
289
|
else:
|
|
286
290
|
if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
|
|
287
291
|
# we have yet to store this image
|
|
288
|
-
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.
|
|
292
|
+
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
|
|
289
293
|
result_row.flush_img(value_expr_slot_idx, filepath)
|
|
290
294
|
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
291
295
|
if col.col_type.is_media_type():
|
|
@@ -415,9 +419,7 @@ class StoreBase:
|
|
|
415
419
|
number of deleted rows
|
|
416
420
|
"""
|
|
417
421
|
where_clause = sql.true() if where_clause is None else where_clause
|
|
418
|
-
|
|
419
|
-
self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
|
|
420
|
-
)
|
|
422
|
+
version_clause = sql.and_(self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION)
|
|
421
423
|
rowid_join_clause = self._rowid_join_predicate()
|
|
422
424
|
base_versions_clause = (
|
|
423
425
|
sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
|
|
@@ -428,10 +430,12 @@ class StoreBase:
|
|
|
428
430
|
set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
|
|
429
431
|
# set value column to NULL
|
|
430
432
|
set_clause[index_info.val_col.sa_col] = None
|
|
433
|
+
|
|
431
434
|
stmt = (
|
|
432
435
|
sql.update(self.sa_tbl)
|
|
433
436
|
.values(set_clause)
|
|
434
437
|
.where(where_clause)
|
|
438
|
+
.where(version_clause)
|
|
435
439
|
.where(rowid_join_clause)
|
|
436
440
|
.where(base_versions_clause)
|
|
437
441
|
)
|
|
@@ -528,10 +532,12 @@ class StoreComponentView(StoreView):
|
|
|
528
532
|
self.rowid_cols.append(self.pos_col)
|
|
529
533
|
return self.rowid_cols
|
|
530
534
|
|
|
531
|
-
def create_sa_tbl(self) -> None:
|
|
532
|
-
|
|
535
|
+
def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
|
|
536
|
+
if tbl_version is None:
|
|
537
|
+
tbl_version = self.tbl_version.get()
|
|
538
|
+
super().create_sa_tbl(tbl_version)
|
|
533
539
|
# we need to fix up the 'pos' column in TableVersion
|
|
534
|
-
|
|
540
|
+
tbl_version.cols_by_name['pos'].sa_col = self.pos_col
|
|
535
541
|
|
|
536
542
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
537
543
|
return sql.and_(
|
pixeltable/utils/dbms.py
CHANGED
|
@@ -35,7 +35,7 @@ class PostgresqlDbms(Dbms):
|
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
def __init__(self, db_url: URL):
|
|
38
|
-
super().__init__('postgresql', '
|
|
38
|
+
super().__init__('postgresql', 'SERIALIZABLE', 'brin', db_url)
|
|
39
39
|
|
|
40
40
|
def drop_db_stmt(self, database: str) -> str:
|
|
41
41
|
return f'DROP DATABASE {database}'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0rc1
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
|