pixeltable 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +15 -33
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +1 -1
- pixeltable/catalog/column.py +28 -16
- pixeltable/catalog/dir.py +2 -2
- pixeltable/catalog/insertable_table.py +5 -55
- pixeltable/catalog/named_function.py +2 -2
- pixeltable/catalog/schema_object.py +2 -7
- pixeltable/catalog/table.py +298 -204
- pixeltable/catalog/table_version.py +104 -139
- pixeltable/catalog/table_version_path.py +22 -4
- pixeltable/catalog/view.py +20 -10
- pixeltable/dataframe.py +128 -25
- pixeltable/env.py +21 -14
- pixeltable/exec/exec_context.py +5 -0
- pixeltable/exec/exec_node.py +1 -0
- pixeltable/exec/in_memory_data_node.py +29 -24
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/column_ref.py +13 -8
- pixeltable/exprs/data_row.py +4 -0
- pixeltable/exprs/expr.py +16 -1
- pixeltable/exprs/function_call.py +4 -4
- pixeltable/exprs/row_builder.py +29 -20
- pixeltable/exprs/similarity_expr.py +4 -3
- pixeltable/ext/functions/yolox.py +2 -1
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +14 -12
- pixeltable/func/callable_function.py +8 -6
- pixeltable/func/expr_template_function.py +13 -19
- pixeltable/func/function.py +3 -6
- pixeltable/func/query_template_function.py +84 -0
- pixeltable/func/signature.py +68 -23
- pixeltable/func/udf.py +13 -10
- pixeltable/functions/__init__.py +6 -91
- pixeltable/functions/eval.py +26 -14
- pixeltable/functions/fireworks.py +25 -23
- pixeltable/functions/globals.py +62 -0
- pixeltable/functions/huggingface.py +20 -16
- pixeltable/functions/image.py +170 -1
- pixeltable/functions/openai.py +95 -128
- pixeltable/functions/string.py +10 -2
- pixeltable/functions/together.py +95 -84
- pixeltable/functions/util.py +16 -0
- pixeltable/functions/video.py +94 -16
- pixeltable/functions/whisper.py +78 -0
- pixeltable/globals.py +1 -1
- pixeltable/io/__init__.py +10 -0
- pixeltable/io/external_store.py +370 -0
- pixeltable/io/globals.py +50 -22
- pixeltable/{datatransfer → io}/label_studio.py +279 -166
- pixeltable/io/parquet.py +1 -1
- pixeltable/iterators/__init__.py +9 -0
- pixeltable/iterators/string.py +40 -0
- pixeltable/metadata/__init__.py +6 -8
- pixeltable/metadata/converters/convert_10.py +2 -4
- pixeltable/metadata/converters/convert_12.py +7 -2
- pixeltable/metadata/converters/convert_13.py +6 -8
- pixeltable/metadata/converters/convert_14.py +2 -4
- pixeltable/metadata/converters/convert_15.py +40 -25
- pixeltable/metadata/converters/convert_16.py +18 -0
- pixeltable/metadata/converters/util.py +11 -8
- pixeltable/metadata/schema.py +3 -6
- pixeltable/plan.py +8 -7
- pixeltable/store.py +1 -1
- pixeltable/tool/create_test_db_dump.py +145 -54
- pixeltable/tool/embed_udf.py +9 -0
- pixeltable/type_system.py +1 -2
- pixeltable/utils/code.py +34 -0
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/METADATA +2 -2
- pixeltable-0.2.9.dist-info/RECORD +131 -0
- pixeltable/datatransfer/__init__.py +0 -1
- pixeltable/datatransfer/remote.py +0 -113
- pixeltable/functions/pil/image.py +0 -147
- pixeltable-0.2.7.dist-info/RECORD +0 -126
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/WHEEL +0 -0
|
@@ -89,8 +89,6 @@ class TableVersion:
|
|
|
89
89
|
self.next_idx_id = tbl_md.next_idx_id
|
|
90
90
|
self.next_rowid = tbl_md.next_row_id
|
|
91
91
|
|
|
92
|
-
self.remotes = dict(TableVersion._init_remote(remote_md) for remote_md in tbl_md.remotes)
|
|
93
|
-
|
|
94
92
|
# view-specific initialization
|
|
95
93
|
from pixeltable import exprs
|
|
96
94
|
predicate_dict = None if not is_view or tbl_md.view_md.predicate is None else tbl_md.view_md.predicate
|
|
@@ -124,8 +122,13 @@ class TableVersion:
|
|
|
124
122
|
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
|
|
125
123
|
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
126
124
|
self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
|
|
125
|
+
self.external_stores: dict[str, pixeltable.io.ExternalStore] = {}
|
|
126
|
+
|
|
127
127
|
self._init_schema(tbl_md, schema_version_md)
|
|
128
128
|
|
|
129
|
+
# Init external stores (this needs to happen after the schema is created)
|
|
130
|
+
self._init_external_stores(tbl_md)
|
|
131
|
+
|
|
129
132
|
def __hash__(self) -> int:
|
|
130
133
|
return hash(self.id)
|
|
131
134
|
|
|
@@ -160,7 +163,7 @@ class TableVersion:
|
|
|
160
163
|
column_md = cls._create_column_md(cols)
|
|
161
164
|
table_md = schema.TableMd(
|
|
162
165
|
name=name, current_version=0, current_schema_version=0, next_col_id=len(cols),
|
|
163
|
-
next_idx_id=0, next_row_id=0, column_md=column_md, index_md={},
|
|
166
|
+
next_idx_id=0, next_row_id=0, column_md=column_md, index_md={}, external_stores=[], view_md=view_md)
|
|
164
167
|
# create a schema.Table here, we need it to call our c'tor;
|
|
165
168
|
# don't add it to the session yet, we might add index metadata
|
|
166
169
|
tbl_id = uuid.uuid4()
|
|
@@ -240,6 +243,8 @@ class TableVersion:
|
|
|
240
243
|
def _init_cols(self, tbl_md: schema.TableMd, schema_version_md: schema.TableSchemaVersionMd) -> None:
|
|
241
244
|
"""Initialize self.cols with the columns visible in our effective version"""
|
|
242
245
|
import pixeltable.exprs as exprs
|
|
246
|
+
from pixeltable.catalog import Catalog
|
|
247
|
+
|
|
243
248
|
self.cols = []
|
|
244
249
|
self.cols_by_name = {}
|
|
245
250
|
self.cols_by_id = {}
|
|
@@ -248,7 +253,8 @@ class TableVersion:
|
|
|
248
253
|
col = Column(
|
|
249
254
|
col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
250
255
|
is_pk=col_md.is_pk, stored=col_md.stored,
|
|
251
|
-
schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop
|
|
256
|
+
schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
|
|
257
|
+
value_expr_dict=col_md.value_expr)
|
|
252
258
|
col.tbl = self
|
|
253
259
|
self.cols.append(col)
|
|
254
260
|
|
|
@@ -266,18 +272,8 @@ class TableVersion:
|
|
|
266
272
|
# make sure to traverse columns ordered by position = order in which cols were created;
|
|
267
273
|
# this guarantees that references always point backwards
|
|
268
274
|
if col_md.value_expr is not None:
|
|
269
|
-
|
|
270
|
-
self.
|
|
271
|
-
|
|
272
|
-
# if this is a stored proxy column, resolve the relationships with its proxy base.
|
|
273
|
-
if col_md.proxy_base is not None:
|
|
274
|
-
# proxy_base must have a strictly smaller id, so we must already have encountered it
|
|
275
|
-
# in traversal order; and if the proxy column is active at this version, then the
|
|
276
|
-
# proxy base must necessarily be active as well. This motivates the following assertion.
|
|
277
|
-
assert col_md.proxy_base in self.cols_by_id
|
|
278
|
-
base_col = self.cols_by_id[col_md.proxy_base]
|
|
279
|
-
base_col.stored_proxy = col
|
|
280
|
-
col.proxy_base = base_col
|
|
275
|
+
refd_cols = exprs.Expr.get_refd_columns(col_md.value_expr)
|
|
276
|
+
self._record_refd_columns(col)
|
|
281
277
|
|
|
282
278
|
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
283
279
|
self.idx_md = tbl_md.index_md
|
|
@@ -317,22 +313,30 @@ class TableVersion:
|
|
|
317
313
|
self.store_tbl: StoreBase = StoreTable(self)
|
|
318
314
|
|
|
319
315
|
def _update_md(
|
|
320
|
-
self, timestamp: float, preceding_schema_version: Optional[int]
|
|
316
|
+
self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
|
|
321
317
|
) -> None:
|
|
322
|
-
"""
|
|
318
|
+
"""Writes table metadata to the database.
|
|
319
|
+
|
|
323
320
|
Args:
|
|
324
321
|
timestamp: timestamp of the change
|
|
325
|
-
|
|
322
|
+
conn: database connection to use
|
|
323
|
+
update_tbl_version: if `True`, will also write `TableVersion` metadata
|
|
324
|
+
preceding_schema_version: if specified, will also write `TableSchemaVersion` metadata, recording the
|
|
325
|
+
specified preceding schema version
|
|
326
326
|
"""
|
|
327
|
+
assert update_tbl_version or preceding_schema_version is None
|
|
328
|
+
|
|
327
329
|
conn.execute(
|
|
328
330
|
sql.update(schema.Table.__table__)
|
|
329
331
|
.values({schema.Table.md: dataclasses.asdict(self._create_tbl_md())})
|
|
330
332
|
.where(schema.Table.id == self.id))
|
|
331
333
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
.
|
|
334
|
+
if update_tbl_version:
|
|
335
|
+
version_md = self._create_version_md(timestamp)
|
|
336
|
+
conn.execute(
|
|
337
|
+
sql.insert(schema.TableVersion.__table__)
|
|
338
|
+
.values(tbl_id=self.id, version=self.version, md=dataclasses.asdict(version_md)))
|
|
339
|
+
|
|
336
340
|
if preceding_schema_version is not None:
|
|
337
341
|
schema_version_md = self._create_schema_version_md(preceding_schema_version)
|
|
338
342
|
conn.execute(
|
|
@@ -352,7 +356,7 @@ class TableVersion:
|
|
|
352
356
|
self.schema_version = self.version
|
|
353
357
|
with Env.get().engine.begin() as conn:
|
|
354
358
|
status = self._add_index(col, idx_name, idx, conn)
|
|
355
|
-
self._update_md(time.time(),
|
|
359
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
356
360
|
_logger.info(f'Added index {idx_name} on column {col.name} to table {self.name}')
|
|
357
361
|
return status
|
|
358
362
|
|
|
@@ -435,7 +439,7 @@ class TableVersion:
|
|
|
435
439
|
|
|
436
440
|
with Env.get().engine.begin() as conn:
|
|
437
441
|
self._drop_columns([idx_info.val_col, idx_info.undo_col])
|
|
438
|
-
self._update_md(time.time(),
|
|
442
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
439
443
|
_logger.info(f'Dropped index {idx_md.name} on table {self.name}')
|
|
440
444
|
|
|
441
445
|
def add_column(self, col: Column, print_stats: bool = False) -> UpdateStatus:
|
|
@@ -461,7 +465,7 @@ class TableVersion:
|
|
|
461
465
|
status = self._add_columns([col], conn, print_stats=print_stats)
|
|
462
466
|
_ = self._add_default_index(col, conn)
|
|
463
467
|
# TODO: what to do about errors?
|
|
464
|
-
self._update_md(time.time(),
|
|
468
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
465
469
|
_logger.info(f'Added column {col.name} to table {self.name}, new version: {self.version}')
|
|
466
470
|
|
|
467
471
|
msg = (
|
|
@@ -472,8 +476,9 @@ class TableVersion:
|
|
|
472
476
|
_logger.info(f'Column {col.name}: {msg}')
|
|
473
477
|
return status
|
|
474
478
|
|
|
475
|
-
def _add_columns(self, cols:
|
|
479
|
+
def _add_columns(self, cols: Iterable[Column], conn: sql.engine.Connection, print_stats: bool = False) -> UpdateStatus:
|
|
476
480
|
"""Add and populate columns within the current transaction"""
|
|
481
|
+
cols = list(cols)
|
|
477
482
|
row_count = self.store_tbl.count(conn=conn)
|
|
478
483
|
for col in cols:
|
|
479
484
|
if not col.col_type.nullable and not col.is_computed:
|
|
@@ -493,7 +498,7 @@ class TableVersion:
|
|
|
493
498
|
self.cols_by_id[col.id] = col
|
|
494
499
|
if col.value_expr is not None:
|
|
495
500
|
col.check_value_expr()
|
|
496
|
-
self.
|
|
501
|
+
self._record_refd_columns(col)
|
|
497
502
|
|
|
498
503
|
if col.is_stored:
|
|
499
504
|
self.store_tbl.add_column(col, conn)
|
|
@@ -507,7 +512,7 @@ class TableVersion:
|
|
|
507
512
|
plan.ctx.num_rows = row_count
|
|
508
513
|
|
|
509
514
|
try:
|
|
510
|
-
plan.ctx.conn
|
|
515
|
+
plan.ctx.set_conn(conn)
|
|
511
516
|
plan.open()
|
|
512
517
|
num_excs = self.store_tbl.load_column(col, plan, value_expr_slot_idx, conn)
|
|
513
518
|
if num_excs > 0:
|
|
@@ -537,6 +542,8 @@ class TableVersion:
|
|
|
537
542
|
def drop_column(self, name: str) -> None:
|
|
538
543
|
"""Drop a column from the table.
|
|
539
544
|
"""
|
|
545
|
+
from pixeltable.catalog import Catalog
|
|
546
|
+
|
|
540
547
|
assert not self.is_snapshot
|
|
541
548
|
if name not in self.cols_by_name:
|
|
542
549
|
raise excs.Error(f'Unknown column: {name}')
|
|
@@ -547,13 +554,24 @@ class TableVersion:
|
|
|
547
554
|
f'Cannot drop column `{name}` because the following columns depend on it:\n'
|
|
548
555
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
549
556
|
)
|
|
550
|
-
|
|
551
|
-
|
|
557
|
+
# See if this column has a dependent store. We need to look through all stores in all
|
|
558
|
+
# (transitive) views of this table.
|
|
559
|
+
transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
|
|
560
|
+
dependent_stores = [
|
|
561
|
+
(view, store)
|
|
562
|
+
for view in transitive_views
|
|
563
|
+
for store in view._tbl_version.external_stores.values()
|
|
564
|
+
if col in store.get_local_columns()
|
|
565
|
+
]
|
|
566
|
+
if len(dependent_stores) > 0:
|
|
567
|
+
dependent_store_names = [
|
|
568
|
+
store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
|
|
569
|
+
for view, store in dependent_stores
|
|
570
|
+
]
|
|
552
571
|
raise excs.Error(
|
|
553
|
-
f'Cannot drop column `{name}` because the following
|
|
554
|
-
f'{", ".join(
|
|
572
|
+
f'Cannot drop column `{name}` because the following external stores depend on it:\n'
|
|
573
|
+
f'{", ".join(dependent_store_names)}'
|
|
555
574
|
)
|
|
556
|
-
assert col.stored_proxy is None # since there are no dependent remotes
|
|
557
575
|
|
|
558
576
|
# we're creating a new schema version
|
|
559
577
|
self.version += 1
|
|
@@ -576,10 +594,10 @@ class TableVersion:
|
|
|
576
594
|
for idx_name in dropped_idx_names:
|
|
577
595
|
del self.idxs_by_name[idx_name]
|
|
578
596
|
self._drop_columns(dropped_cols)
|
|
579
|
-
self._update_md(time.time(),
|
|
597
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
580
598
|
_logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
|
|
581
599
|
|
|
582
|
-
def _drop_columns(self, cols:
|
|
600
|
+
def _drop_columns(self, cols: Iterable[Column]) -> None:
|
|
583
601
|
"""Mark columns as dropped"""
|
|
584
602
|
assert not self.is_snapshot
|
|
585
603
|
|
|
@@ -621,7 +639,7 @@ class TableVersion:
|
|
|
621
639
|
self.schema_version = self.version
|
|
622
640
|
|
|
623
641
|
with Env.get().engine.begin() as conn:
|
|
624
|
-
self._update_md(time.time(),
|
|
642
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
625
643
|
_logger.info(f'Renamed column {old_name} to {new_name} in table {self.name}, new version: {self.version}')
|
|
626
644
|
|
|
627
645
|
def set_comment(self, new_comment: Optional[str]):
|
|
@@ -640,7 +658,7 @@ class TableVersion:
|
|
|
640
658
|
preceding_schema_version = self.schema_version
|
|
641
659
|
self.schema_version = self.version
|
|
642
660
|
with Env.get().engine.begin() as conn:
|
|
643
|
-
self._update_md(time.time(),
|
|
661
|
+
self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
644
662
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
645
663
|
|
|
646
664
|
def insert(
|
|
@@ -667,7 +685,7 @@ class TableVersion:
|
|
|
667
685
|
result.num_excs = num_excs
|
|
668
686
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
669
687
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
670
|
-
self._update_md(timestamp,
|
|
688
|
+
self._update_md(timestamp, conn)
|
|
671
689
|
|
|
672
690
|
# update views
|
|
673
691
|
for view in self.mutable_views:
|
|
@@ -781,7 +799,7 @@ class TableVersion:
|
|
|
781
799
|
result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
|
|
782
800
|
self.store_tbl.delete_rows(
|
|
783
801
|
self.version, base_versions=base_versions, match_on_vmin=True, where_clause=where_clause, conn=conn)
|
|
784
|
-
self._update_md(timestamp,
|
|
802
|
+
self._update_md(timestamp, conn)
|
|
785
803
|
|
|
786
804
|
if cascade:
|
|
787
805
|
base_versions = [None if plan is None else self.version] + base_versions # don't update in place
|
|
@@ -831,7 +849,7 @@ class TableVersion:
|
|
|
831
849
|
if num_rows > 0:
|
|
832
850
|
# we're creating a new version
|
|
833
851
|
self.version += 1
|
|
834
|
-
self._update_md(timestamp,
|
|
852
|
+
self._update_md(timestamp, conn)
|
|
835
853
|
else:
|
|
836
854
|
pass
|
|
837
855
|
for view in self.mutable_views:
|
|
@@ -962,93 +980,29 @@ class TableVersion:
|
|
|
962
980
|
view._revert(session)
|
|
963
981
|
_logger.info(f'TableVersion {self.name}: reverted to version {self.version}')
|
|
964
982
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
return remote, col_mapping
|
|
972
|
-
|
|
973
|
-
def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
|
|
974
|
-
# All of the media columns being linked need to either be stored, computed columns or have stored proxies.
|
|
975
|
-
# This ensures that the media in those columns resides in the media cache, where it can be served.
|
|
976
|
-
# First determine which columns (if any) need stored proxies, but don't have one yet.
|
|
977
|
-
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
978
|
-
stored_proxies_needed = []
|
|
979
|
-
for col_name in col_mapping.keys():
|
|
980
|
-
col = cols_by_name[col_name]
|
|
981
|
-
if col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy:
|
|
982
|
-
stored_proxies_needed.append(col)
|
|
983
|
-
with Env.get().engine.begin() as conn:
|
|
984
|
-
self.version += 1
|
|
985
|
-
self.remotes[remote] = col_mapping
|
|
986
|
-
preceding_schema_version = None
|
|
987
|
-
if len(stored_proxies_needed) > 0:
|
|
988
|
-
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
989
|
-
# Create stored proxies for columns that need one. Increment the schema version
|
|
990
|
-
# accordingly.
|
|
991
|
-
preceding_schema_version = self.schema_version
|
|
992
|
-
self.schema_version = self.version
|
|
993
|
-
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
994
|
-
# Add the columns; this will also update table metadata.
|
|
995
|
-
# TODO Add to base tables
|
|
996
|
-
self._add_columns(proxy_cols, conn)
|
|
997
|
-
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
998
|
-
# invisible to the user.
|
|
999
|
-
self._update_md(time.time(), preceding_schema_version, conn)
|
|
1000
|
-
|
|
1001
|
-
def create_stored_proxy(self, col: Column) -> Column:
|
|
1002
|
-
from pixeltable import exprs
|
|
983
|
+
def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
|
|
984
|
+
for store_md in tbl_md.external_stores:
|
|
985
|
+
store_cls = resolve_symbol(store_md['class'])
|
|
986
|
+
assert isinstance(store_cls, type) and issubclass(store_cls, pixeltable.io.ExternalStore)
|
|
987
|
+
store = store_cls.from_dict(store_md['md'])
|
|
988
|
+
self.external_stores[store.name] = store
|
|
1003
989
|
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
col_id=self.next_col_id,
|
|
1010
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
1011
|
-
schema_version_add=self.schema_version
|
|
1012
|
-
)
|
|
1013
|
-
proxy_col.tbl = self
|
|
1014
|
-
self.next_col_id += 1
|
|
1015
|
-
col.stored_proxy = proxy_col
|
|
1016
|
-
proxy_col.proxy_base = col
|
|
1017
|
-
return proxy_col
|
|
990
|
+
def link_external_store(self, store: pixeltable.io.ExternalStore) -> None:
|
|
991
|
+
with Env.get().engine.begin() as conn:
|
|
992
|
+
store.link(self, conn) # May result in additional metadata changes
|
|
993
|
+
self.external_stores[store.name] = store
|
|
994
|
+
self._update_md(time.time(), conn, update_tbl_version=False)
|
|
1018
995
|
|
|
1019
|
-
def
|
|
1020
|
-
assert
|
|
1021
|
-
|
|
1022
|
-
this_remote_col_names = list(self.remotes[remote].keys())
|
|
1023
|
-
other_remote_col_names = {
|
|
1024
|
-
col_name
|
|
1025
|
-
for other_remote, col_mapping in self.remotes.items() if other_remote != remote
|
|
1026
|
-
for col_name in col_mapping.keys()
|
|
1027
|
-
}
|
|
1028
|
-
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
1029
|
-
stored_proxy_deletions_needed = [
|
|
1030
|
-
cols_by_name[col_name]
|
|
1031
|
-
for col_name in this_remote_col_names
|
|
1032
|
-
if col_name not in other_remote_col_names and cols_by_name[col_name].stored_proxy
|
|
1033
|
-
]
|
|
996
|
+
def unlink_external_store(self, store_name: str, delete_external_data: bool) -> None:
|
|
997
|
+
assert store_name in self.external_stores
|
|
998
|
+
store = self.external_stores[store_name]
|
|
1034
999
|
with Env.get().engine.begin() as conn:
|
|
1035
|
-
self
|
|
1036
|
-
del self.
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
proxy_cols = [col.stored_proxy for col in stored_proxy_deletions_needed]
|
|
1042
|
-
for col in stored_proxy_deletions_needed:
|
|
1043
|
-
assert col.stored_proxy is not None and col.stored_proxy.proxy_base == col
|
|
1044
|
-
col.stored_proxy.proxy_base = None
|
|
1045
|
-
col.stored_proxy = None
|
|
1046
|
-
# TODO Drop from base tables
|
|
1047
|
-
self._drop_columns(proxy_cols)
|
|
1048
|
-
self._update_md(timestamp, preceding_schema_version, conn)
|
|
1049
|
-
|
|
1050
|
-
def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
1051
|
-
return self.remotes
|
|
1000
|
+
store.unlink(self, conn) # May result in additional metadata changes
|
|
1001
|
+
del self.external_stores[store_name]
|
|
1002
|
+
self._update_md(time.time(), conn, update_tbl_version=False)
|
|
1003
|
+
|
|
1004
|
+
if delete_external_data and isinstance(store, pixeltable.io.external_store.Project):
|
|
1005
|
+
store.delete()
|
|
1052
1006
|
|
|
1053
1007
|
def is_view(self) -> bool:
|
|
1054
1008
|
return self.base is not None
|
|
@@ -1110,14 +1064,17 @@ class TableVersion:
|
|
|
1110
1064
|
args.append(exprs.ColumnRef(param))
|
|
1111
1065
|
fn = func.make_function(
|
|
1112
1066
|
col.compute_func, return_type=col.col_type, param_types=[arg.col_type for arg in args])
|
|
1113
|
-
col.
|
|
1067
|
+
col.set_value_expr(fn(*args))
|
|
1114
1068
|
|
|
1115
|
-
def
|
|
1069
|
+
def _record_refd_columns(self, col: Column) -> None:
|
|
1116
1070
|
"""Update Column.dependent_cols for all cols referenced in col.value_expr.
|
|
1117
1071
|
"""
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1072
|
+
import pixeltable.exprs as exprs
|
|
1073
|
+
if col.value_expr_dict is not None:
|
|
1074
|
+
# if we have a value_expr_dict, use that instead of instantiating the value_expr
|
|
1075
|
+
refd_cols = exprs.Expr.get_refd_columns(col.value_expr_dict)
|
|
1076
|
+
else:
|
|
1077
|
+
refd_cols = [e.col for e in col.value_expr.subexprs(expr_class=exprs.ColumnRef)]
|
|
1121
1078
|
for refd_col in refd_cols:
|
|
1122
1079
|
refd_col.dependent_cols.add(col)
|
|
1123
1080
|
|
|
@@ -1145,25 +1102,23 @@ class TableVersion:
|
|
|
1145
1102
|
|
|
1146
1103
|
@classmethod
|
|
1147
1104
|
def _create_column_md(cls, cols: List[Column]) -> dict[int, schema.ColumnMd]:
|
|
1148
|
-
column_md:
|
|
1105
|
+
column_md: dict[int, schema.ColumnMd] = {}
|
|
1149
1106
|
for col in cols:
|
|
1150
1107
|
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1151
1108
|
column_md[col.id] = schema.ColumnMd(
|
|
1152
1109
|
id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
|
|
1153
1110
|
schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
|
|
1154
|
-
value_expr=value_expr_dict, stored=col.stored
|
|
1155
|
-
proxy_base=col.proxy_base.id if col.proxy_base else None)
|
|
1111
|
+
value_expr=value_expr_dict, stored=col.stored)
|
|
1156
1112
|
return column_md
|
|
1157
1113
|
|
|
1158
1114
|
@classmethod
|
|
1159
|
-
def
|
|
1115
|
+
def _create_stores_md(cls, stores: Iterable['pixeltable.io.ExternalStore']) -> list[dict[str, Any]]:
|
|
1160
1116
|
return [
|
|
1161
1117
|
{
|
|
1162
|
-
'class': f'{type(
|
|
1163
|
-
'
|
|
1164
|
-
'col_mapping': col_mapping
|
|
1118
|
+
'class': f'{type(store).__module__}.{type(store).__qualname__}',
|
|
1119
|
+
'md': store.as_dict()
|
|
1165
1120
|
}
|
|
1166
|
-
for
|
|
1121
|
+
for store in stores
|
|
1167
1122
|
]
|
|
1168
1123
|
|
|
1169
1124
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
@@ -1171,7 +1126,7 @@ class TableVersion:
|
|
|
1171
1126
|
name=self.name, current_version=self.version, current_schema_version=self.schema_version,
|
|
1172
1127
|
next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
|
|
1173
1128
|
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1174
|
-
|
|
1129
|
+
external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md)
|
|
1175
1130
|
|
|
1176
1131
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1177
1132
|
return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
|
|
@@ -1184,3 +1139,13 @@ class TableVersion:
|
|
|
1184
1139
|
return schema.TableSchemaVersionMd(
|
|
1185
1140
|
schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
|
|
1186
1141
|
columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)
|
|
1142
|
+
|
|
1143
|
+
def as_dict(self) -> dict:
|
|
1144
|
+
return {'id': str(self.id), 'effective_version': self.effective_version}
|
|
1145
|
+
|
|
1146
|
+
@classmethod
|
|
1147
|
+
def from_dict(cls, d: dict) -> 'TableVersion':
|
|
1148
|
+
import pixeltable.catalog as catalog
|
|
1149
|
+
id = UUID(d['id'])
|
|
1150
|
+
effective_version = d['effective_version']
|
|
1151
|
+
return catalog.Catalog.get().tbl_versions[(id, effective_version)]
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional,
|
|
4
|
+
from typing import Optional, Union
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import pixeltable
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
8
9
|
from .column import Column
|
|
9
10
|
from .globals import POS_COLUMN_NAME
|
|
10
11
|
from .table_version import TableVersion
|
|
@@ -59,13 +60,13 @@ class TableVersionPath:
|
|
|
59
60
|
def is_insertable(self) -> bool:
|
|
60
61
|
return self.tbl_version.is_insertable()
|
|
61
62
|
|
|
62
|
-
def get_tbl_versions(self) ->
|
|
63
|
+
def get_tbl_versions(self) -> list[TableVersion]:
|
|
63
64
|
"""Return all tbl versions"""
|
|
64
65
|
if self.base is None:
|
|
65
66
|
return [self.tbl_version]
|
|
66
67
|
return [self.tbl_version] + self.base.get_tbl_versions()
|
|
67
68
|
|
|
68
|
-
def get_bases(self) ->
|
|
69
|
+
def get_bases(self) -> list[TableVersion]:
|
|
69
70
|
"""Return all tbl versions"""
|
|
70
71
|
if self.base is None:
|
|
71
72
|
return []
|
|
@@ -100,7 +101,7 @@ class TableVersionPath:
|
|
|
100
101
|
from pixeltable.dataframe import DataFrame
|
|
101
102
|
return DataFrame(self).__getitem__(index)
|
|
102
103
|
|
|
103
|
-
def columns(self) ->
|
|
104
|
+
def columns(self) -> list[Column]:
|
|
104
105
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
105
106
|
result = list(self.tbl_version.cols_by_name.values())
|
|
106
107
|
if self.base is not None:
|
|
@@ -114,6 +115,11 @@ class TableVersionPath:
|
|
|
114
115
|
cols = self.columns()
|
|
115
116
|
return {col.name: col for col in cols}
|
|
116
117
|
|
|
118
|
+
def cols_by_id(self) -> dict[int, Column]:
|
|
119
|
+
"""Return a dict of all user columns visible in this tbl version path, including columns from bases"""
|
|
120
|
+
cols = self.columns()
|
|
121
|
+
return {col.id: col for col in cols}
|
|
122
|
+
|
|
117
123
|
def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
|
|
118
124
|
"""Return the column with the given name, or None if not found"""
|
|
119
125
|
col = self.tbl_version.cols_by_name.get(name)
|
|
@@ -136,3 +142,15 @@ class TableVersionPath:
|
|
|
136
142
|
return self.base.has_column(col)
|
|
137
143
|
else:
|
|
138
144
|
return False
|
|
145
|
+
|
|
146
|
+
def as_dict(self) -> dict:
|
|
147
|
+
return {
|
|
148
|
+
'tbl_version': self.tbl_version.as_dict(),
|
|
149
|
+
'base': self.base.as_dict() if self.base is not None else None
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def from_dict(cls, d: dict) -> TableVersionPath:
|
|
154
|
+
tbl_version = TableVersion.from_dict(d['tbl_version'])
|
|
155
|
+
base = TableVersionPath.from_dict(d['base']) if d['base'] is not None else None
|
|
156
|
+
return cls(tbl_version, base)
|
pixeltable/catalog/view.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
import logging
|
|
3
|
-
from typing import List, Optional, Type, Dict, Set, Any
|
|
3
|
+
from typing import List, Optional, Type, Dict, Set, Any, Iterable
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
import inspect
|
|
6
6
|
|
|
@@ -11,7 +11,7 @@ from .table_version import TableVersion
|
|
|
11
11
|
from .table_version_path import TableVersionPath
|
|
12
12
|
from .column import Column
|
|
13
13
|
from .catalog import Catalog
|
|
14
|
-
from .globals import POS_COLUMN_NAME
|
|
14
|
+
from .globals import POS_COLUMN_NAME, UpdateStatus
|
|
15
15
|
from pixeltable.env import Env
|
|
16
16
|
from pixeltable.iterators import ComponentIterator
|
|
17
17
|
from pixeltable.exceptions import Error
|
|
@@ -55,7 +55,7 @@ class View(Table):
|
|
|
55
55
|
|
|
56
56
|
# verify that filter can be evaluated in the context of the base
|
|
57
57
|
if predicate is not None:
|
|
58
|
-
if not predicate.is_bound_by(base.
|
|
58
|
+
if not predicate.is_bound_by(base._tbl_version_path):
|
|
59
59
|
raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
|
|
60
60
|
# create a copy that we can modify and store
|
|
61
61
|
predicate = predicate.copy()
|
|
@@ -65,7 +65,7 @@ class View(Table):
|
|
|
65
65
|
if not col.is_computed:
|
|
66
66
|
continue
|
|
67
67
|
# make sure that the value can be computed in the context of the base
|
|
68
|
-
if col.value_expr is not None and not col.value_expr.is_bound_by(base.
|
|
68
|
+
if col.value_expr is not None and not col.value_expr.is_bound_by(base._tbl_version_path):
|
|
69
69
|
raise excs.Error(
|
|
70
70
|
f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
|
|
71
71
|
|
|
@@ -83,7 +83,7 @@ class View(Table):
|
|
|
83
83
|
|
|
84
84
|
# construct Signature and type-check bound_args
|
|
85
85
|
params = [
|
|
86
|
-
func.Parameter(param_name, param_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
86
|
+
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
87
87
|
for param_name, param_type in iterator_cls.input_schema().items()
|
|
88
88
|
]
|
|
89
89
|
sig = func.Signature(InvalidType(), params)
|
|
@@ -114,7 +114,7 @@ class View(Table):
|
|
|
114
114
|
iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
115
115
|
iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
|
|
116
116
|
else None
|
|
117
|
-
base_version_path = cls._get_snapshot_path(base.
|
|
117
|
+
base_version_path = cls._get_snapshot_path(base._tbl_version_path) if is_snapshot else base._tbl_version_path
|
|
118
118
|
base_versions = [
|
|
119
119
|
(tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
|
|
120
120
|
for tbl_version in base_version_path.get_tbl_versions()
|
|
@@ -127,7 +127,7 @@ class View(Table):
|
|
|
127
127
|
if iterator_args_expr is not None else None
|
|
128
128
|
for col in columns:
|
|
129
129
|
if col.value_expr is not None:
|
|
130
|
-
col.
|
|
130
|
+
col.set_value_expr(col.value_expr.retarget(base_version_path))
|
|
131
131
|
|
|
132
132
|
view_md = md_schema.ViewMd(
|
|
133
133
|
is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
|
|
@@ -148,7 +148,7 @@ class View(Table):
|
|
|
148
148
|
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
149
149
|
|
|
150
150
|
from pixeltable.plan import Planner
|
|
151
|
-
plan, num_values_per_row = Planner.create_view_load_plan(view.
|
|
151
|
+
plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
|
|
152
152
|
num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
|
|
153
153
|
plan, session.connection(), v_min=tbl_version.version)
|
|
154
154
|
print(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
@@ -161,11 +161,13 @@ class View(Table):
|
|
|
161
161
|
return view
|
|
162
162
|
|
|
163
163
|
@classmethod
|
|
164
|
-
def _verify_column(
|
|
164
|
+
def _verify_column(
|
|
165
|
+
cls, col: Column, existing_column_names: Set[str], existing_query_names: Optional[Set[str]] = None
|
|
166
|
+
) -> None:
|
|
165
167
|
# make sure that columns are nullable or have a default
|
|
166
168
|
if not col.col_type.nullable and not col.is_computed:
|
|
167
169
|
raise Error(f'Column {col.name}: non-computed columns in views must be nullable')
|
|
168
|
-
super()._verify_column(col, existing_column_names)
|
|
170
|
+
super()._verify_column(col, existing_column_names, existing_query_names)
|
|
169
171
|
|
|
170
172
|
@classmethod
|
|
171
173
|
def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
|
|
@@ -201,3 +203,11 @@ class View(Table):
|
|
|
201
203
|
cat.tbl_dependents[self._base._id].remove(self)
|
|
202
204
|
del cat.tbl_dependents[self._id]
|
|
203
205
|
|
|
206
|
+
def insert(
|
|
207
|
+
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
208
|
+
fail_on_exception: bool = True, **kwargs: Any
|
|
209
|
+
) -> UpdateStatus:
|
|
210
|
+
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
|
|
211
|
+
|
|
212
|
+
def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
|
|
213
|
+
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')
|