pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +3 -10
- pixeltable/catalog/catalog.py +139 -59
- pixeltable/catalog/column.py +32 -23
- pixeltable/catalog/globals.py +2 -45
- pixeltable/catalog/insertable_table.py +5 -2
- pixeltable/catalog/path.py +6 -0
- pixeltable/catalog/table.py +173 -23
- pixeltable/catalog/table_version.py +156 -92
- pixeltable/catalog/table_version_handle.py +26 -1
- pixeltable/catalog/update_status.py +179 -0
- pixeltable/catalog/view.py +12 -3
- pixeltable/config.py +76 -12
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +29 -0
- pixeltable/exec/exec_node.py +7 -24
- pixeltable/exec/expr_eval/schedulers.py +134 -7
- pixeltable/exprs/column_property_ref.py +23 -20
- pixeltable/exprs/column_ref.py +24 -18
- pixeltable/exprs/data_row.py +9 -0
- pixeltable/exprs/function_call.py +2 -2
- pixeltable/exprs/row_builder.py +46 -14
- pixeltable/exprs/rowid_ref.py +0 -4
- pixeltable/func/function.py +3 -3
- pixeltable/functions/audio.py +36 -9
- pixeltable/functions/video.py +57 -10
- pixeltable/globals.py +61 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +39 -64
- pixeltable/io/globals.py +4 -4
- pixeltable/io/hf_datasets.py +10 -2
- pixeltable/io/label_studio.py +52 -48
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +125 -0
- pixeltable/metadata/converters/util.py +3 -0
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +14 -2
- pixeltable/metadata/utils.py +78 -0
- pixeltable/plan.py +26 -18
- pixeltable/share/packager.py +20 -38
- pixeltable/store.py +121 -142
- pixeltable/type_system.py +2 -2
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/media_store.py +39 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from pixeltable.metadata import register_converter
|
|
8
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
9
|
+
|
|
10
|
+
_logger = logging.getLogger('pixeltable')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@register_converter(version=39)
|
|
14
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
15
|
+
convert_table_md(engine, table_modifier=__table_modifier)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
|
|
19
|
+
store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
|
|
20
|
+
store_name = f'{store_prefix}_{tbl_id.hex}'
|
|
21
|
+
|
|
22
|
+
# Get the list of column names that need to be migrated
|
|
23
|
+
col_names = find_error_columns(conn=conn, store_name=store_name)
|
|
24
|
+
if len(col_names) == 0:
|
|
25
|
+
_logger.info(f'No error columns found in table {store_name}. Skipping migration.')
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
# Check if the table exists, outside of the metadata we were given
|
|
29
|
+
# There seem to be cases where the metadata is present in the catalog,
|
|
30
|
+
# but the table itself is not in the database.
|
|
31
|
+
check_table_sql = sql.text(f"""
|
|
32
|
+
SELECT EXISTS (
|
|
33
|
+
SELECT 1
|
|
34
|
+
FROM information_schema.tables
|
|
35
|
+
WHERE table_name = '{store_name}'
|
|
36
|
+
)
|
|
37
|
+
""")
|
|
38
|
+
table_exists = conn.execute(check_table_sql).scalar()
|
|
39
|
+
if not table_exists:
|
|
40
|
+
_logger.warning(f'Table {store_name} does not exist. Skipping migration.')
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
return migrate_error_to_cellmd_columns(conn, store_name, col_names)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
|
|
47
|
+
"""
|
|
48
|
+
Return and errormsg or errortype columns in the given table
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
conn: SQLAlchemy connection
|
|
52
|
+
store_name: Name of the table to check
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
List of column name roots (root_errormsg, root_errortype)
|
|
56
|
+
"""
|
|
57
|
+
check_columns_sql = sql.text(f"""
|
|
58
|
+
SELECT column_name
|
|
59
|
+
FROM information_schema.columns
|
|
60
|
+
WHERE table_name = '{store_name}'
|
|
61
|
+
""")
|
|
62
|
+
found_columns = [
|
|
63
|
+
row[0]
|
|
64
|
+
for row in conn.execute(check_columns_sql)
|
|
65
|
+
if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
|
|
66
|
+
]
|
|
67
|
+
column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
|
|
68
|
+
return [*column_roots]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def migrate_error_to_cellmd_columns(
|
|
72
|
+
conn: sql.Connection, store_name: str, col_names: list[str], backup_table: Optional[str] = None
|
|
73
|
+
) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Safe version with error handling and optional backup.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
engine: SQLAlchemy engine
|
|
79
|
+
store_name: Name of the table to modify
|
|
80
|
+
col_names: List of column name prefixes
|
|
81
|
+
backup_table: Optional name for backup table
|
|
82
|
+
|
|
83
|
+
Usage:
|
|
84
|
+
migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
# Optional: Create backup
|
|
89
|
+
if backup_table:
|
|
90
|
+
backup_sql = sql.text(f"""
|
|
91
|
+
CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
|
|
92
|
+
""")
|
|
93
|
+
conn.execute(backup_sql)
|
|
94
|
+
_logger.info(f'Backup created: {backup_table}')
|
|
95
|
+
|
|
96
|
+
# Step 1: Add new columns
|
|
97
|
+
add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
|
|
98
|
+
add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
|
|
99
|
+
conn.execute(add_column_sql)
|
|
100
|
+
_logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
|
|
101
|
+
|
|
102
|
+
# Step 2: Populate new columns
|
|
103
|
+
set_column_str = ', '.join(
|
|
104
|
+
[
|
|
105
|
+
f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
|
|
106
|
+
f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
|
|
107
|
+
for col in col_names
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
|
|
111
|
+
result = conn.execute(populate_sql)
|
|
112
|
+
_logger.info(f'Updated {result.rowcount} rows')
|
|
113
|
+
|
|
114
|
+
# Step 3: Drop old columns
|
|
115
|
+
drop_columns_str = ', '.join(
|
|
116
|
+
[f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
|
|
117
|
+
)
|
|
118
|
+
drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
|
|
119
|
+
conn.execute(drop_columns_sql)
|
|
120
|
+
_logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
|
|
121
|
+
_logger.info(f'Migration completed successfully for table: {store_name}')
|
|
122
|
+
|
|
123
|
+
except sql.exc.SQLAlchemyError as e:
|
|
124
|
+
_logger.error(f'Migration for table {store_name} failed: {e}')
|
|
125
|
+
raise
|
|
@@ -16,6 +16,7 @@ def convert_table_md(
|
|
|
16
16
|
column_md_updater: Optional[Callable[[dict], None]] = None,
|
|
17
17
|
external_store_md_updater: Optional[Callable[[dict], None]] = None,
|
|
18
18
|
substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None,
|
|
19
|
+
table_modifier: Optional[Callable[[sql.Connection, UUID, dict, dict], None]] = None,
|
|
19
20
|
) -> None:
|
|
20
21
|
"""
|
|
21
22
|
Converts schema.TableMd dicts based on the specified conversion functions.
|
|
@@ -50,6 +51,8 @@ def convert_table_md(
|
|
|
50
51
|
if updated_table_md != table_md:
|
|
51
52
|
__logger.info(f'Updating schema for table: {tbl_id}')
|
|
52
53
|
conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
|
|
54
|
+
if table_modifier is not None:
|
|
55
|
+
table_modifier(conn, tbl_id, table_md, updated_table_md)
|
|
53
56
|
|
|
54
57
|
for row in conn.execute(sql.select(Function)):
|
|
55
58
|
fn_id = row[0]
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
40: 'Convert error property columns to cellmd columns',
|
|
6
|
+
39: 'ColumnHandles in external stores',
|
|
5
7
|
38: 'Added TableMd.view_sn',
|
|
6
8
|
37: 'Add support for the sample() method on DataFrames',
|
|
7
9
|
36: 'Added Table.lock_dummy',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -8,6 +8,8 @@ from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
|
|
|
8
8
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
9
9
|
from sqlalchemy.orm.decl_api import DeclarativeMeta
|
|
10
10
|
|
|
11
|
+
from ..catalog.update_status import UpdateStatus
|
|
12
|
+
|
|
11
13
|
# Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
|
|
12
14
|
# a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
|
|
13
15
|
# outside of the module in a typesafe way.
|
|
@@ -213,13 +215,15 @@ class Table(Base):
|
|
|
213
215
|
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
214
216
|
|
|
215
217
|
|
|
216
|
-
@dataclasses.dataclass
|
|
218
|
+
@dataclasses.dataclass(frozen=True)
|
|
217
219
|
class TableVersionMd:
|
|
218
220
|
tbl_id: str # uuid.UUID
|
|
219
221
|
created_at: float # time.time()
|
|
220
222
|
version: int
|
|
221
223
|
schema_version: int
|
|
222
|
-
|
|
224
|
+
user: Optional[str] = None # User that created this version
|
|
225
|
+
update_status: Optional[UpdateStatus] = None # UpdateStatus of the change that created this version
|
|
226
|
+
additional_md: dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
223
227
|
|
|
224
228
|
|
|
225
229
|
class TableVersion(Base):
|
|
@@ -308,6 +312,14 @@ class FullTableMd(NamedTuple):
|
|
|
308
312
|
version_md: TableVersionMd
|
|
309
313
|
schema_version_md: TableSchemaVersionMd
|
|
310
314
|
|
|
315
|
+
@property
|
|
316
|
+
def is_pure_snapshot(self) -> bool:
|
|
317
|
+
return (
|
|
318
|
+
self.tbl_md.view_md is not None
|
|
319
|
+
and self.tbl_md.view_md.predicate is None
|
|
320
|
+
and len(self.schema_version_md.columns) == 0
|
|
321
|
+
)
|
|
322
|
+
|
|
311
323
|
def as_dict(self) -> dict[str, Any]:
|
|
312
324
|
return {
|
|
313
325
|
'table_id': self.tbl_md.tbl_id,
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import schema
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MetadataUtils:
|
|
9
|
+
@classmethod
|
|
10
|
+
def _diff_md(
|
|
11
|
+
cls, old_md: Optional[dict[int, schema.SchemaColumn]], new_md: Optional[dict[int, schema.SchemaColumn]]
|
|
12
|
+
) -> str:
|
|
13
|
+
"""Return a string reporting the differences in a specific entry in two dictionaries
|
|
14
|
+
|
|
15
|
+
Results are formatted as follows:
|
|
16
|
+
- If `old_md` is `None`, returns 'Initial Version'.
|
|
17
|
+
- If `old_md` and `new_md` are the same, returns an empty string.
|
|
18
|
+
- If there are additions, changes, or deletions, returns a string summarizing the changes.
|
|
19
|
+
"""
|
|
20
|
+
assert new_md is not None
|
|
21
|
+
if old_md is None:
|
|
22
|
+
return 'Initial Version'
|
|
23
|
+
if old_md == new_md:
|
|
24
|
+
return ''
|
|
25
|
+
added = {k: v.name for k, v in new_md.items() if k not in old_md}
|
|
26
|
+
changed = {
|
|
27
|
+
k: f'{old_md[k].name!r} to {v.name!r}'
|
|
28
|
+
for k, v in new_md.items()
|
|
29
|
+
if k in old_md and old_md[k].name != v.name
|
|
30
|
+
}
|
|
31
|
+
deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
|
|
32
|
+
if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
|
|
33
|
+
return ''
|
|
34
|
+
# Format the result
|
|
35
|
+
t = []
|
|
36
|
+
if len(added) > 0:
|
|
37
|
+
t.append('Added: ' + ', '.join(added.values()))
|
|
38
|
+
if len(changed) > 0:
|
|
39
|
+
t.append('Renamed: ' + ', '.join(changed.values()))
|
|
40
|
+
if len(deleted) > 0:
|
|
41
|
+
t.append('Deleted: ' + ', '.join(deleted.values()))
|
|
42
|
+
r = ', '.join(t)
|
|
43
|
+
return r
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _create_md_change_dict(
|
|
47
|
+
cls, md_list: Optional[list[tuple[int, dict[int, schema.SchemaColumn]]]]
|
|
48
|
+
) -> dict[int, str]:
|
|
49
|
+
"""Return a dictionary of schema changes by version
|
|
50
|
+
Args:
|
|
51
|
+
md_list: a list of tuples, each containing a version number and a metadata dictionary.
|
|
52
|
+
"""
|
|
53
|
+
r: dict[int, str] = {}
|
|
54
|
+
if md_list is None or len(md_list) == 0:
|
|
55
|
+
return r
|
|
56
|
+
|
|
57
|
+
# Sort the list in place by version number
|
|
58
|
+
md_list.sort()
|
|
59
|
+
|
|
60
|
+
first_retrieved_version = md_list[0][0]
|
|
61
|
+
if first_retrieved_version == 0:
|
|
62
|
+
prev_md = None
|
|
63
|
+
prev_ver = -1
|
|
64
|
+
start = 0
|
|
65
|
+
else:
|
|
66
|
+
prev_md = md_list[0][1]
|
|
67
|
+
prev_ver = first_retrieved_version
|
|
68
|
+
start = 1
|
|
69
|
+
|
|
70
|
+
for ver, curr_md in md_list[start:]:
|
|
71
|
+
if ver == prev_ver:
|
|
72
|
+
continue
|
|
73
|
+
assert ver > prev_ver
|
|
74
|
+
tf = cls._diff_md(prev_md, curr_md)
|
|
75
|
+
if tf != '':
|
|
76
|
+
r[ver] = tf
|
|
77
|
+
prev_md = curr_md
|
|
78
|
+
return r
|
pixeltable/plan.py
CHANGED
|
@@ -378,7 +378,7 @@ class Planner:
|
|
|
378
378
|
|
|
379
379
|
cls.__check_valid_columns(tbl, stored_cols, 'inserted into')
|
|
380
380
|
|
|
381
|
-
row_builder = exprs.RowBuilder([], stored_cols, [])
|
|
381
|
+
row_builder = exprs.RowBuilder([], stored_cols, [], tbl)
|
|
382
382
|
|
|
383
383
|
# create InMemoryDataNode for 'rows'
|
|
384
384
|
plan: exec.ExecNode = exec.InMemoryDataNode(
|
|
@@ -473,15 +473,19 @@ class Planner:
|
|
|
473
473
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
474
474
|
target = tbl.tbl_version.get() # the one we need to update
|
|
475
475
|
updated_cols = list(update_targets.keys())
|
|
476
|
+
recomputed_cols: set[Column]
|
|
476
477
|
if len(recompute_targets) > 0:
|
|
477
|
-
|
|
478
|
+
assert len(update_targets) == 0
|
|
479
|
+
recomputed_cols = {*recompute_targets}
|
|
480
|
+
if cascade:
|
|
481
|
+
recomputed_cols |= target.get_dependent_columns(recomputed_cols)
|
|
478
482
|
else:
|
|
479
483
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
484
|
+
# regardless of cascade, we need to update all indices on any updated/recomputed column
|
|
485
|
+
idx_val_cols = target.get_idx_val_columns(set(updated_cols) | recomputed_cols)
|
|
486
|
+
recomputed_cols.update(idx_val_cols)
|
|
487
|
+
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
488
|
+
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
485
489
|
|
|
486
490
|
cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
|
|
487
491
|
|
|
@@ -508,6 +512,7 @@ class Planner:
|
|
|
508
512
|
# update row builder with column information
|
|
509
513
|
for i, col in enumerate(all_base_cols):
|
|
510
514
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
515
|
+
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
511
516
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
512
517
|
return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
513
518
|
|
|
@@ -588,7 +593,7 @@ class Planner:
|
|
|
588
593
|
sql_exprs = list(
|
|
589
594
|
exprs.Expr.list_subexprs(analyzer.all_exprs, filter=analyzer.sql_elements.contains, traverse_matches=False)
|
|
590
595
|
)
|
|
591
|
-
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs)
|
|
596
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
|
|
592
597
|
analyzer.finalize(row_builder)
|
|
593
598
|
sql_lookup_node = exec.SqlLookupNode(tbl, row_builder, sql_exprs, sa_key_cols, key_vals)
|
|
594
599
|
col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
|
|
@@ -602,8 +607,7 @@ class Planner:
|
|
|
602
607
|
row_builder.set_slot_idxs(select_list, remove_duplicates=False)
|
|
603
608
|
for i, col in enumerate(all_base_cols):
|
|
604
609
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
605
|
-
|
|
606
|
-
ctx = exec.ExecContext(row_builder)
|
|
610
|
+
ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
|
|
607
611
|
# we're returning everything to the user, so we might as well do it in a single batch
|
|
608
612
|
ctx.batch_size = 0
|
|
609
613
|
plan.set_ctx(ctx)
|
|
@@ -656,6 +660,7 @@ class Planner:
|
|
|
656
660
|
ignore_errors=True,
|
|
657
661
|
exact_version_only=view.get_bases(),
|
|
658
662
|
)
|
|
663
|
+
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
659
664
|
for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
|
|
660
665
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
661
666
|
# TODO: avoid duplication with view_load_plan() logic (where does this belong?)
|
|
@@ -695,7 +700,7 @@ class Planner:
|
|
|
695
700
|
base_analyzer = Analyzer(
|
|
696
701
|
from_clause, iterator_args, where_clause=target.predicate, sample_clause=target.sample_clause
|
|
697
702
|
)
|
|
698
|
-
row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [])
|
|
703
|
+
row_builder = exprs.RowBuilder(base_analyzer.all_exprs, stored_cols, [], target)
|
|
699
704
|
|
|
700
705
|
# if we're propagating an insert, we only want to see those base rows that were created for the current version
|
|
701
706
|
# execution plan:
|
|
@@ -832,7 +837,11 @@ class Planner:
|
|
|
832
837
|
order_by_clause=order_by_clause,
|
|
833
838
|
sample_clause=sample_clause,
|
|
834
839
|
)
|
|
835
|
-
|
|
840
|
+
# If the from_clause has a single table, we can use it as the context table for the RowBuilder.
|
|
841
|
+
# Otherwise there is no context table, but that's ok, because the context table is only needed for
|
|
842
|
+
# table mutations, which can't happen during a join.
|
|
843
|
+
context_tbl = from_clause.tbls[0].tbl_version.get() if len(from_clause.tbls) == 1 else None
|
|
844
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], context_tbl)
|
|
836
845
|
|
|
837
846
|
analyzer.finalize(row_builder)
|
|
838
847
|
# select_list: we need to materialize everything that's been collected
|
|
@@ -1035,16 +1044,14 @@ class Planner:
|
|
|
1035
1044
|
return Analyzer(FromClause(tbls=[tbl]), [], where_clause=where_clause)
|
|
1036
1045
|
|
|
1037
1046
|
@classmethod
|
|
1038
|
-
def create_add_column_plan(
|
|
1039
|
-
cls, tbl: catalog.TableVersionPath, col: catalog.Column
|
|
1040
|
-
) -> tuple[exec.ExecNode, Optional[int]]:
|
|
1047
|
+
def create_add_column_plan(cls, tbl: catalog.TableVersionPath, col: catalog.Column) -> exec.ExecNode:
|
|
1041
1048
|
"""Creates a plan for InsertableTable.add_column()
|
|
1042
1049
|
Returns:
|
|
1043
1050
|
plan: the plan to execute
|
|
1044
1051
|
value_expr slot idx for the plan output (for computed cols)
|
|
1045
1052
|
"""
|
|
1046
1053
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
1047
|
-
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
|
|
1054
|
+
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[], tbl=tbl.tbl_version.get())
|
|
1048
1055
|
analyzer = Analyzer(FromClause(tbls=[tbl]), row_builder.default_eval_ctx.target_exprs)
|
|
1049
1056
|
plan = cls._create_query_plan(
|
|
1050
1057
|
row_builder=row_builder, analyzer=analyzer, eval_ctx=row_builder.default_eval_ctx, with_pk=True
|
|
@@ -1052,9 +1059,10 @@ class Planner:
|
|
|
1052
1059
|
plan.ctx.batch_size = 16
|
|
1053
1060
|
plan.ctx.show_pbar = True
|
|
1054
1061
|
plan.ctx.ignore_errors = True
|
|
1062
|
+
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
1063
|
+
plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
|
|
1055
1064
|
|
|
1056
1065
|
# we want to flush images
|
|
1057
1066
|
if col.is_computed and col.is_stored and col.col_type.is_image_type():
|
|
1058
1067
|
plan.set_stored_img_cols(row_builder.output_slot_idxs())
|
|
1059
|
-
|
|
1060
|
-
return plan, value_expr_slot_idx
|
|
1068
|
+
return plan
|
pixeltable/share/packager.py
CHANGED
|
@@ -361,49 +361,32 @@ class TableRestorer:
|
|
|
361
361
|
)
|
|
362
362
|
|
|
363
363
|
tbl_md = [schema.FullTableMd.from_dict(t) for t in self.md['md']['tables']]
|
|
364
|
+
for md in tbl_md:
|
|
365
|
+
md.tbl_md.is_replica = True
|
|
364
366
|
|
|
365
|
-
# Create the replica table
|
|
366
|
-
# The logic here needs to be completely restructured in order to make it concurrency-safe.
|
|
367
|
-
# - Catalog.create_replica() needs to write the metadata and also create the physical store tables
|
|
368
|
-
# and populate them, otherwise concurrent readers will see an inconsistent state (table metadata w/o
|
|
369
|
-
# an actual table)
|
|
370
|
-
# - this could be done one replica at a time (instead of the entire hierarchy)
|
|
371
367
|
cat = catalog.Catalog.get()
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
else:
|
|
388
|
-
ancestor_md = tbl_md # Not a pure snapshot; include replica_tbl
|
|
389
|
-
|
|
390
|
-
# Instantiate data from the Parquet tables.
|
|
391
|
-
with Env.get().begin_xact():
|
|
392
|
-
for md in ancestor_md[::-1]: # Base table first
|
|
393
|
-
# Create a TableVersion instance (and a store table) for this ancestor.
|
|
394
|
-
tv = catalog.TableVersion.create_replica(md)
|
|
395
|
-
# Now import data from Parquet.
|
|
396
|
-
_logger.info(f'Importing table {tv.name!r}.')
|
|
397
|
-
self.__import_table(self.tmp_dir, tv, md)
|
|
398
|
-
|
|
399
|
-
with cat.begin_xact(for_write=False):
|
|
368
|
+
|
|
369
|
+
with cat.begin_xact(for_write=True):
|
|
370
|
+
# Create (or update) the replica table and its ancestors, along with TableVersion instances for any
|
|
371
|
+
# versions that have not been seen before.
|
|
372
|
+
cat.create_replica(catalog.Path(self.tbl_path), tbl_md)
|
|
373
|
+
|
|
374
|
+
# Now we need to load data for replica_tbl and its ancestors, except that we skip
|
|
375
|
+
# replica_tbl itself if it's a pure snapshot.
|
|
376
|
+
for md in tbl_md[::-1]: # Base table first
|
|
377
|
+
if not md.is_pure_snapshot:
|
|
378
|
+
tv = cat.get_tbl_version(UUID(md.tbl_md.tbl_id), md.version_md.version)
|
|
379
|
+
# Import data from Parquet.
|
|
380
|
+
_logger.info(f'Importing table {tv.name!r}.')
|
|
381
|
+
self.__import_table(self.tmp_dir, tv, md)
|
|
382
|
+
|
|
400
383
|
return cat.get_table_by_id(UUID(tbl_md[0].tbl_md.tbl_id))
|
|
401
384
|
|
|
402
385
|
def __import_table(self, bundle_path: Path, tv: catalog.TableVersion, tbl_md: schema.FullTableMd) -> None:
|
|
403
386
|
"""
|
|
404
387
|
Import the Parquet table into the Pixeltable catalog.
|
|
405
388
|
"""
|
|
406
|
-
tbl_id =
|
|
389
|
+
tbl_id = UUID(tbl_md.tbl_md.tbl_id)
|
|
407
390
|
parquet_dir = bundle_path / 'tables' / f'tbl_{tbl_id.hex}'
|
|
408
391
|
parquet_table = pq.read_table(str(parquet_dir))
|
|
409
392
|
replica_version = tv.version
|
|
@@ -626,9 +609,8 @@ class TableRestorer:
|
|
|
626
609
|
# First time seeing this pxtmedia:// URL. Relocate the file to the media store and record the mapping
|
|
627
610
|
# in self.media_files.
|
|
628
611
|
src_path = self.tmp_dir / 'media' / parsed_url.netloc
|
|
629
|
-
|
|
630
|
-
src_path.
|
|
631
|
-
self.media_files[url] = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
|
|
612
|
+
# Move the file to the media store and update the URL.
|
|
613
|
+
self.media_files[url] = MediaStore.relocate_local_media_file(src_path, tv.id, media_col_id, tv.version)
|
|
632
614
|
return self.media_files[url]
|
|
633
615
|
# For any type of URL other than a local file, just return the URL as-is.
|
|
634
616
|
return url
|