pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +358 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -16
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +2 -3
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +138 -14
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/index/btree.py
CHANGED
|
@@ -59,6 +59,11 @@ class BtreeIndex(IndexBase):
|
|
|
59
59
|
conn = Env.get().conn
|
|
60
60
|
idx.create(bind=conn)
|
|
61
61
|
|
|
62
|
+
def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
|
|
63
|
+
"""Drop the index on the index value column"""
|
|
64
|
+
# TODO: implement
|
|
65
|
+
raise NotImplementedError()
|
|
66
|
+
|
|
62
67
|
@classmethod
|
|
63
68
|
def display_name(cls) -> str:
|
|
64
69
|
return 'btree'
|
|
@@ -148,6 +148,11 @@ class EmbeddingIndex(IndexBase):
|
|
|
148
148
|
conn = Env.get().conn
|
|
149
149
|
idx.create(bind=conn)
|
|
150
150
|
|
|
151
|
+
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
152
|
+
"""Drop the index on the index value column"""
|
|
153
|
+
# TODO: implement
|
|
154
|
+
raise NotImplementedError()
|
|
155
|
+
|
|
151
156
|
def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
|
|
152
157
|
"""Create a ColumnElement that represents '<val_column> <op> <item>'"""
|
|
153
158
|
assert isinstance(item, (str, PIL.Image.Image))
|
pixeltable/io/external_store.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
import time
|
|
7
6
|
from dataclasses import dataclass
|
|
8
7
|
from typing import Any, Optional
|
|
9
8
|
from uuid import UUID
|
|
@@ -11,7 +10,7 @@ from uuid import UUID
|
|
|
11
10
|
import pixeltable.exceptions as excs
|
|
12
11
|
import pixeltable.type_system as ts
|
|
13
12
|
from pixeltable import Column, Table
|
|
14
|
-
from pixeltable.catalog import TableVersion
|
|
13
|
+
from pixeltable.catalog import TableVersion
|
|
15
14
|
|
|
16
15
|
_logger = logging.getLogger('pixeltable')
|
|
17
16
|
|
|
@@ -32,15 +31,11 @@ class ExternalStore(abc.ABC):
|
|
|
32
31
|
|
|
33
32
|
@abc.abstractmethod
|
|
34
33
|
def link(self, tbl_version: TableVersion) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Called by `TableVersion.link()` to implement store-specific logic.
|
|
37
|
-
"""
|
|
34
|
+
"""Creates store-specific metadata needed to implement sync()."""
|
|
38
35
|
|
|
39
36
|
@abc.abstractmethod
|
|
40
37
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
41
|
-
"""
|
|
42
|
-
Called by `TableVersion.unlink()` to implement store-specific logic.
|
|
43
|
-
"""
|
|
38
|
+
"""Removes store-specific metadata created in link()."""
|
|
44
39
|
|
|
45
40
|
@abc.abstractmethod
|
|
46
41
|
def get_local_columns(self) -> list[Column]:
|
|
@@ -111,17 +106,10 @@ class Project(ExternalStore, abc.ABC):
|
|
|
111
106
|
|
|
112
107
|
if len(stored_proxies_needed) > 0:
|
|
113
108
|
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
114
|
-
# Create stored proxies for columns that need one
|
|
115
|
-
|
|
116
|
-
tbl_version.version += 1
|
|
117
|
-
preceding_schema_version = tbl_version.schema_version
|
|
118
|
-
tbl_version.schema_version = tbl_version.version
|
|
119
|
-
proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
|
|
109
|
+
# Create stored proxies for columns that need one
|
|
110
|
+
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
120
111
|
# Add the columns; this will also update table metadata.
|
|
121
|
-
tbl_version.
|
|
122
|
-
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
123
|
-
# invisible to the user.
|
|
124
|
-
tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
112
|
+
tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
|
|
125
113
|
|
|
126
114
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
127
115
|
# Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
|
|
@@ -132,15 +120,10 @@ class Project(ExternalStore, abc.ABC):
|
|
|
132
120
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
133
121
|
if len(deletions_needed) > 0:
|
|
134
122
|
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
135
|
-
# Delete stored proxies that are no longer needed.
|
|
136
|
-
tbl_version.version += 1
|
|
137
|
-
preceding_schema_version = tbl_version.schema_version
|
|
138
|
-
tbl_version.schema_version = tbl_version.version
|
|
139
123
|
tbl_version._drop_columns(deletions_needed)
|
|
140
124
|
self.stored_proxies.clear()
|
|
141
|
-
tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
142
125
|
|
|
143
|
-
def create_stored_proxy(self,
|
|
126
|
+
def create_stored_proxy(self, col: Column) -> Column:
|
|
144
127
|
"""
|
|
145
128
|
Creates a proxy column for the specified column. The proxy column will be created in the specified
|
|
146
129
|
`TableVersion`.
|
|
@@ -158,12 +141,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
158
141
|
# Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
|
|
159
142
|
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
160
143
|
stored=True,
|
|
161
|
-
col_id=tbl_version.next_col_id,
|
|
162
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
163
|
-
schema_version_add=tbl_version.schema_version,
|
|
164
144
|
)
|
|
165
|
-
proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
|
|
166
|
-
tbl_version.next_col_id += 1
|
|
167
145
|
self.stored_proxies[col] = proxy_col
|
|
168
146
|
return proxy_col
|
|
169
147
|
|
|
@@ -213,6 +191,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
213
191
|
external (import or export) columns.
|
|
214
192
|
If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
|
|
215
193
|
in which the Pixeltable column names are resolved to the corresponding `Column` objects.
|
|
194
|
+
TODO: return columns as names or qualified ids
|
|
216
195
|
"""
|
|
217
196
|
from pixeltable import exprs
|
|
218
197
|
|
|
@@ -223,7 +202,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
223
202
|
resolved_col_mapping: dict[Column, str] = {}
|
|
224
203
|
|
|
225
204
|
# Validate names
|
|
226
|
-
t_cols = set(table.
|
|
205
|
+
t_cols = set(table._get_schema().keys())
|
|
227
206
|
for t_col, ext_col in col_mapping.items():
|
|
228
207
|
if t_col not in t_cols:
|
|
229
208
|
if is_user_specified_col_mapping:
|
|
@@ -246,7 +225,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
246
225
|
assert isinstance(col_ref, exprs.ColumnRef)
|
|
247
226
|
resolved_col_mapping[col_ref.col] = ext_col
|
|
248
227
|
# Validate column specs
|
|
249
|
-
t_col_types = table.
|
|
228
|
+
t_col_types = table._get_schema()
|
|
250
229
|
for t_col, ext_col in col_mapping.items():
|
|
251
230
|
t_col_type = t_col_types[t_col]
|
|
252
231
|
if ext_col in export_cols:
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
|
|
|
412
412
|
# TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
|
|
413
413
|
ancestor = t
|
|
414
414
|
while local_annotations_col not in ancestor._tbl_version.get().cols:
|
|
415
|
-
assert ancestor.
|
|
416
|
-
ancestor = ancestor.
|
|
415
|
+
assert ancestor._get_base_table is not None
|
|
416
|
+
ancestor = ancestor._get_base_table()
|
|
417
417
|
update_status = ancestor.batch_update(updates)
|
|
418
418
|
env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
|
|
419
419
|
return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
|
|
@@ -560,7 +560,7 @@ class LabelStudioProject(Project):
|
|
|
560
560
|
|
|
561
561
|
if name is None:
|
|
562
562
|
# Create a default name that's unique to the table
|
|
563
|
-
all_stores = t.external_stores
|
|
563
|
+
all_stores = t.external_stores()
|
|
564
564
|
n = 0
|
|
565
565
|
while f'ls_project_{n}' in all_stores:
|
|
566
566
|
n += 1
|
|
@@ -576,8 +576,8 @@ class LabelStudioProject(Project):
|
|
|
576
576
|
local_annotations_column = ANNOTATIONS_COLUMN
|
|
577
577
|
else:
|
|
578
578
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
579
|
-
if local_annotations_column not in t.
|
|
580
|
-
t.add_columns({local_annotations_column: ts.
|
|
579
|
+
if local_annotations_column not in t._get_schema():
|
|
580
|
+
t.add_columns({local_annotations_column: ts.Json})
|
|
581
581
|
|
|
582
582
|
resolved_col_mapping = cls.validate_columns(
|
|
583
583
|
t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
|
pixeltable/io/parquet.py
CHANGED
|
@@ -14,7 +14,7 @@ import PIL.Image
|
|
|
14
14
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
import pixeltable.exceptions as excs
|
|
17
|
-
from pixeltable.
|
|
17
|
+
from pixeltable.catalog import Catalog
|
|
18
18
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
19
19
|
|
|
20
20
|
if typing.TYPE_CHECKING:
|
|
@@ -87,7 +87,7 @@ def export_parquet(
|
|
|
87
87
|
current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
|
|
88
88
|
current_byte_estimate = 0
|
|
89
89
|
|
|
90
|
-
with
|
|
90
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
91
91
|
for data_row in df._exec():
|
|
92
92
|
for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
|
|
93
93
|
val = data_row[e.slot_idx]
|
|
@@ -23,7 +23,6 @@ from .utils import normalize_schema_names
|
|
|
23
23
|
|
|
24
24
|
_logger = logging.getLogger('pixeltable')
|
|
25
25
|
|
|
26
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
27
26
|
|
|
28
27
|
if TYPE_CHECKING:
|
|
29
28
|
import datasets # type: ignore[import-untyped]
|
|
@@ -46,9 +45,6 @@ class TableDataConduitFormat(str, enum.Enum):
|
|
|
46
45
|
return False
|
|
47
46
|
|
|
48
47
|
|
|
49
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
50
|
-
|
|
51
|
-
|
|
52
48
|
@dataclass
|
|
53
49
|
class TableDataConduit:
|
|
54
50
|
source: TableDataSource
|
|
@@ -105,7 +101,7 @@ class TableDataConduit:
|
|
|
105
101
|
def add_table_info(self, table: pxt.Table) -> None:
|
|
106
102
|
"""Add information about the table into which we are inserting data"""
|
|
107
103
|
assert isinstance(table, pxt.Table)
|
|
108
|
-
self.pxt_schema = table.
|
|
104
|
+
self.pxt_schema = table._get_schema()
|
|
109
105
|
self.pxt_pk = table._tbl_version.get().primary_key
|
|
110
106
|
for col in table._tbl_version_path.columns():
|
|
111
107
|
if col.is_required_for_insert:
|
|
@@ -129,9 +125,6 @@ class TableDataConduit:
|
|
|
129
125
|
raise excs.Error(f'Missing required column(s) ({", ".join(missing_cols)})')
|
|
130
126
|
|
|
131
127
|
|
|
132
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
133
|
-
|
|
134
|
-
|
|
135
128
|
class DFTableDataConduit(TableDataConduit):
|
|
136
129
|
pxt_df: pxt.DataFrame = None
|
|
137
130
|
|
|
@@ -155,9 +148,6 @@ class DFTableDataConduit(TableDataConduit):
|
|
|
155
148
|
self.check_source_columns_are_insertable(self.pxt_df.schema.keys())
|
|
156
149
|
|
|
157
150
|
|
|
158
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
159
|
-
|
|
160
|
-
|
|
161
151
|
class RowDataTableDataConduit(TableDataConduit):
|
|
162
152
|
raw_rows: Optional[RowData] = None
|
|
163
153
|
disable_mapping: bool = True
|
|
@@ -235,9 +225,6 @@ class RowDataTableDataConduit(TableDataConduit):
|
|
|
235
225
|
yield self.valid_rows
|
|
236
226
|
|
|
237
227
|
|
|
238
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
239
|
-
|
|
240
|
-
|
|
241
228
|
class PandasTableDataConduit(TableDataConduit):
|
|
242
229
|
pd_df: pd.DataFrame = None
|
|
243
230
|
batch_count: int = 0
|
|
@@ -293,9 +280,6 @@ class PandasTableDataConduit(TableDataConduit):
|
|
|
293
280
|
yield self.valid_rows
|
|
294
281
|
|
|
295
282
|
|
|
296
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
297
|
-
|
|
298
|
-
|
|
299
283
|
class CSVTableDataConduit(TableDataConduit):
|
|
300
284
|
@classmethod
|
|
301
285
|
def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
|
|
@@ -307,9 +291,6 @@ class CSVTableDataConduit(TableDataConduit):
|
|
|
307
291
|
return PandasTableDataConduit.from_tds(t)
|
|
308
292
|
|
|
309
293
|
|
|
310
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
311
|
-
|
|
312
|
-
|
|
313
294
|
class ExcelTableDataConduit(TableDataConduit):
|
|
314
295
|
@classmethod
|
|
315
296
|
def from_tds(cls, tds: TableDataConduit) -> 'PandasTableDataConduit':
|
|
@@ -321,9 +302,6 @@ class ExcelTableDataConduit(TableDataConduit):
|
|
|
321
302
|
return PandasTableDataConduit.from_tds(t)
|
|
322
303
|
|
|
323
304
|
|
|
324
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
325
|
-
|
|
326
|
-
|
|
327
305
|
class JsonTableDataConduit(TableDataConduit):
|
|
328
306
|
@classmethod
|
|
329
307
|
def from_tds(cls, tds: TableDataConduit) -> RowDataTableDataConduit:
|
|
@@ -346,9 +324,6 @@ class JsonTableDataConduit(TableDataConduit):
|
|
|
346
324
|
return t2
|
|
347
325
|
|
|
348
326
|
|
|
349
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
350
|
-
|
|
351
|
-
|
|
352
327
|
class HFTableDataConduit(TableDataConduit):
|
|
353
328
|
hf_ds: Optional[Union[datasets.Dataset, datasets.DatasetDict]] = None
|
|
354
329
|
column_name_for_split: Optional[str] = None
|
|
@@ -478,9 +453,6 @@ class HFTableDataConduit(TableDataConduit):
|
|
|
478
453
|
yield batch
|
|
479
454
|
|
|
480
455
|
|
|
481
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
482
|
-
|
|
483
|
-
|
|
484
456
|
class ParquetTableDataConduit(TableDataConduit):
|
|
485
457
|
pq_ds: Optional[ParquetDataset] = None
|
|
486
458
|
|
|
@@ -542,9 +514,6 @@ class ParquetTableDataConduit(TableDataConduit):
|
|
|
542
514
|
raise e
|
|
543
515
|
|
|
544
516
|
|
|
545
|
-
# ---------------------------------------------------------------------------------------------------------
|
|
546
|
-
|
|
547
|
-
|
|
548
517
|
class UnkTableDataConduit(TableDataConduit):
|
|
549
518
|
"""Source type is not known at the time of creation"""
|
|
550
519
|
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -8,15 +8,17 @@ from typing import Callable
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
from sqlalchemy import orm
|
|
10
10
|
|
|
11
|
+
import pixeltable as pxt
|
|
12
|
+
import pixeltable.exceptions as excs
|
|
11
13
|
from pixeltable.utils.console_output import ConsoleLogger
|
|
12
14
|
|
|
13
15
|
from .schema import SystemInfo, SystemInfoMd
|
|
14
16
|
|
|
15
17
|
_console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
|
|
16
|
-
|
|
18
|
+
_logger = logging.getLogger('pixeltable')
|
|
17
19
|
|
|
18
20
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
19
|
-
VERSION =
|
|
21
|
+
VERSION = 38
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -55,6 +57,13 @@ def upgrade_md(engine: sql.engine.Engine) -> None:
|
|
|
55
57
|
system_info = session.query(SystemInfo).one().md
|
|
56
58
|
md_version = system_info['schema_version']
|
|
57
59
|
assert isinstance(md_version, int)
|
|
60
|
+
_logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
|
|
61
|
+
if md_version > VERSION:
|
|
62
|
+
raise excs.Error(
|
|
63
|
+
'This Pixeltable database was created with a newer Pixeltable version '
|
|
64
|
+
f'than the one currently installed ({pxt.__version__}).\n'
|
|
65
|
+
'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
|
|
66
|
+
)
|
|
58
67
|
if md_version == VERSION:
|
|
59
68
|
return
|
|
60
69
|
while md_version < VERSION:
|
|
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
|
|
|
12
12
|
@register_converter(version=13)
|
|
13
13
|
def _(engine: sql.engine.Engine) -> None:
|
|
14
14
|
with engine.begin() as conn:
|
|
15
|
-
for row in conn.execute(sql.select(Table)):
|
|
15
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
16
16
|
id = row[0]
|
|
17
|
-
md = row[
|
|
17
|
+
md = row[1]
|
|
18
18
|
updated_md = __update_md(md)
|
|
19
19
|
if updated_md != md:
|
|
20
20
|
_logger.info(f'Updating schema for table: {id}')
|
|
@@ -1,33 +1,28 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
from uuid import UUID
|
|
2
3
|
|
|
3
4
|
import sqlalchemy as sql
|
|
4
5
|
|
|
5
6
|
from pixeltable.metadata import register_converter
|
|
6
7
|
from pixeltable.metadata.converters.util import (
|
|
7
|
-
|
|
8
|
+
convert_table_md,
|
|
8
9
|
convert_table_schema_version_record,
|
|
9
10
|
convert_table_version_record,
|
|
10
11
|
)
|
|
11
|
-
from pixeltable.metadata.schema import
|
|
12
|
+
from pixeltable.metadata.schema import TableSchemaVersion, TableVersion
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@register_converter(version=30)
|
|
15
16
|
def _(engine: sql.engine.Engine) -> None:
|
|
16
|
-
|
|
17
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
17
18
|
convert_table_version_record(engine, table_version_record_updater=__update_table_version_record)
|
|
18
19
|
convert_table_schema_version_record(
|
|
19
20
|
engine, table_schema_version_record_updater=__update_table_schema_version_record
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
Update TableMd with table_id
|
|
26
|
-
"""
|
|
27
|
-
assert isinstance(record.md, dict)
|
|
28
|
-
md = copy.copy(record.md)
|
|
29
|
-
md['tbl_id'] = str(record.id)
|
|
30
|
-
record.md = md
|
|
24
|
+
def __update_table_md(md: dict, tbl_id: UUID) -> None:
|
|
25
|
+
md['tbl_id'] = str(tbl_id)
|
|
31
26
|
|
|
32
27
|
|
|
33
28
|
def __update_table_version_record(record: TableVersion) -> None:
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=35)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
with engine.begin() as conn:
|
|
9
|
+
conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from pixeltable.metadata import register_converter
|
|
8
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
9
|
+
|
|
10
|
+
_logger = logging.getLogger('pixeltable')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@register_converter(version=36)
|
|
14
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
15
|
+
convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
19
|
+
"""Update the view metadata to add the sample_clause field if it is missing
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
table_md (dict): copy of the original table metadata. this gets updated in place.
|
|
23
|
+
table_id (UUID): the table id
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
if table_md['view_md'] is None:
|
|
27
|
+
return
|
|
28
|
+
if 'sample_clause' not in table_md['view_md']:
|
|
29
|
+
table_md['view_md']['sample_clause'] = None
|
|
30
|
+
_logger.info(f'Updating view metadata for table: {table_id}')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
34
|
+
if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
|
|
35
|
+
if 'sample_clause' not in v:
|
|
36
|
+
v['sample_clause'] = None
|
|
37
|
+
return k, v
|
|
38
|
+
return None
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=37)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __update_table_md(table_md: dict, _: UUID) -> None:
|
|
15
|
+
table_md['view_sn'] = 0
|
|
@@ -33,9 +33,10 @@ def convert_table_md(
|
|
|
33
33
|
the original entry will be replaced, and the traversal will continue with `v'`.
|
|
34
34
|
"""
|
|
35
35
|
with engine.begin() as conn:
|
|
36
|
-
|
|
36
|
+
# avoid a SELECT * here, which breaks when we add new columns to Table
|
|
37
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
37
38
|
tbl_id = row[0]
|
|
38
|
-
table_md = row[
|
|
39
|
+
table_md = row[1]
|
|
39
40
|
assert isinstance(table_md, dict)
|
|
40
41
|
updated_table_md = copy.deepcopy(table_md)
|
|
41
42
|
if table_md_updater is not None:
|
|
@@ -145,13 +146,6 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
|
|
|
145
146
|
schema_column_updater(schema_col)
|
|
146
147
|
|
|
147
148
|
|
|
148
|
-
def convert_table_record(engine: sql.engine.Engine, table_record_updater: Optional[Callable[[Table], None]]) -> None:
|
|
149
|
-
with sql.orm.Session(engine, future=True) as session:
|
|
150
|
-
for record in session.query(Table).all():
|
|
151
|
-
table_record_updater(record)
|
|
152
|
-
session.commit()
|
|
153
|
-
|
|
154
|
-
|
|
155
149
|
def convert_table_version_record(
|
|
156
150
|
engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
|
|
157
151
|
) -> None:
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
38: 'Added TableMd.view_sn',
|
|
6
|
+
37: 'Add support for the sample() method on DataFrames',
|
|
7
|
+
36: 'Added Table.lock_dummy',
|
|
5
8
|
35: 'Track reference_tbl in ColumnRef',
|
|
6
9
|
34: 'Set default value for is_pk field in column metadata to False',
|
|
7
10
|
33: 'Add is_replica field to table metadata',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -84,7 +84,8 @@ class Dir(Base):
|
|
|
84
84
|
)
|
|
85
85
|
parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
86
86
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
|
|
87
|
-
|
|
87
|
+
|
|
88
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
88
89
|
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
89
90
|
|
|
90
91
|
|
|
@@ -146,6 +147,9 @@ class ViewMd:
|
|
|
146
147
|
# filter predicate applied to the base table; view-only
|
|
147
148
|
predicate: Optional[dict[str, Any]]
|
|
148
149
|
|
|
150
|
+
# sampling predicate applied to the base table; view-only
|
|
151
|
+
sample_clause: Optional[dict[str, Any]]
|
|
152
|
+
|
|
149
153
|
# ComponentIterator subclass; only for component views
|
|
150
154
|
iterator_class_fqn: Optional[str]
|
|
151
155
|
|
|
@@ -173,6 +177,11 @@ class TableMd:
|
|
|
173
177
|
# - every row is assigned a unique and immutable rowid on insertion
|
|
174
178
|
next_row_id: int
|
|
175
179
|
|
|
180
|
+
# sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
|
|
181
|
+
# - incremented for each add/drop of a mutable view
|
|
182
|
+
# - only maintained for mutable tables
|
|
183
|
+
view_sn: int
|
|
184
|
+
|
|
176
185
|
# Metadata format for external stores:
|
|
177
186
|
# {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
|
|
178
187
|
external_stores: list[dict[str, Any]]
|
|
@@ -200,6 +209,9 @@ class Table(Base):
|
|
|
200
209
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
|
|
201
210
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
|
|
202
211
|
|
|
212
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
213
|
+
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
214
|
+
|
|
203
215
|
|
|
204
216
|
@dataclasses.dataclass
|
|
205
217
|
class TableVersionMd:
|