pixeltable 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +6 -3
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/globals.py +15 -6
- pixeltable/catalog/insertable_table.py +23 -8
- pixeltable/catalog/named_function.py +1 -1
- pixeltable/catalog/path_dict.py +4 -4
- pixeltable/catalog/schema_object.py +30 -18
- pixeltable/catalog/table.py +87 -104
- pixeltable/catalog/table_version.py +35 -24
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +15 -8
- pixeltable/dataframe.py +56 -56
- pixeltable/env.py +10 -9
- pixeltable/exec/__init__.py +3 -3
- pixeltable/exec/aggregation_node.py +3 -3
- pixeltable/exec/expr_eval_node.py +3 -3
- pixeltable/exec/in_memory_data_node.py +4 -4
- pixeltable/exec/sql_node.py +4 -1
- pixeltable/exprs/arithmetic_expr.py +41 -16
- pixeltable/exprs/array_slice.py +3 -4
- pixeltable/exprs/column_ref.py +20 -4
- pixeltable/exprs/comparison.py +11 -6
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/expr.py +88 -23
- pixeltable/exprs/function_call.py +12 -1
- pixeltable/exprs/globals.py +3 -1
- pixeltable/exprs/inline_array.py +4 -4
- pixeltable/exprs/json_path.py +36 -20
- pixeltable/exprs/row_builder.py +4 -4
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/functions/__init__.py +1 -2
- pixeltable/functions/audio.py +32 -0
- pixeltable/functions/huggingface.py +4 -4
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +46 -0
- pixeltable/functions/video.py +5 -1
- pixeltable/functions/{eval.py → vision.py} +166 -27
- pixeltable/globals.py +57 -28
- pixeltable/io/external_store.py +6 -6
- pixeltable/io/globals.py +13 -14
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/pandas.py +60 -19
- pixeltable/io/parquet.py +14 -14
- pixeltable/iterators/document.py +7 -7
- pixeltable/iterators/video.py +55 -23
- pixeltable/plan.py +58 -29
- pixeltable/store.py +97 -59
- pixeltable/tool/create_test_db_dump.py +17 -11
- pixeltable/type_system.py +155 -143
- pixeltable/utils/pytorch.py +12 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/RECORD +56 -54
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import abc
|
|
4
|
+
import itertools
|
|
3
5
|
import json
|
|
4
6
|
import logging
|
|
5
7
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
8
|
+
from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
|
|
7
9
|
from uuid import UUID
|
|
8
|
-
import abc
|
|
9
10
|
|
|
10
11
|
import pandas as pd
|
|
11
12
|
import sqlalchemy as sql
|
|
@@ -18,8 +19,9 @@ import pixeltable.exprs as exprs
|
|
|
18
19
|
import pixeltable.index as index
|
|
19
20
|
import pixeltable.metadata.schema as schema
|
|
20
21
|
import pixeltable.type_system as ts
|
|
22
|
+
|
|
21
23
|
from .column import Column
|
|
22
|
-
from .globals import _ROWID_COLUMN_NAME,
|
|
24
|
+
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
23
25
|
from .schema_object import SchemaObject
|
|
24
26
|
from .table_version import TableVersion
|
|
25
27
|
from .table_version_path import TableVersionPath
|
|
@@ -27,7 +29,7 @@ from .table_version_path import TableVersionPath
|
|
|
27
29
|
_logger = logging.getLogger('pixeltable')
|
|
28
30
|
|
|
29
31
|
class Table(SchemaObject):
|
|
30
|
-
"""Base class for
|
|
32
|
+
"""Base class for table objects (base tables, views, snapshots)."""
|
|
31
33
|
|
|
32
34
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
33
35
|
super().__init__(id, name, dir_id)
|
|
@@ -46,7 +48,18 @@ class Table(SchemaObject):
|
|
|
46
48
|
f"WHERE {schema.Table.id.name} = :id"))
|
|
47
49
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
48
50
|
|
|
49
|
-
def
|
|
51
|
+
def get_metadata(self) -> dict[str, Any]:
|
|
52
|
+
md = super().get_metadata()
|
|
53
|
+
md['base'] = self._base._path if self._base is not None else None
|
|
54
|
+
md['schema'] = self._schema
|
|
55
|
+
md['version'] = self._version
|
|
56
|
+
md['schema_version'] = self._tbl_version.schema_version
|
|
57
|
+
md['comment'] = self._comment
|
|
58
|
+
md['num_retained_versions'] = self._num_retained_versions
|
|
59
|
+
return md
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def _version(self) -> int:
|
|
50
63
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
51
64
|
return self._tbl_version.version
|
|
52
65
|
|
|
@@ -60,7 +73,7 @@ class Table(SchemaObject):
|
|
|
60
73
|
|
|
61
74
|
def _check_is_dropped(self) -> None:
|
|
62
75
|
if self._is_dropped:
|
|
63
|
-
raise excs.Error(f'{self.
|
|
76
|
+
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
64
77
|
|
|
65
78
|
def __getattr__(
|
|
66
79
|
self, name: str
|
|
@@ -74,7 +87,7 @@ class Table(SchemaObject):
|
|
|
74
87
|
def __getitem__(
|
|
75
88
|
self, index: object
|
|
76
89
|
) -> Union[
|
|
77
|
-
'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.
|
|
90
|
+
'pixeltable.func.QueryTemplateFunction', 'pixeltable.exprs.ColumnRef', 'pixeltable.DataFrame'
|
|
78
91
|
]:
|
|
79
92
|
"""Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
|
|
80
93
|
"""
|
|
@@ -90,10 +103,10 @@ class Table(SchemaObject):
|
|
|
90
103
|
recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
|
|
91
104
|
all sub-views (including views of views, etc.)
|
|
92
105
|
"""
|
|
93
|
-
return [t.
|
|
106
|
+
return [t._path for t in self._get_views(recursive=recursive)]
|
|
94
107
|
|
|
95
108
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
96
|
-
dependents = catalog.Catalog.get().tbl_dependents[self.
|
|
109
|
+
dependents = catalog.Catalog.get().tbl_dependents[self._id]
|
|
97
110
|
if recursive:
|
|
98
111
|
return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
|
|
99
112
|
else:
|
|
@@ -106,33 +119,30 @@ class Table(SchemaObject):
|
|
|
106
119
|
from pixeltable.dataframe import DataFrame
|
|
107
120
|
return DataFrame(self._tbl_version_path)
|
|
108
121
|
|
|
109
|
-
def select(self, *items: Any, **named_items: Any) -> 'pixeltable.
|
|
110
|
-
"""Return a DataFrame for this table.
|
|
111
|
-
"""
|
|
122
|
+
def select(self, *items: Any, **named_items: Any) -> 'pixeltable.DataFrame':
|
|
123
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
112
124
|
# local import: avoid circular imports
|
|
113
125
|
from pixeltable.dataframe import DataFrame
|
|
114
126
|
return DataFrame(self._tbl_version_path).select(*items, **named_items)
|
|
115
127
|
|
|
116
|
-
def where(self, pred: 'exprs.Expr') -> 'pixeltable.
|
|
117
|
-
"""Return a DataFrame for this table.
|
|
118
|
-
"""
|
|
128
|
+
def where(self, pred: 'exprs.Expr') -> 'pixeltable.DataFrame':
|
|
129
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
119
130
|
# local import: avoid circular imports
|
|
120
131
|
from pixeltable.dataframe import DataFrame
|
|
121
132
|
return DataFrame(self._tbl_version_path).where(pred)
|
|
122
133
|
|
|
123
|
-
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.
|
|
124
|
-
"""Return a DataFrame for this table.
|
|
125
|
-
"""
|
|
134
|
+
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.DataFrame':
|
|
135
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
126
136
|
# local import: avoid circular imports
|
|
127
137
|
from pixeltable.dataframe import DataFrame
|
|
128
138
|
return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
|
|
129
139
|
|
|
130
|
-
def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.
|
|
131
|
-
"""Return a DataFrame for this table."""
|
|
140
|
+
def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.DataFrame':
|
|
141
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
132
142
|
from pixeltable.dataframe import DataFrame
|
|
133
143
|
return DataFrame(self._tbl_version_path).group_by(*items)
|
|
134
144
|
|
|
135
|
-
def limit(self, n: int) -> 'pixeltable.
|
|
145
|
+
def limit(self, n: int) -> 'pixeltable.DataFrame':
|
|
136
146
|
from pixeltable.dataframe import DataFrame
|
|
137
147
|
return DataFrame(self._tbl_version_path).limit(n)
|
|
138
148
|
|
|
@@ -163,20 +173,18 @@ class Table(SchemaObject):
|
|
|
163
173
|
"""Return the number of rows in this table."""
|
|
164
174
|
return self._df().count()
|
|
165
175
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def column_types(self) -> dict[str, ts.ColumnType]:
|
|
171
|
-
"""Return the names of the columns in this table."""
|
|
176
|
+
@property
|
|
177
|
+
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
178
|
+
"""Return the schema (column names and column types) of this table."""
|
|
172
179
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
173
180
|
|
|
174
|
-
|
|
181
|
+
@property
|
|
182
|
+
def _query_names(self) -> list[str]:
|
|
175
183
|
"""Return the names of the registered queries for this table."""
|
|
176
184
|
return list(self._queries.keys())
|
|
177
185
|
|
|
178
186
|
@property
|
|
179
|
-
def
|
|
187
|
+
def _base(self) -> Optional['Table']:
|
|
180
188
|
"""
|
|
181
189
|
The base table of this `Table`. If this table is a view, returns the `Table`
|
|
182
190
|
from which it was derived. Otherwise, returns `None`.
|
|
@@ -187,21 +195,13 @@ class Table(SchemaObject):
|
|
|
187
195
|
return catalog.Catalog.get().tbls[base_id]
|
|
188
196
|
|
|
189
197
|
@property
|
|
190
|
-
def
|
|
198
|
+
def _comment(self) -> str:
|
|
191
199
|
return self._tbl_version.comment
|
|
192
200
|
|
|
193
|
-
@comment.setter
|
|
194
|
-
def comment(self, new_comment: Optional[str]):
|
|
195
|
-
self._tbl_version.set_comment(new_comment)
|
|
196
|
-
|
|
197
201
|
@property
|
|
198
|
-
def
|
|
202
|
+
def _num_retained_versions(self):
|
|
199
203
|
return self._tbl_version.num_retained_versions
|
|
200
204
|
|
|
201
|
-
@num_retained_versions.setter
|
|
202
|
-
def num_retained_versions(self, new_num_retained_versions: int):
|
|
203
|
-
self._tbl_version.set_num_retained_versions(new_num_retained_versions)
|
|
204
|
-
|
|
205
205
|
def _description(self) -> pd.DataFrame:
|
|
206
206
|
cols = self._tbl_version_path.columns()
|
|
207
207
|
df = pd.DataFrame({
|
|
@@ -233,11 +233,11 @@ class Table(SchemaObject):
|
|
|
233
233
|
# TODO: Display comments in _repr_html()
|
|
234
234
|
def __repr__(self) -> str:
|
|
235
235
|
description_str = self._description().to_string(index=False)
|
|
236
|
-
if self.
|
|
236
|
+
if self._comment is None:
|
|
237
237
|
comment = ''
|
|
238
238
|
else:
|
|
239
|
-
comment = f'{self.
|
|
240
|
-
return f'{self.
|
|
239
|
+
comment = f'{self._comment}\n'
|
|
240
|
+
return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
241
241
|
|
|
242
242
|
def _repr_html_(self) -> str:
|
|
243
243
|
return self._description_html()._repr_html_()
|
|
@@ -266,51 +266,34 @@ class Table(SchemaObject):
|
|
|
266
266
|
from pixeltable.dataframe import DataFrame
|
|
267
267
|
return DataFrame(self._tbl_version_path).to_coco_dataset()
|
|
268
268
|
|
|
269
|
-
def __setitem__(self,
|
|
270
|
-
"""
|
|
271
|
-
|
|
272
|
-
column_name: the name of the new column
|
|
273
|
-
value: column type or value expression or column specification dictionary:
|
|
274
|
-
column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
|
|
275
|
-
value expression: a Pixeltable expression that computes the column values
|
|
276
|
-
column specification: a dictionary with possible keys 'type', 'value', 'stored'
|
|
277
|
-
Examples:
|
|
278
|
-
Add an int column with ``None`` values:
|
|
279
|
-
|
|
280
|
-
>>> tbl['new_col'] = IntType(nullable=True)
|
|
281
|
-
|
|
282
|
-
For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
|
|
283
|
-
the parameters of the Callable must correspond to existing column names (the column values are then passed
|
|
284
|
-
as arguments to the Callable). In this case, the return type cannot be inferred and needs to be specified
|
|
285
|
-
explicitly:
|
|
286
|
-
|
|
287
|
-
>>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
|
|
269
|
+
def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
|
|
288
272
|
|
|
289
|
-
|
|
290
|
-
90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
|
|
291
|
-
(by default, computed image columns are not stored but recomputed on demand):
|
|
273
|
+
>>> tbl['new_col'] = IntType()
|
|
292
274
|
|
|
293
|
-
|
|
275
|
+
is exactly equivalent to
|
|
294
276
|
|
|
295
|
-
|
|
277
|
+
>>> tbl.add_column(new_col=IntType())
|
|
296
278
|
|
|
297
|
-
|
|
279
|
+
For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
|
|
298
280
|
"""
|
|
299
|
-
if not isinstance(
|
|
300
|
-
raise excs.Error(f'Column name must be a string, got {type(
|
|
301
|
-
if not
|
|
302
|
-
raise excs.Error(f'
|
|
303
|
-
|
|
304
|
-
new_col = self._create_columns({column_name: value})[0]
|
|
305
|
-
self._verify_column(new_col, self.column_names(), self.query_names())
|
|
306
|
-
return self._tbl_version.add_column(new_col)
|
|
281
|
+
if not isinstance(col_name, str):
|
|
282
|
+
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
283
|
+
if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
|
|
284
|
+
raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
|
|
285
|
+
self.add_column(**{col_name: spec})
|
|
307
286
|
|
|
308
287
|
def add_column(
|
|
309
|
-
self,
|
|
310
|
-
|
|
311
|
-
|
|
288
|
+
self,
|
|
289
|
+
*,
|
|
290
|
+
type: Optional[ts.ColumnType] = None,
|
|
291
|
+
stored: Optional[bool] = None,
|
|
292
|
+
print_stats: bool = False,
|
|
293
|
+
**kwargs: Union[ts.ColumnType, exprs.Expr, Callable]
|
|
312
294
|
) -> UpdateStatus:
|
|
313
|
-
"""
|
|
295
|
+
"""
|
|
296
|
+
Adds a column to the table.
|
|
314
297
|
|
|
315
298
|
Args:
|
|
316
299
|
kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
|
|
@@ -364,19 +347,20 @@ class Table(SchemaObject):
|
|
|
364
347
|
self._check_is_dropped()
|
|
365
348
|
# verify kwargs and construct column schema dict
|
|
366
349
|
if len(kwargs) != 1:
|
|
367
|
-
raise excs.Error(
|
|
368
|
-
f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression"
|
|
350
|
+
raise excs.Error(
|
|
351
|
+
f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
|
|
369
352
|
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
370
|
-
)
|
|
353
|
+
)
|
|
371
354
|
col_name, spec = next(iter(kwargs.items()))
|
|
355
|
+
if not is_valid_identifier(col_name):
|
|
356
|
+
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
357
|
+
if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
|
|
358
|
+
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
359
|
+
|
|
372
360
|
col_schema: dict[str, Any] = {}
|
|
373
361
|
if isinstance(spec, ts.ColumnType):
|
|
374
|
-
if type is not None:
|
|
375
|
-
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
376
362
|
col_schema['type'] = spec
|
|
377
363
|
else:
|
|
378
|
-
if isinstance(spec, exprs.Expr) and type is not None:
|
|
379
|
-
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
380
364
|
col_schema['value'] = spec
|
|
381
365
|
if type is not None:
|
|
382
366
|
col_schema['type'] = type
|
|
@@ -384,7 +368,7 @@ class Table(SchemaObject):
|
|
|
384
368
|
col_schema['stored'] = stored
|
|
385
369
|
|
|
386
370
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
387
|
-
self._verify_column(new_col, self.
|
|
371
|
+
self._verify_column(new_col, set(self._schema.keys()), self._query_names)
|
|
388
372
|
return self._tbl_version.add_column(new_col, print_stats=print_stats)
|
|
389
373
|
|
|
390
374
|
@classmethod
|
|
@@ -434,8 +418,8 @@ class Table(SchemaObject):
|
|
|
434
418
|
for name, spec in schema.items():
|
|
435
419
|
col_type: Optional[ts.ColumnType] = None
|
|
436
420
|
value_expr: Optional[exprs.Expr] = None
|
|
437
|
-
stored: Optional[bool] = None
|
|
438
421
|
primary_key: Optional[bool] = None
|
|
422
|
+
stored = True
|
|
439
423
|
|
|
440
424
|
if isinstance(spec, ts.ColumnType):
|
|
441
425
|
# TODO: create copy
|
|
@@ -455,7 +439,7 @@ class Table(SchemaObject):
|
|
|
455
439
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
456
440
|
# create copy so we can modify it
|
|
457
441
|
value_expr = value_expr.copy()
|
|
458
|
-
stored = spec.get('stored')
|
|
442
|
+
stored = spec.get('stored', True)
|
|
459
443
|
primary_key = spec.get('primary_key')
|
|
460
444
|
|
|
461
445
|
column = Column(
|
|
@@ -469,7 +453,7 @@ class Table(SchemaObject):
|
|
|
469
453
|
) -> None:
|
|
470
454
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
471
455
|
if is_system_column_name(col.name):
|
|
472
|
-
raise excs.Error(f'
|
|
456
|
+
raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
473
457
|
if not is_valid_identifier(col.name):
|
|
474
458
|
raise excs.Error(f"Invalid column name: {col.name!r}")
|
|
475
459
|
if col.name in existing_column_names:
|
|
@@ -478,12 +462,10 @@ class Table(SchemaObject):
|
|
|
478
462
|
raise excs.Error(f'Column name conflicts with a registered query: {col.name!r}')
|
|
479
463
|
if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
|
|
480
464
|
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed image columns')
|
|
481
|
-
if col.stored is False and
|
|
465
|
+
if col.stored is False and col.has_window_fn_call():
|
|
482
466
|
raise excs.Error((
|
|
483
467
|
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
|
|
484
468
|
f'function'))
|
|
485
|
-
if col.stored is None:
|
|
486
|
-
col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
|
|
487
469
|
|
|
488
470
|
@classmethod
|
|
489
471
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
@@ -530,7 +512,7 @@ class Table(SchemaObject):
|
|
|
530
512
|
]
|
|
531
513
|
if len(dependent_stores) > 0:
|
|
532
514
|
dependent_store_names = [
|
|
533
|
-
store.name if view.
|
|
515
|
+
store.name if view._id == self._id else f'{store.name} (in view `{view._name}`)'
|
|
534
516
|
for view, store in dependent_stores
|
|
535
517
|
]
|
|
536
518
|
raise excs.Error(
|
|
@@ -594,6 +576,7 @@ class Table(SchemaObject):
|
|
|
594
576
|
if idx_name is not None and idx_name in self._tbl_version.idxs_by_name:
|
|
595
577
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
596
578
|
from pixeltable.index import EmbeddingIndex
|
|
579
|
+
|
|
597
580
|
# create the EmbeddingIndex instance to verify args
|
|
598
581
|
idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
|
|
599
582
|
status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
@@ -850,7 +833,7 @@ class Table(SchemaObject):
|
|
|
850
833
|
else:
|
|
851
834
|
function_path = None
|
|
852
835
|
query_name = py_fn.__name__
|
|
853
|
-
if query_name in self.
|
|
836
|
+
if query_name in self._schema.keys():
|
|
854
837
|
raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
|
|
855
838
|
if query_name in self._queries:
|
|
856
839
|
raise excs.Error(f'Duplicate query name: {query_name!r}')
|
|
@@ -879,13 +862,13 @@ class Table(SchemaObject):
|
|
|
879
862
|
Links the specified `ExternalStore` to this table.
|
|
880
863
|
"""
|
|
881
864
|
if self._tbl_version.is_snapshot:
|
|
882
|
-
raise excs.Error(f'Table `{self.
|
|
865
|
+
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
883
866
|
self._check_is_dropped()
|
|
884
867
|
if store.name in self.external_stores:
|
|
885
|
-
raise excs.Error(f'Table `{self.
|
|
886
|
-
_logger.info(f'Linking external store `{store.name}` to table `{self.
|
|
868
|
+
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
869
|
+
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
887
870
|
self._tbl_version.link_external_store(store)
|
|
888
|
-
print(f'Linked external store `{store.name}` to table `{self.
|
|
871
|
+
print(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
889
872
|
|
|
890
873
|
def unlink_external_stores(
|
|
891
874
|
self,
|
|
@@ -917,11 +900,11 @@ class Table(SchemaObject):
|
|
|
917
900
|
if not ignore_errors:
|
|
918
901
|
for store in stores:
|
|
919
902
|
if store not in all_stores:
|
|
920
|
-
raise excs.Error(f'Table `{self.
|
|
903
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
921
904
|
|
|
922
905
|
for store in stores:
|
|
923
906
|
self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
|
|
924
|
-
print(f'Unlinked external store from table `{self.
|
|
907
|
+
print(f'Unlinked external store from table `{self._name}`: {store}')
|
|
925
908
|
|
|
926
909
|
def sync(
|
|
927
910
|
self,
|
|
@@ -949,7 +932,7 @@ class Table(SchemaObject):
|
|
|
949
932
|
|
|
950
933
|
for store in stores:
|
|
951
934
|
if store not in all_stores:
|
|
952
|
-
raise excs.Error(f'Table `{self.
|
|
935
|
+
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
953
936
|
|
|
954
937
|
from pixeltable.io import SyncStatus
|
|
955
938
|
|
|
@@ -962,7 +945,7 @@ class Table(SchemaObject):
|
|
|
962
945
|
return sync_status
|
|
963
946
|
|
|
964
947
|
def __dir__(self) -> list[str]:
|
|
965
|
-
return list(super().__dir__()) + self.
|
|
948
|
+
return list(super().__dir__()) + list(self._schema.keys()) + self._query_names
|
|
966
949
|
|
|
967
950
|
def _ipython_key_completions_(self) -> list[str]:
|
|
968
|
-
return self.
|
|
951
|
+
return list(self._schema.keys()) + self._query_names
|
|
@@ -12,7 +12,7 @@ from uuid import UUID
|
|
|
12
12
|
import sqlalchemy as sql
|
|
13
13
|
import sqlalchemy.orm as orm
|
|
14
14
|
|
|
15
|
-
import pixeltable
|
|
15
|
+
import pixeltable as pxt
|
|
16
16
|
import pixeltable.exceptions as excs
|
|
17
17
|
import pixeltable.exprs as exprs
|
|
18
18
|
import pixeltable.func as func
|
|
@@ -24,7 +24,7 @@ from pixeltable.metadata import schema
|
|
|
24
24
|
from pixeltable.utils.filecache import FileCache
|
|
25
25
|
from pixeltable.utils.media_store import MediaStore
|
|
26
26
|
from .column import Column
|
|
27
|
-
from .globals import UpdateStatus,
|
|
27
|
+
from .globals import UpdateStatus, _POS_COLUMN_NAME, is_valid_identifier, _ROWID_COLUMN_NAME
|
|
28
28
|
from ..func.globals import resolve_symbol
|
|
29
29
|
|
|
30
30
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -56,7 +56,7 @@ class TableVersion:
|
|
|
56
56
|
|
|
57
57
|
def __init__(
|
|
58
58
|
self, id: UUID, tbl_md: schema.TableMd, version: int, schema_version_md: schema.TableSchemaVersionMd,
|
|
59
|
-
base: Optional[TableVersion] = None, base_path: Optional['
|
|
59
|
+
base: Optional[TableVersion] = None, base_path: Optional['pxt.catalog.TableVersionPath'] = None,
|
|
60
60
|
is_snapshot: Optional[bool] = None
|
|
61
61
|
):
|
|
62
62
|
# only one of base and base_path can be non-None
|
|
@@ -124,7 +124,7 @@ class TableVersion:
|
|
|
124
124
|
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
|
|
125
125
|
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
126
126
|
self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
|
|
127
|
-
self.external_stores: dict[str,
|
|
127
|
+
self.external_stores: dict[str, pxt.io.ExternalStore] = {}
|
|
128
128
|
|
|
129
129
|
self._init_schema(tbl_md, schema_version_md)
|
|
130
130
|
|
|
@@ -145,7 +145,7 @@ class TableVersion:
|
|
|
145
145
|
@classmethod
|
|
146
146
|
def create(
|
|
147
147
|
cls, session: orm.Session, dir_id: UUID, name: str, cols: List[Column], num_retained_versions: int,
|
|
148
|
-
comment: str, base_path: Optional['
|
|
148
|
+
comment: str, base_path: Optional['pxt.catalog.TableVersionPath'] = None,
|
|
149
149
|
view_md: Optional[schema.ViewMd] = None
|
|
150
150
|
) -> Tuple[UUID, Optional[TableVersion]]:
|
|
151
151
|
# assign ids
|
|
@@ -636,14 +636,28 @@ class TableVersion:
|
|
|
636
636
|
_logger.info(f'[{self.name}] Updating table schema to version: {self.version}')
|
|
637
637
|
|
|
638
638
|
def insert(
|
|
639
|
-
self,
|
|
639
|
+
self,
|
|
640
|
+
rows: Optional[list[dict[str, Any]]],
|
|
641
|
+
df: Optional[pxt.DataFrame],
|
|
642
|
+
conn: Optional[sql.engine.Connection] = None,
|
|
643
|
+
print_stats: bool = False,
|
|
644
|
+
fail_on_exception: bool = True
|
|
640
645
|
) -> UpdateStatus:
|
|
641
|
-
"""Insert rows into this table.
|
|
642
646
|
"""
|
|
643
|
-
|
|
647
|
+
Insert rows into this table, either from an explicit list of dicts or from a `DataFrame`.
|
|
648
|
+
"""
|
|
644
649
|
from pixeltable.plan import Planner
|
|
645
|
-
|
|
646
|
-
|
|
650
|
+
|
|
651
|
+
assert self.is_insertable()
|
|
652
|
+
assert (rows is None) != (df is None) # Exactly one must be specified
|
|
653
|
+
if rows is not None:
|
|
654
|
+
plan = Planner.create_insert_plan(self, rows, ignore_errors=not fail_on_exception)
|
|
655
|
+
else:
|
|
656
|
+
plan = Planner.create_df_insert_plan(self, df, ignore_errors=not fail_on_exception)
|
|
657
|
+
if conn is None:
|
|
658
|
+
with Env.get().engine.begin() as conn:
|
|
659
|
+
return self._insert(plan, conn, time.time(), print_stats)
|
|
660
|
+
else:
|
|
647
661
|
return self._insert(plan, conn, time.time(), print_stats)
|
|
648
662
|
|
|
649
663
|
def _insert(
|
|
@@ -739,7 +753,7 @@ class TableVersion:
|
|
|
739
753
|
if error_if_not_exists:
|
|
740
754
|
raise excs.Error(f'batch_update(): {len(unmatched_rows)} row(s) not found')
|
|
741
755
|
if insert_if_not_exists:
|
|
742
|
-
insert_status = self.insert(unmatched_rows, print_stats=False, fail_on_exception=False)
|
|
756
|
+
insert_status = self.insert(unmatched_rows, None, print_stats=False, fail_on_exception=False)
|
|
743
757
|
result += insert_status
|
|
744
758
|
return result
|
|
745
759
|
|
|
@@ -994,11 +1008,11 @@ class TableVersion:
|
|
|
994
1008
|
def _init_external_stores(self, tbl_md: schema.TableMd) -> None:
|
|
995
1009
|
for store_md in tbl_md.external_stores:
|
|
996
1010
|
store_cls = resolve_symbol(store_md['class'])
|
|
997
|
-
assert isinstance(store_cls, type) and issubclass(store_cls,
|
|
1011
|
+
assert isinstance(store_cls, type) and issubclass(store_cls, pxt.io.ExternalStore)
|
|
998
1012
|
store = store_cls.from_dict(store_md['md'])
|
|
999
1013
|
self.external_stores[store.name] = store
|
|
1000
1014
|
|
|
1001
|
-
def link_external_store(self, store:
|
|
1015
|
+
def link_external_store(self, store: pxt.io.ExternalStore) -> None:
|
|
1002
1016
|
with Env.get().engine.begin() as conn:
|
|
1003
1017
|
store.link(self, conn) # May result in additional metadata changes
|
|
1004
1018
|
self.external_stores[store.name] = store
|
|
@@ -1012,7 +1026,7 @@ class TableVersion:
|
|
|
1012
1026
|
del self.external_stores[store_name]
|
|
1013
1027
|
self._update_md(time.time(), conn, update_tbl_version=False)
|
|
1014
1028
|
|
|
1015
|
-
if delete_external_data and isinstance(store,
|
|
1029
|
+
if delete_external_data and isinstance(store, pxt.io.external_store.Project):
|
|
1016
1030
|
store.delete()
|
|
1017
1031
|
|
|
1018
1032
|
def is_view(self) -> bool:
|
|
@@ -1032,7 +1046,7 @@ class TableVersion:
|
|
|
1032
1046
|
|
|
1033
1047
|
def is_system_column(self, col: Column) -> bool:
|
|
1034
1048
|
"""Return True if column was created by Pixeltable"""
|
|
1035
|
-
if col.name ==
|
|
1049
|
+
if col.name == _POS_COLUMN_NAME and self.is_component_view():
|
|
1036
1050
|
return True
|
|
1037
1051
|
return False
|
|
1038
1052
|
|
|
@@ -1056,7 +1070,7 @@ class TableVersion:
|
|
|
1056
1070
|
return names
|
|
1057
1071
|
|
|
1058
1072
|
@classmethod
|
|
1059
|
-
def _create_value_expr(cls, col: Column, path: '
|
|
1073
|
+
def _create_value_expr(cls, col: Column, path: 'pxt.catalog.TableVersionPath') -> None:
|
|
1060
1074
|
"""
|
|
1061
1075
|
Create col.value_expr, given col.compute_func.
|
|
1062
1076
|
Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
|
|
@@ -1093,16 +1107,13 @@ class TableVersion:
|
|
|
1093
1107
|
result = {info.val_col for col in cols for info in col.get_idx_info().values()}
|
|
1094
1108
|
return result
|
|
1095
1109
|
|
|
1096
|
-
def get_dependent_columns(self, cols:
|
|
1110
|
+
def get_dependent_columns(self, cols: Iterable[Column]) -> set[Column]:
|
|
1097
1111
|
"""
|
|
1098
1112
|
Return the set of columns that transitively depend on any of the given ones.
|
|
1099
1113
|
"""
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
for col in cols:
|
|
1104
|
-
result.update(col.dependent_cols)
|
|
1105
|
-
result.update(self.get_dependent_columns(result))
|
|
1114
|
+
result = {dependent_col for col in cols for dependent_col in col.dependent_cols}
|
|
1115
|
+
if len(result) > 0:
|
|
1116
|
+
result.update(self.get_dependent_columns(result))
|
|
1106
1117
|
return result
|
|
1107
1118
|
|
|
1108
1119
|
def num_rowid_columns(self) -> int:
|
|
@@ -1123,7 +1134,7 @@ class TableVersion:
|
|
|
1123
1134
|
return column_md
|
|
1124
1135
|
|
|
1125
1136
|
@classmethod
|
|
1126
|
-
def _create_stores_md(cls, stores: Iterable['
|
|
1137
|
+
def _create_stores_md(cls, stores: Iterable['pxt.io.ExternalStore']) -> list[dict[str, Any]]:
|
|
1127
1138
|
return [
|
|
1128
1139
|
{
|
|
1129
1140
|
'class': f'{type(store).__module__}.{type(store).__qualname__}',
|
|
@@ -6,7 +6,7 @@ from uuid import UUID
|
|
|
6
6
|
|
|
7
7
|
import pixeltable
|
|
8
8
|
from .column import Column
|
|
9
|
-
from .globals import
|
|
9
|
+
from .globals import _POS_COLUMN_NAME
|
|
10
10
|
from .table_version import TableVersion
|
|
11
11
|
|
|
12
12
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -83,7 +83,7 @@ class TableVersionPath:
|
|
|
83
83
|
def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
|
|
84
84
|
"""Return a ColumnRef for the given column name."""
|
|
85
85
|
from pixeltable.exprs import ColumnRef, RowidRef
|
|
86
|
-
if col_name ==
|
|
86
|
+
if col_name == _POS_COLUMN_NAME and self.is_component_view():
|
|
87
87
|
return RowidRef(self.tbl_version, self.tbl_version.store_tbl.pos_col_idx)
|
|
88
88
|
if col_name not in self.tbl_version.cols_by_name:
|
|
89
89
|
if self.base is None:
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Set, Type
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -14,10 +14,11 @@ import pixeltable.metadata.schema as md_schema
|
|
|
14
14
|
from pixeltable.env import Env
|
|
15
15
|
from pixeltable.exceptions import Error
|
|
16
16
|
from pixeltable.iterators import ComponentIterator
|
|
17
|
-
from pixeltable.type_system import
|
|
17
|
+
from pixeltable.type_system import IntType, InvalidType
|
|
18
|
+
|
|
18
19
|
from .catalog import Catalog
|
|
19
20
|
from .column import Column
|
|
20
|
-
from .globals import
|
|
21
|
+
from .globals import _POS_COLUMN_NAME, UpdateStatus
|
|
21
22
|
from .table import Table
|
|
22
23
|
from .table_version import TableVersion
|
|
23
24
|
from .table_version_path import TableVersionPath
|
|
@@ -45,11 +46,11 @@ class View(Table):
|
|
|
45
46
|
self._snapshot_only = snapshot_only
|
|
46
47
|
|
|
47
48
|
@classmethod
|
|
48
|
-
def
|
|
49
|
+
def _display_name(cls) -> str:
|
|
49
50
|
return 'view'
|
|
50
51
|
|
|
51
52
|
@classmethod
|
|
52
|
-
def
|
|
53
|
+
def _create(
|
|
53
54
|
cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
|
|
54
55
|
predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
|
|
55
56
|
iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
|
|
@@ -100,7 +101,7 @@ class View(Table):
|
|
|
100
101
|
# a component view exposes the pos column of its rowid;
|
|
101
102
|
# we create that column here, so it gets assigned a column id;
|
|
102
103
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
103
|
-
iterator_cols = [Column(
|
|
104
|
+
iterator_cols = [Column(_POS_COLUMN_NAME, IntType(), stored=False)]
|
|
104
105
|
output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
|
|
105
106
|
iterator_cols.extend([
|
|
106
107
|
Column(col_name, col_type, stored=col_name not in unstored_cols)
|
|
@@ -207,11 +208,17 @@ class View(Table):
|
|
|
207
208
|
cat.tbl_dependents[self._base_id].remove(self)
|
|
208
209
|
del cat.tbl_dependents[self._id]
|
|
209
210
|
|
|
211
|
+
def get_metadata(self) -> dict[str, Any]:
|
|
212
|
+
md = super().get_metadata()
|
|
213
|
+
md['is_view'] = True
|
|
214
|
+
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
215
|
+
return md
|
|
216
|
+
|
|
210
217
|
def insert(
|
|
211
218
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
212
219
|
fail_on_exception: bool = True, **kwargs: Any
|
|
213
220
|
) -> UpdateStatus:
|
|
214
|
-
raise excs.Error(f'{self.
|
|
221
|
+
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
|
|
215
222
|
|
|
216
223
|
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
217
|
-
raise excs.Error(f'{self.
|
|
224
|
+
raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
|