pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/globals.py +3 -0
- pixeltable/catalog/table.py +208 -145
- pixeltable/catalog/table_version.py +36 -18
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +9 -24
- pixeltable/env.py +1 -1
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/aggregation_node.py +22 -15
- pixeltable/exec/data_row_batch.py +7 -7
- pixeltable/exec/exec_node.py +35 -7
- pixeltable/exec/expr_eval_node.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -9
- pixeltable/exec/sql_node.py +265 -136
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/data_row.py +30 -19
- pixeltable/exprs/expr.py +15 -14
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +21 -15
- pixeltable/exprs/function_call.py +21 -8
- pixeltable/exprs/rowid_ref.py +2 -2
- pixeltable/exprs/sql_element_cache.py +5 -1
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +86 -42
- pixeltable/functions/huggingface.py +12 -14
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/string.py +50 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +55 -56
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +7 -7
- pixeltable/index/embedding_index.py +8 -10
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/globals.py +2 -0
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/document.py +9 -7
- pixeltable/iterators/video.py +10 -1
- pixeltable/metadata/__init__.py +3 -2
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/util.py +9 -8
- pixeltable/metadata/schema.py +32 -21
- pixeltable/plan.py +136 -154
- pixeltable/store.py +51 -36
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/mypy_plugin.py +32 -0
- pixeltable/type_system.py +243 -60
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +1 -1
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
- pixeltable-0.2.21.dist-info/RECORD +148 -0
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -5,14 +5,15 @@ import builtins
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import pandas as pd
|
|
12
13
|
import pandas.io.formats.style
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
|
|
15
|
-
import pixeltable
|
|
16
|
+
import pixeltable as pxt
|
|
16
17
|
import pixeltable.catalog as catalog
|
|
17
18
|
import pixeltable.env as env
|
|
18
19
|
import pixeltable.exceptions as excs
|
|
@@ -35,18 +36,30 @@ _logger = logging.getLogger('pixeltable')
|
|
|
35
36
|
|
|
36
37
|
class Table(SchemaObject):
|
|
37
38
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
41
|
-
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
39
|
+
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
40
|
+
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
42
41
|
"""
|
|
42
|
+
# Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
43
|
+
# FileCache.emit_eviction_warnings() at the end of the operation.
|
|
43
44
|
|
|
44
45
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
45
46
|
super().__init__(id, name, dir_id)
|
|
46
47
|
self._is_dropped = False
|
|
47
48
|
self._tbl_version_path = tbl_version_path
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
self.__query_scope = self.QueryScope(self)
|
|
50
|
+
|
|
51
|
+
class QueryScope:
|
|
52
|
+
__table: 'Table'
|
|
53
|
+
_queries: dict[str, pxt.func.QueryTemplateFunction]
|
|
54
|
+
|
|
55
|
+
def __init__(self, table: 'Table') -> None:
|
|
56
|
+
self.__table = table
|
|
57
|
+
self._queries = {}
|
|
58
|
+
|
|
59
|
+
def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
|
|
60
|
+
if name in self._queries:
|
|
61
|
+
return self._queries[name]
|
|
62
|
+
raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
|
|
50
63
|
|
|
51
64
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
52
65
|
super()._move(new_name, new_dir_id)
|
|
@@ -59,6 +72,28 @@ class Table(SchemaObject):
|
|
|
59
72
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
60
73
|
|
|
61
74
|
def get_metadata(self) -> dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Retrieves metadata associated with this table.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A dictionary containing the metadata, in the following format:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
{
|
|
83
|
+
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
84
|
+
'schema': {
|
|
85
|
+
'col1': StringType(),
|
|
86
|
+
'col2': IntType(),
|
|
87
|
+
},
|
|
88
|
+
'version': 22,
|
|
89
|
+
'schema_version': 1,
|
|
90
|
+
'comment': '',
|
|
91
|
+
'num_retained_versions': 10,
|
|
92
|
+
'is_view': False,
|
|
93
|
+
'is_snapshot': False,
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
"""
|
|
62
97
|
md = super().get_metadata()
|
|
63
98
|
md['base'] = self._base._path if self._base is not None else None
|
|
64
99
|
md['schema'] = self._schema
|
|
@@ -85,25 +120,24 @@ class Table(SchemaObject):
|
|
|
85
120
|
if self._is_dropped:
|
|
86
121
|
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
87
122
|
|
|
88
|
-
def __getattr__(
|
|
89
|
-
|
|
90
|
-
) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
|
|
91
|
-
"""Return a ColumnRef or QueryTemplateFunction for the given name.
|
|
123
|
+
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
124
|
+
"""Return a ColumnRef for the given name.
|
|
92
125
|
"""
|
|
93
|
-
if name in self._queries:
|
|
94
|
-
return self._queries[name]
|
|
95
126
|
return getattr(self._tbl_version_path, name)
|
|
96
127
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
]:
|
|
128
|
+
@overload
|
|
129
|
+
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
|
|
130
|
+
|
|
131
|
+
@overload
|
|
132
|
+
def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
|
|
133
|
+
|
|
134
|
+
def __getitem__(self, index):
|
|
102
135
|
"""Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
|
|
103
136
|
"""
|
|
104
|
-
if isinstance(index, str)
|
|
105
|
-
return self
|
|
106
|
-
|
|
137
|
+
if isinstance(index, str):
|
|
138
|
+
return getattr(self, index)
|
|
139
|
+
else:
|
|
140
|
+
return self._df()[index]
|
|
107
141
|
|
|
108
142
|
def list_views(self, *, recursive: bool = True) -> list[str]:
|
|
109
143
|
"""
|
|
@@ -112,6 +146,9 @@ class Table(SchemaObject):
|
|
|
112
146
|
Args:
|
|
113
147
|
recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
|
|
114
148
|
all sub-views (including views of views, etc.)
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
A list of view paths.
|
|
115
152
|
"""
|
|
116
153
|
return [t._path for t in self._get_views(recursive=recursive)]
|
|
117
154
|
|
|
@@ -122,60 +159,55 @@ class Table(SchemaObject):
|
|
|
122
159
|
else:
|
|
123
160
|
return dependents
|
|
124
161
|
|
|
125
|
-
def _df(self) -> '
|
|
162
|
+
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
126
163
|
"""Return a DataFrame for this table.
|
|
127
164
|
"""
|
|
128
165
|
# local import: avoid circular imports
|
|
129
|
-
|
|
130
|
-
|
|
166
|
+
return pxt.DataFrame(self._tbl_version_path)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def queries(self) -> 'Table.QueryScope':
|
|
170
|
+
return self.__query_scope
|
|
131
171
|
|
|
132
|
-
def select(self, *items: Any, **named_items: Any) -> '
|
|
172
|
+
def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
|
|
133
173
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
134
|
-
|
|
135
|
-
from pixeltable.dataframe import DataFrame
|
|
136
|
-
return DataFrame(self._tbl_version_path).select(*items, **named_items)
|
|
174
|
+
return self._df().select(*items, **named_items)
|
|
137
175
|
|
|
138
|
-
def where(self, pred: 'exprs.Expr') -> '
|
|
176
|
+
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
139
177
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
140
|
-
|
|
141
|
-
from pixeltable.dataframe import DataFrame
|
|
142
|
-
return DataFrame(self._tbl_version_path).where(pred)
|
|
178
|
+
return self._df().where(pred)
|
|
143
179
|
|
|
144
|
-
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> '
|
|
180
|
+
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
145
181
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
146
|
-
|
|
147
|
-
from pixeltable.dataframe import DataFrame
|
|
148
|
-
return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
|
|
182
|
+
return self._df().order_by(*items, asc=asc)
|
|
149
183
|
|
|
150
|
-
def group_by(self, *items: 'exprs.Expr') -> '
|
|
184
|
+
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
151
185
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
152
|
-
|
|
153
|
-
return DataFrame(self._tbl_version_path).group_by(*items)
|
|
186
|
+
return self._df().group_by(*items)
|
|
154
187
|
|
|
155
|
-
def limit(self, n: int) -> '
|
|
156
|
-
|
|
157
|
-
return DataFrame(self._tbl_version_path).limit(n)
|
|
188
|
+
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
189
|
+
return self._df().limit(n)
|
|
158
190
|
|
|
159
|
-
def collect(self) -> '
|
|
191
|
+
def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
|
|
160
192
|
"""Return rows from this table."""
|
|
161
193
|
return self._df().collect()
|
|
162
194
|
|
|
163
195
|
def show(
|
|
164
196
|
self, *args, **kwargs
|
|
165
|
-
) -> '
|
|
197
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
166
198
|
"""Return rows from this table.
|
|
167
199
|
"""
|
|
168
200
|
return self._df().show(*args, **kwargs)
|
|
169
201
|
|
|
170
202
|
def head(
|
|
171
203
|
self, *args, **kwargs
|
|
172
|
-
) -> '
|
|
204
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
173
205
|
"""Return the first n rows inserted into this table."""
|
|
174
206
|
return self._df().head(*args, **kwargs)
|
|
175
207
|
|
|
176
208
|
def tail(
|
|
177
209
|
self, *args, **kwargs
|
|
178
|
-
) -> '
|
|
210
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
179
211
|
"""Return the last n rows inserted into this table."""
|
|
180
212
|
return self._df().tail(*args, **kwargs)
|
|
181
213
|
|
|
@@ -191,7 +223,7 @@ class Table(SchemaObject):
|
|
|
191
223
|
@property
|
|
192
224
|
def _query_names(self) -> list[str]:
|
|
193
225
|
"""Return the names of the registered queries for this table."""
|
|
194
|
-
return list(self._queries.keys())
|
|
226
|
+
return list(self.__query_scope._queries.keys())
|
|
195
227
|
|
|
196
228
|
@property
|
|
197
229
|
def _base(self) -> Optional['Table']:
|
|
@@ -216,7 +248,7 @@ class Table(SchemaObject):
|
|
|
216
248
|
cols = self._tbl_version_path.columns()
|
|
217
249
|
df = pd.DataFrame({
|
|
218
250
|
'Column Name': [c.name for c in cols],
|
|
219
|
-
'Type': [
|
|
251
|
+
'Type': [c.col_type._to_str(as_schema=True) for c in cols],
|
|
220
252
|
'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
|
|
221
253
|
})
|
|
222
254
|
return df
|
|
@@ -239,7 +271,7 @@ class Table(SchemaObject):
|
|
|
239
271
|
from IPython.display import display
|
|
240
272
|
display(self._description_html())
|
|
241
273
|
else:
|
|
242
|
-
print(self
|
|
274
|
+
print(repr(self))
|
|
243
275
|
|
|
244
276
|
# TODO: Display comments in _repr_html()
|
|
245
277
|
def __repr__(self) -> str:
|
|
@@ -267,75 +299,77 @@ class Table(SchemaObject):
|
|
|
267
299
|
"""Return a PyTorch Dataset for this table.
|
|
268
300
|
See DataFrame.to_pytorch_dataset()
|
|
269
301
|
"""
|
|
270
|
-
|
|
271
|
-
return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
|
|
302
|
+
return self._df().to_pytorch_dataset(image_format=image_format)
|
|
272
303
|
|
|
273
304
|
def to_coco_dataset(self) -> Path:
|
|
274
305
|
"""Return the path to a COCO json file for this table.
|
|
275
306
|
See DataFrame.to_coco_dataset()
|
|
276
307
|
"""
|
|
277
|
-
|
|
278
|
-
return DataFrame(self._tbl_version_path).to_coco_dataset()
|
|
308
|
+
return self._df().to_coco_dataset()
|
|
279
309
|
|
|
280
310
|
def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
|
|
281
311
|
"""
|
|
282
312
|
Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
|
|
283
313
|
|
|
284
|
-
>>> tbl['new_col'] =
|
|
314
|
+
>>> tbl['new_col'] = pxt.Int
|
|
285
315
|
|
|
286
316
|
is exactly equivalent to
|
|
287
317
|
|
|
288
|
-
>>> tbl.add_column(new_col=
|
|
318
|
+
>>> tbl.add_column(new_col=pxt.Int)
|
|
289
319
|
|
|
290
320
|
For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
|
|
291
321
|
"""
|
|
292
322
|
if not isinstance(col_name, str):
|
|
293
323
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
294
|
-
if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
|
|
295
|
-
raise excs.Error(f'Column spec must be a ColumnType or
|
|
296
|
-
self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
|
|
324
|
+
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
325
|
+
raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
|
|
326
|
+
self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
|
|
297
327
|
|
|
298
328
|
def add_column(
|
|
299
329
|
self,
|
|
300
330
|
*,
|
|
301
|
-
type:
|
|
331
|
+
type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
|
|
302
332
|
stored: Optional[bool] = None,
|
|
303
333
|
print_stats: bool = False,
|
|
304
|
-
|
|
334
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
335
|
+
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
|
|
305
336
|
) -> UpdateStatus:
|
|
306
337
|
"""
|
|
307
338
|
Adds a column to the table.
|
|
308
339
|
|
|
309
340
|
Args:
|
|
310
|
-
kwargs: Exactly one keyword argument of the form
|
|
311
|
-
type: The type of the column. Only valid and required if
|
|
341
|
+
kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
|
|
342
|
+
type: The type of the column. Only valid and required if `value-expression` is a Callable.
|
|
312
343
|
stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
|
|
313
|
-
print_stats: If
|
|
344
|
+
print_stats: If `True`, print execution metrics during evaluation.
|
|
345
|
+
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
346
|
+
row.
|
|
347
|
+
|
|
348
|
+
- If `on_error='abort'`, then an exception will be raised and the column will not be added.
|
|
349
|
+
- If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
|
|
350
|
+
with errors will have a `None` value for the column, with information about the error stored in the
|
|
351
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
314
352
|
|
|
315
353
|
Returns:
|
|
316
|
-
execution status
|
|
354
|
+
Information about the execution status of the operation.
|
|
317
355
|
|
|
318
356
|
Raises:
|
|
319
357
|
Error: If the column name is invalid or already exists.
|
|
320
358
|
|
|
321
359
|
Examples:
|
|
322
|
-
Add an int column
|
|
360
|
+
Add an int column:
|
|
323
361
|
|
|
324
|
-
>>> tbl.add_column(new_col=
|
|
362
|
+
>>> tbl.add_column(new_col=pxt.Int)
|
|
325
363
|
|
|
326
364
|
Alternatively, this can also be expressed as:
|
|
327
365
|
|
|
328
|
-
>>> tbl['new_col'] =
|
|
366
|
+
>>> tbl['new_col'] = pxt.Int
|
|
329
367
|
|
|
330
|
-
For a table with int column
|
|
368
|
+
For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
|
|
331
369
|
the parameters of the Callable must correspond to existing column names (the column values are then passed
|
|
332
370
|
as arguments to the Callable). In this case, the column type needs to be specified explicitly:
|
|
333
371
|
|
|
334
|
-
>>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=
|
|
335
|
-
|
|
336
|
-
Alternatively, this can also be expressed as:
|
|
337
|
-
|
|
338
|
-
>>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
|
|
372
|
+
>>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
|
|
339
373
|
|
|
340
374
|
For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
|
|
341
375
|
90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
|
|
@@ -347,13 +381,9 @@ class Table(SchemaObject):
|
|
|
347
381
|
|
|
348
382
|
>>> tbl['rotated'] = tbl.frame.rotate(90)
|
|
349
383
|
|
|
350
|
-
Do the same, but now the column is
|
|
351
|
-
|
|
352
|
-
>>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
|
|
353
|
-
|
|
354
|
-
Alternatively, this can also be expressed as:
|
|
384
|
+
Do the same, but now the column is unstored:
|
|
355
385
|
|
|
356
|
-
>>> tbl
|
|
386
|
+
>>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
357
387
|
"""
|
|
358
388
|
self._check_is_dropped()
|
|
359
389
|
# verify kwargs and construct column schema dict
|
|
@@ -365,26 +395,25 @@ class Table(SchemaObject):
|
|
|
365
395
|
col_name, spec = next(iter(kwargs.items()))
|
|
366
396
|
if not is_valid_identifier(col_name):
|
|
367
397
|
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
368
|
-
if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
|
|
398
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
|
|
369
399
|
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
370
400
|
|
|
371
401
|
col_schema: dict[str, Any] = {}
|
|
372
|
-
if isinstance(spec, ts.ColumnType):
|
|
373
|
-
col_schema['type'] = spec
|
|
402
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
403
|
+
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
374
404
|
else:
|
|
375
405
|
col_schema['value'] = spec
|
|
376
406
|
if type is not None:
|
|
377
|
-
col_schema['type'] = type
|
|
407
|
+
col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
|
|
378
408
|
if stored is not None:
|
|
379
409
|
col_schema['stored'] = stored
|
|
380
410
|
|
|
381
411
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
382
412
|
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
383
|
-
status = self._tbl_version.add_column(new_col, print_stats=print_stats)
|
|
413
|
+
status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
|
|
384
414
|
FileCache.get().emit_eviction_warnings()
|
|
385
415
|
return status
|
|
386
416
|
|
|
387
|
-
|
|
388
417
|
@classmethod
|
|
389
418
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
390
419
|
"""Check integrity of user-supplied Column spec
|
|
@@ -401,8 +430,8 @@ class Table(SchemaObject):
|
|
|
401
430
|
|
|
402
431
|
if 'type' in spec:
|
|
403
432
|
has_type = True
|
|
404
|
-
if not isinstance(spec['type'], ts.ColumnType):
|
|
405
|
-
raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
|
|
433
|
+
if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
434
|
+
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
406
435
|
|
|
407
436
|
if 'value' in spec:
|
|
408
437
|
value_spec = spec['value']
|
|
@@ -435,20 +464,20 @@ class Table(SchemaObject):
|
|
|
435
464
|
primary_key: Optional[bool] = None
|
|
436
465
|
stored = True
|
|
437
466
|
|
|
438
|
-
if isinstance(spec, ts.ColumnType):
|
|
439
|
-
|
|
440
|
-
col_type = spec
|
|
467
|
+
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
468
|
+
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
441
469
|
elif isinstance(spec, exprs.Expr):
|
|
442
470
|
# create copy so we can modify it
|
|
443
471
|
value_expr = spec.copy()
|
|
444
472
|
elif callable(spec):
|
|
445
|
-
raise excs.Error(
|
|
473
|
+
raise excs.Error(
|
|
446
474
|
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
447
|
-
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type":
|
|
448
|
-
)
|
|
475
|
+
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
|
|
476
|
+
)
|
|
449
477
|
elif isinstance(spec, dict):
|
|
450
478
|
cls._validate_column_spec(name, spec)
|
|
451
|
-
|
|
479
|
+
if 'type' in spec:
|
|
480
|
+
col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
|
|
452
481
|
value_expr = spec.get('value')
|
|
453
482
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
454
483
|
# create copy so we can modify it
|
|
@@ -496,12 +525,13 @@ class Table(SchemaObject):
|
|
|
496
525
|
name: The name of the column to drop.
|
|
497
526
|
|
|
498
527
|
Raises:
|
|
499
|
-
Error: If the column does not exist or if it is referenced by a computed column.
|
|
528
|
+
Error: If the column does not exist or if it is referenced by a dependent computed column.
|
|
500
529
|
|
|
501
530
|
Examples:
|
|
502
|
-
Drop column
|
|
531
|
+
Drop the column `col` from the table `my_table`:
|
|
503
532
|
|
|
504
|
-
>>> tbl.
|
|
533
|
+
>>> tbl = pxt.get_table('my_table')
|
|
534
|
+
... tbl.drop_column('col')
|
|
505
535
|
"""
|
|
506
536
|
self._check_is_dropped()
|
|
507
537
|
|
|
@@ -544,43 +574,59 @@ class Table(SchemaObject):
|
|
|
544
574
|
new_name: The new name of the column.
|
|
545
575
|
|
|
546
576
|
Raises:
|
|
547
|
-
Error: If the column does not exist or if the new name is invalid or already exists.
|
|
577
|
+
Error: If the column does not exist, or if the new name is invalid or already exists.
|
|
548
578
|
|
|
549
579
|
Examples:
|
|
550
|
-
Rename column
|
|
580
|
+
Rename the column `col1` to `col2` of the table `my_table`:
|
|
551
581
|
|
|
552
|
-
>>> tbl.
|
|
582
|
+
>>> tbl = pxt.get_table('my_table')
|
|
583
|
+
... tbl.rename_column('col1', 'col2')
|
|
553
584
|
"""
|
|
554
585
|
self._check_is_dropped()
|
|
555
586
|
self._tbl_version.rename_column(old_name, new_name)
|
|
556
587
|
|
|
557
588
|
def add_embedding_index(
|
|
558
589
|
self, col_name: str, *, idx_name: Optional[str] = None,
|
|
559
|
-
string_embed: Optional[
|
|
590
|
+
string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
|
|
560
591
|
metric: str = 'cosine'
|
|
561
592
|
) -> None:
|
|
562
|
-
"""
|
|
593
|
+
"""
|
|
594
|
+
Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
|
|
595
|
+
rows are inserted into the table.
|
|
596
|
+
|
|
597
|
+
Indices are currently supported only for `String` and `Image` columns. The index must specify, at
|
|
598
|
+
minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
|
|
599
|
+
and image embedding (into the same vector space); in particular, this can be used to provide similarity search
|
|
600
|
+
of text over an image column.
|
|
563
601
|
|
|
564
602
|
Args:
|
|
565
|
-
col_name: name of column to index
|
|
566
|
-
idx_name: name of index
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
603
|
+
col_name: The name of column to index; must be a `String` or `Image` column.
|
|
604
|
+
idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
|
|
605
|
+
If specified, the name must be unique for this table.
|
|
606
|
+
string_embed: A function to embed text; required if the column is a `String` column.
|
|
607
|
+
image_embed: A function to embed images; required if the column is an `Image` column.
|
|
608
|
+
metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
|
|
609
|
+
the default is `'cosine'`.
|
|
570
610
|
|
|
571
611
|
Raises:
|
|
572
|
-
Error: If an index with that name already exists for the table or if the column does not exist.
|
|
612
|
+
Error: If an index with that name already exists for the table, or if the specified column does not exist.
|
|
573
613
|
|
|
574
614
|
Examples:
|
|
575
|
-
Add an index to the
|
|
615
|
+
Add an index to the `img` column of the table `my_table`:
|
|
576
616
|
|
|
577
|
-
>>> tbl.
|
|
617
|
+
>>> tbl = pxt.get_table('my_table')
|
|
618
|
+
... tbl.add_embedding_index('img', image_embed=my_image_func)
|
|
578
619
|
|
|
579
|
-
Add another index to the
|
|
580
|
-
and with a specific name;
|
|
620
|
+
Add another index to the `img` column, using the inner product as the distance metric,
|
|
621
|
+
and with a specific name; `string_embed` is also specified in order to search with text:
|
|
581
622
|
|
|
582
623
|
>>> tbl.add_embedding_index(
|
|
583
|
-
|
|
624
|
+
... 'img',
|
|
625
|
+
... idx_name='clip_idx',
|
|
626
|
+
... image_embed=my_image_func,
|
|
627
|
+
... string_embed=my_string_func,
|
|
628
|
+
... metric='ip'
|
|
629
|
+
... )
|
|
584
630
|
"""
|
|
585
631
|
if self._tbl_version_path.is_snapshot():
|
|
586
632
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
@@ -599,37 +645,50 @@ class Table(SchemaObject):
|
|
|
599
645
|
FileCache.get().emit_eviction_warnings()
|
|
600
646
|
|
|
601
647
|
def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
602
|
-
"""
|
|
648
|
+
"""
|
|
649
|
+
Drop an embedding index from the table. Either a column name or an index name (but not both) must be
|
|
650
|
+
specified. If a column name is specified, it must be a column containing exactly one embedding index;
|
|
651
|
+
otherwise the specific index name must be provided instead.
|
|
603
652
|
|
|
604
653
|
Args:
|
|
605
|
-
column_name: The name of the column
|
|
654
|
+
column_name: The name of the column from which to drop the index. Invalid if the column has multiple
|
|
606
655
|
embedding indices.
|
|
607
656
|
idx_name: The name of the index to drop.
|
|
608
657
|
|
|
609
658
|
Raises:
|
|
610
|
-
Error: If the
|
|
659
|
+
Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
|
|
660
|
+
indices or multiple embedding indices.
|
|
661
|
+
Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
|
|
611
662
|
|
|
612
663
|
Examples:
|
|
613
|
-
Drop embedding index on the
|
|
664
|
+
Drop the embedding index on the `img` column of the table `my_table`:
|
|
614
665
|
|
|
615
|
-
>>> tbl.
|
|
666
|
+
>>> tbl = pxt.get_table('my_table')
|
|
667
|
+
... tbl.drop_embedding_index(column_name='img')
|
|
616
668
|
"""
|
|
617
669
|
self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
|
|
618
670
|
|
|
619
671
|
def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
620
|
-
"""
|
|
672
|
+
"""
|
|
673
|
+
Drop an index from the table. Either a column name or an index name (but not both) must be
|
|
674
|
+
specified. If a column name is specified, it must be a column containing exactly one index;
|
|
675
|
+
otherwise the specific index name must be provided instead.
|
|
621
676
|
|
|
622
677
|
Args:
|
|
623
|
-
column_name: The name of the column
|
|
678
|
+
column_name: The name of the column from which to drop the index. Invalid if the column has multiple
|
|
679
|
+
indices.
|
|
624
680
|
idx_name: The name of the index to drop.
|
|
625
681
|
|
|
626
682
|
Raises:
|
|
627
|
-
Error: If the
|
|
683
|
+
Error: If `column_name` is specified, but the column does not exist, or it contains no
|
|
684
|
+
indices or multiple indices.
|
|
685
|
+
Error: If `idx_name` is specified, but the index does not exist.
|
|
628
686
|
|
|
629
687
|
Examples:
|
|
630
|
-
Drop index on the
|
|
688
|
+
Drop the index on the `img` column of the table `my_table`:
|
|
631
689
|
|
|
632
|
-
>>> tbl.
|
|
690
|
+
>>> tbl = pxt.get_table('my_table')
|
|
691
|
+
... tbl.drop_index(column_name='img')
|
|
633
692
|
"""
|
|
634
693
|
self._drop_index(column_name=column_name, idx_name=idx_name)
|
|
635
694
|
|
|
@@ -682,7 +741,7 @@ class Table(SchemaObject):
|
|
|
682
741
|
To insert multiple rows at a time:
|
|
683
742
|
``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
|
|
684
743
|
|
|
685
|
-
To insert just a single row, you can use the more
|
|
744
|
+
To insert just a single row, you can use the more concise syntax:
|
|
686
745
|
``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
|
|
687
746
|
|
|
688
747
|
Args:
|
|
@@ -698,24 +757,31 @@ class Table(SchemaObject):
|
|
|
698
757
|
If ``True``, raise an exception that aborts the insert.
|
|
699
758
|
|
|
700
759
|
Returns:
|
|
701
|
-
|
|
760
|
+
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
702
761
|
|
|
703
762
|
Raises:
|
|
704
|
-
Error:
|
|
763
|
+
Error: If one of the following conditions occurs:
|
|
764
|
+
|
|
765
|
+
- The table is a view or snapshot.
|
|
766
|
+
- The table has been dropped.
|
|
767
|
+
- One of the rows being inserted does not conform to the table schema.
|
|
768
|
+
- An error occurs during processing of computed columns, and `fail_on_exception=True`.
|
|
705
769
|
|
|
706
770
|
Examples:
|
|
707
|
-
Insert two rows into
|
|
771
|
+
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
772
|
+
Column ``c`` is nullable:
|
|
708
773
|
|
|
709
|
-
>>> tbl.
|
|
774
|
+
>>> tbl = pxt.get_table('my_table')
|
|
775
|
+
... tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
|
|
710
776
|
|
|
711
|
-
Insert a single row
|
|
777
|
+
Insert a single row using the alternative syntax:
|
|
712
778
|
|
|
713
|
-
>>> tbl.insert(a=
|
|
779
|
+
>>> tbl.insert(a=3, b=3, c=3)
|
|
714
780
|
"""
|
|
715
781
|
raise NotImplementedError
|
|
716
782
|
|
|
717
783
|
def update(
|
|
718
|
-
self, value_spec: dict[str, Any], where: Optional['
|
|
784
|
+
self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
|
|
719
785
|
) -> UpdateStatus:
|
|
720
786
|
"""Update rows in this table.
|
|
721
787
|
|
|
@@ -807,7 +873,7 @@ class Table(SchemaObject):
|
|
|
807
873
|
FileCache.get().emit_eviction_warnings()
|
|
808
874
|
return status
|
|
809
875
|
|
|
810
|
-
def delete(self, where: Optional['
|
|
876
|
+
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
811
877
|
"""Delete rows in this table.
|
|
812
878
|
|
|
813
879
|
Args:
|
|
@@ -836,17 +902,17 @@ class Table(SchemaObject):
|
|
|
836
902
|
self._tbl_version.revert()
|
|
837
903
|
|
|
838
904
|
@overload
|
|
839
|
-
def query(self, py_fn: Callable) -> '
|
|
905
|
+
def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
|
|
840
906
|
|
|
841
907
|
@overload
|
|
842
908
|
def query(
|
|
843
909
|
self, *, param_types: Optional[list[ts.ColumnType]] = None
|
|
844
|
-
) -> Callable[[Callable], '
|
|
910
|
+
) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
|
|
845
911
|
|
|
846
912
|
def query(self, *args: Any, **kwargs: Any) -> Any:
|
|
847
913
|
def make_query_template(
|
|
848
914
|
py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
|
|
849
|
-
) -> '
|
|
915
|
+
) -> 'pxt.func.QueryTemplateFunction':
|
|
850
916
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
851
917
|
# this is a named function in a module
|
|
852
918
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -855,12 +921,11 @@ class Table(SchemaObject):
|
|
|
855
921
|
query_name = py_fn.__name__
|
|
856
922
|
if query_name in self._schema.keys():
|
|
857
923
|
raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
|
|
858
|
-
if query_name in self._queries:
|
|
924
|
+
if query_name in self.__query_scope._queries and function_path is not None:
|
|
859
925
|
raise excs.Error(f'Duplicate query name: {query_name!r}')
|
|
860
|
-
|
|
861
|
-
query_fn = func.QueryTemplateFunction.create(
|
|
926
|
+
query_fn = pxt.func.QueryTemplateFunction.create(
|
|
862
927
|
py_fn, param_types=param_types, path=function_path, name=query_name)
|
|
863
|
-
self._queries[query_name] = query_fn
|
|
928
|
+
self.__query_scope._queries[query_name] = query_fn
|
|
864
929
|
return query_fn
|
|
865
930
|
|
|
866
931
|
# TODO: verify that the inferred return type matches that of the template
|
|
@@ -877,7 +942,7 @@ class Table(SchemaObject):
|
|
|
877
942
|
def external_stores(self) -> list[str]:
|
|
878
943
|
return list(self._tbl_version.external_stores.keys())
|
|
879
944
|
|
|
880
|
-
def _link_external_store(self, store: '
|
|
945
|
+
def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
|
|
881
946
|
"""
|
|
882
947
|
Links the specified `ExternalStore` to this table.
|
|
883
948
|
"""
|
|
@@ -932,7 +997,7 @@ class Table(SchemaObject):
|
|
|
932
997
|
*,
|
|
933
998
|
export_data: bool = True,
|
|
934
999
|
import_data: bool = True
|
|
935
|
-
) -> '
|
|
1000
|
+
) -> 'pxt.io.SyncStatus':
|
|
936
1001
|
"""
|
|
937
1002
|
Synchronizes this table with its linked external stores.
|
|
938
1003
|
|
|
@@ -954,9 +1019,7 @@ class Table(SchemaObject):
|
|
|
954
1019
|
if store not in all_stores:
|
|
955
1020
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
956
1021
|
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
sync_status = SyncStatus.empty()
|
|
1022
|
+
sync_status = pxt.io.SyncStatus.empty()
|
|
960
1023
|
for store in stores:
|
|
961
1024
|
store_obj = self._tbl_version.external_stores[store]
|
|
962
1025
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|