pixeltable 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/globals.py +3 -0
- pixeltable/catalog/insertable_table.py +9 -7
- pixeltable/catalog/table.py +220 -143
- pixeltable/catalog/table_version.py +36 -18
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +9 -24
- pixeltable/env.py +107 -36
- pixeltable/exceptions.py +7 -4
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/aggregation_node.py +22 -15
- pixeltable/exec/component_iteration_node.py +62 -41
- pixeltable/exec/data_row_batch.py +7 -7
- pixeltable/exec/exec_node.py +35 -7
- pixeltable/exec/expr_eval_node.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -9
- pixeltable/exec/sql_node.py +265 -136
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/data_row.py +30 -19
- pixeltable/exprs/expr.py +15 -14
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +21 -15
- pixeltable/exprs/function_call.py +21 -8
- pixeltable/exprs/json_path.py +3 -6
- pixeltable/exprs/rowid_ref.py +2 -2
- pixeltable/exprs/sql_element_cache.py +5 -1
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +86 -42
- pixeltable/functions/huggingface.py +12 -14
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/string.py +50 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +26 -12
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +57 -56
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +7 -7
- pixeltable/index/embedding_index.py +8 -10
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/globals.py +3 -1
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +10 -1
- pixeltable/metadata/__init__.py +3 -2
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/util.py +9 -8
- pixeltable/metadata/schema.py +32 -21
- pixeltable/plan.py +136 -154
- pixeltable/store.py +51 -36
- pixeltable/tool/create_test_db_dump.py +7 -7
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/mypy_plugin.py +32 -0
- pixeltable/type_system.py +243 -60
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +1 -1
- pixeltable/utils/filecache.py +131 -84
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/METADATA +16 -7
- pixeltable-0.2.21.dist-info/RECORD +148 -0
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.19.dist-info/RECORD +0 -147
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -5,14 +5,15 @@ import builtins
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Sequence, Tuple, Type, Union, overload
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import pandas as pd
|
|
12
13
|
import pandas.io.formats.style
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
|
|
15
|
-
import pixeltable
|
|
16
|
+
import pixeltable as pxt
|
|
16
17
|
import pixeltable.catalog as catalog
|
|
17
18
|
import pixeltable.env as env
|
|
18
19
|
import pixeltable.exceptions as excs
|
|
@@ -20,6 +21,7 @@ import pixeltable.exprs as exprs
|
|
|
20
21
|
import pixeltable.index as index
|
|
21
22
|
import pixeltable.metadata.schema as schema
|
|
22
23
|
import pixeltable.type_system as ts
|
|
24
|
+
from pixeltable.utils.filecache import FileCache
|
|
23
25
|
|
|
24
26
|
from .column import Column
|
|
25
27
|
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
@@ -33,14 +35,31 @@ if TYPE_CHECKING:
|
|
|
33
35
|
_logger = logging.getLogger('pixeltable')
|
|
34
36
|
|
|
35
37
|
class Table(SchemaObject):
|
|
36
|
-
"""
|
|
38
|
+
"""
|
|
39
|
+
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
40
|
+
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
41
|
+
"""
|
|
42
|
+
# Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
43
|
+
# FileCache.emit_eviction_warnings() at the end of the operation.
|
|
37
44
|
|
|
38
45
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
39
46
|
super().__init__(id, name, dir_id)
|
|
40
47
|
self._is_dropped = False
|
|
41
48
|
self._tbl_version_path = tbl_version_path
|
|
42
|
-
|
|
43
|
-
|
|
49
|
+
self.__query_scope = self.QueryScope(self)
|
|
50
|
+
|
|
51
|
+
class QueryScope:
|
|
52
|
+
__table: 'Table'
|
|
53
|
+
_queries: dict[str, pxt.func.QueryTemplateFunction]
|
|
54
|
+
|
|
55
|
+
def __init__(self, table: 'Table') -> None:
|
|
56
|
+
self.__table = table
|
|
57
|
+
self._queries = {}
|
|
58
|
+
|
|
59
|
+
def __getattr__(self, name: str) -> pxt.func.QueryTemplateFunction:
|
|
60
|
+
if name in self._queries:
|
|
61
|
+
return self._queries[name]
|
|
62
|
+
raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
|
|
44
63
|
|
|
45
64
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
46
65
|
super()._move(new_name, new_dir_id)
|
|
@@ -53,6 +72,28 @@ class Table(SchemaObject):
|
|
|
53
72
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
54
73
|
|
|
55
74
|
def get_metadata(self) -> dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Retrieves metadata associated with this table.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A dictionary containing the metadata, in the following format:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
{
|
|
83
|
+
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
84
|
+
'schema': {
|
|
85
|
+
'col1': StringType(),
|
|
86
|
+
'col2': IntType(),
|
|
87
|
+
},
|
|
88
|
+
'version': 22,
|
|
89
|
+
'schema_version': 1,
|
|
90
|
+
'comment': '',
|
|
91
|
+
'num_retained_versions': 10,
|
|
92
|
+
'is_view': False,
|
|
93
|
+
'is_snapshot': False,
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
"""
|
|
56
97
|
md = super().get_metadata()
|
|
57
98
|
md['base'] = self._base._path if self._base is not None else None
|
|
58
99
|
md['schema'] = self._schema
|
|
@@ -79,25 +120,24 @@ class Table(SchemaObject):
|
|
|
79
120
|
if self._is_dropped:
|
|
80
121
|
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
81
122
|
|
|
82
|
-
def __getattr__(
|
|
83
|
-
|
|
84
|
-
) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.func.QueryTemplateFunction']:
|
|
85
|
-
"""Return a ColumnRef or QueryTemplateFunction for the given name.
|
|
123
|
+
def __getattr__(self, name: str) -> 'pxt.exprs.ColumnRef':
|
|
124
|
+
"""Return a ColumnRef for the given name.
|
|
86
125
|
"""
|
|
87
|
-
if name in self._queries:
|
|
88
|
-
return self._queries[name]
|
|
89
126
|
return getattr(self._tbl_version_path, name)
|
|
90
127
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
]:
|
|
128
|
+
@overload
|
|
129
|
+
def __getitem__(self, name: str) -> 'pxt.exprs.ColumnRef': ...
|
|
130
|
+
|
|
131
|
+
@overload
|
|
132
|
+
def __getitem__(self, index: Union[exprs.Expr, Sequence[exprs.Expr]]) -> 'pxt.DataFrame': ...
|
|
133
|
+
|
|
134
|
+
def __getitem__(self, index):
|
|
96
135
|
"""Return a ColumnRef or QueryTemplateFunction for the given name, or a DataFrame for the given slice.
|
|
97
136
|
"""
|
|
98
|
-
if isinstance(index, str)
|
|
99
|
-
return self
|
|
100
|
-
|
|
137
|
+
if isinstance(index, str):
|
|
138
|
+
return getattr(self, index)
|
|
139
|
+
else:
|
|
140
|
+
return self._df()[index]
|
|
101
141
|
|
|
102
142
|
def list_views(self, *, recursive: bool = True) -> list[str]:
|
|
103
143
|
"""
|
|
@@ -106,6 +146,9 @@ class Table(SchemaObject):
|
|
|
106
146
|
Args:
|
|
107
147
|
recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
|
|
108
148
|
all sub-views (including views of views, etc.)
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
A list of view paths.
|
|
109
152
|
"""
|
|
110
153
|
return [t._path for t in self._get_views(recursive=recursive)]
|
|
111
154
|
|
|
@@ -116,60 +159,55 @@ class Table(SchemaObject):
|
|
|
116
159
|
else:
|
|
117
160
|
return dependents
|
|
118
161
|
|
|
119
|
-
def _df(self) -> '
|
|
162
|
+
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
120
163
|
"""Return a DataFrame for this table.
|
|
121
164
|
"""
|
|
122
165
|
# local import: avoid circular imports
|
|
123
|
-
|
|
124
|
-
|
|
166
|
+
return pxt.DataFrame(self._tbl_version_path)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def queries(self) -> 'Table.QueryScope':
|
|
170
|
+
return self.__query_scope
|
|
125
171
|
|
|
126
|
-
def select(self, *items: Any, **named_items: Any) -> '
|
|
172
|
+
def select(self, *items: Any, **named_items: Any) -> 'pxt.DataFrame':
|
|
127
173
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
128
|
-
|
|
129
|
-
from pixeltable.dataframe import DataFrame
|
|
130
|
-
return DataFrame(self._tbl_version_path).select(*items, **named_items)
|
|
174
|
+
return self._df().select(*items, **named_items)
|
|
131
175
|
|
|
132
|
-
def where(self, pred: 'exprs.Expr') -> '
|
|
176
|
+
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
133
177
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
134
|
-
|
|
135
|
-
from pixeltable.dataframe import DataFrame
|
|
136
|
-
return DataFrame(self._tbl_version_path).where(pred)
|
|
178
|
+
return self._df().where(pred)
|
|
137
179
|
|
|
138
|
-
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> '
|
|
180
|
+
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
139
181
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
140
|
-
|
|
141
|
-
from pixeltable.dataframe import DataFrame
|
|
142
|
-
return DataFrame(self._tbl_version_path).order_by(*items, asc=asc)
|
|
182
|
+
return self._df().order_by(*items, asc=asc)
|
|
143
183
|
|
|
144
|
-
def group_by(self, *items: 'exprs.Expr') -> '
|
|
184
|
+
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
145
185
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
146
|
-
|
|
147
|
-
return DataFrame(self._tbl_version_path).group_by(*items)
|
|
186
|
+
return self._df().group_by(*items)
|
|
148
187
|
|
|
149
|
-
def limit(self, n: int) -> '
|
|
150
|
-
|
|
151
|
-
return DataFrame(self._tbl_version_path).limit(n)
|
|
188
|
+
def limit(self, n: int) -> 'pxt.DataFrame':
|
|
189
|
+
return self._df().limit(n)
|
|
152
190
|
|
|
153
|
-
def collect(self) -> '
|
|
191
|
+
def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
|
|
154
192
|
"""Return rows from this table."""
|
|
155
193
|
return self._df().collect()
|
|
156
194
|
|
|
157
195
|
def show(
|
|
158
196
|
self, *args, **kwargs
|
|
159
|
-
) -> '
|
|
197
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
160
198
|
"""Return rows from this table.
|
|
161
199
|
"""
|
|
162
200
|
return self._df().show(*args, **kwargs)
|
|
163
201
|
|
|
164
202
|
def head(
|
|
165
203
|
self, *args, **kwargs
|
|
166
|
-
) -> '
|
|
204
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
167
205
|
"""Return the first n rows inserted into this table."""
|
|
168
206
|
return self._df().head(*args, **kwargs)
|
|
169
207
|
|
|
170
208
|
def tail(
|
|
171
209
|
self, *args, **kwargs
|
|
172
|
-
) -> '
|
|
210
|
+
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
173
211
|
"""Return the last n rows inserted into this table."""
|
|
174
212
|
return self._df().tail(*args, **kwargs)
|
|
175
213
|
|
|
@@ -185,7 +223,7 @@ class Table(SchemaObject):
|
|
|
185
223
|
@property
|
|
186
224
|
def _query_names(self) -> list[str]:
|
|
187
225
|
"""Return the names of the registered queries for this table."""
|
|
188
|
-
return list(self._queries.keys())
|
|
226
|
+
return list(self.__query_scope._queries.keys())
|
|
189
227
|
|
|
190
228
|
@property
|
|
191
229
|
def _base(self) -> Optional['Table']:
|
|
@@ -210,7 +248,7 @@ class Table(SchemaObject):
|
|
|
210
248
|
cols = self._tbl_version_path.columns()
|
|
211
249
|
df = pd.DataFrame({
|
|
212
250
|
'Column Name': [c.name for c in cols],
|
|
213
|
-
'Type': [
|
|
251
|
+
'Type': [c.col_type._to_str(as_schema=True) for c in cols],
|
|
214
252
|
'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
|
|
215
253
|
})
|
|
216
254
|
return df
|
|
@@ -233,7 +271,7 @@ class Table(SchemaObject):
|
|
|
233
271
|
from IPython.display import display
|
|
234
272
|
display(self._description_html())
|
|
235
273
|
else:
|
|
236
|
-
print(self
|
|
274
|
+
print(repr(self))
|
|
237
275
|
|
|
238
276
|
# TODO: Display comments in _repr_html()
|
|
239
277
|
def __repr__(self) -> str:
|
|
@@ -261,75 +299,77 @@ class Table(SchemaObject):
|
|
|
261
299
|
"""Return a PyTorch Dataset for this table.
|
|
262
300
|
See DataFrame.to_pytorch_dataset()
|
|
263
301
|
"""
|
|
264
|
-
|
|
265
|
-
return DataFrame(self._tbl_version_path).to_pytorch_dataset(image_format=image_format)
|
|
302
|
+
return self._df().to_pytorch_dataset(image_format=image_format)
|
|
266
303
|
|
|
267
304
|
def to_coco_dataset(self) -> Path:
|
|
268
305
|
"""Return the path to a COCO json file for this table.
|
|
269
306
|
See DataFrame.to_coco_dataset()
|
|
270
307
|
"""
|
|
271
|
-
|
|
272
|
-
return DataFrame(self._tbl_version_path).to_coco_dataset()
|
|
308
|
+
return self._df().to_coco_dataset()
|
|
273
309
|
|
|
274
310
|
def __setitem__(self, col_name: str, spec: Union[ts.ColumnType, exprs.Expr]) -> None:
|
|
275
311
|
"""
|
|
276
312
|
Adds a column to the table. This is an alternate syntax for `add_column()`; the meaning of
|
|
277
313
|
|
|
278
|
-
>>> tbl['new_col'] =
|
|
314
|
+
>>> tbl['new_col'] = pxt.Int
|
|
279
315
|
|
|
280
316
|
is exactly equivalent to
|
|
281
317
|
|
|
282
|
-
>>> tbl.add_column(new_col=
|
|
318
|
+
>>> tbl.add_column(new_col=pxt.Int)
|
|
283
319
|
|
|
284
320
|
For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
|
|
285
321
|
"""
|
|
286
322
|
if not isinstance(col_name, str):
|
|
287
323
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
288
|
-
if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
|
|
289
|
-
raise excs.Error(f'Column spec must be a ColumnType or
|
|
290
|
-
self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
|
|
324
|
+
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
325
|
+
raise excs.Error(f'Column spec must be a ColumnType, Expr, or type, got {type(spec)}')
|
|
326
|
+
self.add_column(type=None, stored=None, print_stats=False, on_error='abort', **{col_name: spec})
|
|
291
327
|
|
|
292
328
|
def add_column(
|
|
293
329
|
self,
|
|
294
330
|
*,
|
|
295
|
-
type:
|
|
331
|
+
type: Union[ts.ColumnType, builtins.type, _GenericAlias, None] = None,
|
|
296
332
|
stored: Optional[bool] = None,
|
|
297
333
|
print_stats: bool = False,
|
|
298
|
-
|
|
334
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
335
|
+
**kwargs: Union[ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr, Callable]
|
|
299
336
|
) -> UpdateStatus:
|
|
300
337
|
"""
|
|
301
338
|
Adds a column to the table.
|
|
302
339
|
|
|
303
340
|
Args:
|
|
304
|
-
kwargs: Exactly one keyword argument of the form
|
|
305
|
-
type: The type of the column. Only valid and required if
|
|
341
|
+
kwargs: Exactly one keyword argument of the form `column_name=type` or `column_name=expression`.
|
|
342
|
+
type: The type of the column. Only valid and required if `value-expression` is a Callable.
|
|
306
343
|
stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
|
|
307
|
-
print_stats: If
|
|
344
|
+
print_stats: If `True`, print execution metrics during evaluation.
|
|
345
|
+
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
346
|
+
row.
|
|
347
|
+
|
|
348
|
+
- If `on_error='abort'`, then an exception will be raised and the column will not be added.
|
|
349
|
+
- If `on_error='ignore'`, then execution will continue and the column will be added. Any rows
|
|
350
|
+
with errors will have a `None` value for the column, with information about the error stored in the
|
|
351
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
308
352
|
|
|
309
353
|
Returns:
|
|
310
|
-
execution status
|
|
354
|
+
Information about the execution status of the operation.
|
|
311
355
|
|
|
312
356
|
Raises:
|
|
313
357
|
Error: If the column name is invalid or already exists.
|
|
314
358
|
|
|
315
359
|
Examples:
|
|
316
|
-
Add an int column
|
|
360
|
+
Add an int column:
|
|
317
361
|
|
|
318
|
-
>>> tbl.add_column(new_col=
|
|
362
|
+
>>> tbl.add_column(new_col=pxt.Int)
|
|
319
363
|
|
|
320
364
|
Alternatively, this can also be expressed as:
|
|
321
365
|
|
|
322
|
-
>>> tbl['new_col'] =
|
|
366
|
+
>>> tbl['new_col'] = pxt.Int
|
|
323
367
|
|
|
324
|
-
For a table with int column
|
|
368
|
+
For a table with int column `int_col`, add a column that is the factorial of ``int_col``. The names of
|
|
325
369
|
the parameters of the Callable must correspond to existing column names (the column values are then passed
|
|
326
370
|
as arguments to the Callable). In this case, the column type needs to be specified explicitly:
|
|
327
371
|
|
|
328
|
-
>>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=
|
|
329
|
-
|
|
330
|
-
Alternatively, this can also be expressed as:
|
|
331
|
-
|
|
332
|
-
>>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
|
|
372
|
+
>>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=pxt.Int)
|
|
333
373
|
|
|
334
374
|
For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
|
|
335
375
|
90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
|
|
@@ -341,13 +381,9 @@ class Table(SchemaObject):
|
|
|
341
381
|
|
|
342
382
|
>>> tbl['rotated'] = tbl.frame.rotate(90)
|
|
343
383
|
|
|
344
|
-
Do the same, but now the column is
|
|
345
|
-
|
|
346
|
-
>>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
|
|
347
|
-
|
|
348
|
-
Alternatively, this can also be expressed as:
|
|
384
|
+
Do the same, but now the column is unstored:
|
|
349
385
|
|
|
350
|
-
>>> tbl
|
|
386
|
+
>>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=False)
|
|
351
387
|
"""
|
|
352
388
|
self._check_is_dropped()
|
|
353
389
|
# verify kwargs and construct column schema dict
|
|
@@ -359,22 +395,24 @@ class Table(SchemaObject):
|
|
|
359
395
|
col_name, spec = next(iter(kwargs.items()))
|
|
360
396
|
if not is_valid_identifier(col_name):
|
|
361
397
|
raise excs.Error(f'Invalid column name: {col_name!r}')
|
|
362
|
-
if isinstance(spec, (ts.ColumnType, exprs.Expr)) and type is not None:
|
|
398
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias, exprs.Expr)) and type is not None:
|
|
363
399
|
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
364
400
|
|
|
365
401
|
col_schema: dict[str, Any] = {}
|
|
366
|
-
if isinstance(spec, ts.ColumnType):
|
|
367
|
-
col_schema['type'] = spec
|
|
402
|
+
if isinstance(spec, (ts.ColumnType, builtins.type, _GenericAlias)):
|
|
403
|
+
col_schema['type'] = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
368
404
|
else:
|
|
369
405
|
col_schema['value'] = spec
|
|
370
406
|
if type is not None:
|
|
371
|
-
col_schema['type'] = type
|
|
407
|
+
col_schema['type'] = ts.ColumnType.normalize_type(type, nullable_default=True)
|
|
372
408
|
if stored is not None:
|
|
373
409
|
col_schema['stored'] = stored
|
|
374
410
|
|
|
375
411
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
376
412
|
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
377
|
-
|
|
413
|
+
status = self._tbl_version.add_column(new_col, print_stats=print_stats, on_error=on_error)
|
|
414
|
+
FileCache.get().emit_eviction_warnings()
|
|
415
|
+
return status
|
|
378
416
|
|
|
379
417
|
@classmethod
|
|
380
418
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -392,8 +430,8 @@ class Table(SchemaObject):
|
|
|
392
430
|
|
|
393
431
|
if 'type' in spec:
|
|
394
432
|
has_type = True
|
|
395
|
-
if not isinstance(spec['type'], ts.ColumnType):
|
|
396
|
-
raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
|
|
433
|
+
if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
434
|
+
raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
|
|
397
435
|
|
|
398
436
|
if 'value' in spec:
|
|
399
437
|
value_spec = spec['value']
|
|
@@ -426,20 +464,20 @@ class Table(SchemaObject):
|
|
|
426
464
|
primary_key: Optional[bool] = None
|
|
427
465
|
stored = True
|
|
428
466
|
|
|
429
|
-
if isinstance(spec, ts.ColumnType):
|
|
430
|
-
|
|
431
|
-
col_type = spec
|
|
467
|
+
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
468
|
+
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True)
|
|
432
469
|
elif isinstance(spec, exprs.Expr):
|
|
433
470
|
# create copy so we can modify it
|
|
434
471
|
value_expr = spec.copy()
|
|
435
472
|
elif callable(spec):
|
|
436
|
-
raise excs.Error(
|
|
473
|
+
raise excs.Error(
|
|
437
474
|
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
438
|
-
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type":
|
|
439
|
-
)
|
|
475
|
+
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": pxt.Int}})'
|
|
476
|
+
)
|
|
440
477
|
elif isinstance(spec, dict):
|
|
441
478
|
cls._validate_column_spec(name, spec)
|
|
442
|
-
|
|
479
|
+
if 'type' in spec:
|
|
480
|
+
col_type = ts.ColumnType.normalize_type(spec['type'], nullable_default=True)
|
|
443
481
|
value_expr = spec.get('value')
|
|
444
482
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
445
483
|
# create copy so we can modify it
|
|
@@ -487,12 +525,13 @@ class Table(SchemaObject):
|
|
|
487
525
|
name: The name of the column to drop.
|
|
488
526
|
|
|
489
527
|
Raises:
|
|
490
|
-
Error: If the column does not exist or if it is referenced by a computed column.
|
|
528
|
+
Error: If the column does not exist or if it is referenced by a dependent computed column.
|
|
491
529
|
|
|
492
530
|
Examples:
|
|
493
|
-
Drop column
|
|
531
|
+
Drop the column `col` from the table `my_table`:
|
|
494
532
|
|
|
495
|
-
>>> tbl.
|
|
533
|
+
>>> tbl = pxt.get_table('my_table')
|
|
534
|
+
... tbl.drop_column('col')
|
|
496
535
|
"""
|
|
497
536
|
self._check_is_dropped()
|
|
498
537
|
|
|
@@ -535,43 +574,59 @@ class Table(SchemaObject):
|
|
|
535
574
|
new_name: The new name of the column.
|
|
536
575
|
|
|
537
576
|
Raises:
|
|
538
|
-
Error: If the column does not exist or if the new name is invalid or already exists.
|
|
577
|
+
Error: If the column does not exist, or if the new name is invalid or already exists.
|
|
539
578
|
|
|
540
579
|
Examples:
|
|
541
|
-
Rename column
|
|
580
|
+
Rename the column `col1` to `col2` of the table `my_table`:
|
|
542
581
|
|
|
543
|
-
>>> tbl.
|
|
582
|
+
>>> tbl = pxt.get_table('my_table')
|
|
583
|
+
... tbl.rename_column('col1', 'col2')
|
|
544
584
|
"""
|
|
545
585
|
self._check_is_dropped()
|
|
546
586
|
self._tbl_version.rename_column(old_name, new_name)
|
|
547
587
|
|
|
548
588
|
def add_embedding_index(
|
|
549
589
|
self, col_name: str, *, idx_name: Optional[str] = None,
|
|
550
|
-
string_embed: Optional[
|
|
590
|
+
string_embed: Optional[pxt.Function] = None, image_embed: Optional[pxt.Function] = None,
|
|
551
591
|
metric: str = 'cosine'
|
|
552
592
|
) -> None:
|
|
553
|
-
"""
|
|
593
|
+
"""
|
|
594
|
+
Add an embedding index to the table. Once the index is added, it will be automatically kept up to data as new
|
|
595
|
+
rows are inserted into the table.
|
|
596
|
+
|
|
597
|
+
Indices are currently supported only for `String` and `Image` columns. The index must specify, at
|
|
598
|
+
minimum, an embedding of the appropriate type (string or image). It may optionally specify _both_ a string
|
|
599
|
+
and image embedding (into the same vector space); in particular, this can be used to provide similarity search
|
|
600
|
+
of text over an image column.
|
|
554
601
|
|
|
555
602
|
Args:
|
|
556
|
-
col_name: name of column to index
|
|
557
|
-
idx_name: name of index
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
603
|
+
col_name: The name of column to index; must be a `String` or `Image` column.
|
|
604
|
+
idx_name: The name of index. If not specified, a name such as `'idx0'` will be generated automatically.
|
|
605
|
+
If specified, the name must be unique for this table.
|
|
606
|
+
string_embed: A function to embed text; required if the column is a `String` column.
|
|
607
|
+
image_embed: A function to embed images; required if the column is an `Image` column.
|
|
608
|
+
metric: Distance metric to use for the index; one of `'cosine'`, `'ip'`, or `'l2'`;
|
|
609
|
+
the default is `'cosine'`.
|
|
561
610
|
|
|
562
611
|
Raises:
|
|
563
|
-
Error: If an index with that name already exists for the table or if the column does not exist.
|
|
612
|
+
Error: If an index with that name already exists for the table, or if the specified column does not exist.
|
|
564
613
|
|
|
565
614
|
Examples:
|
|
566
|
-
Add an index to the
|
|
615
|
+
Add an index to the `img` column of the table `my_table`:
|
|
567
616
|
|
|
568
|
-
>>> tbl.
|
|
617
|
+
>>> tbl = pxt.get_table('my_table')
|
|
618
|
+
... tbl.add_embedding_index('img', image_embed=my_image_func)
|
|
569
619
|
|
|
570
|
-
Add another index to the
|
|
571
|
-
and with a specific name;
|
|
620
|
+
Add another index to the `img` column, using the inner product as the distance metric,
|
|
621
|
+
and with a specific name; `string_embed` is also specified in order to search with text:
|
|
572
622
|
|
|
573
623
|
>>> tbl.add_embedding_index(
|
|
574
|
-
|
|
624
|
+
... 'img',
|
|
625
|
+
... idx_name='clip_idx',
|
|
626
|
+
... image_embed=my_image_func,
|
|
627
|
+
... string_embed=my_string_func,
|
|
628
|
+
... metric='ip'
|
|
629
|
+
... )
|
|
575
630
|
"""
|
|
576
631
|
if self._tbl_version_path.is_snapshot():
|
|
577
632
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
@@ -587,39 +642,53 @@ class Table(SchemaObject):
|
|
|
587
642
|
idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
|
|
588
643
|
status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
589
644
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
645
|
+
FileCache.get().emit_eviction_warnings()
|
|
590
646
|
|
|
591
647
|
def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
592
|
-
"""
|
|
648
|
+
"""
|
|
649
|
+
Drop an embedding index from the table. Either a column name or an index name (but not both) must be
|
|
650
|
+
specified. If a column name is specified, it must be a column containing exactly one embedding index;
|
|
651
|
+
otherwise the specific index name must be provided instead.
|
|
593
652
|
|
|
594
653
|
Args:
|
|
595
|
-
column_name: The name of the column
|
|
654
|
+
column_name: The name of the column from which to drop the index. Invalid if the column has multiple
|
|
596
655
|
embedding indices.
|
|
597
656
|
idx_name: The name of the index to drop.
|
|
598
657
|
|
|
599
658
|
Raises:
|
|
600
|
-
Error: If the
|
|
659
|
+
Error: If `column_name` is specified, but the column does not exist, or it contains no embedding
|
|
660
|
+
indices or multiple embedding indices.
|
|
661
|
+
Error: If `idx_name` is specified, but the index does not exist or is not an embedding index.
|
|
601
662
|
|
|
602
663
|
Examples:
|
|
603
|
-
Drop embedding index on the
|
|
664
|
+
Drop the embedding index on the `img` column of the table `my_table`:
|
|
604
665
|
|
|
605
|
-
>>> tbl.
|
|
666
|
+
>>> tbl = pxt.get_table('my_table')
|
|
667
|
+
... tbl.drop_embedding_index(column_name='img')
|
|
606
668
|
"""
|
|
607
669
|
self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
|
|
608
670
|
|
|
609
671
|
def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
610
|
-
"""
|
|
672
|
+
"""
|
|
673
|
+
Drop an index from the table. Either a column name or an index name (but not both) must be
|
|
674
|
+
specified. If a column name is specified, it must be a column containing exactly one index;
|
|
675
|
+
otherwise the specific index name must be provided instead.
|
|
611
676
|
|
|
612
677
|
Args:
|
|
613
|
-
column_name: The name of the column
|
|
678
|
+
column_name: The name of the column from which to drop the index. Invalid if the column has multiple
|
|
679
|
+
indices.
|
|
614
680
|
idx_name: The name of the index to drop.
|
|
615
681
|
|
|
616
682
|
Raises:
|
|
617
|
-
Error: If the
|
|
683
|
+
Error: If `column_name` is specified, but the column does not exist, or it contains no
|
|
684
|
+
indices or multiple indices.
|
|
685
|
+
Error: If `idx_name` is specified, but the index does not exist.
|
|
618
686
|
|
|
619
687
|
Examples:
|
|
620
|
-
Drop index on the
|
|
688
|
+
Drop the index on the `img` column of the table `my_table`:
|
|
621
689
|
|
|
622
|
-
>>> tbl.
|
|
690
|
+
>>> tbl = pxt.get_table('my_table')
|
|
691
|
+
... tbl.drop_index(column_name='img')
|
|
623
692
|
"""
|
|
624
693
|
self._drop_index(column_name=column_name, idx_name=idx_name)
|
|
625
694
|
|
|
@@ -672,7 +741,7 @@ class Table(SchemaObject):
|
|
|
672
741
|
To insert multiple rows at a time:
|
|
673
742
|
``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
|
|
674
743
|
|
|
675
|
-
To insert just a single row, you can use the more
|
|
744
|
+
To insert just a single row, you can use the more concise syntax:
|
|
676
745
|
``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
|
|
677
746
|
|
|
678
747
|
Args:
|
|
@@ -688,24 +757,31 @@ class Table(SchemaObject):
|
|
|
688
757
|
If ``True``, raise an exception that aborts the insert.
|
|
689
758
|
|
|
690
759
|
Returns:
|
|
691
|
-
|
|
760
|
+
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
692
761
|
|
|
693
762
|
Raises:
|
|
694
|
-
Error:
|
|
763
|
+
Error: If one of the following conditions occurs:
|
|
764
|
+
|
|
765
|
+
- The table is a view or snapshot.
|
|
766
|
+
- The table has been dropped.
|
|
767
|
+
- One of the rows being inserted does not conform to the table schema.
|
|
768
|
+
- An error occurs during processing of computed columns, and `fail_on_exception=True`.
|
|
695
769
|
|
|
696
770
|
Examples:
|
|
697
|
-
Insert two rows into
|
|
771
|
+
Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
|
|
772
|
+
Column ``c`` is nullable:
|
|
698
773
|
|
|
699
|
-
>>> tbl.
|
|
774
|
+
>>> tbl = pxt.get_table('my_table')
|
|
775
|
+
... tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
|
|
700
776
|
|
|
701
|
-
Insert a single row
|
|
777
|
+
Insert a single row using the alternative syntax:
|
|
702
778
|
|
|
703
|
-
>>> tbl.insert(a=
|
|
779
|
+
>>> tbl.insert(a=3, b=3, c=3)
|
|
704
780
|
"""
|
|
705
781
|
raise NotImplementedError
|
|
706
782
|
|
|
707
783
|
def update(
|
|
708
|
-
self, value_spec: dict[str, Any], where: Optional['
|
|
784
|
+
self, value_spec: dict[str, Any], where: Optional['pxt.exprs.Expr'] = None, cascade: bool = True
|
|
709
785
|
) -> UpdateStatus:
|
|
710
786
|
"""Update rows in this table.
|
|
711
787
|
|
|
@@ -732,7 +808,9 @@ class Table(SchemaObject):
|
|
|
732
808
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
733
809
|
"""
|
|
734
810
|
self._check_is_dropped()
|
|
735
|
-
|
|
811
|
+
status = self._tbl_version.update(value_spec, where, cascade)
|
|
812
|
+
FileCache.get().emit_eviction_warnings()
|
|
813
|
+
return status
|
|
736
814
|
|
|
737
815
|
def batch_update(
|
|
738
816
|
self, rows: Iterable[dict[str, Any]], cascade: bool = True,
|
|
@@ -789,11 +867,13 @@ class Table(SchemaObject):
|
|
|
789
867
|
missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
|
|
790
868
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
791
869
|
row_updates.append(col_vals)
|
|
792
|
-
|
|
870
|
+
status = self._tbl_version.batch_update(
|
|
793
871
|
row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
|
|
794
872
|
insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
|
|
873
|
+
FileCache.get().emit_eviction_warnings()
|
|
874
|
+
return status
|
|
795
875
|
|
|
796
|
-
def delete(self, where: Optional['
|
|
876
|
+
def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
|
|
797
877
|
"""Delete rows in this table.
|
|
798
878
|
|
|
799
879
|
Args:
|
|
@@ -822,17 +902,17 @@ class Table(SchemaObject):
|
|
|
822
902
|
self._tbl_version.revert()
|
|
823
903
|
|
|
824
904
|
@overload
|
|
825
|
-
def query(self, py_fn: Callable) -> '
|
|
905
|
+
def query(self, py_fn: Callable) -> 'pxt.func.QueryTemplateFunction': ...
|
|
826
906
|
|
|
827
907
|
@overload
|
|
828
908
|
def query(
|
|
829
909
|
self, *, param_types: Optional[list[ts.ColumnType]] = None
|
|
830
|
-
) -> Callable[[Callable], '
|
|
910
|
+
) -> Callable[[Callable], 'pxt.func.QueryTemplateFunction']: ...
|
|
831
911
|
|
|
832
912
|
def query(self, *args: Any, **kwargs: Any) -> Any:
|
|
833
913
|
def make_query_template(
|
|
834
914
|
py_fn: Callable, param_types: Optional[list[ts.ColumnType]]
|
|
835
|
-
) -> '
|
|
915
|
+
) -> 'pxt.func.QueryTemplateFunction':
|
|
836
916
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
837
917
|
# this is a named function in a module
|
|
838
918
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -841,12 +921,11 @@ class Table(SchemaObject):
|
|
|
841
921
|
query_name = py_fn.__name__
|
|
842
922
|
if query_name in self._schema.keys():
|
|
843
923
|
raise excs.Error(f'Query name {query_name!r} conflicts with existing column')
|
|
844
|
-
if query_name in self._queries:
|
|
924
|
+
if query_name in self.__query_scope._queries and function_path is not None:
|
|
845
925
|
raise excs.Error(f'Duplicate query name: {query_name!r}')
|
|
846
|
-
|
|
847
|
-
query_fn = func.QueryTemplateFunction.create(
|
|
926
|
+
query_fn = pxt.func.QueryTemplateFunction.create(
|
|
848
927
|
py_fn, param_types=param_types, path=function_path, name=query_name)
|
|
849
|
-
self._queries[query_name] = query_fn
|
|
928
|
+
self.__query_scope._queries[query_name] = query_fn
|
|
850
929
|
return query_fn
|
|
851
930
|
|
|
852
931
|
# TODO: verify that the inferred return type matches that of the template
|
|
@@ -863,7 +942,7 @@ class Table(SchemaObject):
|
|
|
863
942
|
def external_stores(self) -> list[str]:
|
|
864
943
|
return list(self._tbl_version.external_stores.keys())
|
|
865
944
|
|
|
866
|
-
def _link_external_store(self, store: '
|
|
945
|
+
def _link_external_store(self, store: 'pxt.io.ExternalStore') -> None:
|
|
867
946
|
"""
|
|
868
947
|
Links the specified `ExternalStore` to this table.
|
|
869
948
|
"""
|
|
@@ -918,7 +997,7 @@ class Table(SchemaObject):
|
|
|
918
997
|
*,
|
|
919
998
|
export_data: bool = True,
|
|
920
999
|
import_data: bool = True
|
|
921
|
-
) -> '
|
|
1000
|
+
) -> 'pxt.io.SyncStatus':
|
|
922
1001
|
"""
|
|
923
1002
|
Synchronizes this table with its linked external stores.
|
|
924
1003
|
|
|
@@ -940,9 +1019,7 @@ class Table(SchemaObject):
|
|
|
940
1019
|
if store not in all_stores:
|
|
941
1020
|
raise excs.Error(f'Table `{self._name}` has no external store with that name: {store}')
|
|
942
1021
|
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
sync_status = SyncStatus.empty()
|
|
1022
|
+
sync_status = pxt.io.SyncStatus.empty()
|
|
946
1023
|
for store in stores:
|
|
947
1024
|
store_obj = self._tbl_version.external_stores[store]
|
|
948
1025
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|