pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +590 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +359 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +195 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +256 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +122 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +418 -182
- pixeltable/tests/conftest.py +146 -88
- pixeltable/tests/functions/test_fireworks.py +42 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/functions/test_huggingface.py +158 -0
- pixeltable/tests/functions/test_openai.py +152 -0
- pixeltable/tests/functions/test_together.py +111 -0
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +370 -0
- pixeltable/tests/test_dataframe.py +439 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +120 -0
- pixeltable/tests/test_exprs.py +592 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1195 -263
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +151 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +320 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +445 -124
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +167 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.4.dist-info/LICENSE +18 -0
- pixeltable-0.2.4.dist-info/METADATA +127 -0
- pixeltable-0.2.4.dist-info/RECORD +132 -0
- {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_functions.py +0 -11
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.0.dist-info/METADATA +0 -34
- pixeltable-0.1.0.dist-info/RECORD +0 -36
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple
|
|
8
|
+
from uuid import UUID
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import sqlalchemy as sql
|
|
12
|
+
|
|
13
|
+
import pixeltable
|
|
14
|
+
import pixeltable.catalog as catalog
|
|
15
|
+
import pixeltable.env as env
|
|
16
|
+
import pixeltable.exceptions as excs
|
|
17
|
+
import pixeltable.exprs as exprs
|
|
18
|
+
import pixeltable.metadata.schema as schema
|
|
19
|
+
import pixeltable.type_system as ts
|
|
20
|
+
from .column import Column
|
|
21
|
+
from .globals import is_valid_identifier, is_system_column_name
|
|
22
|
+
from .schema_object import SchemaObject
|
|
23
|
+
from .table_version import TableVersion
|
|
24
|
+
from .table_version_path import TableVersionPath
|
|
25
|
+
|
|
26
|
+
_logger = logging.getLogger('pixeltable')
|
|
27
|
+
|
|
28
|
+
class Table(SchemaObject):
|
|
29
|
+
"""Base class for all tabular SchemaObjects."""
|
|
30
|
+
|
|
31
|
+
@dataclasses.dataclass
|
|
32
|
+
class UpdateStatus:
|
|
33
|
+
num_rows: int = 0
|
|
34
|
+
# TODO: change to num_computed_columns (the number of computed slots isn't really meaningful to the user)
|
|
35
|
+
num_computed_values: int = 0
|
|
36
|
+
num_excs: int = 0
|
|
37
|
+
updated_cols: List[str] = dataclasses.field(default_factory=list)
|
|
38
|
+
cols_with_excs: List[str] = dataclasses.field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
41
|
+
super().__init__(id, name, dir_id)
|
|
42
|
+
self.is_dropped = False
|
|
43
|
+
self.tbl_version_path = tbl_version_path
|
|
44
|
+
|
|
45
|
+
def move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
46
|
+
super().move(new_name, new_dir_id)
|
|
47
|
+
with env.Env.get().engine.begin() as conn:
|
|
48
|
+
stmt = sql.text((
|
|
49
|
+
f"UPDATE {schema.Table.__table__} "
|
|
50
|
+
f"SET {schema.Table.dir_id.name} = :new_dir_id, "
|
|
51
|
+
f" {schema.Table.md.name}['name'] = :new_name "
|
|
52
|
+
f"WHERE {schema.Table.id.name} = :id"))
|
|
53
|
+
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
54
|
+
|
|
55
|
+
def version(self) -> int:
|
|
56
|
+
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
57
|
+
return self.tbl_version_path.tbl_version.version
|
|
58
|
+
|
|
59
|
+
def _tbl_version(self) -> TableVersion:
|
|
60
|
+
"""Return TableVersion for just this table."""
|
|
61
|
+
return self.tbl_version_path.tbl_version
|
|
62
|
+
|
|
63
|
+
def __hash__(self) -> int:
|
|
64
|
+
return hash(self._tbl_version().id)
|
|
65
|
+
|
|
66
|
+
def _check_is_dropped(self) -> None:
|
|
67
|
+
if self.is_dropped:
|
|
68
|
+
raise excs.Error(f'{self.display_name()} {self.name} has been dropped')
|
|
69
|
+
|
|
70
|
+
def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
|
|
71
|
+
"""Return a ColumnRef for the given column name.
|
|
72
|
+
"""
|
|
73
|
+
return getattr(self.tbl_version_path, col_name)
|
|
74
|
+
|
|
75
|
+
def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
|
|
76
|
+
"""Return a ColumnRef for the given column name, or a DataFrame for the given slice.
|
|
77
|
+
"""
|
|
78
|
+
return self.tbl_version_path.__getitem__(index)
|
|
79
|
+
|
|
80
|
+
def df(self) -> 'pixeltable.dataframe.DataFrame':
|
|
81
|
+
"""Return a DataFrame for this table.
|
|
82
|
+
"""
|
|
83
|
+
# local import: avoid circular imports
|
|
84
|
+
from pixeltable.dataframe import DataFrame
|
|
85
|
+
return DataFrame(self.tbl_version_path)
|
|
86
|
+
|
|
87
|
+
def select(self, *items: Any, **named_items: Any) -> 'pixeltable.dataframe.DataFrame':
|
|
88
|
+
"""Return a DataFrame for this table.
|
|
89
|
+
"""
|
|
90
|
+
# local import: avoid circular imports
|
|
91
|
+
from pixeltable.dataframe import DataFrame
|
|
92
|
+
return DataFrame(self.tbl_version_path).select(*items, **named_items)
|
|
93
|
+
|
|
94
|
+
def where(self, pred: 'exprs.Predicate') -> 'pixeltable.dataframe.DataFrame':
|
|
95
|
+
"""Return a DataFrame for this table.
|
|
96
|
+
"""
|
|
97
|
+
# local import: avoid circular imports
|
|
98
|
+
from pixeltable.dataframe import DataFrame
|
|
99
|
+
return DataFrame(self.tbl_version_path).where(pred)
|
|
100
|
+
|
|
101
|
+
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pixeltable.dataframe.DataFrame':
|
|
102
|
+
"""Return a DataFrame for this table.
|
|
103
|
+
"""
|
|
104
|
+
# local import: avoid circular imports
|
|
105
|
+
from pixeltable.dataframe import DataFrame
|
|
106
|
+
return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
|
|
107
|
+
|
|
108
|
+
def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
|
|
109
|
+
"""Return rows from this table.
|
|
110
|
+
"""
|
|
111
|
+
return self.df().collect()
|
|
112
|
+
|
|
113
|
+
def show(
|
|
114
|
+
self, *args, **kwargs
|
|
115
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
|
|
116
|
+
"""Return rows from this table.
|
|
117
|
+
"""
|
|
118
|
+
return self.df().show(*args, **kwargs)
|
|
119
|
+
|
|
120
|
+
def head(
|
|
121
|
+
self, *args, **kwargs
|
|
122
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
|
|
123
|
+
"""Return the first n rows inserted into this table."""
|
|
124
|
+
return self.df().head(*args, **kwargs)
|
|
125
|
+
|
|
126
|
+
def tail(
|
|
127
|
+
self, *args, **kwargs
|
|
128
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
|
|
129
|
+
"""Return the last n rows inserted into this table."""
|
|
130
|
+
return self.df().tail(*args, **kwargs)
|
|
131
|
+
|
|
132
|
+
def count(self) -> int:
|
|
133
|
+
"""Return the number of rows in this table."""
|
|
134
|
+
return self.df().count()
|
|
135
|
+
|
|
136
|
+
def column_names(self) -> List[str]:
|
|
137
|
+
"""Return the names of the columns in this table."""
|
|
138
|
+
return [c.name for c in self.tbl_version_path.columns()]
|
|
139
|
+
|
|
140
|
+
def column_types(self) -> Dict[str, ts.ColumnType]:
|
|
141
|
+
"""Return the names of the columns in this table."""
|
|
142
|
+
return {c.name: c.col_type for c in self.tbl_version_path.columns()}
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def comment(self) -> str:
|
|
146
|
+
return self.tbl_version.comment
|
|
147
|
+
|
|
148
|
+
@comment.setter
|
|
149
|
+
def comment(self, new_comment: Optional[str]):
|
|
150
|
+
self.tbl_version.set_comment(new_comment)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def num_retained_versions(self):
|
|
154
|
+
return self.tbl_version.num_retained_versions
|
|
155
|
+
|
|
156
|
+
@num_retained_versions.setter
|
|
157
|
+
def num_retained_versions(self, new_num_retained_versions: int):
|
|
158
|
+
self.tbl_version.set_num_retained_versions(new_num_retained_versions)
|
|
159
|
+
|
|
160
|
+
def _description(self) -> pd.DataFrame:
|
|
161
|
+
cols = self.tbl_version_path.columns()
|
|
162
|
+
df = pd.DataFrame({
|
|
163
|
+
'Column Name': [c.name for c in cols],
|
|
164
|
+
'Type': [str(c.col_type) for c in cols],
|
|
165
|
+
'Computed With': [c.value_expr.display_str(inline=False) if c.value_expr is not None else '' for c in cols],
|
|
166
|
+
})
|
|
167
|
+
return df
|
|
168
|
+
|
|
169
|
+
def _description_html(self) -> pd.DataFrame:
|
|
170
|
+
pd_df = self._description()
|
|
171
|
+
# white-space: pre-wrap: print \n as newline
|
|
172
|
+
# th: center-align headings
|
|
173
|
+
return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
|
|
174
|
+
.set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
|
|
175
|
+
.hide(axis='index')
|
|
176
|
+
|
|
177
|
+
def describe(self) -> None:
|
|
178
|
+
try:
|
|
179
|
+
__IPYTHON__
|
|
180
|
+
from IPython.display import display
|
|
181
|
+
display(self._description_html())
|
|
182
|
+
except NameError:
|
|
183
|
+
print(self.__repr__())
|
|
184
|
+
|
|
185
|
+
# TODO: Display comments in _repr_html()
|
|
186
|
+
def __repr__(self) -> str:
|
|
187
|
+
description_str = self._description().to_string(index=False)
|
|
188
|
+
if self.comment is None:
|
|
189
|
+
comment = ''
|
|
190
|
+
else:
|
|
191
|
+
comment = f'{self.comment}\n'
|
|
192
|
+
return f'{self.display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
193
|
+
|
|
194
|
+
def _repr_html_(self) -> str:
|
|
195
|
+
return self._description_html()._repr_html_()
|
|
196
|
+
|
|
197
|
+
def _drop(self) -> None:
|
|
198
|
+
self._check_is_dropped()
|
|
199
|
+
self.tbl_version_path.tbl_version.drop()
|
|
200
|
+
self.is_dropped = True
|
|
201
|
+
# update catalog
|
|
202
|
+
cat = catalog.Catalog.get()
|
|
203
|
+
del cat.tbls[self._id]
|
|
204
|
+
|
|
205
|
+
# TODO Factor this out into a separate module.
|
|
206
|
+
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
207
|
+
def to_pytorch_dataset(self, image_format : str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
208
|
+
"""Return a PyTorch Dataset for this table.
|
|
209
|
+
See DataFrame.to_pytorch_dataset()
|
|
210
|
+
"""
|
|
211
|
+
from pixeltable.dataframe import DataFrame
|
|
212
|
+
return DataFrame(self.tbl_version_path).to_pytorch_dataset(image_format=image_format)
|
|
213
|
+
|
|
214
|
+
def to_coco_dataset(self) -> Path:
|
|
215
|
+
"""Return the path to a COCO json file for this table.
|
|
216
|
+
See DataFrame.to_coco_dataset()
|
|
217
|
+
"""
|
|
218
|
+
from pixeltable.dataframe import DataFrame
|
|
219
|
+
return DataFrame(self.tbl_version_path).to_coco_dataset()
|
|
220
|
+
|
|
221
|
+
def __setitem__(self, column_name: str, value: Union[ts.ColumnType, exprs.Expr, Callable, dict]) -> None:
|
|
222
|
+
"""Adds a column to the table
|
|
223
|
+
Args:
|
|
224
|
+
column_name: the name of the new column
|
|
225
|
+
value: column type or value expression or column specification dictionary:
|
|
226
|
+
column type: a Pixeltable column type (if the table already contains rows, it must be nullable)
|
|
227
|
+
value expression: a Pixeltable expression that computes the column values
|
|
228
|
+
column specification: a dictionary with possible keys 'type', 'value', 'stored', 'indexed'
|
|
229
|
+
Examples:
|
|
230
|
+
Add an int column with ``None`` values:
|
|
231
|
+
|
|
232
|
+
>>> tbl['new_col'] = IntType(nullable=True)
|
|
233
|
+
|
|
234
|
+
For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
|
|
235
|
+
the parameters of the Callable must correspond to existing column names (the column values are then passed
|
|
236
|
+
as arguments to the Callable). In this case, the return type cannot be inferred and needs to be specified
|
|
237
|
+
explicitly:
|
|
238
|
+
|
|
239
|
+
>>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
|
|
240
|
+
|
|
241
|
+
For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
|
|
242
|
+
90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
|
|
243
|
+
(by default, computed image columns are not stored but recomputed on demand):
|
|
244
|
+
|
|
245
|
+
>>> tbl['rotated'] = tbl.frame.rotate(90)
|
|
246
|
+
|
|
247
|
+
Do the same, but now the column is stored:
|
|
248
|
+
|
|
249
|
+
>>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
|
|
250
|
+
|
|
251
|
+
Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
|
|
252
|
+
to be indexed:
|
|
253
|
+
|
|
254
|
+
>>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
|
|
255
|
+
"""
|
|
256
|
+
if not isinstance(column_name, str):
|
|
257
|
+
raise excs.Error(f'Column name must be a string, got {type(column_name)}')
|
|
258
|
+
if not is_valid_identifier(column_name):
|
|
259
|
+
raise excs.Error(f'Invalid column name: {column_name!r}')
|
|
260
|
+
|
|
261
|
+
new_col = self._create_columns({column_name: value})[0]
|
|
262
|
+
self._verify_column(new_col, self.column_names())
|
|
263
|
+
return self.tbl_version_path.tbl_version.add_column(new_col)
|
|
264
|
+
|
|
265
|
+
def add_column(
|
|
266
|
+
self, *,
|
|
267
|
+
type: Optional[ts.ColumnType] = None, stored: Optional[bool] = None, indexed: Optional[bool] = None,
|
|
268
|
+
print_stats: bool = False, **kwargs: Any
|
|
269
|
+
) -> UpdateStatus:
|
|
270
|
+
"""Adds a column to the table.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
kwargs: Exactly one keyword argument of the form ``column-name=type|value-expression``.
|
|
274
|
+
type: The type of the column. Only valid and required if ``value-expression`` is a Callable.
|
|
275
|
+
stored: Whether the column is materialized and stored or computed on demand. Only valid for image columns.
|
|
276
|
+
indexed: Whether the column is indexed.
|
|
277
|
+
print_stats: If ``True``, print execution metrics.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
execution status
|
|
281
|
+
|
|
282
|
+
Raises:
|
|
283
|
+
Error: If the column name is invalid or already exists.
|
|
284
|
+
|
|
285
|
+
Examples:
|
|
286
|
+
Add an int column with ``None`` values:
|
|
287
|
+
|
|
288
|
+
>>> tbl.add_column(new_col=IntType())
|
|
289
|
+
|
|
290
|
+
Alternatively, this can also be expressed as:
|
|
291
|
+
|
|
292
|
+
>>> tbl['new_col'] = IntType()
|
|
293
|
+
|
|
294
|
+
For a table with int column ``int_col``, add a column that is the factorial of ``int_col``. The names of
|
|
295
|
+
the parameters of the Callable must correspond to existing column names (the column values are then passed
|
|
296
|
+
as arguments to the Callable). In this case, the column type needs to be specified explicitly:
|
|
297
|
+
|
|
298
|
+
>>> tbl.add_column(factorial=lambda int_col: math.factorial(int_col), type=IntType())
|
|
299
|
+
|
|
300
|
+
Alternatively, this can also be expressed as:
|
|
301
|
+
|
|
302
|
+
>>> tbl['factorial'] = {'value': lambda int_col: math.factorial(int_col), 'type': IntType()}
|
|
303
|
+
|
|
304
|
+
For a table with an image column ``frame``, add an image column ``rotated`` that rotates the image by
|
|
305
|
+
90 degrees. In this case, the column type is inferred from the expression. Also, the column is not stored
|
|
306
|
+
(by default, computed image columns are not stored but recomputed on demand):
|
|
307
|
+
|
|
308
|
+
>>> tbl.add_column(rotated=tbl.frame.rotate(90))
|
|
309
|
+
|
|
310
|
+
Alternatively, this can also be expressed as:
|
|
311
|
+
|
|
312
|
+
>>> tbl['rotated'] = tbl.frame.rotate(90)
|
|
313
|
+
|
|
314
|
+
Do the same, but now the column is stored:
|
|
315
|
+
|
|
316
|
+
>>> tbl.add_column(rotated=tbl.frame.rotate(90), stored=True)
|
|
317
|
+
|
|
318
|
+
Alternatively, this can also be expressed as:
|
|
319
|
+
|
|
320
|
+
>>> tbl['rotated'] = {'value': tbl.frame.rotate(90), 'stored': True}
|
|
321
|
+
|
|
322
|
+
Add a resized version of the ``frame`` column and index it. The column does not need to be stored in order
|
|
323
|
+
to be indexed:
|
|
324
|
+
|
|
325
|
+
>>> tbl.add_column(small_frame=tbl.frame.resize([224, 224]), indexed=True)
|
|
326
|
+
|
|
327
|
+
Alternatively, this can also be expressed as:
|
|
328
|
+
|
|
329
|
+
>>> tbl['small_frame'] = {'value': tbl.frame.resize([224, 224]), 'indexed': True}
|
|
330
|
+
"""
|
|
331
|
+
self._check_is_dropped()
|
|
332
|
+
# verify kwargs and construct column schema dict
|
|
333
|
+
if len(kwargs) != 1:
|
|
334
|
+
raise excs.Error((
|
|
335
|
+
f'add_column() requires exactly one keyword argument of the form "column-name=type|value-expression", '
|
|
336
|
+
f'got {len(kwargs)} instead ({", ".join(list(kwargs.keys()))})'
|
|
337
|
+
))
|
|
338
|
+
col_name, spec = next(iter(kwargs.items()))
|
|
339
|
+
col_schema: Dict[str, Any] = {}
|
|
340
|
+
if isinstance(spec, ts.ColumnType):
|
|
341
|
+
if type is not None:
|
|
342
|
+
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
343
|
+
col_schema['type'] = spec
|
|
344
|
+
else:
|
|
345
|
+
if isinstance(spec, exprs.Expr) and type is not None:
|
|
346
|
+
raise excs.Error(f'add_column(): keyword argument "type" is redundant')
|
|
347
|
+
col_schema['value'] = spec
|
|
348
|
+
if type is not None:
|
|
349
|
+
col_schema['type'] = type
|
|
350
|
+
if stored is not None:
|
|
351
|
+
col_schema['stored'] = stored
|
|
352
|
+
if indexed is not None:
|
|
353
|
+
col_schema['indexed'] = indexed
|
|
354
|
+
|
|
355
|
+
new_col = self._create_columns({col_name: col_schema})[0]
|
|
356
|
+
self._verify_column(new_col, self.column_names())
|
|
357
|
+
return self.tbl_version_path.tbl_version.add_column(new_col, print_stats=print_stats)
|
|
358
|
+
|
|
359
|
+
@classmethod
|
|
360
|
+
def _validate_column_spec(cls, name: str, spec: Dict[str, Any]) -> None:
|
|
361
|
+
"""Check integrity of user-supplied Column spec
|
|
362
|
+
|
|
363
|
+
We unfortunately can't use something like jsonschema for validation, because this isn't strictly a JSON schema
|
|
364
|
+
(on account of containing Python Callables or Exprs).
|
|
365
|
+
"""
|
|
366
|
+
assert isinstance(spec, dict)
|
|
367
|
+
valid_keys = {'type', 'value', 'stored', 'indexed'}
|
|
368
|
+
has_type = False
|
|
369
|
+
for k in spec.keys():
|
|
370
|
+
if k not in valid_keys:
|
|
371
|
+
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
372
|
+
|
|
373
|
+
if 'type' in spec:
|
|
374
|
+
has_type = True
|
|
375
|
+
if not isinstance(spec['type'], ts.ColumnType):
|
|
376
|
+
raise excs.Error(f'Column {name}: "type" must be a ColumnType, got {spec["type"]}')
|
|
377
|
+
|
|
378
|
+
if 'value' in spec:
|
|
379
|
+
value_spec = spec['value']
|
|
380
|
+
value_expr = exprs.Expr.from_object(value_spec)
|
|
381
|
+
if value_expr is None:
|
|
382
|
+
# needs to be a Callable
|
|
383
|
+
if not isinstance(value_spec, Callable):
|
|
384
|
+
raise excs.Error(
|
|
385
|
+
f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
|
|
386
|
+
f'but it is a {type(value_spec)}')
|
|
387
|
+
if 'type' not in spec:
|
|
388
|
+
raise excs.Error(f'Column {name}: "type" is required if value is a Callable')
|
|
389
|
+
else:
|
|
390
|
+
has_type = True
|
|
391
|
+
if 'type' in spec:
|
|
392
|
+
raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
|
|
393
|
+
|
|
394
|
+
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
395
|
+
raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
|
|
396
|
+
if 'indexed' in spec and not isinstance(spec['indexed'], bool):
|
|
397
|
+
raise excs.Error(f'Column {name}: "indexed" must be a bool, got {spec["indexed"]}')
|
|
398
|
+
if not has_type:
|
|
399
|
+
raise excs.Error(f'Column {name}: "type" is required')
|
|
400
|
+
|
|
401
|
+
@classmethod
|
|
402
|
+
def _create_columns(cls, schema: Dict[str, Any]) -> List[Column]:
|
|
403
|
+
"""Construct list of Columns, given schema"""
|
|
404
|
+
columns: List[Column] = []
|
|
405
|
+
for name, spec in schema.items():
|
|
406
|
+
col_type: Optional[ts.ColumnType] = None
|
|
407
|
+
value_expr: Optional[exprs.Expr] = None
|
|
408
|
+
stored: Optional[bool] = None
|
|
409
|
+
indexed: Optional[bool] = None
|
|
410
|
+
primary_key: Optional[bool] = None
|
|
411
|
+
|
|
412
|
+
if isinstance(spec, ts.ColumnType):
|
|
413
|
+
# TODO: create copy
|
|
414
|
+
col_type = spec
|
|
415
|
+
elif isinstance(spec, exprs.Expr):
|
|
416
|
+
# create copy so we can modify it
|
|
417
|
+
value_expr = spec.copy()
|
|
418
|
+
elif isinstance(spec, Callable):
|
|
419
|
+
raise excs.Error((
|
|
420
|
+
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
421
|
+
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
|
|
422
|
+
))
|
|
423
|
+
elif isinstance(spec, dict):
|
|
424
|
+
cls._validate_column_spec(name, spec)
|
|
425
|
+
col_type = spec.get('type')
|
|
426
|
+
value_expr = spec.get('value')
|
|
427
|
+
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
428
|
+
# create copy so we can modify it
|
|
429
|
+
value_expr = value_expr.copy()
|
|
430
|
+
stored = spec.get('stored')
|
|
431
|
+
indexed = spec.get('indexed')
|
|
432
|
+
primary_key = spec.get('primary_key')
|
|
433
|
+
|
|
434
|
+
column = Column(
|
|
435
|
+
name, col_type=col_type, computed_with=value_expr, stored=stored, indexed=indexed,
|
|
436
|
+
primary_key=primary_key)
|
|
437
|
+
columns.append(column)
|
|
438
|
+
return columns
|
|
439
|
+
|
|
440
|
+
@classmethod
|
|
441
|
+
def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
|
|
442
|
+
"""Check integrity of user-supplied Column and supply defaults"""
|
|
443
|
+
if is_system_column_name(col.name):
|
|
444
|
+
raise excs.Error(f'Column name {col.name} is reserved')
|
|
445
|
+
if not is_valid_identifier(col.name):
|
|
446
|
+
raise excs.Error(f"Invalid column name: '{col.name}'")
|
|
447
|
+
if col.name in existing_column_names:
|
|
448
|
+
raise excs.Error(f'Duplicate column name: {col.name}')
|
|
449
|
+
if col.stored is False and not (col.is_computed and col.col_type.is_image_type()):
|
|
450
|
+
raise excs.Error(f'Column {col.name}: stored={col.stored} only applies to computed image columns')
|
|
451
|
+
if col.stored is False and not (col.col_type.is_image_type() and not col.has_window_fn_call()):
|
|
452
|
+
raise excs.Error((
|
|
453
|
+
f'Column {col.name}: stored={col.stored} is not valid for image columns computed with a streaming '
|
|
454
|
+
f'function'))
|
|
455
|
+
if col.stored is None:
|
|
456
|
+
col.stored = not (col.is_computed and col.col_type.is_image_type() and not col.has_window_fn_call())
|
|
457
|
+
|
|
458
|
+
@classmethod
|
|
459
|
+
def _verify_schema(cls, schema: List[Column]) -> None:
|
|
460
|
+
"""Check integrity of user-supplied schema and set defaults"""
|
|
461
|
+
column_names: Set[str] = set()
|
|
462
|
+
for col in schema:
|
|
463
|
+
cls._verify_column(col, column_names)
|
|
464
|
+
column_names.add(col.name)
|
|
465
|
+
|
|
466
|
+
def drop_column(self, name: str) -> None:
|
|
467
|
+
"""Drop a column from the table.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
name: The name of the column to drop.
|
|
471
|
+
|
|
472
|
+
Raises:
|
|
473
|
+
Error: If the column does not exist or if it is referenced by a computed column.
|
|
474
|
+
|
|
475
|
+
Examples:
|
|
476
|
+
Drop column ``factorial``:
|
|
477
|
+
|
|
478
|
+
>>> tbl.drop_column('factorial')
|
|
479
|
+
"""
|
|
480
|
+
self._check_is_dropped()
|
|
481
|
+
self.tbl_version_path.tbl_version.drop_column(name)
|
|
482
|
+
|
|
483
|
+
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
484
|
+
"""Rename a column.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
old_name: The current name of the column.
|
|
488
|
+
new_name: The new name of the column.
|
|
489
|
+
|
|
490
|
+
Raises:
|
|
491
|
+
Error: If the column does not exist or if the new name is invalid or already exists.
|
|
492
|
+
|
|
493
|
+
Examples:
|
|
494
|
+
Rename column ``factorial`` to ``fac``:
|
|
495
|
+
|
|
496
|
+
>>> tbl.rename_column('factorial', 'fac')
|
|
497
|
+
"""
|
|
498
|
+
self._check_is_dropped()
|
|
499
|
+
self.tbl_version_path.tbl_version.rename_column(old_name, new_name)
|
|
500
|
+
|
|
501
|
+
def update(
|
|
502
|
+
self, value_spec: Dict[str, Union['pixeltable.exprs.Expr', Any]],
|
|
503
|
+
where: Optional['pixeltable.exprs.Predicate'] = None, cascade: bool = True
|
|
504
|
+
) -> UpdateStatus:
|
|
505
|
+
"""Update rows in this table.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
|
|
509
|
+
where: a Predicate to filter rows to update.
|
|
510
|
+
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
511
|
+
|
|
512
|
+
Examples:
|
|
513
|
+
Set newly-added column `int_col` to 1 for all rows:
|
|
514
|
+
|
|
515
|
+
>>> tbl.update({'int_col': 1})
|
|
516
|
+
|
|
517
|
+
Set newly-added column `int_col` to 1 for all rows where `int_col` is 0:
|
|
518
|
+
|
|
519
|
+
>>> tbl.update({'int_col': 1}, where=tbl.int_col == 0)
|
|
520
|
+
|
|
521
|
+
Set `int_col` to the value of `other_int_col` + 1:
|
|
522
|
+
|
|
523
|
+
>>> tbl.update({'int_col': tbl.other_int_col + 1})
|
|
524
|
+
|
|
525
|
+
Increment `int_col` by 1 for all rows where `int_col` is 0:
|
|
526
|
+
|
|
527
|
+
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
528
|
+
"""
|
|
529
|
+
from pixeltable import exprs
|
|
530
|
+
update_targets: List[Tuple[Column, exprs.Expr]] = []
|
|
531
|
+
for col_name, val in value_spec.items():
|
|
532
|
+
if not isinstance(col_name, str):
|
|
533
|
+
raise excs.Error(f'Update specification: dict key must be column name, got {col_name!r}')
|
|
534
|
+
col = self.tbl_version_path.get_column(col_name, include_bases=False)
|
|
535
|
+
if col is None:
|
|
536
|
+
# TODO: return more informative error if this is trying to update a base column
|
|
537
|
+
raise excs.Error(f'Column {col_name} unknown')
|
|
538
|
+
if col.is_computed:
|
|
539
|
+
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
540
|
+
if col.primary_key:
|
|
541
|
+
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
542
|
+
if col.col_type.is_media_type():
|
|
543
|
+
raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
|
|
544
|
+
|
|
545
|
+
# make sure that the value is compatible with the column type
|
|
546
|
+
# check if this is a literal
|
|
547
|
+
try:
|
|
548
|
+
value_expr = exprs.Literal(val, col_type=col.col_type)
|
|
549
|
+
except TypeError:
|
|
550
|
+
# it's not a literal, let's try to create an expr from it
|
|
551
|
+
value_expr = exprs.Expr.from_object(val)
|
|
552
|
+
if value_expr is None:
|
|
553
|
+
raise excs.Error(f'Column {col_name}: value {val!r} is not a recognized literal or expression')
|
|
554
|
+
if not col.col_type.matches(value_expr.col_type):
|
|
555
|
+
raise excs.Error((
|
|
556
|
+
f'Type of value {val!r} ({value_expr.col_type}) is not compatible with the type of column '
|
|
557
|
+
f'{col_name} ({col.col_type})'
|
|
558
|
+
))
|
|
559
|
+
update_targets.append((col, value_expr))
|
|
560
|
+
|
|
561
|
+
from pixeltable.plan import Planner
|
|
562
|
+
if where is not None:
|
|
563
|
+
if not isinstance(where, exprs.Predicate):
|
|
564
|
+
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
565
|
+
analysis_info = Planner.analyze(self.tbl_version_path, where)
|
|
566
|
+
if analysis_info.similarity_clause is not None:
|
|
567
|
+
raise excs.Error('nearest() cannot be used with update()')
|
|
568
|
+
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
569
|
+
if analysis_info.filter is not None:
|
|
570
|
+
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
571
|
+
|
|
572
|
+
return self.tbl_version_path.tbl_version.update(update_targets, where, cascade)
|
|
573
|
+
|
|
574
|
+
def revert(self) -> None:
|
|
575
|
+
"""Reverts the table to the previous version.
|
|
576
|
+
|
|
577
|
+
.. warning::
|
|
578
|
+
This operation is irreversible.
|
|
579
|
+
"""
|
|
580
|
+
self._check_is_dropped()
|
|
581
|
+
self.tbl_version_path.tbl_version.revert()
|