pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +144 -118
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +139 -124
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +315 -246
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +69 -78
- pixeltable/env.py +78 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +28 -27
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +1033 -6
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +36 -31
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +75 -40
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +66 -37
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +32 -34
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +126 -41
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +74 -38
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.17.dist-info/RECORD +0 -211
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -3,21 +3,21 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
|
+
import pixeltable.exprs as exprs
|
|
11
12
|
import pixeltable.type_system as ts
|
|
12
|
-
from pixeltable import
|
|
13
|
+
from pixeltable.env import Env
|
|
13
14
|
from pixeltable.metadata import schema
|
|
14
15
|
|
|
15
|
-
from .globals import MediaValidation, is_valid_identifier
|
|
16
|
+
from .globals import MediaValidation, QColumnId, is_valid_identifier
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from .table_version import TableVersion
|
|
19
|
-
from .table_version_handle import ColumnHandle
|
|
20
|
-
from .table_version_path import TableVersionPath
|
|
20
|
+
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
21
21
|
|
|
22
22
|
_logger = logging.getLogger('pixeltable')
|
|
23
23
|
|
|
@@ -48,57 +48,58 @@ class Column:
|
|
|
48
48
|
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
name:
|
|
52
|
-
id:
|
|
51
|
+
name: str | None
|
|
52
|
+
id: int | None
|
|
53
53
|
col_type: ts.ColumnType
|
|
54
54
|
stored: bool
|
|
55
55
|
is_pk: bool
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
#
|
|
68
|
-
|
|
56
|
+
is_iterator_col: bool
|
|
57
|
+
_explicit_destination: str | None # An object store reference for computed files
|
|
58
|
+
_media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
|
|
59
|
+
schema_version_add: int | None
|
|
60
|
+
schema_version_drop: int | None
|
|
61
|
+
stores_cellmd: bool
|
|
62
|
+
sa_col: sql.schema.Column | None
|
|
63
|
+
sa_col_type: sql.types.TypeEngine
|
|
64
|
+
sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
65
|
+
_value_expr: exprs.Expr | None
|
|
66
|
+
value_expr_dict: dict[str, Any] | None
|
|
67
|
+
# we store a handle here in order to allow Column construction before there is a corresponding TableVersion
|
|
68
|
+
tbl_handle: 'TableVersionHandle' | None
|
|
69
69
|
|
|
70
70
|
def __init__(
|
|
71
71
|
self,
|
|
72
|
-
name:
|
|
73
|
-
col_type:
|
|
74
|
-
computed_with:
|
|
72
|
+
name: str | None,
|
|
73
|
+
col_type: ts.ColumnType | None = None,
|
|
74
|
+
computed_with: exprs.Expr | None = None,
|
|
75
75
|
is_pk: bool = False,
|
|
76
|
+
is_iterator_col: bool = False,
|
|
76
77
|
stored: bool = True,
|
|
77
|
-
media_validation:
|
|
78
|
-
col_id:
|
|
79
|
-
schema_version_add:
|
|
80
|
-
schema_version_drop:
|
|
81
|
-
sa_col_type:
|
|
82
|
-
stores_cellmd:
|
|
83
|
-
value_expr_dict:
|
|
84
|
-
|
|
85
|
-
destination:
|
|
78
|
+
media_validation: MediaValidation | None = None,
|
|
79
|
+
col_id: int | None = None,
|
|
80
|
+
schema_version_add: int | None = None,
|
|
81
|
+
schema_version_drop: int | None = None,
|
|
82
|
+
sa_col_type: sql.types.TypeEngine | None = None,
|
|
83
|
+
stores_cellmd: bool | None = None,
|
|
84
|
+
value_expr_dict: dict[str, Any] | None = None,
|
|
85
|
+
tbl_handle: 'TableVersionHandle' | None = None,
|
|
86
|
+
destination: str | None = None,
|
|
86
87
|
):
|
|
87
88
|
if name is not None and not is_valid_identifier(name):
|
|
88
|
-
raise excs.Error(f
|
|
89
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
89
90
|
self.name = name
|
|
90
|
-
self.
|
|
91
|
+
self.tbl_handle = tbl_handle
|
|
91
92
|
if col_type is None and computed_with is None:
|
|
92
|
-
raise excs.Error(f'Column
|
|
93
|
+
raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
|
|
93
94
|
|
|
94
|
-
self._value_expr
|
|
95
|
+
self._value_expr = None
|
|
95
96
|
self.value_expr_dict = value_expr_dict
|
|
96
97
|
if computed_with is not None:
|
|
97
98
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
98
99
|
if value_expr is None:
|
|
99
100
|
# TODO: this shouldn't be a user-facing error
|
|
100
101
|
raise excs.Error(
|
|
101
|
-
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
102
|
+
f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
|
|
102
103
|
f'but it is a {type(computed_with)}'
|
|
103
104
|
)
|
|
104
105
|
else:
|
|
@@ -115,21 +116,30 @@ class Column:
|
|
|
115
116
|
# self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
116
117
|
self.id = col_id
|
|
117
118
|
self.is_pk = is_pk
|
|
119
|
+
self.is_iterator_col = is_iterator_col
|
|
118
120
|
self._media_validation = media_validation
|
|
119
121
|
self.schema_version_add = schema_version_add
|
|
120
122
|
self.schema_version_drop = schema_version_drop
|
|
121
123
|
|
|
122
|
-
|
|
124
|
+
if stores_cellmd is not None:
|
|
125
|
+
self.stores_cellmd = stores_cellmd
|
|
126
|
+
else:
|
|
127
|
+
self.stores_cellmd = stored and (
|
|
128
|
+
self.is_computed
|
|
129
|
+
or self.col_type.is_media_type()
|
|
130
|
+
or self.col_type.is_json_type()
|
|
131
|
+
or self.col_type.is_array_type()
|
|
132
|
+
)
|
|
123
133
|
|
|
124
134
|
# column in the stored table for the values of this Column
|
|
125
135
|
self.sa_col = None
|
|
126
|
-
self.sa_col_type = sa_col_type
|
|
136
|
+
self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
|
|
127
137
|
|
|
128
138
|
# computed cols also have storage columns for the exception string and type
|
|
129
139
|
self.sa_cellmd_col = None
|
|
130
|
-
self.
|
|
140
|
+
self._explicit_destination = destination
|
|
131
141
|
|
|
132
|
-
def to_md(self, pos:
|
|
142
|
+
def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
|
|
133
143
|
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
134
144
|
assert self.is_pk is not None
|
|
135
145
|
col_md = schema.ColumnMd(
|
|
@@ -140,7 +150,7 @@ class Column:
|
|
|
140
150
|
schema_version_drop=self.schema_version_drop,
|
|
141
151
|
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
142
152
|
stored=self.stored,
|
|
143
|
-
destination=self.
|
|
153
|
+
destination=self._explicit_destination,
|
|
144
154
|
)
|
|
145
155
|
if pos is None:
|
|
146
156
|
return col_md, None
|
|
@@ -152,33 +162,6 @@ class Column:
|
|
|
152
162
|
)
|
|
153
163
|
return col_md, sch_md
|
|
154
164
|
|
|
155
|
-
@classmethod
|
|
156
|
-
def from_md(
|
|
157
|
-
cls, col_md: schema.ColumnMd, tbl: TableVersion, schema_col_md: Optional[schema.SchemaColumn]
|
|
158
|
-
) -> Column:
|
|
159
|
-
"""Create a Column from a ColumnMd."""
|
|
160
|
-
assert col_md.id is not None
|
|
161
|
-
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
162
|
-
media_val = (
|
|
163
|
-
MediaValidation[schema_col_md.media_validation.upper()]
|
|
164
|
-
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
165
|
-
else None
|
|
166
|
-
)
|
|
167
|
-
col = cls(
|
|
168
|
-
col_id=col_md.id,
|
|
169
|
-
name=col_name,
|
|
170
|
-
col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
171
|
-
is_pk=col_md.is_pk,
|
|
172
|
-
stored=col_md.stored,
|
|
173
|
-
media_validation=media_val,
|
|
174
|
-
schema_version_add=col_md.schema_version_add,
|
|
175
|
-
schema_version_drop=col_md.schema_version_drop,
|
|
176
|
-
value_expr_dict=col_md.value_expr,
|
|
177
|
-
tbl=tbl,
|
|
178
|
-
destination=col_md.destination,
|
|
179
|
-
)
|
|
180
|
-
return col
|
|
181
|
-
|
|
182
165
|
def init_value_expr(self) -> None:
|
|
183
166
|
from pixeltable import exprs
|
|
184
167
|
|
|
@@ -190,7 +173,7 @@ class Column:
|
|
|
190
173
|
message = (
|
|
191
174
|
dedent(
|
|
192
175
|
f"""
|
|
193
|
-
The computed column {self.name!r} in table {self.
|
|
176
|
+
The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
|
|
194
177
|
{{validation_error}}
|
|
195
178
|
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
196
179
|
""" # noqa: E501
|
|
@@ -200,17 +183,46 @@ class Column:
|
|
|
200
183
|
)
|
|
201
184
|
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
202
185
|
|
|
186
|
+
def get_tbl(self) -> TableVersion:
|
|
187
|
+
tv = self.tbl_handle.get()
|
|
188
|
+
return tv
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def destination(self) -> str | None:
|
|
192
|
+
if self._explicit_destination is not None:
|
|
193
|
+
# An expilicit destination was set as part of the column definition
|
|
194
|
+
return self._explicit_destination
|
|
195
|
+
|
|
196
|
+
# Otherwise, if this is a stored media column, use the default destination if one is configured (input
|
|
197
|
+
# destination or output destination, depending on whether this is a computed column)
|
|
198
|
+
# TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
|
|
199
|
+
# the underlying media column. We should move to using pxt.String as the col_type of index columns; this
|
|
200
|
+
# would be a more robust solution, and then `self.name is not None` could be removed.
|
|
201
|
+
if self.is_stored and self.col_type.is_media_type() and self.name is not None:
|
|
202
|
+
if self.is_computed:
|
|
203
|
+
return Env.get().default_output_media_dest
|
|
204
|
+
else:
|
|
205
|
+
return Env.get().default_input_media_dest
|
|
206
|
+
|
|
207
|
+
return None
|
|
208
|
+
|
|
203
209
|
@property
|
|
204
210
|
def handle(self) -> 'ColumnHandle':
|
|
205
211
|
"""Returns a ColumnHandle for this Column."""
|
|
206
212
|
from .table_version_handle import ColumnHandle
|
|
207
213
|
|
|
208
|
-
assert self.
|
|
214
|
+
assert self.tbl_handle is not None
|
|
215
|
+
assert self.id is not None
|
|
216
|
+
return ColumnHandle(self.tbl_handle, self.id)
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def qid(self) -> QColumnId:
|
|
220
|
+
assert self.tbl_handle is not None
|
|
209
221
|
assert self.id is not None
|
|
210
|
-
return
|
|
222
|
+
return QColumnId(self.tbl_handle.id, self.id)
|
|
211
223
|
|
|
212
224
|
@property
|
|
213
|
-
def value_expr(self) ->
|
|
225
|
+
def value_expr(self) -> exprs.Expr | None:
|
|
214
226
|
assert self.value_expr_dict is None or self._value_expr is not None
|
|
215
227
|
return self._value_expr
|
|
216
228
|
|
|
@@ -220,29 +232,22 @@ class Column:
|
|
|
220
232
|
|
|
221
233
|
def check_value_expr(self) -> None:
|
|
222
234
|
assert self._value_expr is not None
|
|
223
|
-
if self.stored
|
|
235
|
+
if not self.stored and self.is_computed and self.has_window_fn_call():
|
|
224
236
|
raise excs.Error(
|
|
225
|
-
f'Column {self.name}: stored={self.stored} not supported for columns
|
|
226
|
-
f'
|
|
237
|
+
f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
|
|
238
|
+
f'computed with window functions:\n{self.value_expr}'
|
|
227
239
|
)
|
|
228
240
|
|
|
229
241
|
def has_window_fn_call(self) -> bool:
|
|
230
|
-
if self.value_expr is None:
|
|
231
|
-
return False
|
|
232
242
|
from pixeltable import exprs
|
|
233
243
|
|
|
244
|
+
if self.value_expr is None:
|
|
245
|
+
return False
|
|
234
246
|
window_fn_calls = list(
|
|
235
247
|
self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
|
|
236
248
|
)
|
|
237
249
|
return len(window_fn_calls) > 0
|
|
238
250
|
|
|
239
|
-
# TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
|
|
240
|
-
# multiple dependents)
|
|
241
|
-
def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
|
|
242
|
-
assert self.tbl is not None
|
|
243
|
-
tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
|
|
244
|
-
return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
|
|
245
|
-
|
|
246
251
|
@property
|
|
247
252
|
def is_computed(self) -> bool:
|
|
248
253
|
return self._value_expr is not None or self.value_expr_dict is not None
|
|
@@ -253,30 +258,17 @@ class Column:
|
|
|
253
258
|
assert self.stored is not None
|
|
254
259
|
return self.stored
|
|
255
260
|
|
|
256
|
-
@property
|
|
257
|
-
def stores_cellmd(self) -> bool:
|
|
258
|
-
"""True if this column also stores error information."""
|
|
259
|
-
# default: record errors for computed and media columns
|
|
260
|
-
if self._stores_cellmd is not None:
|
|
261
|
-
return self._stores_cellmd
|
|
262
|
-
return self.is_stored and (
|
|
263
|
-
self.is_computed
|
|
264
|
-
or self.col_type.is_media_type()
|
|
265
|
-
or self.col_type.is_json_type()
|
|
266
|
-
or self.col_type.is_array_type()
|
|
267
|
-
)
|
|
268
|
-
|
|
269
261
|
@property
|
|
270
262
|
def qualified_name(self) -> str:
|
|
271
|
-
assert self.
|
|
272
|
-
return f'{self.
|
|
263
|
+
assert self.get_tbl() is not None
|
|
264
|
+
return f'{self.get_tbl().name}.{self.name}'
|
|
273
265
|
|
|
274
266
|
@property
|
|
275
267
|
def media_validation(self) -> MediaValidation:
|
|
276
268
|
if self._media_validation is not None:
|
|
277
269
|
return self._media_validation
|
|
278
|
-
assert self.
|
|
279
|
-
return self.
|
|
270
|
+
assert self.get_tbl() is not None
|
|
271
|
+
return self.get_tbl().media_validation
|
|
280
272
|
|
|
281
273
|
@property
|
|
282
274
|
def is_required_for_insert(self) -> bool:
|
|
@@ -295,24 +287,21 @@ class Column:
|
|
|
295
287
|
|
|
296
288
|
def create_sa_cols(self) -> None:
|
|
297
289
|
"""
|
|
298
|
-
These need to be recreated for every
|
|
290
|
+
These need to be recreated for every sql.Table instance
|
|
299
291
|
"""
|
|
300
292
|
assert self.is_stored
|
|
293
|
+
assert self.stores_cellmd is not None
|
|
301
294
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
302
|
-
self.sa_col = sql.Column(self.store_name(), self.
|
|
295
|
+
self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
|
|
303
296
|
if self.stores_cellmd:
|
|
304
|
-
# JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
305
297
|
self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
|
|
306
298
|
|
|
307
|
-
def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
|
|
308
|
-
return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
|
|
309
|
-
|
|
310
299
|
@classmethod
|
|
311
300
|
def cellmd_type(cls) -> ts.ColumnType:
|
|
312
301
|
return ts.JsonType(nullable=True)
|
|
313
302
|
|
|
314
303
|
@classmethod
|
|
315
|
-
def sa_cellmd_type(cls) -> sql.
|
|
304
|
+
def sa_cellmd_type(cls) -> sql.types.TypeEngine:
|
|
316
305
|
return cls.cellmd_type().to_sa_type()
|
|
317
306
|
|
|
318
307
|
def store_name(self) -> str:
|
|
@@ -327,17 +316,17 @@ class Column:
|
|
|
327
316
|
return f'{self.name}: {self.col_type}'
|
|
328
317
|
|
|
329
318
|
def __repr__(self) -> str:
|
|
330
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.
|
|
319
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
|
|
331
320
|
|
|
332
321
|
def __hash__(self) -> int:
|
|
333
322
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
334
323
|
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
|
335
|
-
assert self.
|
|
336
|
-
return hash((self.
|
|
324
|
+
assert self.tbl_handle is not None
|
|
325
|
+
return hash((self.tbl_handle.id, self.id))
|
|
337
326
|
|
|
338
327
|
def __eq__(self, other: object) -> bool:
|
|
339
328
|
if not isinstance(other, Column):
|
|
340
329
|
return False
|
|
341
|
-
assert self.
|
|
342
|
-
assert other.
|
|
343
|
-
return self.
|
|
330
|
+
assert self.tbl_handle is not None
|
|
331
|
+
assert other.tbl_handle is not None
|
|
332
|
+
return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -4,7 +4,6 @@ import enum
|
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import Optional
|
|
8
7
|
from uuid import UUID
|
|
9
8
|
|
|
10
9
|
import pixeltable.exceptions as excs
|
|
@@ -17,7 +16,7 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
17
16
|
|
|
18
17
|
# Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
|
|
19
18
|
# This will be populated lazily to avoid circular imports.
|
|
20
|
-
_PREDEF_SYMBOLS:
|
|
19
|
+
_PREDEF_SYMBOLS: set[str] | None = None
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@dataclass(frozen=True)
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pydantic
|
|
@@ -16,9 +16,10 @@ from pixeltable.utils.pydantic import is_json_convertible
|
|
|
16
16
|
|
|
17
17
|
from .globals import MediaValidation
|
|
18
18
|
from .table import Table
|
|
19
|
-
from .table_version import TableVersion
|
|
19
|
+
from .table_version import TableVersion, TableVersionMd
|
|
20
20
|
from .table_version_handle import TableVersionHandle
|
|
21
21
|
from .table_version_path import TableVersionPath
|
|
22
|
+
from .tbl_ops import CreateStoreTableOp, TableOp
|
|
22
23
|
from .update_status import UpdateStatus
|
|
23
24
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
@@ -65,15 +66,14 @@ class InsertableTable(Table):
|
|
|
65
66
|
@classmethod
|
|
66
67
|
def _create(
|
|
67
68
|
cls,
|
|
68
|
-
dir_id: UUID,
|
|
69
69
|
name: str,
|
|
70
70
|
schema: dict[str, ts.ColumnType],
|
|
71
|
-
df: Optional[pxt.DataFrame],
|
|
72
71
|
primary_key: list[str],
|
|
73
72
|
num_retained_versions: int,
|
|
74
73
|
comment: str,
|
|
75
74
|
media_validation: MediaValidation,
|
|
76
|
-
|
|
75
|
+
create_default_idxs: bool,
|
|
76
|
+
) -> tuple[TableVersionMd, list[TableOp]]:
|
|
77
77
|
columns = cls._create_columns(schema)
|
|
78
78
|
cls._verify_schema(columns)
|
|
79
79
|
column_names = [col.name for col in columns]
|
|
@@ -85,38 +85,35 @@ class InsertableTable(Table):
|
|
|
85
85
|
raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
|
|
86
86
|
col.is_pk = True
|
|
87
87
|
|
|
88
|
-
|
|
89
|
-
dir_id,
|
|
88
|
+
md = TableVersion.create_initial_md(
|
|
90
89
|
name,
|
|
91
90
|
columns,
|
|
92
|
-
num_retained_versions
|
|
93
|
-
comment
|
|
94
|
-
media_validation
|
|
91
|
+
num_retained_versions,
|
|
92
|
+
comment,
|
|
93
|
+
media_validation,
|
|
94
|
+
create_default_idxs=create_default_idxs,
|
|
95
|
+
view_md=None,
|
|
95
96
|
)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
_logger.info(f'Created table {name!r}, id={tbl_version.id}')
|
|
109
|
-
Env.get().console_logger.info(f'Created table {name!r}.')
|
|
110
|
-
return tbl
|
|
97
|
+
|
|
98
|
+
ops = [
|
|
99
|
+
TableOp(
|
|
100
|
+
tbl_id=md.tbl_md.tbl_id,
|
|
101
|
+
op_sn=0,
|
|
102
|
+
num_ops=1,
|
|
103
|
+
needs_xact=False,
|
|
104
|
+
create_store_table_op=CreateStoreTableOp(),
|
|
105
|
+
)
|
|
106
|
+
]
|
|
107
|
+
return md, ops
|
|
111
108
|
|
|
112
109
|
@overload
|
|
113
110
|
def insert(
|
|
114
111
|
self,
|
|
115
|
-
source:
|
|
112
|
+
source: TableDataSource | None = None,
|
|
116
113
|
/,
|
|
117
114
|
*,
|
|
118
|
-
source_format:
|
|
119
|
-
schema_overrides:
|
|
115
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
116
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
120
117
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
121
118
|
print_stats: bool = False,
|
|
122
119
|
**kwargs: Any,
|
|
@@ -129,11 +126,11 @@ class InsertableTable(Table):
|
|
|
129
126
|
|
|
130
127
|
def insert(
|
|
131
128
|
self,
|
|
132
|
-
source:
|
|
129
|
+
source: TableDataSource | None = None,
|
|
133
130
|
/,
|
|
134
131
|
*,
|
|
135
|
-
source_format:
|
|
136
|
-
schema_overrides:
|
|
132
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
133
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
137
134
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
138
135
|
print_stats: bool = False,
|
|
139
136
|
**kwargs: Any,
|
|
@@ -142,7 +139,7 @@ class InsertableTable(Table):
|
|
|
142
139
|
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
143
140
|
|
|
144
141
|
if source is not None and isinstance(source, Sequence) and len(source) == 0:
|
|
145
|
-
raise excs.Error('Cannot insert an empty sequence')
|
|
142
|
+
raise excs.Error('Cannot insert an empty sequence.')
|
|
146
143
|
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
147
144
|
|
|
148
145
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
@@ -214,7 +211,7 @@ class InsertableTable(Table):
|
|
|
214
211
|
try:
|
|
215
212
|
pxt_rows.append(row.model_dump(mode='json'))
|
|
216
213
|
except pydantic_core.PydanticSerializationError as e:
|
|
217
|
-
raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e
|
|
214
|
+
raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
|
|
218
215
|
|
|
219
216
|
# explicitly check that all required columns are present and non-None in the rows,
|
|
220
217
|
# because we ignore nullability when validating the pydantic model
|
|
@@ -222,7 +219,7 @@ class InsertableTable(Table):
|
|
|
222
219
|
for i, pxt_row in enumerate(pxt_rows):
|
|
223
220
|
if type(rows[i]) is not model_class:
|
|
224
221
|
raise excs.Error(
|
|
225
|
-
f'Expected {model_class.__name__
|
|
222
|
+
f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
|
|
226
223
|
)
|
|
227
224
|
for col_name in reqd_col_names:
|
|
228
225
|
if pxt_row.get(col_name) is None:
|
|
@@ -253,22 +250,20 @@ class InsertableTable(Table):
|
|
|
253
250
|
missing_required = required_cols - model_field_names
|
|
254
251
|
if missing_required:
|
|
255
252
|
raise excs.Error(
|
|
256
|
-
f'Pydantic model {model.__name__
|
|
257
|
-
f'{", ".join(f"{col_name!r}" for col_name in missing_required)}'
|
|
253
|
+
f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
|
|
258
254
|
)
|
|
259
255
|
|
|
260
256
|
computed_in_model = computed_cols & model_field_names
|
|
261
257
|
if computed_in_model:
|
|
262
258
|
raise excs.Error(
|
|
263
|
-
f'Pydantic model {model.__name__
|
|
264
|
-
f'{", ".join(f"{col_name!r}" for col_name in computed_in_model)}'
|
|
259
|
+
f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
|
|
265
260
|
)
|
|
266
261
|
|
|
267
262
|
# validate type compatibility
|
|
268
263
|
common_fields = model_field_names & set(schema.keys())
|
|
269
264
|
if len(common_fields) == 0:
|
|
270
265
|
raise excs.Error(
|
|
271
|
-
f'Pydantic model {model.__name__
|
|
266
|
+
f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
|
|
272
267
|
)
|
|
273
268
|
for field_name in common_fields:
|
|
274
269
|
pxt_col_type = schema[field_name]
|
|
@@ -281,21 +276,21 @@ class InsertableTable(Table):
|
|
|
281
276
|
inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
|
|
282
277
|
if inferred_pxt_type is None:
|
|
283
278
|
raise excs.Error(
|
|
284
|
-
f'Pydantic model {model.__name__
|
|
279
|
+
f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
|
|
285
280
|
)
|
|
286
281
|
|
|
287
282
|
if pxt_col_type.is_media_type():
|
|
288
283
|
# media types require file paths, either as str or Path
|
|
289
284
|
if not inferred_pxt_type.is_string_type():
|
|
290
285
|
raise excs.Error(
|
|
291
|
-
f
|
|
292
|
-
f'{model_type.__name__
|
|
286
|
+
f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
|
|
287
|
+
f'`{model_type.__name__}`'
|
|
293
288
|
)
|
|
294
289
|
else:
|
|
295
290
|
if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
|
|
296
291
|
raise excs.Error(
|
|
297
|
-
f'Pydantic model {model.__name__
|
|
298
|
-
f'for column {field_name!r} ({pxt_col_type})'
|
|
292
|
+
f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
|
|
293
|
+
f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
|
|
299
294
|
)
|
|
300
295
|
|
|
301
296
|
if (
|
|
@@ -304,11 +299,11 @@ class InsertableTable(Table):
|
|
|
304
299
|
and not is_json_convertible(model_type)
|
|
305
300
|
):
|
|
306
301
|
raise excs.Error(
|
|
307
|
-
f'Pydantic model {model.__name__
|
|
308
|
-
f'{model_type.__name__
|
|
302
|
+
f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
|
|
303
|
+
f'`{model_type.__name__}`, which is not JSON-convertible'
|
|
309
304
|
)
|
|
310
305
|
|
|
311
|
-
def delete(self, where:
|
|
306
|
+
def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
|
|
312
307
|
"""Delete rows in this table.
|
|
313
308
|
|
|
314
309
|
Args:
|
|
@@ -328,11 +323,11 @@ class InsertableTable(Table):
|
|
|
328
323
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
329
324
|
return self._tbl_version.get().delete(where=where)
|
|
330
325
|
|
|
331
|
-
def _get_base_table(self) ->
|
|
326
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
332
327
|
return None
|
|
333
328
|
|
|
334
329
|
@property
|
|
335
|
-
def _effective_base_versions(self) -> list[
|
|
330
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
336
331
|
return []
|
|
337
332
|
|
|
338
333
|
def _table_descriptor(self) -> str:
|
pixeltable/catalog/path.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
from pixeltable import exceptions as excs
|
|
7
6
|
|
|
@@ -12,9 +11,9 @@ _logger = logging.getLogger('pixeltable')
|
|
|
12
11
|
|
|
13
12
|
class Path:
|
|
14
13
|
components: list[str]
|
|
15
|
-
version:
|
|
14
|
+
version: int | None
|
|
16
15
|
|
|
17
|
-
def __init__(self, components: list[str], version:
|
|
16
|
+
def __init__(self, components: list[str], version: int | None = None) -> None:
|
|
18
17
|
assert len(components) > 0
|
|
19
18
|
self.components = components
|
|
20
19
|
self.version = version
|
|
@@ -28,7 +27,7 @@ class Path:
|
|
|
28
27
|
allow_versioned_path: bool = False,
|
|
29
28
|
) -> Path:
|
|
30
29
|
components: list[str]
|
|
31
|
-
version:
|
|
30
|
+
version: int | None
|
|
32
31
|
if ':' in path:
|
|
33
32
|
parts = path.split(':')
|
|
34
33
|
if len(parts) != 2:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
@@ -14,16 +14,16 @@ class SchemaObject:
|
|
|
14
14
|
|
|
15
15
|
_id: UUID
|
|
16
16
|
_name: str
|
|
17
|
-
_dir_id:
|
|
17
|
+
_dir_id: UUID | None
|
|
18
18
|
|
|
19
|
-
def __init__(self, obj_id: UUID, name: str, dir_id:
|
|
19
|
+
def __init__(self, obj_id: UUID, name: str, dir_id: UUID | None):
|
|
20
20
|
# make these private so they don't collide with column names (id and name are fairly common)
|
|
21
21
|
assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
|
|
22
22
|
self._id = obj_id
|
|
23
23
|
self._name = name
|
|
24
24
|
self._dir_id = dir_id
|
|
25
25
|
|
|
26
|
-
def _parent(self) ->
|
|
26
|
+
def _parent(self) -> 'catalog.Dir | None':
|
|
27
27
|
"""Returns the parent directory of this schema object."""
|
|
28
28
|
from .catalog import Catalog
|
|
29
29
|
|