pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,19 +2,25 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
+
import pydantic
|
|
9
|
+
import pydantic_core
|
|
10
|
+
|
|
8
11
|
import pixeltable as pxt
|
|
9
12
|
from pixeltable import exceptions as excs, type_system as ts
|
|
10
13
|
from pixeltable.env import Env
|
|
11
14
|
from pixeltable.utils.filecache import FileCache
|
|
15
|
+
from pixeltable.utils.pydantic import is_json_convertible
|
|
12
16
|
|
|
13
|
-
from .globals import MediaValidation
|
|
17
|
+
from .globals import MediaValidation
|
|
14
18
|
from .table import Table
|
|
15
|
-
from .table_version import TableVersion
|
|
19
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
16
20
|
from .table_version_handle import TableVersionHandle
|
|
17
21
|
from .table_version_path import TableVersionPath
|
|
22
|
+
from .tbl_ops import CreateStoreTableOp, TableOp
|
|
23
|
+
from .update_status import UpdateStatus
|
|
18
24
|
|
|
19
25
|
if TYPE_CHECKING:
|
|
20
26
|
from pixeltable import exprs
|
|
@@ -51,72 +57,63 @@ class InsertableTable(Table):
|
|
|
51
57
|
def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
|
|
52
58
|
tbl_version_path = TableVersionPath(tbl_version)
|
|
53
59
|
super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
|
|
60
|
+
self._tbl_version = tbl_version
|
|
54
61
|
|
|
55
|
-
|
|
56
|
-
|
|
62
|
+
def _display_name(self) -> str:
|
|
63
|
+
assert not self._tbl_version_path.is_replica()
|
|
57
64
|
return 'table'
|
|
58
65
|
|
|
59
66
|
@classmethod
|
|
60
67
|
def _create(
|
|
61
68
|
cls,
|
|
62
|
-
dir_id: UUID,
|
|
63
69
|
name: str,
|
|
64
70
|
schema: dict[str, ts.ColumnType],
|
|
65
|
-
df: Optional[pxt.DataFrame],
|
|
66
71
|
primary_key: list[str],
|
|
67
72
|
num_retained_versions: int,
|
|
68
73
|
comment: str,
|
|
69
74
|
media_validation: MediaValidation,
|
|
70
|
-
|
|
75
|
+
create_default_idxs: bool,
|
|
76
|
+
) -> tuple[TableVersionCompleteMd, list[TableOp]]:
|
|
71
77
|
columns = cls._create_columns(schema)
|
|
72
78
|
cls._verify_schema(columns)
|
|
73
79
|
column_names = [col.name for col in columns]
|
|
74
80
|
for pk_col in primary_key:
|
|
75
81
|
if pk_col not in column_names:
|
|
76
|
-
raise excs.Error(f'Primary key column {pk_col} not found in table schema')
|
|
82
|
+
raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
|
|
77
83
|
col = columns[column_names.index(pk_col)]
|
|
78
84
|
if col.col_type.nullable:
|
|
79
|
-
raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
|
|
85
|
+
raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
|
|
80
86
|
col.is_pk = True
|
|
81
87
|
|
|
82
|
-
|
|
83
|
-
dir_id,
|
|
88
|
+
md = TableVersion.create_initial_md(
|
|
84
89
|
name,
|
|
85
90
|
columns,
|
|
86
|
-
num_retained_versions
|
|
87
|
-
comment
|
|
88
|
-
media_validation
|
|
91
|
+
num_retained_versions,
|
|
92
|
+
comment,
|
|
93
|
+
media_validation,
|
|
94
|
+
create_default_idxs=create_default_idxs,
|
|
95
|
+
view_md=None,
|
|
89
96
|
)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
_logger.info(f'Created table `{name}`, id={tbl_version.id}')
|
|
103
|
-
Env.get().console_logger.info(f'Created table `{name}`.')
|
|
104
|
-
return tbl
|
|
105
|
-
|
|
106
|
-
def get_metadata(self) -> dict[str, Any]:
|
|
107
|
-
md = super().get_metadata()
|
|
108
|
-
md['is_view'] = False
|
|
109
|
-
md['is_snapshot'] = False
|
|
110
|
-
return md
|
|
97
|
+
|
|
98
|
+
ops = [
|
|
99
|
+
TableOp(
|
|
100
|
+
tbl_id=md.tbl_md.tbl_id,
|
|
101
|
+
op_sn=0,
|
|
102
|
+
num_ops=1,
|
|
103
|
+
needs_xact=False,
|
|
104
|
+
create_store_table_op=CreateStoreTableOp(),
|
|
105
|
+
)
|
|
106
|
+
]
|
|
107
|
+
return md, ops
|
|
111
108
|
|
|
112
109
|
@overload
|
|
113
110
|
def insert(
|
|
114
111
|
self,
|
|
115
|
-
source:
|
|
112
|
+
source: TableDataSource | None = None,
|
|
116
113
|
/,
|
|
117
114
|
*,
|
|
118
|
-
source_format:
|
|
119
|
-
schema_overrides:
|
|
115
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
116
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
120
117
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
121
118
|
print_stats: bool = False,
|
|
122
119
|
**kwargs: Any,
|
|
@@ -129,11 +126,11 @@ class InsertableTable(Table):
|
|
|
129
126
|
|
|
130
127
|
def insert(
|
|
131
128
|
self,
|
|
132
|
-
source:
|
|
129
|
+
source: TableDataSource | None = None,
|
|
133
130
|
/,
|
|
134
131
|
*,
|
|
135
|
-
source_format:
|
|
136
|
-
schema_overrides:
|
|
132
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
133
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
137
134
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
138
135
|
print_stats: bool = False,
|
|
139
136
|
**kwargs: Any,
|
|
@@ -141,8 +138,24 @@ class InsertableTable(Table):
|
|
|
141
138
|
from pixeltable.catalog import Catalog
|
|
142
139
|
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
143
140
|
|
|
144
|
-
|
|
141
|
+
if source is not None and isinstance(source, Sequence) and len(source) == 0:
|
|
142
|
+
raise excs.Error('Cannot insert an empty sequence.')
|
|
143
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
144
|
+
|
|
145
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
145
146
|
table = self
|
|
147
|
+
|
|
148
|
+
# TODO: unify with TableDataConduit
|
|
149
|
+
if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
|
|
150
|
+
status = self._insert_pydantic(
|
|
151
|
+
cast(Sequence[pydantic.BaseModel], source), # needed for mypy
|
|
152
|
+
print_stats=print_stats,
|
|
153
|
+
fail_on_exception=fail_on_exception,
|
|
154
|
+
)
|
|
155
|
+
Env.get().console_logger.info(status.insert_msg)
|
|
156
|
+
FileCache.get().emit_eviction_warnings()
|
|
157
|
+
return status
|
|
158
|
+
|
|
146
159
|
if source is None:
|
|
147
160
|
source = [kwargs]
|
|
148
161
|
kwargs = None
|
|
@@ -158,7 +171,6 @@ class InsertableTable(Table):
|
|
|
158
171
|
data_source.add_table_info(table)
|
|
159
172
|
data_source.prepare_for_insert_into_table()
|
|
160
173
|
|
|
161
|
-
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
162
174
|
return table.insert_table_data_source(
|
|
163
175
|
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
164
176
|
)
|
|
@@ -170,13 +182,14 @@ class InsertableTable(Table):
|
|
|
170
182
|
from pixeltable.catalog import Catalog
|
|
171
183
|
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
172
184
|
|
|
173
|
-
|
|
174
|
-
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
185
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
175
186
|
if isinstance(data_source, DFTableDataConduit):
|
|
187
|
+
status = pxt.UpdateStatus()
|
|
176
188
|
status += self._tbl_version.get().insert(
|
|
177
189
|
rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
178
190
|
)
|
|
179
191
|
else:
|
|
192
|
+
status = pxt.UpdateStatus()
|
|
180
193
|
for row_batch in data_source.valid_row_batch():
|
|
181
194
|
status += self._tbl_version.get().insert(
|
|
182
195
|
rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
@@ -187,34 +200,110 @@ class InsertableTable(Table):
|
|
|
187
200
|
FileCache.get().emit_eviction_warnings()
|
|
188
201
|
return status
|
|
189
202
|
|
|
190
|
-
def
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
203
|
+
def _insert_pydantic(
|
|
204
|
+
self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
|
|
205
|
+
) -> UpdateStatus:
|
|
206
|
+
model_class = type(rows[0])
|
|
207
|
+
self._validate_pydantic_model(model_class)
|
|
208
|
+
# convert rows one-by-one in order to be able to print meaningful error messages
|
|
209
|
+
pxt_rows: list[dict[str, Any]] = []
|
|
210
|
+
for i, row in enumerate(rows):
|
|
211
|
+
try:
|
|
212
|
+
pxt_rows.append(row.model_dump(mode='json'))
|
|
213
|
+
except pydantic_core.PydanticSerializationError as e:
|
|
214
|
+
raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
|
|
215
|
+
|
|
216
|
+
# explicitly check that all required columns are present and non-None in the rows,
|
|
217
|
+
# because we ignore nullability when validating the pydantic model
|
|
218
|
+
reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
|
|
219
|
+
for i, pxt_row in enumerate(pxt_rows):
|
|
220
|
+
if type(rows[i]) is not model_class:
|
|
221
|
+
raise excs.Error(
|
|
222
|
+
f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
|
|
223
|
+
)
|
|
224
|
+
for col_name in reqd_col_names:
|
|
225
|
+
if pxt_row.get(col_name) is None:
|
|
226
|
+
raise excs.Error(f'Missing required column {col_name!r} in row {i}')
|
|
227
|
+
|
|
228
|
+
status = self._tbl_version.get().insert(
|
|
229
|
+
rows=pxt_rows, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
230
|
+
)
|
|
231
|
+
return status
|
|
232
|
+
|
|
233
|
+
def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Check if a Pydantic model is compatible with this table for insert operations.
|
|
236
|
+
|
|
237
|
+
A model is compatible if:
|
|
238
|
+
- All required table columns have corresponding model fields with compatible types
|
|
239
|
+
- Model does not define fields for computed columns
|
|
240
|
+
- Model field types are compatible with table column types
|
|
241
|
+
"""
|
|
242
|
+
assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
|
|
243
|
+
|
|
244
|
+
schema = self._get_schema()
|
|
245
|
+
required_cols = set(self._tbl_version.get().get_required_col_names())
|
|
246
|
+
computed_cols = set(self._tbl_version.get().get_computed_col_names())
|
|
247
|
+
model_fields = model.model_fields
|
|
248
|
+
model_field_names = set(model_fields.keys())
|
|
249
|
+
|
|
250
|
+
missing_required = required_cols - model_field_names
|
|
251
|
+
if missing_required:
|
|
252
|
+
raise excs.Error(
|
|
253
|
+
f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
computed_in_model = computed_cols & model_field_names
|
|
257
|
+
if computed_in_model:
|
|
258
|
+
raise excs.Error(
|
|
259
|
+
f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# validate type compatibility
|
|
263
|
+
common_fields = model_field_names & set(schema.keys())
|
|
264
|
+
if len(common_fields) == 0:
|
|
265
|
+
raise excs.Error(
|
|
266
|
+
f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
|
|
267
|
+
)
|
|
268
|
+
for field_name in common_fields:
|
|
269
|
+
pxt_col_type = schema[field_name]
|
|
270
|
+
model_field = model_fields[field_name]
|
|
271
|
+
model_type = model_field.annotation
|
|
272
|
+
|
|
273
|
+
# we ignore nullability: we want to accept optional model fields for required table columns, as long as
|
|
274
|
+
# the model instances provide a non-null value
|
|
275
|
+
# allow_enum=True: model_dump(mode='json') converts enums to their values
|
|
276
|
+
inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
|
|
277
|
+
if inferred_pxt_type is None:
|
|
278
|
+
raise excs.Error(
|
|
279
|
+
f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if pxt_col_type.is_media_type():
|
|
283
|
+
# media types require file paths, either as str or Path
|
|
284
|
+
if not inferred_pxt_type.is_string_type():
|
|
285
|
+
raise excs.Error(
|
|
286
|
+
f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
|
|
287
|
+
f'`{model_type.__name__}`'
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
|
|
291
|
+
raise excs.Error(
|
|
292
|
+
f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
|
|
293
|
+
f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
if (
|
|
297
|
+
isinstance(model_type, type)
|
|
298
|
+
and issubclass(model_type, pydantic.BaseModel)
|
|
299
|
+
and not is_json_convertible(model_type)
|
|
300
|
+
):
|
|
301
|
+
raise excs.Error(
|
|
302
|
+
f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
|
|
303
|
+
f'`{model_type.__name__}`, which is not JSON-convertible'
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
|
|
218
307
|
"""Delete rows in this table.
|
|
219
308
|
|
|
220
309
|
Args:
|
|
@@ -231,16 +320,15 @@ class InsertableTable(Table):
|
|
|
231
320
|
"""
|
|
232
321
|
from pixeltable.catalog import Catalog
|
|
233
322
|
|
|
234
|
-
with Catalog.get().begin_xact(
|
|
323
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
235
324
|
return self._tbl_version.get().delete(where=where)
|
|
236
325
|
|
|
237
|
-
|
|
238
|
-
def _base_table(self) -> Optional['Table']:
|
|
326
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
239
327
|
return None
|
|
240
328
|
|
|
241
329
|
@property
|
|
242
|
-
def _effective_base_versions(self) -> list[
|
|
330
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
243
331
|
return []
|
|
244
332
|
|
|
245
333
|
def _table_descriptor(self) -> str:
|
|
246
|
-
return
|
|
334
|
+
return self._display_str()
|
pixeltable/catalog/path.py
CHANGED
|
@@ -1,20 +1,53 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import NamedTuple
|
|
5
5
|
|
|
6
6
|
from pixeltable import exceptions as excs
|
|
7
7
|
|
|
8
|
-
from .globals import
|
|
8
|
+
from .globals import is_valid_identifier
|
|
9
9
|
|
|
10
10
|
_logger = logging.getLogger('pixeltable')
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class Path:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
class Path(NamedTuple):
|
|
14
|
+
components: list[str]
|
|
15
|
+
version: int | None = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def parse(
|
|
19
|
+
cls,
|
|
20
|
+
path: str,
|
|
21
|
+
allow_empty_path: bool = False,
|
|
22
|
+
allow_system_path: bool = False,
|
|
23
|
+
allow_versioned_path: bool = False,
|
|
24
|
+
) -> Path:
|
|
25
|
+
components: list[str]
|
|
26
|
+
version: int | None
|
|
27
|
+
if ':' in path:
|
|
28
|
+
parts = path.split(':')
|
|
29
|
+
if len(parts) != 2:
|
|
30
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
31
|
+
try:
|
|
32
|
+
components = parts[0].split('.')
|
|
33
|
+
version = int(parts[1])
|
|
34
|
+
except ValueError:
|
|
35
|
+
raise excs.Error(f'Invalid path: {path}') from None
|
|
36
|
+
else:
|
|
37
|
+
components = path.split('.')
|
|
38
|
+
version = None
|
|
39
|
+
|
|
40
|
+
if components == [''] and not allow_empty_path:
|
|
41
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
42
|
+
|
|
43
|
+
if components != [''] and not all(is_valid_identifier(c, allow_system_path) for c in components):
|
|
44
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
45
|
+
|
|
46
|
+
if version is not None and not allow_versioned_path:
|
|
47
|
+
raise excs.Error(f'Versioned path not allowed here: {path}')
|
|
48
|
+
|
|
49
|
+
assert len(components) > 0
|
|
50
|
+
return Path(components, version)
|
|
18
51
|
|
|
19
52
|
@property
|
|
20
53
|
def len(self) -> int:
|
|
@@ -22,7 +55,6 @@ class Path:
|
|
|
22
55
|
|
|
23
56
|
@property
|
|
24
57
|
def name(self) -> str:
|
|
25
|
-
assert len(self.components) > 0
|
|
26
58
|
return self.components[-1]
|
|
27
59
|
|
|
28
60
|
@property
|
|
@@ -36,18 +68,15 @@ class Path:
|
|
|
36
68
|
@property
|
|
37
69
|
def parent(self) -> Path:
|
|
38
70
|
if len(self.components) == 1:
|
|
39
|
-
|
|
40
|
-
return self
|
|
41
|
-
else:
|
|
42
|
-
return Path('', empty_is_valid=True, allow_system_paths=True)
|
|
71
|
+
return ROOT_PATH # Includes the case of the root path, which is its own parent.
|
|
43
72
|
else:
|
|
44
|
-
return Path(
|
|
73
|
+
return Path(self.components[:-1])
|
|
45
74
|
|
|
46
75
|
def append(self, name: str) -> Path:
|
|
47
76
|
if self.is_root:
|
|
48
|
-
return Path(name
|
|
77
|
+
return Path([name])
|
|
49
78
|
else:
|
|
50
|
-
return Path(
|
|
79
|
+
return Path([*self.components, name])
|
|
51
80
|
|
|
52
81
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
53
82
|
"""
|
|
@@ -60,22 +89,31 @@ class Path:
|
|
|
60
89
|
is_prefix = self.components == other.components[: self.len]
|
|
61
90
|
return is_prefix and (self.len == (other.len - 1) or not is_parent)
|
|
62
91
|
|
|
63
|
-
def ancestors(self) ->
|
|
92
|
+
def ancestors(self) -> list[Path]:
|
|
64
93
|
"""
|
|
65
|
-
Return all ancestors of this path in top-down order including root.
|
|
94
|
+
Return all proper ancestors of this path in top-down order including root.
|
|
66
95
|
If this path is for the root directory, which has no parent, then None is returned.
|
|
67
96
|
"""
|
|
68
97
|
if self.is_root:
|
|
69
|
-
return
|
|
98
|
+
return []
|
|
70
99
|
else:
|
|
71
|
-
for i in range(
|
|
72
|
-
yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
|
|
100
|
+
return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
|
|
73
101
|
|
|
74
102
|
def __repr__(self) -> str:
|
|
75
103
|
return repr(str(self))
|
|
76
104
|
|
|
77
105
|
def __str__(self) -> str:
|
|
78
|
-
|
|
106
|
+
base = '.'.join(self.components)
|
|
107
|
+
if self.version is not None:
|
|
108
|
+
return f'{base}:{self.version}'
|
|
109
|
+
else:
|
|
110
|
+
return base
|
|
111
|
+
|
|
112
|
+
def __eq__(self, other: object) -> bool:
|
|
113
|
+
return isinstance(other, Path) and str(self) == str(other)
|
|
114
|
+
|
|
115
|
+
def __hash__(self) -> int:
|
|
116
|
+
return hash(str(self))
|
|
117
|
+
|
|
79
118
|
|
|
80
|
-
|
|
81
|
-
return str(self) < str(other)
|
|
119
|
+
ROOT_PATH = Path([''])
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
@@ -14,15 +14,16 @@ class SchemaObject:
|
|
|
14
14
|
|
|
15
15
|
_id: UUID
|
|
16
16
|
_name: str
|
|
17
|
-
_dir_id:
|
|
17
|
+
_dir_id: UUID | None
|
|
18
18
|
|
|
19
|
-
def __init__(self, obj_id: UUID, name: str, dir_id:
|
|
19
|
+
def __init__(self, obj_id: UUID, name: str, dir_id: UUID | None):
|
|
20
20
|
# make these private so they don't collide with column names (id and name are fairly common)
|
|
21
|
+
assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
|
|
21
22
|
self._id = obj_id
|
|
22
23
|
self._name = name
|
|
23
24
|
self._dir_id = dir_id
|
|
24
25
|
|
|
25
|
-
def _parent(self) ->
|
|
26
|
+
def _parent(self) -> 'catalog.Dir | None':
|
|
26
27
|
"""Returns the parent directory of this schema object."""
|
|
27
28
|
from .catalog import Catalog
|
|
28
29
|
|
|
@@ -40,18 +41,16 @@ class SchemaObject:
|
|
|
40
41
|
path = Catalog.get().get_dir_path(self._dir_id)
|
|
41
42
|
return str(path.append(self._name))
|
|
42
43
|
|
|
43
|
-
def get_metadata(self) -> dict[str, Any]:
|
|
44
|
-
"""Returns metadata associated with this schema object."""
|
|
45
|
-
return {'name': self._name, 'path': self._path()}
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
44
|
@abstractmethod
|
|
49
|
-
def _display_name(
|
|
45
|
+
def _display_name(self) -> str:
|
|
50
46
|
"""
|
|
51
47
|
Return name displayed in error messages.
|
|
52
48
|
"""
|
|
53
49
|
pass
|
|
54
50
|
|
|
51
|
+
def _display_str(self) -> str:
|
|
52
|
+
return f'{self._display_name()} {self._path()!r}'
|
|
53
|
+
|
|
55
54
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
56
55
|
"""Subclasses need to override this to make the change persistent"""
|
|
57
56
|
self._name = new_name
|