pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,172 +1,309 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import enum
|
|
3
4
|
import logging
|
|
4
|
-
from typing import
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
7
|
-
import
|
|
8
|
+
import pydantic
|
|
9
|
+
import pydantic_core
|
|
8
10
|
|
|
9
11
|
import pixeltable as pxt
|
|
10
|
-
import
|
|
11
|
-
from pixeltable import exceptions as excs
|
|
12
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
12
13
|
from pixeltable.env import Env
|
|
13
14
|
from pixeltable.utils.filecache import FileCache
|
|
15
|
+
from pixeltable.utils.pydantic import is_json_convertible
|
|
14
16
|
|
|
15
|
-
from .
|
|
16
|
-
from .globals import MediaValidation, UpdateStatus
|
|
17
|
+
from .globals import MediaValidation
|
|
17
18
|
from .table import Table
|
|
18
|
-
from .table_version import TableVersion
|
|
19
|
+
from .table_version import TableVersion, TableVersionMd
|
|
20
|
+
from .table_version_handle import TableVersionHandle
|
|
19
21
|
from .table_version_path import TableVersionPath
|
|
22
|
+
from .tbl_ops import CreateStoreTableOp, TableOp
|
|
23
|
+
from .update_status import UpdateStatus
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pixeltable import exprs
|
|
27
|
+
from pixeltable.globals import TableDataSource
|
|
28
|
+
from pixeltable.io.table_data_conduit import TableDataConduit
|
|
20
29
|
|
|
21
30
|
_logger = logging.getLogger('pixeltable')
|
|
22
31
|
|
|
23
32
|
|
|
33
|
+
class OnErrorParameter(enum.Enum):
|
|
34
|
+
"""Supported values for the on_error parameter"""
|
|
35
|
+
|
|
36
|
+
ABORT = 'abort'
|
|
37
|
+
IGNORE = 'ignore'
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def is_valid(cls, v: Any) -> bool:
|
|
41
|
+
if isinstance(v, str):
|
|
42
|
+
return v.lower() in [c.value for c in cls]
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def fail_on_exception(cls, v: Any) -> bool:
|
|
47
|
+
if not cls.is_valid(v):
|
|
48
|
+
raise ValueError(f'Invalid value for on_error: {v}')
|
|
49
|
+
if isinstance(v, str):
|
|
50
|
+
return v.lower() != cls.IGNORE.value
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
|
|
24
54
|
class InsertableTable(Table):
|
|
25
55
|
"""A `Table` that allows inserting and deleting rows."""
|
|
26
56
|
|
|
27
|
-
def __init__(self, dir_id: UUID, tbl_version:
|
|
57
|
+
def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
|
|
28
58
|
tbl_version_path = TableVersionPath(tbl_version)
|
|
29
|
-
super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
|
|
59
|
+
super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
|
|
60
|
+
self._tbl_version = tbl_version
|
|
30
61
|
|
|
31
|
-
|
|
32
|
-
|
|
62
|
+
def _display_name(self) -> str:
|
|
63
|
+
assert not self._tbl_version_path.is_replica()
|
|
33
64
|
return 'table'
|
|
34
65
|
|
|
35
|
-
# MODULE-LOCAL, NOT PUBLIC
|
|
36
66
|
@classmethod
|
|
37
67
|
def _create(
|
|
38
|
-
cls,
|
|
39
|
-
|
|
40
|
-
|
|
68
|
+
cls,
|
|
69
|
+
name: str,
|
|
70
|
+
schema: dict[str, ts.ColumnType],
|
|
71
|
+
primary_key: list[str],
|
|
72
|
+
num_retained_versions: int,
|
|
73
|
+
comment: str,
|
|
74
|
+
media_validation: MediaValidation,
|
|
75
|
+
create_default_idxs: bool,
|
|
76
|
+
) -> tuple[TableVersionMd, list[TableOp]]:
|
|
41
77
|
columns = cls._create_columns(schema)
|
|
42
78
|
cls._verify_schema(columns)
|
|
43
79
|
column_names = [col.name for col in columns]
|
|
44
80
|
for pk_col in primary_key:
|
|
45
81
|
if pk_col not in column_names:
|
|
46
|
-
raise excs.Error(f'Primary key column {pk_col} not found in table schema')
|
|
82
|
+
raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
|
|
47
83
|
col = columns[column_names.index(pk_col)]
|
|
48
84
|
if col.col_type.nullable:
|
|
49
|
-
raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
|
|
85
|
+
raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
|
|
50
86
|
col.is_pk = True
|
|
51
87
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
return tbl
|
|
73
|
-
|
|
74
|
-
def get_metadata(self) -> dict[str, Any]:
|
|
75
|
-
md = super().get_metadata()
|
|
76
|
-
md['is_view'] = False
|
|
77
|
-
md['is_snapshot'] = False
|
|
78
|
-
return md
|
|
88
|
+
md = TableVersion.create_initial_md(
|
|
89
|
+
name,
|
|
90
|
+
columns,
|
|
91
|
+
num_retained_versions,
|
|
92
|
+
comment,
|
|
93
|
+
media_validation,
|
|
94
|
+
create_default_idxs=create_default_idxs,
|
|
95
|
+
view_md=None,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
ops = [
|
|
99
|
+
TableOp(
|
|
100
|
+
tbl_id=md.tbl_md.tbl_id,
|
|
101
|
+
op_sn=0,
|
|
102
|
+
num_ops=1,
|
|
103
|
+
needs_xact=False,
|
|
104
|
+
create_store_table_op=CreateStoreTableOp(),
|
|
105
|
+
)
|
|
106
|
+
]
|
|
107
|
+
return md, ops
|
|
79
108
|
|
|
80
109
|
@overload
|
|
81
110
|
def insert(
|
|
82
111
|
self,
|
|
83
|
-
|
|
112
|
+
source: TableDataSource | None = None,
|
|
84
113
|
/,
|
|
85
114
|
*,
|
|
115
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
116
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
117
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
86
118
|
print_stats: bool = False,
|
|
87
|
-
|
|
119
|
+
**kwargs: Any,
|
|
88
120
|
) -> UpdateStatus: ...
|
|
89
121
|
|
|
90
122
|
@overload
|
|
91
123
|
def insert(
|
|
92
|
-
self,
|
|
93
|
-
*,
|
|
94
|
-
print_stats: bool = False,
|
|
95
|
-
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
96
|
-
**kwargs: Any
|
|
124
|
+
self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
|
|
97
125
|
) -> UpdateStatus: ...
|
|
98
126
|
|
|
99
|
-
def insert(
|
|
127
|
+
def insert(
|
|
100
128
|
self,
|
|
101
|
-
|
|
129
|
+
source: TableDataSource | None = None,
|
|
102
130
|
/,
|
|
103
131
|
*,
|
|
104
|
-
|
|
132
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
133
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
105
134
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
106
|
-
|
|
135
|
+
print_stats: bool = False,
|
|
136
|
+
**kwargs: Any,
|
|
107
137
|
) -> UpdateStatus:
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
138
|
+
from pixeltable.catalog import Catalog
|
|
139
|
+
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
140
|
+
|
|
141
|
+
if source is not None and isinstance(source, Sequence) and len(source) == 0:
|
|
142
|
+
raise excs.Error('Cannot insert an empty sequence.')
|
|
143
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
144
|
+
|
|
145
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
146
|
+
table = self
|
|
147
|
+
|
|
148
|
+
# TODO: unify with TableDataConduit
|
|
149
|
+
if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
|
|
150
|
+
status = self._insert_pydantic(
|
|
151
|
+
cast(Sequence[pydantic.BaseModel], source), # needed for mypy
|
|
152
|
+
print_stats=print_stats,
|
|
153
|
+
fail_on_exception=fail_on_exception,
|
|
154
|
+
)
|
|
155
|
+
Env.get().console_logger.info(status.insert_msg)
|
|
156
|
+
FileCache.get().emit_eviction_warnings()
|
|
157
|
+
return status
|
|
158
|
+
|
|
159
|
+
if source is None:
|
|
160
|
+
source = [kwargs]
|
|
161
|
+
kwargs = None
|
|
162
|
+
|
|
163
|
+
tds = UnkTableDataConduit(
|
|
164
|
+
source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
|
|
165
|
+
)
|
|
166
|
+
data_source = tds.specialize()
|
|
167
|
+
if data_source.source_column_map is None:
|
|
168
|
+
data_source.src_pk = []
|
|
169
|
+
|
|
170
|
+
assert isinstance(table, Table)
|
|
171
|
+
data_source.add_table_info(table)
|
|
172
|
+
data_source.prepare_for_insert_into_table()
|
|
173
|
+
|
|
174
|
+
return table.insert_table_data_source(
|
|
175
|
+
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def insert_table_data_source(
|
|
179
|
+
self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
|
|
180
|
+
) -> pxt.UpdateStatus:
|
|
181
|
+
"""Insert row batches into this table from a `TableDataConduit`."""
|
|
182
|
+
from pixeltable.catalog import Catalog
|
|
183
|
+
from pixeltable.io.table_data_conduit import QueryTableDataConduit
|
|
184
|
+
|
|
185
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
186
|
+
if isinstance(data_source, QueryTableDataConduit):
|
|
187
|
+
status = pxt.UpdateStatus()
|
|
188
|
+
status += self._tbl_version.get().insert(
|
|
189
|
+
rows=None, query=data_source.pxt_query, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
status = pxt.UpdateStatus()
|
|
193
|
+
for row_batch in data_source.valid_row_batch():
|
|
194
|
+
status += self._tbl_version.get().insert(
|
|
195
|
+
rows=row_batch, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
Env.get().console_logger.info(status.insert_msg)
|
|
199
|
+
|
|
139
200
|
FileCache.get().emit_eviction_warnings()
|
|
140
201
|
return status
|
|
141
202
|
|
|
142
|
-
def
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
203
|
+
def _insert_pydantic(
|
|
204
|
+
self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
|
|
205
|
+
) -> UpdateStatus:
|
|
206
|
+
model_class = type(rows[0])
|
|
207
|
+
self._validate_pydantic_model(model_class)
|
|
208
|
+
# convert rows one-by-one in order to be able to print meaningful error messages
|
|
209
|
+
pxt_rows: list[dict[str, Any]] = []
|
|
210
|
+
for i, row in enumerate(rows):
|
|
211
|
+
try:
|
|
212
|
+
pxt_rows.append(row.model_dump(mode='json'))
|
|
213
|
+
except pydantic_core.PydanticSerializationError as e:
|
|
214
|
+
raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
|
|
215
|
+
|
|
216
|
+
# explicitly check that all required columns are present and non-None in the rows,
|
|
217
|
+
# because we ignore nullability when validating the pydantic model
|
|
218
|
+
reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
|
|
219
|
+
for i, pxt_row in enumerate(pxt_rows):
|
|
220
|
+
if type(rows[i]) is not model_class:
|
|
221
|
+
raise excs.Error(
|
|
222
|
+
f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
|
|
223
|
+
)
|
|
224
|
+
for col_name in reqd_col_names:
|
|
225
|
+
if pxt_row.get(col_name) is None:
|
|
226
|
+
raise excs.Error(f'Missing required column {col_name!r} in row {i}')
|
|
227
|
+
|
|
228
|
+
status = self._tbl_version.get().insert(
|
|
229
|
+
rows=pxt_rows, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
230
|
+
)
|
|
231
|
+
return status
|
|
232
|
+
|
|
233
|
+
def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Check if a Pydantic model is compatible with this table for insert operations.
|
|
236
|
+
|
|
237
|
+
A model is compatible if:
|
|
238
|
+
- All required table columns have corresponding model fields with compatible types
|
|
239
|
+
- Model does not define fields for computed columns
|
|
240
|
+
- Model field types are compatible with table column types
|
|
241
|
+
"""
|
|
242
|
+
assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
|
|
243
|
+
|
|
244
|
+
schema = self._get_schema()
|
|
245
|
+
required_cols = set(self._tbl_version.get().get_required_col_names())
|
|
246
|
+
computed_cols = set(self._tbl_version.get().get_computed_col_names())
|
|
247
|
+
model_fields = model.model_fields
|
|
248
|
+
model_field_names = set(model_fields.keys())
|
|
249
|
+
|
|
250
|
+
missing_required = required_cols - model_field_names
|
|
251
|
+
if missing_required:
|
|
252
|
+
raise excs.Error(
|
|
253
|
+
f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
computed_in_model = computed_cols & model_field_names
|
|
257
|
+
if computed_in_model:
|
|
258
|
+
raise excs.Error(
|
|
259
|
+
f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# validate type compatibility
|
|
263
|
+
common_fields = model_field_names & set(schema.keys())
|
|
264
|
+
if len(common_fields) == 0:
|
|
265
|
+
raise excs.Error(
|
|
266
|
+
f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
|
|
267
|
+
)
|
|
268
|
+
for field_name in common_fields:
|
|
269
|
+
pxt_col_type = schema[field_name]
|
|
270
|
+
model_field = model_fields[field_name]
|
|
271
|
+
model_type = model_field.annotation
|
|
272
|
+
|
|
273
|
+
# we ignore nullability: we want to accept optional model fields for required table columns, as long as
|
|
274
|
+
# the model instances provide a non-null value
|
|
275
|
+
# allow_enum=True: model_dump(mode='json') converts enums to their values
|
|
276
|
+
inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
|
|
277
|
+
if inferred_pxt_type is None:
|
|
278
|
+
raise excs.Error(
|
|
279
|
+
f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if pxt_col_type.is_media_type():
|
|
283
|
+
# media types require file paths, either as str or Path
|
|
284
|
+
if not inferred_pxt_type.is_string_type():
|
|
285
|
+
raise excs.Error(
|
|
286
|
+
f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
|
|
287
|
+
f'`{model_type.__name__}`'
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
|
|
291
|
+
raise excs.Error(
|
|
292
|
+
f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
|
|
293
|
+
f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
if (
|
|
297
|
+
isinstance(model_type, type)
|
|
298
|
+
and issubclass(model_type, pydantic.BaseModel)
|
|
299
|
+
and not is_json_convertible(model_type)
|
|
300
|
+
):
|
|
301
|
+
raise excs.Error(
|
|
302
|
+
f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
|
|
303
|
+
f'`{model_type.__name__}`, which is not JSON-convertible'
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
|
|
170
307
|
"""Delete rows in this table.
|
|
171
308
|
|
|
172
309
|
Args:
|
|
@@ -181,4 +318,17 @@ class InsertableTable(Table):
|
|
|
181
318
|
|
|
182
319
|
>>> tbl.delete(tbl.a > 5)
|
|
183
320
|
"""
|
|
184
|
-
|
|
321
|
+
from pixeltable.catalog import Catalog
|
|
322
|
+
|
|
323
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
324
|
+
return self._tbl_version.get().delete(where=where)
|
|
325
|
+
|
|
326
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
327
|
+
return None
|
|
328
|
+
|
|
329
|
+
@property
|
|
330
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
331
|
+
return []
|
|
332
|
+
|
|
333
|
+
def _table_descriptor(self) -> str:
|
|
334
|
+
return self._display_str()
|
pixeltable/catalog/path.py
CHANGED
|
@@ -1,17 +1,55 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from typing import NamedTuple
|
|
4
5
|
|
|
5
6
|
from pixeltable import exceptions as excs
|
|
6
|
-
|
|
7
|
+
|
|
8
|
+
from .globals import is_valid_identifier
|
|
7
9
|
|
|
8
10
|
_logger = logging.getLogger('pixeltable')
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
|
|
13
|
+
class Path(NamedTuple):
|
|
14
|
+
components: list[str]
|
|
15
|
+
version: int | None = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def parse(
|
|
19
|
+
cls,
|
|
20
|
+
path: str,
|
|
21
|
+
allow_empty_path: bool = False,
|
|
22
|
+
allow_system_path: bool = False,
|
|
23
|
+
allow_versioned_path: bool = False,
|
|
24
|
+
) -> Path:
|
|
25
|
+
components: list[str]
|
|
26
|
+
version: int | None
|
|
27
|
+
if ':' in path:
|
|
28
|
+
parts = path.split(':')
|
|
29
|
+
if len(parts) != 2:
|
|
30
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
31
|
+
try:
|
|
32
|
+
components = parts[0].split('.')
|
|
33
|
+
version = int(parts[1])
|
|
34
|
+
except ValueError:
|
|
35
|
+
raise excs.Error(f'Invalid path: {path}') from None
|
|
36
|
+
else:
|
|
37
|
+
components = path.split('.')
|
|
38
|
+
version = None
|
|
39
|
+
|
|
40
|
+
if components == [''] and not allow_empty_path:
|
|
41
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
42
|
+
|
|
43
|
+
if components != [''] and not all(
|
|
44
|
+
is_valid_identifier(c, allow_system_identifiers=allow_system_path, allow_hyphens=True) for c in components
|
|
45
|
+
):
|
|
46
|
+
raise excs.Error(f'Invalid path: {path}')
|
|
47
|
+
|
|
48
|
+
if version is not None and not allow_versioned_path:
|
|
49
|
+
raise excs.Error(f'Versioned path not allowed here: {path}')
|
|
50
|
+
|
|
51
|
+
assert len(components) > 0
|
|
52
|
+
return Path(components, version)
|
|
15
53
|
|
|
16
54
|
@property
|
|
17
55
|
def len(self) -> int:
|
|
@@ -19,28 +57,28 @@ class Path:
|
|
|
19
57
|
|
|
20
58
|
@property
|
|
21
59
|
def name(self) -> str:
|
|
22
|
-
assert len(self.components) > 0
|
|
23
60
|
return self.components[-1]
|
|
24
61
|
|
|
25
62
|
@property
|
|
26
63
|
def is_root(self) -> bool:
|
|
27
|
-
return self.components[0]
|
|
64
|
+
return not self.components[0]
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def is_system_path(self) -> bool:
|
|
68
|
+
return self.components[0].startswith('_')
|
|
28
69
|
|
|
29
70
|
@property
|
|
30
71
|
def parent(self) -> Path:
|
|
31
72
|
if len(self.components) == 1:
|
|
32
|
-
|
|
33
|
-
return self
|
|
34
|
-
else:
|
|
35
|
-
return Path('', empty_is_valid=True)
|
|
73
|
+
return ROOT_PATH # Includes the case of the root path, which is its own parent.
|
|
36
74
|
else:
|
|
37
|
-
return Path(
|
|
75
|
+
return Path(self.components[:-1])
|
|
38
76
|
|
|
39
77
|
def append(self, name: str) -> Path:
|
|
40
78
|
if self.is_root:
|
|
41
|
-
return Path(name)
|
|
79
|
+
return Path([name])
|
|
42
80
|
else:
|
|
43
|
-
return Path(
|
|
81
|
+
return Path([*self.components, name])
|
|
44
82
|
|
|
45
83
|
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
46
84
|
"""
|
|
@@ -50,9 +88,34 @@ class Path:
|
|
|
50
88
|
return False
|
|
51
89
|
if self.is_root and (other.len == 1 or not is_parent):
|
|
52
90
|
return True
|
|
53
|
-
is_prefix = self.components == other.components[:self.len]
|
|
91
|
+
is_prefix = self.components == other.components[: self.len]
|
|
54
92
|
return is_prefix and (self.len == (other.len - 1) or not is_parent)
|
|
55
93
|
|
|
94
|
+
def ancestors(self) -> list[Path]:
|
|
95
|
+
"""
|
|
96
|
+
Return all proper ancestors of this path in top-down order including root.
|
|
97
|
+
If this path is for the root directory, which has no parent, then None is returned.
|
|
98
|
+
"""
|
|
99
|
+
if self.is_root:
|
|
100
|
+
return []
|
|
101
|
+
else:
|
|
102
|
+
return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
|
|
103
|
+
|
|
104
|
+
def __repr__(self) -> str:
|
|
105
|
+
return repr(str(self))
|
|
106
|
+
|
|
56
107
|
def __str__(self) -> str:
|
|
57
|
-
|
|
108
|
+
base = '.'.join(self.components)
|
|
109
|
+
if self.version is not None:
|
|
110
|
+
return f'{base}:{self.version}'
|
|
111
|
+
else:
|
|
112
|
+
return base
|
|
113
|
+
|
|
114
|
+
def __eq__(self, other: object) -> bool:
|
|
115
|
+
return isinstance(other, Path) and str(self) == str(other)
|
|
116
|
+
|
|
117
|
+
def __hash__(self) -> int:
|
|
118
|
+
return hash(str(self))
|
|
119
|
+
|
|
58
120
|
|
|
121
|
+
ROOT_PATH = Path([''])
|