pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/view.py
CHANGED
|
@@ -2,27 +2,28 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
-
import sqlalchemy.orm as orm
|
|
9
|
-
|
|
10
8
|
import pixeltable.exceptions as excs
|
|
11
9
|
import pixeltable.metadata.schema as md_schema
|
|
12
10
|
import pixeltable.type_system as ts
|
|
13
11
|
from pixeltable import catalog, exprs, func
|
|
14
|
-
from pixeltable.env import Env
|
|
15
12
|
from pixeltable.iterators import ComponentIterator
|
|
16
13
|
|
|
17
|
-
from .catalog import Catalog
|
|
18
14
|
from .column import Column
|
|
19
|
-
from .globals import _POS_COLUMN_NAME,
|
|
15
|
+
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
20
16
|
from .table import Table
|
|
21
|
-
from .table_version import TableVersion
|
|
17
|
+
from .table_version import TableVersion, TableVersionKey, TableVersionMd
|
|
18
|
+
from .table_version_handle import TableVersionHandle
|
|
22
19
|
from .table_version_path import TableVersionPath
|
|
20
|
+
from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
|
|
21
|
+
from .update_status import UpdateStatus
|
|
23
22
|
|
|
24
23
|
if TYPE_CHECKING:
|
|
25
|
-
|
|
24
|
+
from pixeltable.catalog.table import TableMetadata
|
|
25
|
+
from pixeltable.globals import TableDataSource
|
|
26
|
+
from pixeltable.plan import SampleClause
|
|
26
27
|
|
|
27
28
|
_logger = logging.getLogger('pixeltable')
|
|
28
29
|
|
|
@@ -35,34 +36,87 @@ class View(Table):
|
|
|
35
36
|
The exception is a snapshot view without a predicate and without additional columns: in that case, the view
|
|
36
37
|
is simply a reference to a specific set of base versions.
|
|
37
38
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
snapshot_only: bool):
|
|
39
|
+
|
|
40
|
+
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
|
|
41
41
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
42
|
-
assert base_id in catalog.Catalog.get().tbl_dependents
|
|
43
|
-
self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
|
|
44
42
|
self._snapshot_only = snapshot_only
|
|
43
|
+
if not snapshot_only:
|
|
44
|
+
self._tbl_version = tbl_version_path.tbl_version
|
|
45
|
+
|
|
46
|
+
def _display_name(self) -> str:
|
|
47
|
+
if self._tbl_version_path.is_replica():
|
|
48
|
+
return 'replica'
|
|
49
|
+
if self._tbl_version_path.is_snapshot():
|
|
50
|
+
return 'snapshot'
|
|
51
|
+
if self._tbl_version_path.is_view():
|
|
52
|
+
return 'view'
|
|
53
|
+
return 'table'
|
|
45
54
|
|
|
46
55
|
@classmethod
|
|
47
|
-
def
|
|
48
|
-
|
|
56
|
+
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, str | None]]) -> dict[str, dict]:
|
|
57
|
+
"""Returns a list of columns in the same format as the additional_columns parameter of View.create.
|
|
58
|
+
The source is the list of expressions from a select() statement on a Query.
|
|
59
|
+
If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
|
|
60
|
+
"""
|
|
61
|
+
from pixeltable._query import Query
|
|
62
|
+
|
|
63
|
+
r: dict[str, dict] = {}
|
|
64
|
+
exps, names = Query._normalize_select_list([], select_list)
|
|
65
|
+
for expr, name in zip(exps, names):
|
|
66
|
+
stored = not isinstance(expr, exprs.ColumnRef)
|
|
67
|
+
r[name] = {'value': expr, 'stored': stored}
|
|
68
|
+
return r
|
|
49
69
|
|
|
50
70
|
@classmethod
|
|
51
71
|
def _create(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
cls,
|
|
73
|
+
dir_id: UUID,
|
|
74
|
+
name: str,
|
|
75
|
+
base: TableVersionPath,
|
|
76
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
77
|
+
additional_columns: dict[str, Any],
|
|
78
|
+
predicate: 'exprs.Expr' | None,
|
|
79
|
+
sample_clause: 'SampleClause' | None,
|
|
80
|
+
is_snapshot: bool,
|
|
81
|
+
create_default_idxs: bool,
|
|
82
|
+
num_retained_versions: int,
|
|
83
|
+
comment: str,
|
|
84
|
+
media_validation: MediaValidation,
|
|
85
|
+
iterator_cls: type[ComponentIterator] | None,
|
|
86
|
+
iterator_args: dict | None,
|
|
87
|
+
) -> tuple[TableVersionMd, list[TableOp] | None]:
|
|
88
|
+
from pixeltable.plan import SampleClause
|
|
89
|
+
|
|
90
|
+
# Convert select_list to more additional_columns if present
|
|
91
|
+
include_base_columns: bool = select_list is None
|
|
92
|
+
select_list_columns: List[Column] = []
|
|
93
|
+
if not include_base_columns:
|
|
94
|
+
r = cls.select_list_to_additional_columns(select_list)
|
|
95
|
+
select_list_columns = cls._create_columns(r)
|
|
96
|
+
|
|
97
|
+
columns_from_additional_columns = cls._create_columns(additional_columns)
|
|
98
|
+
columns = select_list_columns + columns_from_additional_columns
|
|
58
99
|
cls._verify_schema(columns)
|
|
59
100
|
|
|
60
|
-
# verify that
|
|
101
|
+
# verify that filters can be evaluated in the context of the base
|
|
61
102
|
if predicate is not None:
|
|
62
103
|
if not predicate.is_bound_by([base]):
|
|
63
|
-
raise excs.Error(f'
|
|
104
|
+
raise excs.Error(f'View filter cannot be computed in the context of the base table {base.tbl_name()!r}')
|
|
64
105
|
# create a copy that we can modify and store
|
|
65
106
|
predicate = predicate.copy()
|
|
107
|
+
if sample_clause is not None:
|
|
108
|
+
# make sure that the sample clause can be computed in the context of the base
|
|
109
|
+
if sample_clause.stratify_exprs is not None and not all(
|
|
110
|
+
stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
|
|
111
|
+
):
|
|
112
|
+
raise excs.Error(
|
|
113
|
+
f'View sample clause cannot be computed in the context of the base table {base.tbl_name()!r}'
|
|
114
|
+
)
|
|
115
|
+
# create a copy that we can modify and store
|
|
116
|
+
sc = sample_clause
|
|
117
|
+
sample_clause = SampleClause(
|
|
118
|
+
sc.version, sc.n, sc.n_per_stratum, sc.fraction, sc.seed, sc.stratify_exprs.copy()
|
|
119
|
+
)
|
|
66
120
|
|
|
67
121
|
# same for value exprs
|
|
68
122
|
for col in columns:
|
|
@@ -71,108 +125,118 @@ class View(Table):
|
|
|
71
125
|
# make sure that the value can be computed in the context of the base
|
|
72
126
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
73
127
|
raise excs.Error(
|
|
74
|
-
f'Column {col.name}:
|
|
128
|
+
f'Column {col.name!r}: Value expression cannot be computed in the context of the '
|
|
129
|
+
f'base table {base.tbl_name()!r}'
|
|
130
|
+
)
|
|
75
131
|
|
|
76
132
|
if iterator_cls is not None:
|
|
77
133
|
assert iterator_args is not None
|
|
78
134
|
|
|
79
135
|
# validate iterator_args
|
|
80
136
|
py_signature = inspect.signature(iterator_cls.__init__)
|
|
137
|
+
|
|
138
|
+
# make sure iterator_args can be used to instantiate iterator_cls
|
|
139
|
+
bound_args: dict[str, Any]
|
|
81
140
|
try:
|
|
82
|
-
# make sure iterator_args can be used to instantiate iterator_cls
|
|
83
141
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
142
|
+
except TypeError as exc:
|
|
143
|
+
raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
|
|
144
|
+
# we ignore 'self'
|
|
145
|
+
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
146
|
+
del bound_args[first_param_name]
|
|
147
|
+
|
|
148
|
+
# construct Signature and type-check bound_args
|
|
149
|
+
params = [
|
|
150
|
+
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
151
|
+
for param_name, param_type in iterator_cls.input_schema().items()
|
|
152
|
+
]
|
|
153
|
+
sig = func.Signature(ts.InvalidType(), params)
|
|
154
|
+
|
|
155
|
+
expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
156
|
+
sig.validate_args(expr_args, context=f'in iterator of type `{iterator_cls.__name__}`')
|
|
157
|
+
literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
|
|
98
158
|
|
|
99
159
|
# prepend pos and output_schema columns to cols:
|
|
100
160
|
# a component view exposes the pos column of its rowid;
|
|
101
161
|
# we create that column here, so it gets assigned a column id;
|
|
102
162
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
103
|
-
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
|
|
104
|
-
output_dict, unstored_cols = iterator_cls.output_schema(**
|
|
105
|
-
iterator_cols.extend(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
163
|
+
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), is_iterator_col=True, stored=False)]
|
|
164
|
+
output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
|
|
165
|
+
iterator_cols.extend(
|
|
166
|
+
[
|
|
167
|
+
Column(col_name, col_type, is_iterator_col=True, stored=col_name not in unstored_cols)
|
|
168
|
+
for col_name, col_type in output_dict.items()
|
|
169
|
+
]
|
|
170
|
+
)
|
|
109
171
|
|
|
110
172
|
iterator_col_names = {col.name for col in iterator_cols}
|
|
111
173
|
for col in columns:
|
|
112
174
|
if col.name in iterator_col_names:
|
|
113
|
-
raise excs.Error(
|
|
175
|
+
raise excs.Error(
|
|
176
|
+
f'Duplicate name: column {col.name!r} is already present in the iterator output schema'
|
|
177
|
+
)
|
|
114
178
|
columns = iterator_cols + columns
|
|
115
179
|
|
|
116
|
-
|
|
117
|
-
from pixeltable.exprs import InlineDict
|
|
118
|
-
iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
119
|
-
iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
|
|
120
|
-
else None
|
|
121
|
-
base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
|
|
122
|
-
base_versions = [
|
|
123
|
-
(tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
|
|
124
|
-
for tbl_version in base_version_path.get_tbl_versions()
|
|
125
|
-
]
|
|
180
|
+
from pixeltable.exprs import InlineDict
|
|
126
181
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
182
|
+
iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
183
|
+
iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None else None
|
|
184
|
+
base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
|
|
185
|
+
|
|
186
|
+
# if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
|
|
187
|
+
if is_snapshot:
|
|
188
|
+
predicate = predicate.retarget(base_version_path) if predicate is not None else None
|
|
189
|
+
if sample_clause is not None:
|
|
190
|
+
exprs.Expr.retarget_list(sample_clause.stratify_exprs, base_version_path)
|
|
191
|
+
iterator_args_expr = (
|
|
192
|
+
iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
|
|
193
|
+
)
|
|
194
|
+
for col in columns:
|
|
195
|
+
if col.value_expr is not None:
|
|
196
|
+
col.set_value_expr(col.value_expr.retarget(base_version_path))
|
|
197
|
+
|
|
198
|
+
view_md = md_schema.ViewMd(
|
|
199
|
+
is_snapshot=is_snapshot,
|
|
200
|
+
include_base_columns=include_base_columns,
|
|
201
|
+
predicate=predicate.as_dict() if predicate is not None else None,
|
|
202
|
+
sample_clause=sample_clause.as_dict() if sample_clause is not None else None,
|
|
203
|
+
base_versions=base_version_path.as_md(),
|
|
204
|
+
iterator_class_fqn=iterator_class_fqn,
|
|
205
|
+
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
md = TableVersion.create_initial_md(
|
|
209
|
+
name,
|
|
210
|
+
columns,
|
|
211
|
+
num_retained_versions,
|
|
212
|
+
comment,
|
|
213
|
+
media_validation=media_validation,
|
|
214
|
+
view_md=view_md,
|
|
215
|
+
create_default_idxs=create_default_idxs,
|
|
216
|
+
)
|
|
217
|
+
if md.tbl_md.is_pure_snapshot:
|
|
218
|
+
# this is purely a snapshot: no store table to create or load
|
|
219
|
+
return md, None
|
|
220
|
+
else:
|
|
221
|
+
tbl_id = md.tbl_md.tbl_id
|
|
222
|
+
key = TableVersionKey(UUID(tbl_id), 0 if is_snapshot else None, None)
|
|
223
|
+
view_path = TableVersionPath(TableVersionHandle(key), base=base_version_path)
|
|
224
|
+
ops = [
|
|
225
|
+
TableOp(
|
|
226
|
+
tbl_id=tbl_id, op_sn=0, num_ops=2, needs_xact=False, create_store_table_op=CreateStoreTableOp()
|
|
227
|
+
),
|
|
228
|
+
TableOp(
|
|
229
|
+
tbl_id=tbl_id, op_sn=1, num_ops=2, needs_xact=True, load_view_op=LoadViewOp(view_path.as_dict())
|
|
230
|
+
),
|
|
231
|
+
]
|
|
232
|
+
return md, ops
|
|
167
233
|
|
|
168
234
|
@classmethod
|
|
169
|
-
def _verify_column(
|
|
170
|
-
cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
|
|
171
|
-
) -> None:
|
|
235
|
+
def _verify_column(cls, col: Column) -> None:
|
|
172
236
|
# make sure that columns are nullable or have a default
|
|
173
237
|
if not col.col_type.nullable and not col.is_computed:
|
|
174
|
-
raise excs.Error(f'Column {col.name}:
|
|
175
|
-
super()._verify_column(col
|
|
238
|
+
raise excs.Error(f'Column {col.name!r}: Non-computed columns in views must be nullable')
|
|
239
|
+
super()._verify_column(col)
|
|
176
240
|
|
|
177
241
|
@classmethod
|
|
178
242
|
def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
|
|
@@ -182,46 +246,99 @@ class View(Table):
|
|
|
182
246
|
"""
|
|
183
247
|
if tbl_version_path.is_snapshot():
|
|
184
248
|
return tbl_version_path
|
|
185
|
-
tbl_version = tbl_version_path.tbl_version
|
|
186
|
-
|
|
187
|
-
# create and register snapshot version
|
|
188
|
-
tbl_version = tbl_version.create_snapshot_copy()
|
|
189
|
-
assert tbl_version.is_snapshot
|
|
249
|
+
tbl_version = tbl_version_path.tbl_version.get()
|
|
250
|
+
assert not tbl_version.is_snapshot
|
|
190
251
|
|
|
191
252
|
return TableVersionPath(
|
|
192
|
-
tbl_version,
|
|
193
|
-
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
cat = catalog.Catalog.get()
|
|
208
|
-
del cat.tbls[self._id]
|
|
209
|
-
else:
|
|
210
|
-
super()._drop()
|
|
211
|
-
cat.tbl_dependents[self._base_id].remove(self)
|
|
212
|
-
del cat.tbl_dependents[self._id]
|
|
253
|
+
TableVersionHandle(TableVersionKey(tbl_version.id, tbl_version.version, None)),
|
|
254
|
+
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def _is_named_pure_snapshot(self) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
|
|
260
|
+
"""
|
|
261
|
+
return self._id != self._tbl_version_path.tbl_id
|
|
262
|
+
|
|
263
|
+
def _is_anonymous_snapshot(self) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
|
|
266
|
+
"""
|
|
267
|
+
return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
|
|
213
268
|
|
|
214
|
-
def
|
|
215
|
-
md = super().
|
|
269
|
+
def _get_metadata(self) -> 'TableMetadata':
|
|
270
|
+
md = super()._get_metadata()
|
|
216
271
|
md['is_view'] = True
|
|
217
272
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
273
|
+
if self._is_anonymous_snapshot():
|
|
274
|
+
# Update name and path with version qualifiers.
|
|
275
|
+
md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
|
|
276
|
+
md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
|
|
277
|
+
base_tbl_id = self._base_tbl_id
|
|
278
|
+
if base_tbl_id is not None:
|
|
279
|
+
base_tbl = self._get_base_table()
|
|
280
|
+
base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
|
|
281
|
+
base_version = self._effective_base_versions[0]
|
|
282
|
+
md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
|
|
218
283
|
return md
|
|
219
284
|
|
|
220
285
|
def insert(
|
|
221
|
-
|
|
222
|
-
|
|
286
|
+
self,
|
|
287
|
+
source: TableDataSource | None = None,
|
|
288
|
+
/,
|
|
289
|
+
*,
|
|
290
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
291
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
292
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
293
|
+
print_stats: bool = False,
|
|
294
|
+
**kwargs: Any,
|
|
223
295
|
) -> UpdateStatus:
|
|
224
|
-
raise excs.Error(f'{self.
|
|
296
|
+
raise excs.Error(f'{self._display_str()}: Cannot insert into a {self._display_name()}.')
|
|
297
|
+
|
|
298
|
+
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
299
|
+
raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def _base_tbl_id(self) -> UUID | None:
|
|
303
|
+
if self._tbl_version_path.tbl_id != self._id:
|
|
304
|
+
# _tbl_version_path represents a different schema object from this one. This can only happen if this is a
|
|
305
|
+
# named pure snapshot.
|
|
306
|
+
return self._tbl_version_path.tbl_id
|
|
307
|
+
if self._tbl_version_path.base is None:
|
|
308
|
+
return None
|
|
309
|
+
return self._tbl_version_path.base.tbl_id
|
|
310
|
+
|
|
311
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
312
|
+
"""Returns None if there is no base table, or if the base table is hidden."""
|
|
313
|
+
base_tbl_id = self._base_tbl_id
|
|
314
|
+
if base_tbl_id is None:
|
|
315
|
+
return None
|
|
316
|
+
with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
|
|
317
|
+
return catalog.Catalog.get().get_table_by_id(base_tbl_id)
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
321
|
+
effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
|
|
322
|
+
if self._snapshot_only and not self._is_anonymous_snapshot():
|
|
323
|
+
return effective_versions # Named pure snapshot
|
|
324
|
+
else:
|
|
325
|
+
return effective_versions[1:]
|
|
326
|
+
|
|
327
|
+
def _table_descriptor(self) -> str:
|
|
328
|
+
result = [self._display_str()]
|
|
329
|
+
bases_descrs: list[str] = []
|
|
330
|
+
for base, effective_version in zip(self._get_base_tables(), self._effective_base_versions):
|
|
331
|
+
if effective_version is None:
|
|
332
|
+
bases_descrs.append(f'{base._path()!r}')
|
|
333
|
+
else:
|
|
334
|
+
base_descr = f'{base._path()}:{effective_version}'
|
|
335
|
+
bases_descrs.append(f'{base_descr!r}')
|
|
336
|
+
if len(bases_descrs) > 0:
|
|
337
|
+
# bases_descrs can be empty in the case of a table-replica
|
|
338
|
+
result.append(f' (of {", ".join(bases_descrs)})')
|
|
225
339
|
|
|
226
|
-
|
|
227
|
-
|
|
340
|
+
if self._tbl_version_path.tbl_version.get().predicate is not None:
|
|
341
|
+
result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
|
|
342
|
+
if self._tbl_version_path.tbl_version.get().sample_clause is not None:
|
|
343
|
+
result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
|
|
344
|
+
return ''.join(result)
|