pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -3,142 +3,184 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
8
9
|
import sqlalchemy as sql
|
|
9
10
|
|
|
10
11
|
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.exprs as exprs
|
|
11
13
|
import pixeltable.type_system as ts
|
|
12
|
-
from pixeltable import
|
|
14
|
+
from pixeltable.env import Env
|
|
15
|
+
from pixeltable.metadata import schema
|
|
13
16
|
|
|
14
|
-
from .globals import MediaValidation, is_valid_identifier
|
|
17
|
+
from .globals import MediaValidation, QColumnId, is_valid_identifier
|
|
15
18
|
|
|
16
19
|
if TYPE_CHECKING:
|
|
17
20
|
from .table_version import TableVersion
|
|
18
|
-
from .table_version_handle import TableVersionHandle
|
|
21
|
+
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
19
22
|
from .table_version_path import TableVersionPath
|
|
20
23
|
|
|
21
24
|
_logger = logging.getLogger('pixeltable')
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
class Column:
|
|
25
|
-
"""Representation of a column in the schema of a Table/
|
|
28
|
+
"""Representation of a column in the schema of a Table/Query.
|
|
26
29
|
|
|
27
30
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
28
31
|
table/view.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
name: column name; None for system columns (eg, index columns)
|
|
35
|
+
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
36
|
+
computed_with: an Expr that computes the column value
|
|
37
|
+
is_pk: if True, this column is part of the primary key
|
|
38
|
+
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
39
|
+
destination: An object store reference for persisting computed files
|
|
40
|
+
col_id: column ID (only used internally)
|
|
41
|
+
|
|
42
|
+
Computed columns: those have a non-None ``computed_with`` argument
|
|
43
|
+
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
44
|
+
col_type is None
|
|
45
|
+
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
46
|
+
|
|
47
|
+
``stored`` (only valid for computed columns):
|
|
48
|
+
- if True: the column is present in the stored table
|
|
49
|
+
- if False: the column is not present in the stored table and recomputed during a query
|
|
50
|
+
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
29
51
|
"""
|
|
30
52
|
|
|
31
|
-
name: str
|
|
32
|
-
id:
|
|
53
|
+
name: str | None
|
|
54
|
+
id: int | None
|
|
33
55
|
col_type: ts.ColumnType
|
|
34
56
|
stored: bool
|
|
35
57
|
is_pk: bool
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
58
|
+
is_iterator_col: bool
|
|
59
|
+
_explicit_destination: str | None # An object store reference for computed files
|
|
60
|
+
_media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
|
|
61
|
+
schema_version_add: int | None
|
|
62
|
+
schema_version_drop: int | None
|
|
63
|
+
stores_cellmd: bool
|
|
64
|
+
sa_col: sql.schema.Column | None
|
|
65
|
+
sa_col_type: sql.types.TypeEngine
|
|
66
|
+
sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
67
|
+
_value_expr: exprs.Expr | None
|
|
68
|
+
value_expr_dict: dict[str, Any] | None
|
|
69
|
+
# we store a handle here in order to allow Column construction before there is a corresponding TableVersion
|
|
70
|
+
tbl_handle: 'TableVersionHandle' | None
|
|
48
71
|
|
|
49
72
|
def __init__(
|
|
50
73
|
self,
|
|
51
|
-
name:
|
|
52
|
-
col_type:
|
|
53
|
-
computed_with:
|
|
74
|
+
name: str | None,
|
|
75
|
+
col_type: ts.ColumnType | None = None,
|
|
76
|
+
computed_with: exprs.Expr | None = None,
|
|
54
77
|
is_pk: bool = False,
|
|
78
|
+
is_iterator_col: bool = False,
|
|
55
79
|
stored: bool = True,
|
|
56
|
-
media_validation:
|
|
57
|
-
col_id:
|
|
58
|
-
schema_version_add:
|
|
59
|
-
schema_version_drop:
|
|
60
|
-
sa_col_type:
|
|
61
|
-
|
|
62
|
-
value_expr_dict:
|
|
80
|
+
media_validation: MediaValidation | None = None,
|
|
81
|
+
col_id: int | None = None,
|
|
82
|
+
schema_version_add: int | None = None,
|
|
83
|
+
schema_version_drop: int | None = None,
|
|
84
|
+
sa_col_type: sql.types.TypeEngine | None = None,
|
|
85
|
+
stores_cellmd: bool | None = None,
|
|
86
|
+
value_expr_dict: dict[str, Any] | None = None,
|
|
87
|
+
tbl_handle: 'TableVersionHandle' | None = None,
|
|
88
|
+
destination: str | None = None,
|
|
63
89
|
):
|
|
64
|
-
"""Column constructor.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
name: column name; None for system columns (eg, index columns)
|
|
68
|
-
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
69
|
-
computed_with: an Expr that computes the column value
|
|
70
|
-
is_pk: if True, this column is part of the primary key
|
|
71
|
-
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
72
|
-
col_id: column ID (only used internally)
|
|
73
|
-
|
|
74
|
-
Computed columns: those have a non-None ``computed_with`` argument
|
|
75
|
-
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
76
|
-
col_type is None
|
|
77
|
-
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
78
|
-
|
|
79
|
-
``stored`` (only valid for computed columns):
|
|
80
|
-
- if True: the column is present in the stored table
|
|
81
|
-
- if False: the column is not present in the stored table and recomputed during a query
|
|
82
|
-
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
83
|
-
"""
|
|
84
90
|
if name is not None and not is_valid_identifier(name):
|
|
85
|
-
raise excs.Error(f
|
|
91
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
86
92
|
self.name = name
|
|
93
|
+
self.tbl_handle = tbl_handle
|
|
87
94
|
if col_type is None and computed_with is None:
|
|
88
|
-
raise excs.Error(f'Column
|
|
95
|
+
raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
|
|
89
96
|
|
|
90
|
-
self._value_expr
|
|
97
|
+
self._value_expr = None
|
|
91
98
|
self.value_expr_dict = value_expr_dict
|
|
92
99
|
if computed_with is not None:
|
|
93
100
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
94
101
|
if value_expr is None:
|
|
102
|
+
# TODO: this shouldn't be a user-facing error
|
|
95
103
|
raise excs.Error(
|
|
96
|
-
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
104
|
+
f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
|
|
97
105
|
f'but it is a {type(computed_with)}'
|
|
98
106
|
)
|
|
99
107
|
else:
|
|
100
108
|
self._value_expr = value_expr.copy()
|
|
101
109
|
self.col_type = self._value_expr.col_type
|
|
110
|
+
if self._value_expr is not None and self.value_expr_dict is None:
|
|
111
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
102
112
|
|
|
103
113
|
if col_type is not None:
|
|
104
114
|
self.col_type = col_type
|
|
105
115
|
assert self.col_type is not None
|
|
106
116
|
|
|
107
117
|
self.stored = stored
|
|
108
|
-
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
118
|
+
# self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
109
119
|
self.id = col_id
|
|
110
120
|
self.is_pk = is_pk
|
|
121
|
+
self.is_iterator_col = is_iterator_col
|
|
111
122
|
self._media_validation = media_validation
|
|
112
123
|
self.schema_version_add = schema_version_add
|
|
113
124
|
self.schema_version_drop = schema_version_drop
|
|
114
125
|
|
|
115
|
-
|
|
126
|
+
if stores_cellmd is not None:
|
|
127
|
+
self.stores_cellmd = stores_cellmd
|
|
128
|
+
else:
|
|
129
|
+
self.stores_cellmd = stored and (
|
|
130
|
+
self.is_computed or self.col_type.is_media_type() or self.col_type.supports_file_offloading()
|
|
131
|
+
)
|
|
116
132
|
|
|
117
133
|
# column in the stored table for the values of this Column
|
|
118
134
|
self.sa_col = None
|
|
119
|
-
self.sa_col_type = sa_col_type
|
|
135
|
+
self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
|
|
120
136
|
|
|
121
137
|
# computed cols also have storage columns for the exception string and type
|
|
122
|
-
self.
|
|
123
|
-
self.
|
|
138
|
+
self.sa_cellmd_col = None
|
|
139
|
+
self._explicit_destination = destination
|
|
140
|
+
|
|
141
|
+
def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
|
|
142
|
+
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
143
|
+
assert self.is_pk is not None
|
|
144
|
+
col_md = schema.ColumnMd(
|
|
145
|
+
id=self.id,
|
|
146
|
+
col_type=self.col_type.as_dict(),
|
|
147
|
+
is_pk=self.is_pk,
|
|
148
|
+
schema_version_add=self.schema_version_add,
|
|
149
|
+
schema_version_drop=self.schema_version_drop,
|
|
150
|
+
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
151
|
+
stored=self.stored,
|
|
152
|
+
destination=self._explicit_destination,
|
|
153
|
+
)
|
|
154
|
+
if pos is None:
|
|
155
|
+
return col_md, None
|
|
156
|
+
assert self.name is not None, 'Column name must be set for user-facing columns'
|
|
157
|
+
sch_md = schema.SchemaColumn(
|
|
158
|
+
name=self.name,
|
|
159
|
+
pos=pos,
|
|
160
|
+
media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
|
|
161
|
+
)
|
|
162
|
+
return col_md, sch_md
|
|
124
163
|
|
|
125
|
-
|
|
164
|
+
def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Initialize the value_expr from its dict representation, if necessary.
|
|
126
167
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
131
|
-
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
132
|
-
if self.value_expr_dict is not None and self._value_expr is None:
|
|
133
|
-
from pixeltable import exprs
|
|
168
|
+
If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
|
|
169
|
+
"""
|
|
170
|
+
from pixeltable import exprs
|
|
134
171
|
|
|
172
|
+
if self._value_expr is None and self.value_expr_dict is None:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
if self._value_expr is None:
|
|
176
|
+
# Instantiate the Expr from its dict
|
|
135
177
|
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
136
178
|
self._value_expr.bind_rel_paths()
|
|
137
179
|
if not self._value_expr.is_valid:
|
|
138
180
|
message = (
|
|
139
181
|
dedent(
|
|
140
182
|
f"""
|
|
141
|
-
The computed column {self.name!r} in table {self.
|
|
183
|
+
The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
|
|
142
184
|
{{validation_error}}
|
|
143
185
|
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
144
186
|
""" # noqa: E501
|
|
@@ -147,36 +189,81 @@ class Column:
|
|
|
147
189
|
.format(validation_error=self._value_expr.validation_error)
|
|
148
190
|
)
|
|
149
191
|
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
192
|
+
|
|
193
|
+
if tvp is not None:
|
|
194
|
+
# Retarget the Expr
|
|
195
|
+
self._value_expr = self._value_expr.retarget(tvp)
|
|
196
|
+
|
|
197
|
+
def get_tbl(self) -> TableVersion:
|
|
198
|
+
tv = self.tbl_handle.get()
|
|
199
|
+
return tv
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def destination(self) -> str | None:
|
|
203
|
+
if self._explicit_destination is not None:
|
|
204
|
+
# An expilicit destination was set as part of the column definition
|
|
205
|
+
return self._explicit_destination
|
|
206
|
+
|
|
207
|
+
# Otherwise, if this is a stored media column, use the default destination if one is configured (input
|
|
208
|
+
# destination or output destination, depending on whether this is a computed column)
|
|
209
|
+
# TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
|
|
210
|
+
# the underlying media column. We should move to using pxt.String as the col_type of index columns; this
|
|
211
|
+
# would be a more robust solution, and then `self.name is not None` could be removed.
|
|
212
|
+
if self.is_stored and self.col_type.is_media_type() and self.name is not None:
|
|
213
|
+
if self.is_computed:
|
|
214
|
+
return Env.get().default_output_media_dest
|
|
215
|
+
else:
|
|
216
|
+
return Env.get().default_input_media_dest
|
|
217
|
+
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def handle(self) -> 'ColumnHandle':
|
|
222
|
+
"""Returns a ColumnHandle for this Column."""
|
|
223
|
+
from .table_version_handle import ColumnHandle
|
|
224
|
+
|
|
225
|
+
assert self.tbl_handle is not None
|
|
226
|
+
assert self.id is not None
|
|
227
|
+
return ColumnHandle(self.tbl_handle, self.id)
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def qid(self) -> QColumnId:
|
|
231
|
+
assert self.tbl_handle is not None
|
|
232
|
+
assert self.id is not None
|
|
233
|
+
return QColumnId(self.tbl_handle.id, self.id)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def value_expr(self) -> exprs.Expr | None:
|
|
237
|
+
assert self.value_expr_dict is None or self._value_expr is not None
|
|
150
238
|
return self._value_expr
|
|
151
239
|
|
|
152
240
|
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
153
241
|
self._value_expr = value_expr
|
|
154
|
-
self.value_expr_dict =
|
|
242
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
155
243
|
|
|
156
244
|
def check_value_expr(self) -> None:
|
|
157
245
|
assert self._value_expr is not None
|
|
158
|
-
if self.stored
|
|
246
|
+
if not self.stored and self.is_computed and self.has_window_fn_call():
|
|
159
247
|
raise excs.Error(
|
|
160
|
-
f'Column {self.name}: stored={self.stored} not supported for columns
|
|
161
|
-
f'
|
|
248
|
+
f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
|
|
249
|
+
f'computed with window functions:\n{self.value_expr}'
|
|
162
250
|
)
|
|
163
251
|
|
|
164
252
|
def has_window_fn_call(self) -> bool:
|
|
165
|
-
if self.value_expr is None:
|
|
166
|
-
return False
|
|
167
253
|
from pixeltable import exprs
|
|
168
254
|
|
|
255
|
+
if self.value_expr is None:
|
|
256
|
+
return False
|
|
169
257
|
window_fn_calls = list(
|
|
170
258
|
self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
|
|
171
259
|
)
|
|
172
260
|
return len(window_fn_calls) > 0
|
|
173
261
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
return {name: info for name, info in tbl.get().idxs_by_name.items() if info.col == self}
|
|
262
|
+
def stores_external_array(self) -> bool:
|
|
263
|
+
"""Returns True if this is an Array column that might store its values externally."""
|
|
264
|
+
assert self.sa_col_type is not None
|
|
265
|
+
# Vector: if this is a vector column (ie, used for a vector index), it stores the array itself
|
|
266
|
+
return self.col_type.is_array_type() and not isinstance(self.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
180
267
|
|
|
181
268
|
@property
|
|
182
269
|
def is_computed(self) -> bool:
|
|
@@ -188,25 +275,17 @@ class Column:
|
|
|
188
275
|
assert self.stored is not None
|
|
189
276
|
return self.stored
|
|
190
277
|
|
|
191
|
-
@property
|
|
192
|
-
def records_errors(self) -> bool:
|
|
193
|
-
"""True if this column also stores error information."""
|
|
194
|
-
# default: record errors for computed and media columns
|
|
195
|
-
if self._records_errors is not None:
|
|
196
|
-
return self._records_errors
|
|
197
|
-
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
198
|
-
|
|
199
278
|
@property
|
|
200
279
|
def qualified_name(self) -> str:
|
|
201
|
-
assert self.
|
|
202
|
-
return f'{self.
|
|
280
|
+
assert self.get_tbl() is not None
|
|
281
|
+
return f'{self.get_tbl().name}.{self.name}'
|
|
203
282
|
|
|
204
283
|
@property
|
|
205
284
|
def media_validation(self) -> MediaValidation:
|
|
206
285
|
if self._media_validation is not None:
|
|
207
286
|
return self._media_validation
|
|
208
|
-
assert self.
|
|
209
|
-
return self.
|
|
287
|
+
assert self.get_tbl() is not None
|
|
288
|
+
return self.get_tbl().media_validation
|
|
210
289
|
|
|
211
290
|
@property
|
|
212
291
|
def is_required_for_insert(self) -> bool:
|
|
@@ -225,48 +304,46 @@ class Column:
|
|
|
225
304
|
|
|
226
305
|
def create_sa_cols(self) -> None:
|
|
227
306
|
"""
|
|
228
|
-
These need to be recreated for every
|
|
307
|
+
These need to be recreated for every sql.Table instance
|
|
229
308
|
"""
|
|
230
309
|
assert self.is_stored
|
|
310
|
+
assert self.stores_cellmd is not None
|
|
231
311
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
232
|
-
self.sa_col = sql.Column(
|
|
233
|
-
|
|
234
|
-
self.
|
|
235
|
-
nullable=True,
|
|
236
|
-
)
|
|
237
|
-
if self.is_computed or self.col_type.is_media_type():
|
|
238
|
-
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
239
|
-
self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
312
|
+
self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
|
|
313
|
+
if self.stores_cellmd:
|
|
314
|
+
self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
|
|
240
315
|
|
|
241
|
-
|
|
242
|
-
|
|
316
|
+
@classmethod
|
|
317
|
+
def cellmd_type(cls) -> ts.ColumnType:
|
|
318
|
+
return ts.JsonType(nullable=True)
|
|
319
|
+
|
|
320
|
+
@classmethod
|
|
321
|
+
def sa_cellmd_type(cls) -> sql.types.TypeEngine:
|
|
322
|
+
return cls.cellmd_type().to_sa_type()
|
|
243
323
|
|
|
244
324
|
def store_name(self) -> str:
|
|
245
325
|
assert self.id is not None
|
|
246
326
|
assert self.is_stored
|
|
247
327
|
return f'col_{self.id}'
|
|
248
328
|
|
|
249
|
-
def
|
|
250
|
-
return f'{self.store_name()}
|
|
251
|
-
|
|
252
|
-
def errortype_store_name(self) -> str:
|
|
253
|
-
return f'{self.store_name()}_errortype'
|
|
329
|
+
def cellmd_store_name(self) -> str:
|
|
330
|
+
return f'{self.store_name()}_cellmd'
|
|
254
331
|
|
|
255
332
|
def __str__(self) -> str:
|
|
256
333
|
return f'{self.name}: {self.col_type}'
|
|
257
334
|
|
|
258
335
|
def __repr__(self) -> str:
|
|
259
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.
|
|
336
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
|
|
260
337
|
|
|
261
338
|
def __hash__(self) -> int:
|
|
262
339
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
263
340
|
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
|
264
|
-
assert self.
|
|
265
|
-
return hash((self.
|
|
341
|
+
assert self.tbl_handle is not None
|
|
342
|
+
return hash((self.tbl_handle.id, self.id))
|
|
266
343
|
|
|
267
344
|
def __eq__(self, other: object) -> bool:
|
|
268
345
|
if not isinstance(other, Column):
|
|
269
346
|
return False
|
|
270
|
-
assert self.
|
|
271
|
-
assert other.
|
|
272
|
-
return self.
|
|
347
|
+
assert self.tbl_handle is not None
|
|
348
|
+
assert other.tbl_handle is not None
|
|
349
|
+
return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -34,17 +34,15 @@ class Dir(SchemaObject):
|
|
|
34
34
|
dir = cls(dir_record.id, parent_id, name)
|
|
35
35
|
return dir
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
def _display_name(cls) -> str:
|
|
37
|
+
def _display_name(self) -> str:
|
|
39
38
|
return 'directory'
|
|
40
39
|
|
|
41
|
-
@property
|
|
42
40
|
def _path(self) -> str:
|
|
43
41
|
"""Returns the path to this schema object."""
|
|
44
42
|
if self._dir_id is None:
|
|
45
43
|
# we're the root dir
|
|
46
44
|
return ''
|
|
47
|
-
return super()._path
|
|
45
|
+
return super()._path()
|
|
48
46
|
|
|
49
47
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
50
48
|
# print(
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import dataclasses
|
|
4
3
|
import enum
|
|
5
4
|
import itertools
|
|
6
5
|
import logging
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from typing_extensions import Self
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from uuid import UUID
|
|
10
8
|
|
|
11
9
|
import pixeltable.exceptions as excs
|
|
12
10
|
|
|
@@ -18,45 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
18
16
|
|
|
19
17
|
# Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
|
|
20
18
|
# This will be populated lazily to avoid circular imports.
|
|
21
|
-
_PREDEF_SYMBOLS:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@
|
|
25
|
-
class
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
num_rows: int = 0
|
|
31
|
-
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
32
|
-
num_computed_values: int = 0
|
|
33
|
-
num_excs: int = 0
|
|
34
|
-
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
35
|
-
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
36
|
-
|
|
37
|
-
def __iadd__(self, other: 'UpdateStatus') -> Self:
|
|
38
|
-
self.num_rows += other.num_rows
|
|
39
|
-
self.num_computed_values += other.num_computed_values
|
|
40
|
-
self.num_excs += other.num_excs
|
|
41
|
-
self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
|
|
42
|
-
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
43
|
-
return self
|
|
44
|
-
|
|
45
|
-
@property
|
|
46
|
-
def insert_msg(self) -> str:
|
|
47
|
-
"""Return a message describing the results of an insert operation."""
|
|
48
|
-
if self.num_excs == 0:
|
|
49
|
-
cols_with_excs_str = ''
|
|
50
|
-
else:
|
|
51
|
-
cols_with_excs_str = (
|
|
52
|
-
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
53
|
-
)
|
|
54
|
-
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
55
|
-
msg = (
|
|
56
|
-
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
57
|
-
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
58
|
-
)
|
|
59
|
-
return msg
|
|
19
|
+
_PREDEF_SYMBOLS: set[str] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class QColumnId:
|
|
24
|
+
"""Qualified column id"""
|
|
25
|
+
|
|
26
|
+
tbl_id: UUID
|
|
27
|
+
col_id: int
|
|
60
28
|
|
|
61
29
|
|
|
62
30
|
class MediaValidation(enum.Enum):
|
|
@@ -100,14 +68,19 @@ class IfNotExistsParam(enum.Enum):
|
|
|
100
68
|
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
101
69
|
|
|
102
70
|
|
|
103
|
-
def is_valid_identifier(name: str, allow_system_identifiers: bool = False) -> bool:
|
|
104
|
-
|
|
71
|
+
def is_valid_identifier(name: str, *, allow_system_identifiers: bool = False, allow_hyphens: bool = False) -> bool:
|
|
72
|
+
# If allow_hyphens=True, we allow hyphens to appear in the name, but we still do not permit a name to start with
|
|
73
|
+
# one (even if allow_system_identifiers=True)
|
|
74
|
+
adj_name = name.replace('-', '_') if allow_hyphens else name
|
|
75
|
+
return (
|
|
76
|
+
adj_name.isidentifier() and not name.startswith('-') and (allow_system_identifiers or not name.startswith('_'))
|
|
77
|
+
)
|
|
105
78
|
|
|
106
79
|
|
|
107
80
|
def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
|
|
108
81
|
if path == '':
|
|
109
82
|
return empty_is_valid
|
|
110
|
-
return all(is_valid_identifier(part, allow_system_paths) for part in path.split('.'))
|
|
83
|
+
return all(is_valid_identifier(part, allow_system_identifiers=allow_system_paths) for part in path.split('.'))
|
|
111
84
|
|
|
112
85
|
|
|
113
86
|
def is_system_column_name(name: str) -> bool:
|