pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -3,18 +3,21 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
|
+
import pixeltable.exprs as exprs
|
|
11
12
|
import pixeltable.type_system as ts
|
|
12
|
-
from pixeltable import
|
|
13
|
+
from pixeltable.env import Env
|
|
14
|
+
from pixeltable.metadata import schema
|
|
13
15
|
|
|
14
|
-
from .globals import MediaValidation, is_valid_identifier
|
|
16
|
+
from .globals import MediaValidation, QColumnId, is_valid_identifier
|
|
15
17
|
|
|
16
18
|
if TYPE_CHECKING:
|
|
17
19
|
from .table_version import TableVersion
|
|
20
|
+
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
18
21
|
from .table_version_path import TableVersionPath
|
|
19
22
|
|
|
20
23
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -25,122 +28,161 @@ class Column:
|
|
|
25
28
|
|
|
26
29
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
27
30
|
table/view.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
name: column name; None for system columns (eg, index columns)
|
|
34
|
+
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
35
|
+
computed_with: an Expr that computes the column value
|
|
36
|
+
is_pk: if True, this column is part of the primary key
|
|
37
|
+
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
38
|
+
destination: An object store reference for persisting computed files
|
|
39
|
+
col_id: column ID (only used internally)
|
|
40
|
+
|
|
41
|
+
Computed columns: those have a non-None ``computed_with`` argument
|
|
42
|
+
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
43
|
+
col_type is None
|
|
44
|
+
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
45
|
+
|
|
46
|
+
``stored`` (only valid for computed columns):
|
|
47
|
+
- if True: the column is present in the stored table
|
|
48
|
+
- if False: the column is not present in the stored table and recomputed during a query
|
|
49
|
+
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
28
50
|
"""
|
|
29
51
|
|
|
30
|
-
name: str
|
|
31
|
-
id:
|
|
52
|
+
name: str | None
|
|
53
|
+
id: int | None
|
|
32
54
|
col_type: ts.ColumnType
|
|
33
55
|
stored: bool
|
|
34
56
|
is_pk: bool
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# we store a
|
|
47
|
-
|
|
48
|
-
tbl: Optional[TableVersion]
|
|
49
|
-
# tbl: Optional[TableVersionHandle]
|
|
57
|
+
is_iterator_col: bool
|
|
58
|
+
_explicit_destination: str | None # An object store reference for computed files
|
|
59
|
+
_media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
|
|
60
|
+
schema_version_add: int | None
|
|
61
|
+
schema_version_drop: int | None
|
|
62
|
+
stores_cellmd: bool
|
|
63
|
+
sa_col: sql.schema.Column | None
|
|
64
|
+
sa_col_type: sql.types.TypeEngine
|
|
65
|
+
sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
66
|
+
_value_expr: exprs.Expr | None
|
|
67
|
+
value_expr_dict: dict[str, Any] | None
|
|
68
|
+
# we store a handle here in order to allow Column construction before there is a corresponding TableVersion
|
|
69
|
+
tbl_handle: 'TableVersionHandle' | None
|
|
50
70
|
|
|
51
71
|
def __init__(
|
|
52
72
|
self,
|
|
53
|
-
name:
|
|
54
|
-
col_type:
|
|
55
|
-
computed_with:
|
|
73
|
+
name: str | None,
|
|
74
|
+
col_type: ts.ColumnType | None = None,
|
|
75
|
+
computed_with: exprs.Expr | None = None,
|
|
56
76
|
is_pk: bool = False,
|
|
77
|
+
is_iterator_col: bool = False,
|
|
57
78
|
stored: bool = True,
|
|
58
|
-
media_validation:
|
|
59
|
-
col_id:
|
|
60
|
-
schema_version_add:
|
|
61
|
-
schema_version_drop:
|
|
62
|
-
sa_col_type:
|
|
63
|
-
|
|
64
|
-
value_expr_dict:
|
|
79
|
+
media_validation: MediaValidation | None = None,
|
|
80
|
+
col_id: int | None = None,
|
|
81
|
+
schema_version_add: int | None = None,
|
|
82
|
+
schema_version_drop: int | None = None,
|
|
83
|
+
sa_col_type: sql.types.TypeEngine | None = None,
|
|
84
|
+
stores_cellmd: bool | None = None,
|
|
85
|
+
value_expr_dict: dict[str, Any] | None = None,
|
|
86
|
+
tbl_handle: 'TableVersionHandle' | None = None,
|
|
87
|
+
destination: str | None = None,
|
|
65
88
|
):
|
|
66
|
-
"""Column constructor.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
name: column name; None for system columns (eg, index columns)
|
|
70
|
-
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
71
|
-
computed_with: an Expr that computes the column value
|
|
72
|
-
is_pk: if True, this column is part of the primary key
|
|
73
|
-
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
74
|
-
col_id: column ID (only used internally)
|
|
75
|
-
|
|
76
|
-
Computed columns: those have a non-None ``computed_with`` argument
|
|
77
|
-
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
78
|
-
col_type is None
|
|
79
|
-
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
80
|
-
|
|
81
|
-
``stored`` (only valid for computed columns):
|
|
82
|
-
- if True: the column is present in the stored table
|
|
83
|
-
- if False: the column is not present in the stored table and recomputed during a query
|
|
84
|
-
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
85
|
-
"""
|
|
86
89
|
if name is not None and not is_valid_identifier(name):
|
|
87
|
-
raise excs.Error(f
|
|
90
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
88
91
|
self.name = name
|
|
92
|
+
self.tbl_handle = tbl_handle
|
|
89
93
|
if col_type is None and computed_with is None:
|
|
90
|
-
raise excs.Error(f'Column
|
|
94
|
+
raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
|
|
91
95
|
|
|
92
|
-
self._value_expr
|
|
96
|
+
self._value_expr = None
|
|
93
97
|
self.value_expr_dict = value_expr_dict
|
|
94
98
|
if computed_with is not None:
|
|
95
99
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
96
100
|
if value_expr is None:
|
|
101
|
+
# TODO: this shouldn't be a user-facing error
|
|
97
102
|
raise excs.Error(
|
|
98
|
-
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
103
|
+
f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
|
|
99
104
|
f'but it is a {type(computed_with)}'
|
|
100
105
|
)
|
|
101
106
|
else:
|
|
102
107
|
self._value_expr = value_expr.copy()
|
|
103
108
|
self.col_type = self._value_expr.col_type
|
|
109
|
+
if self._value_expr is not None and self.value_expr_dict is None:
|
|
110
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
104
111
|
|
|
105
112
|
if col_type is not None:
|
|
106
113
|
self.col_type = col_type
|
|
107
114
|
assert self.col_type is not None
|
|
108
115
|
|
|
109
116
|
self.stored = stored
|
|
110
|
-
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
117
|
+
# self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
111
118
|
self.id = col_id
|
|
112
119
|
self.is_pk = is_pk
|
|
120
|
+
self.is_iterator_col = is_iterator_col
|
|
113
121
|
self._media_validation = media_validation
|
|
114
122
|
self.schema_version_add = schema_version_add
|
|
115
123
|
self.schema_version_drop = schema_version_drop
|
|
116
124
|
|
|
117
|
-
|
|
125
|
+
if stores_cellmd is not None:
|
|
126
|
+
self.stores_cellmd = stores_cellmd
|
|
127
|
+
else:
|
|
128
|
+
self.stores_cellmd = stored and (
|
|
129
|
+
self.is_computed
|
|
130
|
+
or self.col_type.is_media_type()
|
|
131
|
+
or self.col_type.is_json_type()
|
|
132
|
+
or self.col_type.is_array_type()
|
|
133
|
+
)
|
|
118
134
|
|
|
119
135
|
# column in the stored table for the values of this Column
|
|
120
136
|
self.sa_col = None
|
|
121
|
-
self.sa_col_type = sa_col_type
|
|
137
|
+
self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
|
|
122
138
|
|
|
123
139
|
# computed cols also have storage columns for the exception string and type
|
|
124
|
-
self.
|
|
125
|
-
self.
|
|
140
|
+
self.sa_cellmd_col = None
|
|
141
|
+
self._explicit_destination = destination
|
|
142
|
+
|
|
143
|
+
def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
|
|
144
|
+
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
145
|
+
assert self.is_pk is not None
|
|
146
|
+
col_md = schema.ColumnMd(
|
|
147
|
+
id=self.id,
|
|
148
|
+
col_type=self.col_type.as_dict(),
|
|
149
|
+
is_pk=self.is_pk,
|
|
150
|
+
schema_version_add=self.schema_version_add,
|
|
151
|
+
schema_version_drop=self.schema_version_drop,
|
|
152
|
+
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
153
|
+
stored=self.stored,
|
|
154
|
+
destination=self._explicit_destination,
|
|
155
|
+
)
|
|
156
|
+
if pos is None:
|
|
157
|
+
return col_md, None
|
|
158
|
+
assert self.name is not None, 'Column name must be set for user-facing columns'
|
|
159
|
+
sch_md = schema.SchemaColumn(
|
|
160
|
+
name=self.name,
|
|
161
|
+
pos=pos,
|
|
162
|
+
media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
|
|
163
|
+
)
|
|
164
|
+
return col_md, sch_md
|
|
126
165
|
|
|
127
|
-
|
|
166
|
+
def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
|
|
167
|
+
"""
|
|
168
|
+
Initialize the value_expr from its dict representation, if necessary.
|
|
128
169
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
133
|
-
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
134
|
-
if self.value_expr_dict is not None and self._value_expr is None:
|
|
135
|
-
from pixeltable import exprs
|
|
170
|
+
If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
|
|
171
|
+
"""
|
|
172
|
+
from pixeltable import exprs
|
|
136
173
|
|
|
174
|
+
if self._value_expr is None and self.value_expr_dict is None:
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
if self._value_expr is None:
|
|
178
|
+
# Instantiate the Expr from its dict
|
|
137
179
|
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
138
180
|
self._value_expr.bind_rel_paths()
|
|
139
181
|
if not self._value_expr.is_valid:
|
|
140
182
|
message = (
|
|
141
183
|
dedent(
|
|
142
184
|
f"""
|
|
143
|
-
The computed column {self.name!r} in table {self.
|
|
185
|
+
The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
|
|
144
186
|
{{validation_error}}
|
|
145
187
|
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
146
188
|
""" # noqa: E501
|
|
@@ -149,37 +191,76 @@ class Column:
|
|
|
149
191
|
.format(validation_error=self._value_expr.validation_error)
|
|
150
192
|
)
|
|
151
193
|
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
194
|
+
|
|
195
|
+
if tvp is not None:
|
|
196
|
+
# Retarget the Expr
|
|
197
|
+
self._value_expr = self._value_expr.retarget(tvp)
|
|
198
|
+
|
|
199
|
+
def get_tbl(self) -> TableVersion:
|
|
200
|
+
tv = self.tbl_handle.get()
|
|
201
|
+
return tv
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def destination(self) -> str | None:
|
|
205
|
+
if self._explicit_destination is not None:
|
|
206
|
+
# An expilicit destination was set as part of the column definition
|
|
207
|
+
return self._explicit_destination
|
|
208
|
+
|
|
209
|
+
# Otherwise, if this is a stored media column, use the default destination if one is configured (input
|
|
210
|
+
# destination or output destination, depending on whether this is a computed column)
|
|
211
|
+
# TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
|
|
212
|
+
# the underlying media column. We should move to using pxt.String as the col_type of index columns; this
|
|
213
|
+
# would be a more robust solution, and then `self.name is not None` could be removed.
|
|
214
|
+
if self.is_stored and self.col_type.is_media_type() and self.name is not None:
|
|
215
|
+
if self.is_computed:
|
|
216
|
+
return Env.get().default_output_media_dest
|
|
217
|
+
else:
|
|
218
|
+
return Env.get().default_input_media_dest
|
|
219
|
+
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
@property
|
|
223
|
+
def handle(self) -> 'ColumnHandle':
|
|
224
|
+
"""Returns a ColumnHandle for this Column."""
|
|
225
|
+
from .table_version_handle import ColumnHandle
|
|
226
|
+
|
|
227
|
+
assert self.tbl_handle is not None
|
|
228
|
+
assert self.id is not None
|
|
229
|
+
return ColumnHandle(self.tbl_handle, self.id)
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def qid(self) -> QColumnId:
|
|
233
|
+
assert self.tbl_handle is not None
|
|
234
|
+
assert self.id is not None
|
|
235
|
+
return QColumnId(self.tbl_handle.id, self.id)
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def value_expr(self) -> exprs.Expr | None:
|
|
239
|
+
assert self.value_expr_dict is None or self._value_expr is not None
|
|
152
240
|
return self._value_expr
|
|
153
241
|
|
|
154
242
|
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
155
243
|
self._value_expr = value_expr
|
|
156
|
-
self.value_expr_dict =
|
|
244
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
157
245
|
|
|
158
246
|
def check_value_expr(self) -> None:
|
|
159
247
|
assert self._value_expr is not None
|
|
160
|
-
if self.stored
|
|
248
|
+
if not self.stored and self.is_computed and self.has_window_fn_call():
|
|
161
249
|
raise excs.Error(
|
|
162
|
-
f'Column {self.name}: stored={self.stored} not supported for columns
|
|
163
|
-
f'
|
|
250
|
+
f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
|
|
251
|
+
f'computed with window functions:\n{self.value_expr}'
|
|
164
252
|
)
|
|
165
253
|
|
|
166
254
|
def has_window_fn_call(self) -> bool:
|
|
167
|
-
if self.value_expr is None:
|
|
168
|
-
return False
|
|
169
255
|
from pixeltable import exprs
|
|
170
256
|
|
|
257
|
+
if self.value_expr is None:
|
|
258
|
+
return False
|
|
171
259
|
window_fn_calls = list(
|
|
172
260
|
self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
|
|
173
261
|
)
|
|
174
262
|
return len(window_fn_calls) > 0
|
|
175
263
|
|
|
176
|
-
# TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
|
|
177
|
-
# multiple dependents)
|
|
178
|
-
def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
|
|
179
|
-
assert self.tbl is not None
|
|
180
|
-
tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
|
|
181
|
-
return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
|
|
182
|
-
|
|
183
264
|
@property
|
|
184
265
|
def is_computed(self) -> bool:
|
|
185
266
|
return self._value_expr is not None or self.value_expr_dict is not None
|
|
@@ -190,25 +271,17 @@ class Column:
|
|
|
190
271
|
assert self.stored is not None
|
|
191
272
|
return self.stored
|
|
192
273
|
|
|
193
|
-
@property
|
|
194
|
-
def records_errors(self) -> bool:
|
|
195
|
-
"""True if this column also stores error information."""
|
|
196
|
-
# default: record errors for computed and media columns
|
|
197
|
-
if self._records_errors is not None:
|
|
198
|
-
return self._records_errors
|
|
199
|
-
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
200
|
-
|
|
201
274
|
@property
|
|
202
275
|
def qualified_name(self) -> str:
|
|
203
|
-
assert self.
|
|
204
|
-
return f'{self.
|
|
276
|
+
assert self.get_tbl() is not None
|
|
277
|
+
return f'{self.get_tbl().name}.{self.name}'
|
|
205
278
|
|
|
206
279
|
@property
|
|
207
280
|
def media_validation(self) -> MediaValidation:
|
|
208
281
|
if self._media_validation is not None:
|
|
209
282
|
return self._media_validation
|
|
210
|
-
assert self.
|
|
211
|
-
return self.
|
|
283
|
+
assert self.get_tbl() is not None
|
|
284
|
+
return self.get_tbl().media_validation
|
|
212
285
|
|
|
213
286
|
@property
|
|
214
287
|
def is_required_for_insert(self) -> bool:
|
|
@@ -227,48 +300,46 @@ class Column:
|
|
|
227
300
|
|
|
228
301
|
def create_sa_cols(self) -> None:
|
|
229
302
|
"""
|
|
230
|
-
These need to be recreated for every
|
|
303
|
+
These need to be recreated for every sql.Table instance
|
|
231
304
|
"""
|
|
232
305
|
assert self.is_stored
|
|
306
|
+
assert self.stores_cellmd is not None
|
|
233
307
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
234
|
-
self.sa_col = sql.Column(
|
|
235
|
-
|
|
236
|
-
self.
|
|
237
|
-
nullable=True,
|
|
238
|
-
)
|
|
239
|
-
if self.is_computed or self.col_type.is_media_type():
|
|
240
|
-
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
241
|
-
self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
308
|
+
self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
|
|
309
|
+
if self.stores_cellmd:
|
|
310
|
+
self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
|
|
242
311
|
|
|
243
|
-
|
|
244
|
-
|
|
312
|
+
@classmethod
|
|
313
|
+
def cellmd_type(cls) -> ts.ColumnType:
|
|
314
|
+
return ts.JsonType(nullable=True)
|
|
315
|
+
|
|
316
|
+
@classmethod
|
|
317
|
+
def sa_cellmd_type(cls) -> sql.types.TypeEngine:
|
|
318
|
+
return cls.cellmd_type().to_sa_type()
|
|
245
319
|
|
|
246
320
|
def store_name(self) -> str:
|
|
247
321
|
assert self.id is not None
|
|
248
322
|
assert self.is_stored
|
|
249
323
|
return f'col_{self.id}'
|
|
250
324
|
|
|
251
|
-
def
|
|
252
|
-
return f'{self.store_name()}
|
|
253
|
-
|
|
254
|
-
def errortype_store_name(self) -> str:
|
|
255
|
-
return f'{self.store_name()}_errortype'
|
|
325
|
+
def cellmd_store_name(self) -> str:
|
|
326
|
+
return f'{self.store_name()}_cellmd'
|
|
256
327
|
|
|
257
328
|
def __str__(self) -> str:
|
|
258
329
|
return f'{self.name}: {self.col_type}'
|
|
259
330
|
|
|
260
331
|
def __repr__(self) -> str:
|
|
261
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.
|
|
332
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
|
|
262
333
|
|
|
263
334
|
def __hash__(self) -> int:
|
|
264
335
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
265
336
|
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
|
266
|
-
assert self.
|
|
267
|
-
return hash((self.
|
|
337
|
+
assert self.tbl_handle is not None
|
|
338
|
+
return hash((self.tbl_handle.id, self.id))
|
|
268
339
|
|
|
269
340
|
def __eq__(self, other: object) -> bool:
|
|
270
341
|
if not isinstance(other, Column):
|
|
271
342
|
return False
|
|
272
|
-
assert self.
|
|
273
|
-
assert other.
|
|
274
|
-
return self.
|
|
343
|
+
assert self.tbl_handle is not None
|
|
344
|
+
assert other.tbl_handle is not None
|
|
345
|
+
return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
|
pixeltable/catalog/dir.py
CHANGED
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import dataclasses
|
|
4
3
|
import enum
|
|
5
4
|
import itertools
|
|
6
5
|
import logging
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from typing_extensions import Self
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from uuid import UUID
|
|
10
8
|
|
|
11
9
|
import pixeltable.exceptions as excs
|
|
12
10
|
|
|
@@ -18,45 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
18
16
|
|
|
19
17
|
# Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
|
|
20
18
|
# This will be populated lazily to avoid circular imports.
|
|
21
|
-
_PREDEF_SYMBOLS:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@
|
|
25
|
-
class
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
num_rows: int = 0
|
|
31
|
-
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
32
|
-
num_computed_values: int = 0
|
|
33
|
-
num_excs: int = 0
|
|
34
|
-
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
35
|
-
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
36
|
-
|
|
37
|
-
def __iadd__(self, other: 'UpdateStatus') -> Self:
|
|
38
|
-
self.num_rows += other.num_rows
|
|
39
|
-
self.num_computed_values += other.num_computed_values
|
|
40
|
-
self.num_excs += other.num_excs
|
|
41
|
-
self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
|
|
42
|
-
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
43
|
-
return self
|
|
44
|
-
|
|
45
|
-
@property
|
|
46
|
-
def insert_msg(self) -> str:
|
|
47
|
-
"""Return a message describing the results of an insert operation."""
|
|
48
|
-
if self.num_excs == 0:
|
|
49
|
-
cols_with_excs_str = ''
|
|
50
|
-
else:
|
|
51
|
-
cols_with_excs_str = (
|
|
52
|
-
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
53
|
-
)
|
|
54
|
-
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
55
|
-
msg = (
|
|
56
|
-
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
57
|
-
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
58
|
-
)
|
|
59
|
-
return msg
|
|
19
|
+
_PREDEF_SYMBOLS: set[str] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class QColumnId:
|
|
24
|
+
"""Qualified column id"""
|
|
25
|
+
|
|
26
|
+
tbl_id: UUID
|
|
27
|
+
col_id: int
|
|
60
28
|
|
|
61
29
|
|
|
62
30
|
class MediaValidation(enum.Enum):
|