pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -1,146 +1,269 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
|
|
4
|
+
import warnings
|
|
5
|
+
from textwrap import dedent
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
5
7
|
|
|
8
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
6
9
|
import sqlalchemy as sql
|
|
7
10
|
|
|
8
11
|
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.exprs as exprs
|
|
9
13
|
import pixeltable.type_system as ts
|
|
10
|
-
from pixeltable import
|
|
11
|
-
from .
|
|
14
|
+
from pixeltable.env import Env
|
|
15
|
+
from pixeltable.metadata import schema
|
|
16
|
+
|
|
17
|
+
from .globals import MediaValidation, QColumnId, is_valid_identifier
|
|
12
18
|
|
|
13
19
|
if TYPE_CHECKING:
|
|
14
20
|
from .table_version import TableVersion
|
|
21
|
+
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
22
|
+
from .table_version_path import TableVersionPath
|
|
15
23
|
|
|
16
24
|
_logger = logging.getLogger('pixeltable')
|
|
17
25
|
|
|
18
26
|
|
|
19
27
|
class Column:
|
|
20
|
-
"""Representation of a column in the schema of a Table/
|
|
28
|
+
"""Representation of a column in the schema of a Table/Query.
|
|
21
29
|
|
|
22
30
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
23
31
|
table/view.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
name: column name; None for system columns (eg, index columns)
|
|
35
|
+
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
36
|
+
computed_with: an Expr that computes the column value
|
|
37
|
+
is_pk: if True, this column is part of the primary key
|
|
38
|
+
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
39
|
+
destination: An object store reference for persisting computed files
|
|
40
|
+
col_id: column ID (only used internally)
|
|
41
|
+
|
|
42
|
+
Computed columns: those have a non-None ``computed_with`` argument
|
|
43
|
+
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
44
|
+
col_type is None
|
|
45
|
+
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
46
|
+
|
|
47
|
+
``stored`` (only valid for computed columns):
|
|
48
|
+
- if True: the column is present in the stored table
|
|
49
|
+
- if False: the column is not present in the stored table and recomputed during a query
|
|
50
|
+
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
24
51
|
"""
|
|
25
|
-
|
|
26
|
-
|
|
52
|
+
|
|
53
|
+
name: str | None
|
|
54
|
+
id: int | None
|
|
27
55
|
col_type: ts.ColumnType
|
|
28
56
|
stored: bool
|
|
29
57
|
is_pk: bool
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
58
|
+
is_iterator_col: bool
|
|
59
|
+
_explicit_destination: str | None # An object store reference for computed files
|
|
60
|
+
_media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
|
|
61
|
+
schema_version_add: int | None
|
|
62
|
+
schema_version_drop: int | None
|
|
63
|
+
stores_cellmd: bool
|
|
64
|
+
sa_col: sql.schema.Column | None
|
|
65
|
+
sa_col_type: sql.types.TypeEngine
|
|
66
|
+
sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
|
|
67
|
+
_value_expr: exprs.Expr | None
|
|
68
|
+
value_expr_dict: dict[str, Any] | None
|
|
69
|
+
# we store a handle here in order to allow Column construction before there is a corresponding TableVersion
|
|
70
|
+
tbl_handle: 'TableVersionHandle' | None
|
|
42
71
|
|
|
43
72
|
def __init__(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
73
|
+
self,
|
|
74
|
+
name: str | None,
|
|
75
|
+
col_type: ts.ColumnType | None = None,
|
|
76
|
+
computed_with: exprs.Expr | None = None,
|
|
77
|
+
is_pk: bool = False,
|
|
78
|
+
is_iterator_col: bool = False,
|
|
79
|
+
stored: bool = True,
|
|
80
|
+
media_validation: MediaValidation | None = None,
|
|
81
|
+
col_id: int | None = None,
|
|
82
|
+
schema_version_add: int | None = None,
|
|
83
|
+
schema_version_drop: int | None = None,
|
|
84
|
+
sa_col_type: sql.types.TypeEngine | None = None,
|
|
85
|
+
stores_cellmd: bool | None = None,
|
|
86
|
+
value_expr_dict: dict[str, Any] | None = None,
|
|
87
|
+
tbl_handle: 'TableVersionHandle' | None = None,
|
|
88
|
+
destination: str | None = None,
|
|
50
89
|
):
|
|
51
|
-
"""Column constructor.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
name: column name; None for system columns (eg, index columns)
|
|
55
|
-
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
56
|
-
computed_with: an Expr that computes the column value
|
|
57
|
-
is_pk: if True, this column is part of the primary key
|
|
58
|
-
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
59
|
-
col_id: column ID (only used internally)
|
|
60
|
-
|
|
61
|
-
Computed columns: those have a non-None ``computed_with`` argument
|
|
62
|
-
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
63
|
-
col_type is None
|
|
64
|
-
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
65
|
-
|
|
66
|
-
``stored`` (only valid for computed image columns):
|
|
67
|
-
- if True: the column is present in the stored table
|
|
68
|
-
- if False: the column is not present in the stored table and recomputed during a query
|
|
69
|
-
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
70
|
-
"""
|
|
71
90
|
if name is not None and not is_valid_identifier(name):
|
|
72
|
-
raise excs.Error(f
|
|
91
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
73
92
|
self.name = name
|
|
93
|
+
self.tbl_handle = tbl_handle
|
|
74
94
|
if col_type is None and computed_with is None:
|
|
75
|
-
raise excs.Error(f'Column
|
|
95
|
+
raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
|
|
76
96
|
|
|
77
|
-
self._value_expr
|
|
97
|
+
self._value_expr = None
|
|
78
98
|
self.value_expr_dict = value_expr_dict
|
|
79
99
|
if computed_with is not None:
|
|
80
100
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
81
101
|
if value_expr is None:
|
|
102
|
+
# TODO: this shouldn't be a user-facing error
|
|
82
103
|
raise excs.Error(
|
|
83
|
-
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
84
|
-
f'but it is a {type(computed_with)}'
|
|
104
|
+
f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
|
|
105
|
+
f'but it is a {type(computed_with)}'
|
|
106
|
+
)
|
|
85
107
|
else:
|
|
86
108
|
self._value_expr = value_expr.copy()
|
|
87
109
|
self.col_type = self._value_expr.col_type
|
|
110
|
+
if self._value_expr is not None and self.value_expr_dict is None:
|
|
111
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
88
112
|
|
|
89
113
|
if col_type is not None:
|
|
90
114
|
self.col_type = col_type
|
|
91
115
|
assert self.col_type is not None
|
|
92
116
|
|
|
93
117
|
self.stored = stored
|
|
94
|
-
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
118
|
+
# self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
95
119
|
self.id = col_id
|
|
96
120
|
self.is_pk = is_pk
|
|
121
|
+
self.is_iterator_col = is_iterator_col
|
|
97
122
|
self._media_validation = media_validation
|
|
98
123
|
self.schema_version_add = schema_version_add
|
|
99
124
|
self.schema_version_drop = schema_version_drop
|
|
100
125
|
|
|
101
|
-
|
|
126
|
+
if stores_cellmd is not None:
|
|
127
|
+
self.stores_cellmd = stores_cellmd
|
|
128
|
+
else:
|
|
129
|
+
self.stores_cellmd = stored and (
|
|
130
|
+
self.is_computed or self.col_type.is_media_type() or self.col_type.supports_file_offloading()
|
|
131
|
+
)
|
|
102
132
|
|
|
103
133
|
# column in the stored table for the values of this Column
|
|
104
134
|
self.sa_col = None
|
|
105
|
-
self.sa_col_type = sa_col_type
|
|
135
|
+
self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
|
|
106
136
|
|
|
107
137
|
# computed cols also have storage columns for the exception string and type
|
|
108
|
-
self.
|
|
109
|
-
self.
|
|
138
|
+
self.sa_cellmd_col = None
|
|
139
|
+
self._explicit_destination = destination
|
|
140
|
+
|
|
141
|
+
def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
|
|
142
|
+
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
143
|
+
assert self.is_pk is not None
|
|
144
|
+
col_md = schema.ColumnMd(
|
|
145
|
+
id=self.id,
|
|
146
|
+
col_type=self.col_type.as_dict(),
|
|
147
|
+
is_pk=self.is_pk,
|
|
148
|
+
schema_version_add=self.schema_version_add,
|
|
149
|
+
schema_version_drop=self.schema_version_drop,
|
|
150
|
+
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
151
|
+
stored=self.stored,
|
|
152
|
+
destination=self._explicit_destination,
|
|
153
|
+
)
|
|
154
|
+
if pos is None:
|
|
155
|
+
return col_md, None
|
|
156
|
+
assert self.name is not None, 'Column name must be set for user-facing columns'
|
|
157
|
+
sch_md = schema.SchemaColumn(
|
|
158
|
+
name=self.name,
|
|
159
|
+
pos=pos,
|
|
160
|
+
media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
|
|
161
|
+
)
|
|
162
|
+
return col_md, sch_md
|
|
163
|
+
|
|
164
|
+
def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Initialize the value_expr from its dict representation, if necessary.
|
|
110
167
|
|
|
111
|
-
|
|
168
|
+
If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
|
|
169
|
+
"""
|
|
170
|
+
from pixeltable import exprs
|
|
112
171
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if self.value_expr_dict is not None and self._value_expr is None:
|
|
119
|
-
from pixeltable import exprs
|
|
172
|
+
if self._value_expr is None and self.value_expr_dict is None:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
if self._value_expr is None:
|
|
176
|
+
# Instantiate the Expr from its dict
|
|
120
177
|
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
178
|
+
self._value_expr.bind_rel_paths()
|
|
179
|
+
if not self._value_expr.is_valid:
|
|
180
|
+
message = (
|
|
181
|
+
dedent(
|
|
182
|
+
f"""
|
|
183
|
+
The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
|
|
184
|
+
{{validation_error}}
|
|
185
|
+
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
186
|
+
""" # noqa: E501
|
|
187
|
+
)
|
|
188
|
+
.strip()
|
|
189
|
+
.format(validation_error=self._value_expr.validation_error)
|
|
190
|
+
)
|
|
191
|
+
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
192
|
+
|
|
193
|
+
if tvp is not None:
|
|
194
|
+
# Retarget the Expr
|
|
195
|
+
self._value_expr = self._value_expr.retarget(tvp)
|
|
196
|
+
|
|
197
|
+
def get_tbl(self) -> TableVersion:
|
|
198
|
+
tv = self.tbl_handle.get()
|
|
199
|
+
return tv
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def destination(self) -> str | None:
|
|
203
|
+
if self._explicit_destination is not None:
|
|
204
|
+
# An expilicit destination was set as part of the column definition
|
|
205
|
+
return self._explicit_destination
|
|
206
|
+
|
|
207
|
+
# Otherwise, if this is a stored media column, use the default destination if one is configured (input
|
|
208
|
+
# destination or output destination, depending on whether this is a computed column)
|
|
209
|
+
# TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
|
|
210
|
+
# the underlying media column. We should move to using pxt.String as the col_type of index columns; this
|
|
211
|
+
# would be a more robust solution, and then `self.name is not None` could be removed.
|
|
212
|
+
if self.is_stored and self.col_type.is_media_type() and self.name is not None:
|
|
213
|
+
if self.is_computed:
|
|
214
|
+
return Env.get().default_output_media_dest
|
|
215
|
+
else:
|
|
216
|
+
return Env.get().default_input_media_dest
|
|
217
|
+
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def handle(self) -> 'ColumnHandle':
|
|
222
|
+
"""Returns a ColumnHandle for this Column."""
|
|
223
|
+
from .table_version_handle import ColumnHandle
|
|
224
|
+
|
|
225
|
+
assert self.tbl_handle is not None
|
|
226
|
+
assert self.id is not None
|
|
227
|
+
return ColumnHandle(self.tbl_handle, self.id)
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def qid(self) -> QColumnId:
|
|
231
|
+
assert self.tbl_handle is not None
|
|
232
|
+
assert self.id is not None
|
|
233
|
+
return QColumnId(self.tbl_handle.id, self.id)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def value_expr(self) -> exprs.Expr | None:
|
|
237
|
+
assert self.value_expr_dict is None or self._value_expr is not None
|
|
121
238
|
return self._value_expr
|
|
122
239
|
|
|
123
240
|
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
124
241
|
self._value_expr = value_expr
|
|
125
|
-
self.value_expr_dict =
|
|
242
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
126
243
|
|
|
127
244
|
def check_value_expr(self) -> None:
|
|
128
245
|
assert self._value_expr is not None
|
|
129
|
-
if self.stored
|
|
246
|
+
if not self.stored and self.is_computed and self.has_window_fn_call():
|
|
130
247
|
raise excs.Error(
|
|
131
|
-
f'Column {self.name}: stored={self.stored} not supported for columns
|
|
132
|
-
f'
|
|
248
|
+
f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
|
|
249
|
+
f'computed with window functions:\n{self.value_expr}'
|
|
250
|
+
)
|
|
133
251
|
|
|
134
252
|
def has_window_fn_call(self) -> bool:
|
|
253
|
+
from pixeltable import exprs
|
|
254
|
+
|
|
135
255
|
if self.value_expr is None:
|
|
136
256
|
return False
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
257
|
+
window_fn_calls = list(
|
|
258
|
+
self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
|
|
259
|
+
)
|
|
260
|
+
return len(window_fn_calls) > 0
|
|
140
261
|
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
|
|
262
|
+
def stores_external_array(self) -> bool:
|
|
263
|
+
"""Returns True if this is an Array column that might store its values externally."""
|
|
264
|
+
assert self.sa_col_type is not None
|
|
265
|
+
# Vector: if this is a vector column (ie, used for a vector index), it stores the array itself
|
|
266
|
+
return self.col_type.is_array_type() and not isinstance(self.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
144
267
|
|
|
145
268
|
@property
|
|
146
269
|
def is_computed(self) -> bool:
|
|
@@ -152,77 +275,75 @@ class Column:
|
|
|
152
275
|
assert self.stored is not None
|
|
153
276
|
return self.stored
|
|
154
277
|
|
|
155
|
-
@property
|
|
156
|
-
def records_errors(self) -> bool:
|
|
157
|
-
"""True if this column also stores error information."""
|
|
158
|
-
# default: record errors for computed and media columns
|
|
159
|
-
if self._records_errors is not None:
|
|
160
|
-
return self._records_errors
|
|
161
|
-
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
162
|
-
|
|
163
278
|
@property
|
|
164
279
|
def qualified_name(self) -> str:
|
|
165
|
-
assert self.
|
|
166
|
-
return f'{self.
|
|
280
|
+
assert self.get_tbl() is not None
|
|
281
|
+
return f'{self.get_tbl().name}.{self.name}'
|
|
167
282
|
|
|
168
283
|
@property
|
|
169
284
|
def media_validation(self) -> MediaValidation:
|
|
170
285
|
if self._media_validation is not None:
|
|
171
286
|
return self._media_validation
|
|
172
|
-
assert self.
|
|
173
|
-
return self.
|
|
287
|
+
assert self.get_tbl() is not None
|
|
288
|
+
return self.get_tbl().media_validation
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def is_required_for_insert(self) -> bool:
|
|
292
|
+
"""Returns True if column is required when inserting rows."""
|
|
293
|
+
return not self.col_type.nullable and not self.is_computed
|
|
174
294
|
|
|
175
295
|
def source(self) -> None:
|
|
176
296
|
"""
|
|
177
297
|
If this is a computed col and the top-level expr is a function call, print the source, if possible.
|
|
178
298
|
"""
|
|
179
299
|
from pixeltable import exprs
|
|
300
|
+
|
|
180
301
|
if self.value_expr is None or not isinstance(self.value_expr, exprs.FunctionCall):
|
|
181
302
|
return
|
|
182
303
|
self.value_expr.fn.source()
|
|
183
304
|
|
|
184
305
|
def create_sa_cols(self) -> None:
|
|
185
306
|
"""
|
|
186
|
-
These need to be recreated for every
|
|
307
|
+
These need to be recreated for every sql.Table instance
|
|
187
308
|
"""
|
|
188
309
|
assert self.is_stored
|
|
310
|
+
assert self.stores_cellmd is not None
|
|
189
311
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
190
|
-
self.sa_col = sql.Column(
|
|
191
|
-
|
|
192
|
-
nullable=True)
|
|
193
|
-
if self.is_computed or self.col_type.is_media_type():
|
|
194
|
-
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
195
|
-
self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
312
|
+
self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
|
|
313
|
+
if self.stores_cellmd:
|
|
314
|
+
self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
|
|
196
315
|
|
|
197
|
-
|
|
198
|
-
|
|
316
|
+
@classmethod
|
|
317
|
+
def cellmd_type(cls) -> ts.ColumnType:
|
|
318
|
+
return ts.JsonType(nullable=True)
|
|
319
|
+
|
|
320
|
+
@classmethod
|
|
321
|
+
def sa_cellmd_type(cls) -> sql.types.TypeEngine:
|
|
322
|
+
return cls.cellmd_type().to_sa_type()
|
|
199
323
|
|
|
200
324
|
def store_name(self) -> str:
|
|
201
325
|
assert self.id is not None
|
|
202
326
|
assert self.is_stored
|
|
203
327
|
return f'col_{self.id}'
|
|
204
328
|
|
|
205
|
-
def
|
|
206
|
-
return f'{self.store_name()}
|
|
207
|
-
|
|
208
|
-
def errortype_store_name(self) -> str:
|
|
209
|
-
return f'{self.store_name()}_errortype'
|
|
329
|
+
def cellmd_store_name(self) -> str:
|
|
330
|
+
return f'{self.store_name()}_cellmd'
|
|
210
331
|
|
|
211
332
|
def __str__(self) -> str:
|
|
212
333
|
return f'{self.name}: {self.col_type}'
|
|
213
334
|
|
|
214
335
|
def __repr__(self) -> str:
|
|
215
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.
|
|
336
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
|
|
216
337
|
|
|
217
338
|
def __hash__(self) -> int:
|
|
218
339
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
219
340
|
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
|
220
|
-
assert self.
|
|
221
|
-
return hash((self.
|
|
341
|
+
assert self.tbl_handle is not None
|
|
342
|
+
return hash((self.tbl_handle.id, self.id))
|
|
222
343
|
|
|
223
344
|
def __eq__(self, other: object) -> bool:
|
|
224
345
|
if not isinstance(other, Column):
|
|
225
346
|
return False
|
|
226
|
-
assert self.
|
|
227
|
-
assert other.
|
|
228
|
-
return self.
|
|
347
|
+
assert self.tbl_handle is not None
|
|
348
|
+
assert other.tbl_handle is not None
|
|
349
|
+
return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -1,32 +1,61 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import dataclasses
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
7
8
|
import sqlalchemy as sql
|
|
8
9
|
|
|
9
|
-
from .schema_object import SchemaObject
|
|
10
10
|
from pixeltable.env import Env
|
|
11
11
|
from pixeltable.metadata import schema
|
|
12
12
|
|
|
13
|
+
from .schema_object import SchemaObject
|
|
13
14
|
|
|
14
15
|
_logger = logging.getLogger('pixeltable')
|
|
15
16
|
|
|
17
|
+
|
|
16
18
|
class Dir(SchemaObject):
|
|
17
19
|
def __init__(self, id: UUID, parent_id: UUID, name: str):
|
|
18
20
|
super().__init__(id, name, parent_id)
|
|
19
21
|
|
|
20
22
|
@classmethod
|
|
21
|
-
def
|
|
23
|
+
def _create(cls, parent_id: UUID, name: str) -> Dir:
|
|
24
|
+
session = Env.get().session
|
|
25
|
+
user = Env.get().user
|
|
26
|
+
assert session is not None
|
|
27
|
+
dir_md = schema.DirMd(name=name, user=user, additional_md={})
|
|
28
|
+
dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
|
|
29
|
+
session.add(dir_record)
|
|
30
|
+
session.flush()
|
|
31
|
+
# print(f'{datetime.datetime.now()} create dir {dir_record}')
|
|
32
|
+
assert dir_record.id is not None
|
|
33
|
+
assert isinstance(dir_record.id, UUID)
|
|
34
|
+
dir = cls(dir_record.id, parent_id, name)
|
|
35
|
+
return dir
|
|
36
|
+
|
|
37
|
+
def _display_name(self) -> str:
|
|
22
38
|
return 'directory'
|
|
23
39
|
|
|
40
|
+
def _path(self) -> str:
|
|
41
|
+
"""Returns the path to this schema object."""
|
|
42
|
+
if self._dir_id is None:
|
|
43
|
+
# we're the root dir
|
|
44
|
+
return ''
|
|
45
|
+
return super()._path()
|
|
46
|
+
|
|
24
47
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
48
|
+
# print(
|
|
49
|
+
# f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} '
|
|
50
|
+
# f'new_name={new_name} new_dir_id={new_dir_id}'
|
|
51
|
+
# )
|
|
25
52
|
super()._move(new_name, new_dir_id)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
53
|
+
stmt = sql.text(
|
|
54
|
+
(
|
|
55
|
+
f'UPDATE {schema.Dir.__table__} '
|
|
56
|
+
f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
|
|
57
|
+
f" {schema.Dir.md.name} = jsonb_set({schema.Dir.md.name}, '{{name}}', (:new_name)::jsonb) "
|
|
58
|
+
f'WHERE {schema.Dir.id.name} = :id'
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
Env.get().conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import enum
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
from
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from uuid import UUID
|
|
7
8
|
|
|
8
9
|
import pixeltable.exceptions as excs
|
|
9
10
|
|
|
@@ -15,28 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
15
16
|
|
|
16
17
|
# Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
|
|
17
18
|
# This will be populated lazily to avoid circular imports.
|
|
18
|
-
_PREDEF_SYMBOLS:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@
|
|
22
|
-
class
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
28
|
-
num_computed_values: int = 0
|
|
29
|
-
num_excs: int = 0
|
|
30
|
-
updated_cols: list[str] = dataclasses.field(default_factory=list)
|
|
31
|
-
cols_with_excs: list[str] = dataclasses.field(default_factory=list)
|
|
32
|
-
|
|
33
|
-
def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
|
|
34
|
-
self.num_rows += other.num_rows
|
|
35
|
-
self.num_computed_values += other.num_computed_values
|
|
36
|
-
self.num_excs += other.num_excs
|
|
37
|
-
self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
|
|
38
|
-
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
39
|
-
return self
|
|
19
|
+
_PREDEF_SYMBOLS: set[str] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class QColumnId:
|
|
24
|
+
"""Qualified column id"""
|
|
25
|
+
|
|
26
|
+
tbl_id: UUID
|
|
27
|
+
col_id: int
|
|
40
28
|
|
|
41
29
|
|
|
42
30
|
class MediaValidation(enum.Enum):
|
|
@@ -48,26 +36,57 @@ class MediaValidation(enum.Enum):
|
|
|
48
36
|
try:
|
|
49
37
|
return cls[name.upper()]
|
|
50
38
|
except KeyError:
|
|
51
|
-
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__
|
|
52
|
-
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
|
|
39
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
40
|
+
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]') from None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class IfExistsParam(enum.Enum):
|
|
44
|
+
ERROR = 0
|
|
45
|
+
IGNORE = 1
|
|
46
|
+
REPLACE = 2
|
|
47
|
+
REPLACE_FORCE = 3
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def validated(cls, param_val: str, param_name: str) -> IfExistsParam:
|
|
51
|
+
try:
|
|
52
|
+
return cls[param_val.upper()]
|
|
53
|
+
except KeyError:
|
|
54
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
55
|
+
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class IfNotExistsParam(enum.Enum):
|
|
59
|
+
ERROR = 0
|
|
60
|
+
IGNORE = 1
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def validated(cls, param_val: str, param_name: str) -> IfNotExistsParam:
|
|
64
|
+
try:
|
|
65
|
+
return cls[param_val.upper()]
|
|
66
|
+
except KeyError:
|
|
67
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
|
|
68
|
+
raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
|
|
69
|
+
|
|
53
70
|
|
|
71
|
+
def is_valid_identifier(name: str, *, allow_system_identifiers: bool = False, allow_hyphens: bool = False) -> bool:
|
|
72
|
+
# If allow_hyphens=True, we allow hyphens to appear in the name, but we still do not permit a name to start with
|
|
73
|
+
# one (even if allow_system_identifiers=True)
|
|
74
|
+
adj_name = name.replace('-', '_') if allow_hyphens else name
|
|
75
|
+
return (
|
|
76
|
+
adj_name.isidentifier() and not name.startswith('-') and (allow_system_identifiers or not name.startswith('_'))
|
|
77
|
+
)
|
|
54
78
|
|
|
55
|
-
def is_valid_identifier(name: str) -> bool:
|
|
56
|
-
return name.isidentifier() and not name.startswith('_')
|
|
57
79
|
|
|
58
|
-
def is_valid_path(path: str, empty_is_valid : bool) -> bool:
|
|
80
|
+
def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
|
|
59
81
|
if path == '':
|
|
60
82
|
return empty_is_valid
|
|
83
|
+
return all(is_valid_identifier(part, allow_system_identifiers=allow_system_paths) for part in path.split('.'))
|
|
61
84
|
|
|
62
|
-
for part in path.split('.'):
|
|
63
|
-
if not is_valid_identifier(part):
|
|
64
|
-
return False
|
|
65
|
-
return True
|
|
66
85
|
|
|
67
86
|
def is_system_column_name(name: str) -> bool:
|
|
68
87
|
from pixeltable.catalog import InsertableTable, View
|
|
69
88
|
|
|
70
|
-
global _PREDEF_SYMBOLS
|
|
89
|
+
global _PREDEF_SYMBOLS # noqa: PLW0603
|
|
71
90
|
if _PREDEF_SYMBOLS is None:
|
|
72
91
|
_PREDEF_SYMBOLS = set(itertools.chain(dir(InsertableTable), dir(View)))
|
|
73
92
|
return name == _POS_COLUMN_NAME or name in _PREDEF_SYMBOLS
|