pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import sqlalchemy as sql
|
|
4
6
|
|
|
@@ -12,77 +14,90 @@ from .literal import Literal
|
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
15
|
from .sql_element_cache import SqlElementCache
|
|
14
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from pixeltable.catalog.table_version import TableVersion
|
|
19
|
+
|
|
15
20
|
|
|
16
21
|
class SimilarityExpr(Expr):
|
|
22
|
+
"""
|
|
23
|
+
A similarity expression against an embedding index.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
idx_id: int
|
|
27
|
+
idx_name: str
|
|
28
|
+
|
|
29
|
+
def __init__(self, col_ref: ColumnRef, item: Expr, idx_name: str | None = None):
|
|
30
|
+
from pixeltable.index import EmbeddingIndex
|
|
17
31
|
|
|
18
|
-
def __init__(self, col_ref: ColumnRef, item: Any, idx_name: Optional[str] = None):
|
|
19
32
|
super().__init__(ts.FloatType())
|
|
20
|
-
item_expr = Expr.from_object(item)
|
|
21
|
-
if item_expr is None or not(item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
|
|
22
|
-
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(item)}')
|
|
23
|
-
assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
|
|
24
33
|
|
|
25
|
-
self.components = [col_ref,
|
|
26
|
-
self.id = self._create_id()
|
|
34
|
+
self.components = [col_ref, item]
|
|
27
35
|
|
|
28
36
|
# determine index to use
|
|
29
|
-
idx_info = col_ref.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if len(embedding_idx_info) > 1:
|
|
39
|
-
if idx_name is None:
|
|
40
|
-
raise excs.Error(
|
|
41
|
-
f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
42
|
-
f'`{col_ref.col.name}.similarity(..., idx=<name>)`')
|
|
43
|
-
self.idx_info = embedding_idx_info[idx_name]
|
|
44
|
-
else:
|
|
45
|
-
self.idx_info = next(iter(embedding_idx_info.values()))
|
|
46
|
-
idx = self.idx_info.idx
|
|
47
|
-
assert isinstance(idx, index.EmbeddingIndex)
|
|
48
|
-
|
|
49
|
-
if item_expr.col_type.is_string_type() and idx.string_embed is None:
|
|
50
|
-
raise excs.Error(
|
|
51
|
-
f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
|
|
52
|
-
f"'string_embed' parameter and does not support string queries")
|
|
53
|
-
if item_expr.col_type.is_image_type() and idx.image_embed is None:
|
|
37
|
+
idx_info = col_ref.tbl.get().get_idx(col_ref.col, idx_name, EmbeddingIndex)
|
|
38
|
+
self.idx_id = idx_info.id
|
|
39
|
+
self.idx_name = idx_info.name
|
|
40
|
+
idx = idx_info.idx
|
|
41
|
+
assert isinstance(idx, EmbeddingIndex)
|
|
42
|
+
|
|
43
|
+
if item.col_type._type not in idx.embeddings:
|
|
44
|
+
type_str = item.col_type._type.name.lower()
|
|
45
|
+
article = 'an' if type_str[0] in 'aeiou' else 'a'
|
|
54
46
|
raise excs.Error(
|
|
55
|
-
f'Embedding index {
|
|
56
|
-
f
|
|
47
|
+
f'Embedding index {idx_info.name!r} on column {idx_info.col.name!r} does not have {article} '
|
|
48
|
+
f'{type_str} embedding and does not support {type_str} queries'
|
|
49
|
+
)
|
|
50
|
+
self.id = self._create_id()
|
|
57
51
|
|
|
58
52
|
def __repr__(self) -> str:
|
|
59
53
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
60
54
|
|
|
55
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
56
|
+
return [*super()._id_attrs(), ('idx_id', self.idx_id)]
|
|
57
|
+
|
|
61
58
|
def default_column_name(self) -> str:
|
|
62
59
|
return 'similarity'
|
|
63
60
|
|
|
64
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
61
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
62
|
+
from pixeltable.index import EmbeddingIndex
|
|
63
|
+
|
|
64
|
+
# check for a literal here, instead of the c'tor: needed for ExprTemplateFunctions
|
|
65
65
|
if not isinstance(self.components[1], Literal):
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
66
|
+
raise excs.Error('similarity(): requires a value, not an expression')
|
|
67
|
+
idx_info = self._resolve_idx()
|
|
68
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
69
|
+
return idx_info.idx.similarity_clause(idx_info.val_col, self.components[1])
|
|
70
|
+
|
|
71
|
+
def as_order_by_clause(self, is_asc: bool) -> sql.ColumnElement | None:
|
|
72
|
+
from pixeltable.index import EmbeddingIndex
|
|
71
73
|
|
|
72
|
-
|
|
74
|
+
# check for a literal here, instead of the c'tor: needed for ExprTemplateFunctions
|
|
73
75
|
if not isinstance(self.components[1], Literal):
|
|
74
|
-
raise excs.Error(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
raise excs.Error('similarity(): requires a value, not an expression')
|
|
77
|
+
idx_info = self._resolve_idx()
|
|
78
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
79
|
+
return idx_info.idx.order_by_clause(idx_info.val_col, self.components[1], is_asc)
|
|
80
|
+
|
|
81
|
+
def _resolve_idx(self) -> 'TableVersion.IndexInfo':
|
|
82
|
+
from pixeltable.index import EmbeddingIndex
|
|
83
|
+
|
|
84
|
+
# resolve idx_id
|
|
85
|
+
col_ref = self.components[0]
|
|
86
|
+
if self.idx_id not in col_ref.tbl.get().idxs:
|
|
87
|
+
raise excs.Error(f'Index {self.idx_name!r} not found')
|
|
88
|
+
idx_info = col_ref.tbl.get().idxs[self.idx_id]
|
|
89
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
90
|
+
return idx_info
|
|
79
91
|
|
|
80
92
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
81
|
-
|
|
82
|
-
|
|
93
|
+
raise excs.Error('similarity(): cannot be used in a computed column')
|
|
94
|
+
|
|
95
|
+
def _as_dict(self) -> dict:
|
|
96
|
+
return {'idx_name': self.idx_name, **super()._as_dict()}
|
|
83
97
|
|
|
84
98
|
@classmethod
|
|
85
99
|
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
100
|
+
idx_name = d.get('idx_name')
|
|
86
101
|
assert len(components) == 2
|
|
87
102
|
assert isinstance(components[0], ColumnRef)
|
|
88
|
-
return cls(components[0], components[1])
|
|
103
|
+
return cls(components[0], components[1], idx_name=idx_name)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable
|
|
1
|
+
from typing import Iterable
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -9,19 +9,19 @@ from .expr_dict import ExprDict
|
|
|
9
9
|
class SqlElementCache:
|
|
10
10
|
"""Cache of sql.ColumnElements for exprs"""
|
|
11
11
|
|
|
12
|
-
cache: dict[int,
|
|
12
|
+
cache: dict[int, sql.ColumnElement | None] # key: Expr.id
|
|
13
13
|
|
|
14
|
-
def __init__(self, elements:
|
|
14
|
+
def __init__(self, elements: ExprDict[sql.ColumnElement] | None = None):
|
|
15
15
|
self.cache = {}
|
|
16
16
|
if elements is not None:
|
|
17
17
|
for e, el in elements.items():
|
|
18
18
|
self.cache[e.id] = el
|
|
19
19
|
|
|
20
|
-
def extend(self, elements: ExprDict[sql.ColumnElement]):
|
|
20
|
+
def extend(self, elements: ExprDict[sql.ColumnElement]) -> None:
|
|
21
21
|
for e, el in elements.items():
|
|
22
22
|
self.cache[e.id] = el
|
|
23
23
|
|
|
24
|
-
def get(self, e: Expr) ->
|
|
24
|
+
def get(self, e: Expr) -> sql.ColumnElement | None:
|
|
25
25
|
"""Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
|
|
26
26
|
try:
|
|
27
27
|
return self.cache[e.id]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
12
|
+
from .globals import StringOperator
|
|
13
|
+
from .row_builder import RowBuilder
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StringOp(Expr):
|
|
18
|
+
"""
|
|
19
|
+
Allows operations on strings
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
operator: StringOperator
|
|
23
|
+
|
|
24
|
+
def __init__(self, operator: StringOperator, op1: Expr, op2: Expr):
|
|
25
|
+
super().__init__(ts.StringType(nullable=op1.col_type.nullable))
|
|
26
|
+
self.operator = operator
|
|
27
|
+
self.components = [op1, op2]
|
|
28
|
+
assert op1.col_type.is_string_type()
|
|
29
|
+
if operator in (StringOperator.CONCAT, StringOperator.REPEAT):
|
|
30
|
+
if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
|
|
31
|
+
raise excs.Error(
|
|
32
|
+
f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
|
|
33
|
+
)
|
|
34
|
+
if operator == StringOperator.REPEAT and not op2.col_type.is_int_type():
|
|
35
|
+
raise excs.Error(f'{self}: {operator} on strings requires int type, but {op2} has type {op2.col_type}')
|
|
36
|
+
else:
|
|
37
|
+
raise excs.Error(
|
|
38
|
+
f'{self}: invalid operation {operator} on strings; '
|
|
39
|
+
f'only operators {StringOperator.CONCAT} and {StringOperator.REPEAT} are supported'
|
|
40
|
+
)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def _op1(self) -> Expr:
|
|
45
|
+
return self.components[0]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def _op2(self) -> Expr:
|
|
49
|
+
return self.components[1]
|
|
50
|
+
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
# add parentheses around operands that are StringOpExpr to express precedence
|
|
53
|
+
op1_str = f'({self._op1})' if isinstance(self._op1, StringOp) else str(self._op1)
|
|
54
|
+
op2_str = f'({self._op2})' if isinstance(self._op2, StringOp) else str(self._op2)
|
|
55
|
+
return f'{op1_str} {self.operator} {op2_str}'
|
|
56
|
+
|
|
57
|
+
def _equals(self, other: StringOp) -> bool:
|
|
58
|
+
return self.operator == other.operator
|
|
59
|
+
|
|
60
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
|
+
|
|
63
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
64
|
+
left = sql_elements.get(self._op1)
|
|
65
|
+
right = sql_elements.get(self._op2)
|
|
66
|
+
if left is None or right is None:
|
|
67
|
+
return None
|
|
68
|
+
if self.operator == StringOperator.CONCAT:
|
|
69
|
+
return left.concat(right)
|
|
70
|
+
if self.operator == StringOperator.REPEAT:
|
|
71
|
+
return sql.func.repeat(left.cast(sql.String), right.cast(sql.Integer))
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
75
|
+
op1_val = data_row[self._op1.slot_idx]
|
|
76
|
+
op2_val = data_row[self._op2.slot_idx]
|
|
77
|
+
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
78
|
+
|
|
79
|
+
def eval_nullable(self, op1_val: str | None, op2_val: int | str | None) -> str | None:
|
|
80
|
+
"""
|
|
81
|
+
Return the result of evaluating the expression on two nullable int/float operands,
|
|
82
|
+
None is interpreted as SQL NULL
|
|
83
|
+
"""
|
|
84
|
+
if op1_val is None or op2_val is None:
|
|
85
|
+
return None
|
|
86
|
+
return self.eval_non_null(op1_val, op2_val)
|
|
87
|
+
|
|
88
|
+
def eval_non_null(self, op1_val: str, op2_val: int | str) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Return the result of evaluating the expression on two int/float operands
|
|
91
|
+
"""
|
|
92
|
+
assert self.operator in (StringOperator.CONCAT, StringOperator.REPEAT)
|
|
93
|
+
if self.operator == StringOperator.CONCAT:
|
|
94
|
+
assert isinstance(op2_val, str)
|
|
95
|
+
return op1_val + op2_val
|
|
96
|
+
else:
|
|
97
|
+
assert isinstance(op2_val, int)
|
|
98
|
+
return op1_val * op2_val
|
|
99
|
+
|
|
100
|
+
def _as_dict(self) -> dict:
|
|
101
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> StringOp:
|
|
105
|
+
assert 'operator' in d
|
|
106
|
+
assert len(components) == 2
|
|
107
|
+
return cls(StringOperator(d['operator']), components[0], components[1])
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
2
|
-
|
|
3
1
|
import sqlalchemy as sql
|
|
4
2
|
|
|
5
|
-
import
|
|
3
|
+
from pixeltable import type_system as ts
|
|
6
4
|
|
|
7
5
|
from .expr import DataRow, Expr
|
|
6
|
+
from .literal import Literal
|
|
8
7
|
from .row_builder import RowBuilder
|
|
9
8
|
from .sql_element_cache import SqlElementCache
|
|
10
9
|
|
|
@@ -14,23 +13,21 @@ class TypeCast(Expr):
|
|
|
14
13
|
An `Expr` that represents a type conversion from an underlying `Expr` to
|
|
15
14
|
a specified `ColumnType`.
|
|
16
15
|
"""
|
|
16
|
+
|
|
17
17
|
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
18
18
|
super().__init__(new_type)
|
|
19
19
|
self.components: list[Expr] = [underlying]
|
|
20
|
-
self.id:
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def _underlying(self):
|
|
24
|
-
return self.components[0]
|
|
20
|
+
self.id: int | None = self._create_id()
|
|
25
21
|
|
|
26
22
|
def _equals(self, other: 'TypeCast') -> bool:
|
|
27
23
|
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
28
24
|
return True
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
@property
|
|
27
|
+
def _op1(self) -> Expr:
|
|
28
|
+
return self.components[0]
|
|
32
29
|
|
|
33
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
30
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
34
31
|
"""
|
|
35
32
|
sql_expr() is unimplemented for now, in order to sidestep potentially thorny
|
|
36
33
|
questions about consistency of doing type conversions in both Python and Postgres.
|
|
@@ -38,9 +35,24 @@ class TypeCast(Expr):
|
|
|
38
35
|
return None
|
|
39
36
|
|
|
40
37
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
41
|
-
original_val = data_row[self.
|
|
38
|
+
original_val = data_row[self._op1.slot_idx]
|
|
42
39
|
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
43
40
|
|
|
41
|
+
def as_literal(self) -> Literal | None:
|
|
42
|
+
op1_lit = self._op1.as_literal()
|
|
43
|
+
if op1_lit is None:
|
|
44
|
+
return None
|
|
45
|
+
if not (
|
|
46
|
+
self.col_type.is_numeric_type() and (op1_lit.col_type.is_numeric_type() or op1_lit.col_type.is_bool_type())
|
|
47
|
+
):
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
op1_val = op1_lit.val
|
|
51
|
+
if self.col_type.is_int_type():
|
|
52
|
+
return Literal(int(op1_val), self.col_type)
|
|
53
|
+
elif self.col_type.is_float_type():
|
|
54
|
+
return Literal(float(op1_val), self.col_type)
|
|
55
|
+
return None
|
|
44
56
|
|
|
45
57
|
def _as_dict(self) -> dict:
|
|
46
58
|
return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
|
|
@@ -52,4 +64,4 @@ class TypeCast(Expr):
|
|
|
52
64
|
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
|
53
65
|
|
|
54
66
|
def __repr__(self) -> str:
|
|
55
|
-
return f'{self.
|
|
67
|
+
return f'{self._op1}.astype({self.col_type._to_str(as_schema=True)})'
|
pixeltable/exprs/variable.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, NoReturn
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
from pixeltable import type_system as ts
|
|
6
6
|
|
|
7
7
|
from .data_row import DataRow
|
|
8
8
|
from .expr import Expr
|
|
@@ -22,7 +22,7 @@ class Variable(Expr):
|
|
|
22
22
|
self.id = self._create_id()
|
|
23
23
|
|
|
24
24
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
25
|
-
return super()._id_attrs()
|
|
25
|
+
return [*super()._id_attrs(), ('name', self.name)]
|
|
26
26
|
|
|
27
27
|
def default_column_name(self) -> NoReturn:
|
|
28
28
|
raise NotImplementedError()
|
pixeltable/func/__init__.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
# ruff: noqa: F401
|
|
2
|
+
|
|
3
|
+
from .aggregate_function import AggregateFunction, Aggregator, uda
|
|
2
4
|
from .callable_function import CallableFunction
|
|
3
5
|
from .expr_template_function import ExprTemplateFunction
|
|
4
|
-
from .function import Function
|
|
6
|
+
from .function import Function, InvalidFunction
|
|
5
7
|
from .function_registry import FunctionRegistry
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
8
|
+
from .mcp import mcp_udfs
|
|
9
|
+
from .query_template_function import QueryTemplateFunction, query, retrieval_udf
|
|
10
|
+
from .signature import Batch, Parameter, Signature
|
|
11
|
+
from .tools import Tool, ToolChoice, Tools
|
|
12
|
+
from .udf import expr_udf, make_function, udf
|