pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +144 -118
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +139 -124
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +315 -246
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +69 -78
- pixeltable/env.py +78 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +28 -27
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +1033 -6
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +36 -31
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +75 -40
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +66 -37
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +32 -34
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +126 -41
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +74 -38
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.17.dist-info/RECORD +0 -211
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import sqlalchemy as sql
|
|
4
6
|
|
|
@@ -12,34 +14,43 @@ from .literal import Literal
|
|
|
12
14
|
from .row_builder import RowBuilder
|
|
13
15
|
from .sql_element_cache import SqlElementCache
|
|
14
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from pixeltable.catalog.table_version import TableVersion
|
|
19
|
+
|
|
15
20
|
|
|
16
21
|
class SimilarityExpr(Expr):
|
|
17
|
-
|
|
22
|
+
"""
|
|
23
|
+
A similarity expression against an embedding index.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
idx_id: int
|
|
27
|
+
idx_name: str
|
|
28
|
+
|
|
29
|
+
def __init__(self, col_ref: ColumnRef, item: Any, idx_name: str | None = None):
|
|
30
|
+
from pixeltable.index import EmbeddingIndex
|
|
31
|
+
|
|
18
32
|
super().__init__(ts.FloatType())
|
|
19
33
|
item_expr = Expr.from_object(item)
|
|
20
34
|
if item_expr is None or not (item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
|
|
21
35
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(item)}')
|
|
22
|
-
assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
|
|
23
36
|
|
|
24
37
|
self.components = [col_ref, item_expr]
|
|
25
38
|
|
|
26
|
-
from pixeltable import index
|
|
27
|
-
|
|
28
39
|
# determine index to use
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self.
|
|
32
|
-
idx =
|
|
33
|
-
assert isinstance(idx,
|
|
40
|
+
idx_info = col_ref.tbl.get().get_idx(col_ref.col, idx_name, EmbeddingIndex)
|
|
41
|
+
self.idx_id = idx_info.id
|
|
42
|
+
self.idx_name = idx_info.name
|
|
43
|
+
idx = idx_info.idx
|
|
44
|
+
assert isinstance(idx, EmbeddingIndex)
|
|
34
45
|
|
|
35
46
|
if item_expr.col_type.is_string_type() and idx.string_embed is None:
|
|
36
47
|
raise excs.Error(
|
|
37
|
-
f'Embedding index {
|
|
48
|
+
f'Embedding index {idx_info.name!r} on column {idx_info.col.name!r} does not have a '
|
|
38
49
|
f'string embedding and does not support string queries'
|
|
39
50
|
)
|
|
40
51
|
if item_expr.col_type.is_image_type() and idx.image_embed is None:
|
|
41
52
|
raise excs.Error(
|
|
42
|
-
f'Embedding index {
|
|
53
|
+
f'Embedding index {idx_info.name!r} on column {idx_info.col.name!r} does not have an '
|
|
43
54
|
f'image embedding and does not support image queries'
|
|
44
55
|
)
|
|
45
56
|
self.id = self._create_id()
|
|
@@ -48,39 +59,53 @@ class SimilarityExpr(Expr):
|
|
|
48
59
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
49
60
|
|
|
50
61
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
51
|
-
return [*super()._id_attrs(), ('
|
|
62
|
+
return [*super()._id_attrs(), ('idx_id', self.idx_id)]
|
|
52
63
|
|
|
53
64
|
def default_column_name(self) -> str:
|
|
54
65
|
return 'similarity'
|
|
55
66
|
|
|
56
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
57
|
-
|
|
67
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
68
|
+
from pixeltable.index import EmbeddingIndex
|
|
69
|
+
|
|
70
|
+
# check for a literal here, instead of the c'tor: needed for ExprTemplateFunctions
|
|
58
71
|
if not isinstance(self.components[1], Literal):
|
|
59
72
|
raise excs.Error('similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
60
73
|
item = self.components[1].val
|
|
61
|
-
|
|
74
|
+
idx_info = self._resolve_idx()
|
|
75
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
76
|
+
return idx_info.idx.similarity_clause(idx_info.val_col, item)
|
|
62
77
|
|
|
63
|
-
|
|
64
|
-
|
|
78
|
+
def as_order_by_clause(self, is_asc: bool) -> sql.ColumnElement | None:
|
|
79
|
+
from pixeltable.index import EmbeddingIndex
|
|
65
80
|
|
|
66
|
-
|
|
81
|
+
# check for a literal here, instead of the c'tor: needed for ExprTemplateFunctions
|
|
67
82
|
if not isinstance(self.components[1], Literal):
|
|
68
83
|
raise excs.Error('similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
84
|
item = self.components[1].val
|
|
70
|
-
|
|
85
|
+
idx_info = self._resolve_idx()
|
|
86
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
87
|
+
return idx_info.idx.order_by_clause(idx_info.val_col, item, is_asc)
|
|
88
|
+
|
|
89
|
+
def _resolve_idx(self) -> 'TableVersion.IndexInfo':
|
|
90
|
+
from pixeltable.index import EmbeddingIndex
|
|
71
91
|
|
|
72
|
-
|
|
73
|
-
|
|
92
|
+
# resolve idx_id
|
|
93
|
+
col_ref = self.components[0]
|
|
94
|
+
if self.idx_id not in col_ref.tbl.get().idxs:
|
|
95
|
+
raise excs.Error(f'Index {self.idx_name!r} not found')
|
|
96
|
+
idx_info = col_ref.tbl.get().idxs[self.idx_id]
|
|
97
|
+
assert isinstance(idx_info.idx, EmbeddingIndex)
|
|
98
|
+
return idx_info
|
|
74
99
|
|
|
75
100
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
76
101
|
raise excs.Error('similarity(): cannot be used in a computed column')
|
|
77
102
|
|
|
78
103
|
def _as_dict(self) -> dict:
|
|
79
|
-
return {'idx_name': self.
|
|
104
|
+
return {'idx_name': self.idx_name, **super()._as_dict()}
|
|
80
105
|
|
|
81
106
|
@classmethod
|
|
82
107
|
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
83
|
-
|
|
108
|
+
idx_name = d.get('idx_name')
|
|
84
109
|
assert len(components) == 2
|
|
85
110
|
assert isinstance(components[0], ColumnRef)
|
|
86
|
-
return cls(components[0], components[1], idx_name=
|
|
111
|
+
return cls(components[0], components[1], idx_name=idx_name)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable
|
|
1
|
+
from typing import Iterable
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -9,9 +9,9 @@ from .expr_dict import ExprDict
|
|
|
9
9
|
class SqlElementCache:
|
|
10
10
|
"""Cache of sql.ColumnElements for exprs"""
|
|
11
11
|
|
|
12
|
-
cache: dict[int,
|
|
12
|
+
cache: dict[int, sql.ColumnElement | None] # key: Expr.id
|
|
13
13
|
|
|
14
|
-
def __init__(self, elements:
|
|
14
|
+
def __init__(self, elements: ExprDict[sql.ColumnElement] | None = None):
|
|
15
15
|
self.cache = {}
|
|
16
16
|
if elements is not None:
|
|
17
17
|
for e, el in elements.items():
|
|
@@ -21,7 +21,7 @@ class SqlElementCache:
|
|
|
21
21
|
for e, el in elements.items():
|
|
22
22
|
self.cache[e.id] = el
|
|
23
23
|
|
|
24
|
-
def get(self, e: Expr) ->
|
|
24
|
+
def get(self, e: Expr) -> sql.ColumnElement | None:
|
|
25
25
|
"""Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
|
|
26
26
|
try:
|
|
27
27
|
return self.cache[e.id]
|
pixeltable/exprs/string_op.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -60,7 +60,7 @@ class StringOp(Expr):
|
|
|
60
60
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
61
|
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
62
|
|
|
63
|
-
def sql_expr(self, sql_elements: SqlElementCache) ->
|
|
63
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
|
|
64
64
|
left = sql_elements.get(self._op1)
|
|
65
65
|
right = sql_elements.get(self._op2)
|
|
66
66
|
if left is None or right is None:
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
import sqlalchemy as sql
|
|
4
2
|
|
|
5
3
|
from pixeltable import type_system as ts
|
|
@@ -19,7 +17,7 @@ class TypeCast(Expr):
|
|
|
19
17
|
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
20
18
|
super().__init__(new_type)
|
|
21
19
|
self.components: list[Expr] = [underlying]
|
|
22
|
-
self.id:
|
|
20
|
+
self.id: int | None = self._create_id()
|
|
23
21
|
|
|
24
22
|
def _equals(self, other: 'TypeCast') -> bool:
|
|
25
23
|
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
@@ -29,7 +27,7 @@ class TypeCast(Expr):
|
|
|
29
27
|
def _op1(self) -> Expr:
|
|
30
28
|
return self.components[0]
|
|
31
29
|
|
|
32
|
-
def sql_expr(self, _: SqlElementCache) ->
|
|
30
|
+
def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
|
|
33
31
|
"""
|
|
34
32
|
sql_expr() is unimplemented for now, in order to sidestep potentially thorny
|
|
35
33
|
questions about consistency of doing type conversions in both Python and Postgres.
|
|
@@ -40,7 +38,7 @@ class TypeCast(Expr):
|
|
|
40
38
|
original_val = data_row[self._op1.slot_idx]
|
|
41
39
|
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
42
40
|
|
|
43
|
-
def as_literal(self) ->
|
|
41
|
+
def as_literal(self) -> Literal | None:
|
|
44
42
|
op1_lit = self._op1.as_literal()
|
|
45
43
|
if op1_lit is None:
|
|
46
44
|
return None
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import inspect
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, ClassVar,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Sequence, overload
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
import pixeltable.type_system as ts
|
|
@@ -42,7 +42,7 @@ class AggregateFunction(Function):
|
|
|
42
42
|
def __init__(
|
|
43
43
|
self,
|
|
44
44
|
agg_class: type[Aggregator],
|
|
45
|
-
type_substitutions:
|
|
45
|
+
type_substitutions: Sequence[dict] | None,
|
|
46
46
|
self_path: str,
|
|
47
47
|
requires_order_by: bool,
|
|
48
48
|
allows_std_agg: bool,
|
|
@@ -75,7 +75,7 @@ class AggregateFunction(Function):
|
|
|
75
75
|
self.init_param_names = [self.init_param_names[signature_idx]]
|
|
76
76
|
|
|
77
77
|
def __cls_to_signature(
|
|
78
|
-
self, cls: type[Aggregator], type_substitutions:
|
|
78
|
+
self, cls: type[Aggregator], type_substitutions: dict | None = None
|
|
79
79
|
) -> tuple[Signature, list[str]]:
|
|
80
80
|
"""Inspects the Aggregator class to infer the corresponding function signature. Returns the
|
|
81
81
|
inferred signature along with the list of init_param_names (for downstream error handling).
|
|
@@ -159,7 +159,7 @@ class AggregateFunction(Function):
|
|
|
159
159
|
self.init_param_names.append(init_param_names)
|
|
160
160
|
return self
|
|
161
161
|
|
|
162
|
-
def comment(self) ->
|
|
162
|
+
def comment(self) -> str | None:
|
|
163
163
|
return inspect.getdoc(self.agg_classes[0])
|
|
164
164
|
|
|
165
165
|
def help_str(self) -> str:
|
|
@@ -173,7 +173,7 @@ class AggregateFunction(Function):
|
|
|
173
173
|
from pixeltable import exprs
|
|
174
174
|
|
|
175
175
|
# perform semantic analysis of special parameters 'order_by' and 'group_by'
|
|
176
|
-
order_by_clause:
|
|
176
|
+
order_by_clause: Any | None = None
|
|
177
177
|
if self.ORDER_BY_PARAM in kwargs:
|
|
178
178
|
if self.requires_order_by:
|
|
179
179
|
raise excs.Error(
|
|
@@ -198,7 +198,7 @@ class AggregateFunction(Function):
|
|
|
198
198
|
# don't pass the first parameter on, the Function doesn't get to see it
|
|
199
199
|
args = args[1:]
|
|
200
200
|
|
|
201
|
-
group_by_clause:
|
|
201
|
+
group_by_clause: Any | None = None
|
|
202
202
|
if self.GROUP_BY_PARAM in kwargs:
|
|
203
203
|
if not self.allows_window:
|
|
204
204
|
raise excs.Error(
|
|
@@ -248,7 +248,7 @@ def uda(
|
|
|
248
248
|
requires_order_by: bool = False,
|
|
249
249
|
allows_std_agg: bool = True,
|
|
250
250
|
allows_window: bool = False,
|
|
251
|
-
type_substitutions:
|
|
251
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
252
252
|
) -> Callable[[type[Aggregator]], AggregateFunction]: ...
|
|
253
253
|
|
|
254
254
|
|
|
@@ -302,7 +302,7 @@ def make_aggregator(
|
|
|
302
302
|
requires_order_by: bool = False,
|
|
303
303
|
allows_std_agg: bool = True,
|
|
304
304
|
allows_window: bool = False,
|
|
305
|
-
type_substitutions:
|
|
305
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
306
306
|
) -> AggregateFunction:
|
|
307
307
|
class_path = f'{cls.__module__}.{cls.__qualname__}'
|
|
308
308
|
instance = AggregateFunction(cls, type_substitutions, class_path, requires_order_by, allows_std_agg, allows_window)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import cloudpickle # type: ignore[import-untyped]
|
|
@@ -25,16 +25,16 @@ class CallableFunction(Function):
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
py_fns: list[Callable]
|
|
28
|
-
self_name:
|
|
29
|
-
batch_size:
|
|
28
|
+
self_name: str | None
|
|
29
|
+
batch_size: int | None
|
|
30
30
|
|
|
31
31
|
def __init__(
|
|
32
32
|
self,
|
|
33
33
|
signatures: list[Signature],
|
|
34
34
|
py_fns: list[Callable],
|
|
35
|
-
self_path:
|
|
36
|
-
self_name:
|
|
37
|
-
batch_size:
|
|
35
|
+
self_path: str | None = None,
|
|
36
|
+
self_name: str | None = None,
|
|
37
|
+
batch_size: int | None = None,
|
|
38
38
|
is_method: bool = False,
|
|
39
39
|
is_property: bool = False,
|
|
40
40
|
):
|
|
@@ -60,7 +60,7 @@ class CallableFunction(Function):
|
|
|
60
60
|
def is_async(self) -> bool:
|
|
61
61
|
return inspect.iscoroutinefunction(self.py_fn)
|
|
62
62
|
|
|
63
|
-
def comment(self) ->
|
|
63
|
+
def comment(self) -> str | None:
|
|
64
64
|
return inspect.getdoc(self.py_fns[0])
|
|
65
65
|
|
|
66
66
|
@property
|
|
@@ -138,7 +138,7 @@ class CallableFunction(Function):
|
|
|
138
138
|
batched_kwargs = {k: v for k, v in kwargs.items() if k not in constant_param_names}
|
|
139
139
|
return constant_kwargs, batched_kwargs
|
|
140
140
|
|
|
141
|
-
def get_batch_size(self, *args: Any, **kwargs: Any) ->
|
|
141
|
+
def get_batch_size(self, *args: Any, **kwargs: Any) -> int | None:
|
|
142
142
|
return self.batch_size
|
|
143
143
|
|
|
144
144
|
@property
|
|
@@ -187,7 +187,7 @@ class CallableFunction(Function):
|
|
|
187
187
|
return md, cloudpickle.dumps(self.py_fn)
|
|
188
188
|
|
|
189
189
|
@classmethod
|
|
190
|
-
def from_store(cls, name:
|
|
190
|
+
def from_store(cls, name: str | None, md: dict, binary_obj: bytes) -> Function:
|
|
191
191
|
py_fn = cloudpickle.loads(binary_obj)
|
|
192
192
|
assert callable(py_fn)
|
|
193
193
|
sig = Signature.from_dict(md['signature'])
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Sequence
|
|
2
2
|
|
|
3
3
|
from pixeltable import exceptions as excs, exprs, type_system as ts
|
|
4
4
|
|
|
@@ -41,7 +41,7 @@ class ExprTemplateFunction(Function):
|
|
|
41
41
|
templates: list[ExprTemplate]
|
|
42
42
|
self_name: str
|
|
43
43
|
|
|
44
|
-
def __init__(self, templates: list[ExprTemplate], self_path:
|
|
44
|
+
def __init__(self, templates: list[ExprTemplate], self_path: str | None = None, name: str | None = None):
|
|
45
45
|
self.templates = templates
|
|
46
46
|
self.self_name = name
|
|
47
47
|
|
|
@@ -98,7 +98,7 @@ class ExprTemplateFunction(Function):
|
|
|
98
98
|
)
|
|
99
99
|
return substituted_expr.col_type
|
|
100
100
|
|
|
101
|
-
def comment(self) ->
|
|
101
|
+
def comment(self) -> str | None:
|
|
102
102
|
if isinstance(self.templates[0].expr, exprs.FunctionCall):
|
|
103
103
|
return self.templates[0].expr.fn.comment()
|
|
104
104
|
return None
|
pixeltable/func/function.py
CHANGED
|
@@ -5,7 +5,7 @@ import inspect
|
|
|
5
5
|
import typing
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from copy import copy
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence, cast
|
|
9
9
|
|
|
10
10
|
import sqlalchemy as sql
|
|
11
11
|
from typing_extensions import Self
|
|
@@ -30,10 +30,10 @@ class Function(ABC):
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
signatures: list[Signature]
|
|
33
|
-
self_path:
|
|
33
|
+
self_path: str | None
|
|
34
34
|
is_method: bool
|
|
35
35
|
is_property: bool
|
|
36
|
-
_conditional_return_type:
|
|
36
|
+
_conditional_return_type: Callable[..., ts.ColumnType] | None
|
|
37
37
|
|
|
38
38
|
# We cache the overload resolutions in self._resolutions. This ensures that each resolution is represented
|
|
39
39
|
# globally by a single Python object. We do this dynamically rather than pre-constructing them in order to
|
|
@@ -43,17 +43,17 @@ class Function(ABC):
|
|
|
43
43
|
# Translates a call to this function with the given arguments to its SQLAlchemy equivalent.
|
|
44
44
|
# Overriden for specific Function instances via the to_sql() decorator. The override must accept the same
|
|
45
45
|
# parameter names as the original function. Each parameter is going to be of type sql.ColumnElement.
|
|
46
|
-
_to_sql: Callable[...,
|
|
46
|
+
_to_sql: Callable[..., sql.ColumnElement | None]
|
|
47
47
|
|
|
48
48
|
# Returns the resource pool to use for calling this function with the given arguments.
|
|
49
49
|
# Overriden for specific Function instances via the resource_pool() decorator. The override must accept a subset
|
|
50
50
|
# of the parameters of the original function, with the same type.
|
|
51
|
-
_resource_pool: Callable[...,
|
|
51
|
+
_resource_pool: Callable[..., str | None]
|
|
52
52
|
|
|
53
53
|
def __init__(
|
|
54
54
|
self,
|
|
55
55
|
signatures: list[Signature],
|
|
56
|
-
self_path:
|
|
56
|
+
self_path: str | None = None,
|
|
57
57
|
is_method: bool = False,
|
|
58
58
|
is_property: bool = False,
|
|
59
59
|
):
|
|
@@ -105,7 +105,7 @@ class Function(ABC):
|
|
|
105
105
|
@abstractmethod
|
|
106
106
|
def is_async(self) -> bool: ...
|
|
107
107
|
|
|
108
|
-
def comment(self) ->
|
|
108
|
+
def comment(self) -> str | None:
|
|
109
109
|
return None
|
|
110
110
|
|
|
111
111
|
def help_str(self) -> str:
|
|
@@ -175,7 +175,7 @@ class Function(ABC):
|
|
|
175
175
|
|
|
176
176
|
def _bind_to_matching_signature(self, args: Sequence[Any], kwargs: dict[str, Any]) -> tuple[Self, dict[str, Any]]:
|
|
177
177
|
result: int = -1
|
|
178
|
-
bound_args:
|
|
178
|
+
bound_args: dict[str, Any] | None = None
|
|
179
179
|
assert len(self.signatures) > 0
|
|
180
180
|
if len(self.signatures) == 1:
|
|
181
181
|
# Only one signature: call _bind_to_signature() and surface any errors directly
|
|
@@ -206,7 +206,7 @@ class Function(ABC):
|
|
|
206
206
|
self._resolved_fns[signature_idx].validate_call(normalized_args)
|
|
207
207
|
return normalized_args
|
|
208
208
|
|
|
209
|
-
def validate_call(self, bound_args: dict[str,
|
|
209
|
+
def validate_call(self, bound_args: dict[str, 'exprs.Expr' | None]) -> None:
|
|
210
210
|
"""Override this to do custom validation of the arguments"""
|
|
211
211
|
assert not self.is_polymorphic
|
|
212
212
|
self.signature.validate_args(bound_args, context=f'in function {self.name!r}')
|
|
@@ -252,9 +252,7 @@ class Function(ABC):
|
|
|
252
252
|
|
|
253
253
|
return return_type
|
|
254
254
|
|
|
255
|
-
def _assemble_callable_args(
|
|
256
|
-
self, callable: Callable, bound_args: dict[str, 'exprs.Expr']
|
|
257
|
-
) -> Optional[dict[str, Any]]:
|
|
255
|
+
def _assemble_callable_args(self, callable: Callable, bound_args: dict[str, 'exprs.Expr']) -> dict[str, Any] | None:
|
|
258
256
|
"""
|
|
259
257
|
Return the kwargs to pass to callable, given bound_args passed to this function.
|
|
260
258
|
|
|
@@ -286,7 +284,7 @@ class Function(ABC):
|
|
|
286
284
|
return None
|
|
287
285
|
assert isinstance(arg, exprs.Expr)
|
|
288
286
|
|
|
289
|
-
expects_expr:
|
|
287
|
+
expects_expr: type[exprs.Expr] | None = None
|
|
290
288
|
type_hint = callable_type_hints.get(param.name)
|
|
291
289
|
if typing.get_origin(type_hint) is not None:
|
|
292
290
|
type_hint = typing.get_origin(type_hint) # Remove type subscript if one exists
|
|
@@ -410,12 +408,12 @@ class Function(ABC):
|
|
|
410
408
|
"""Execute the function with the given arguments and return the result."""
|
|
411
409
|
raise NotImplementedError()
|
|
412
410
|
|
|
413
|
-
def to_sql(self, fn: Callable[...,
|
|
411
|
+
def to_sql(self, fn: Callable[..., sql.ColumnElement | None]) -> Callable[..., sql.ColumnElement | None]:
|
|
414
412
|
"""Instance decorator for specifying the SQL translation of this function"""
|
|
415
413
|
self._to_sql = fn
|
|
416
414
|
return fn
|
|
417
415
|
|
|
418
|
-
def __default_to_sql(self, *args: Any, **kwargs: Any) ->
|
|
416
|
+
def __default_to_sql(self, *args: Any, **kwargs: Any) -> sql.ColumnElement | None:
|
|
419
417
|
"""The default implementation of SQL translation, which provides no translation"""
|
|
420
418
|
return None
|
|
421
419
|
|
|
@@ -425,7 +423,7 @@ class Function(ABC):
|
|
|
425
423
|
self._resource_pool = fn
|
|
426
424
|
return fn
|
|
427
425
|
|
|
428
|
-
def __default_resource_pool(self) ->
|
|
426
|
+
def __default_resource_pool(self) -> str | None:
|
|
429
427
|
return None
|
|
430
428
|
|
|
431
429
|
def __eq__(self, other: object) -> bool:
|
|
@@ -495,7 +493,7 @@ class Function(ABC):
|
|
|
495
493
|
raise NotImplementedError()
|
|
496
494
|
|
|
497
495
|
@classmethod
|
|
498
|
-
def from_store(cls, name:
|
|
496
|
+
def from_store(cls, name: str | None, md: dict, binary_obj: bytes) -> Function:
|
|
499
497
|
"""
|
|
500
498
|
Create a Function instance from the serialized representation returned by to_store()
|
|
501
499
|
"""
|
|
@@ -4,7 +4,6 @@ import dataclasses
|
|
|
4
4
|
import importlib
|
|
5
5
|
import logging
|
|
6
6
|
import sys
|
|
7
|
-
from typing import Optional
|
|
8
7
|
from uuid import UUID
|
|
9
8
|
|
|
10
9
|
import sqlalchemy as sql
|
|
@@ -23,7 +22,7 @@ class FunctionRegistry:
|
|
|
23
22
|
Function are loaded from the store on demand.
|
|
24
23
|
"""
|
|
25
24
|
|
|
26
|
-
_instance:
|
|
25
|
+
_instance: FunctionRegistry | None = None
|
|
27
26
|
|
|
28
27
|
@classmethod
|
|
29
28
|
def get(cls) -> FunctionRegistry:
|
|
@@ -94,7 +93,7 @@ class FunctionRegistry:
|
|
|
94
93
|
# stored_fn_md.append(md)
|
|
95
94
|
return list(self.module_fns.values())
|
|
96
95
|
|
|
97
|
-
# def get_function(self, *, id:
|
|
96
|
+
# def get_function(self, *, id: UUID | None = None, fqn: str | None = None) -> Function:
|
|
98
97
|
# assert (id is not None) != (fqn is not None)
|
|
99
98
|
# if id is not None:
|
|
100
99
|
# if id not in self.stored_fns_by_id:
|
|
@@ -143,7 +142,7 @@ class FunctionRegistry:
|
|
|
143
142
|
return list(self.type_methods[base_type].values())
|
|
144
143
|
return []
|
|
145
144
|
|
|
146
|
-
def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) ->
|
|
145
|
+
def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) -> Function | None:
|
|
147
146
|
"""
|
|
148
147
|
Look up a method (or property) by name for a given base type. If no such method is registered, return None.
|
|
149
148
|
"""
|
|
@@ -151,8 +150,8 @@ class FunctionRegistry:
|
|
|
151
150
|
return self.type_methods[base_type][name]
|
|
152
151
|
return None
|
|
153
152
|
|
|
154
|
-
# def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id:
|
|
155
|
-
def create_stored_function(self, pxt_fn: Function, dir_id:
|
|
153
|
+
# def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: UUID | None = None) -> UUID:
|
|
154
|
+
def create_stored_function(self, pxt_fn: Function, dir_id: UUID | None = None) -> UUID:
|
|
156
155
|
fn_md, binary_obj = pxt_fn.to_store()
|
|
157
156
|
md = schema.FunctionMd(name=pxt_fn.name, md=fn_md, py_version=sys.version, class_name=pxt_fn.__class__.__name__)
|
|
158
157
|
with env.Env.get().engine.begin() as conn:
|
|
@@ -184,7 +183,7 @@ class FunctionRegistry:
|
|
|
184
183
|
self.stored_fns_by_id[id] = instance
|
|
185
184
|
return instance
|
|
186
185
|
|
|
187
|
-
# def create_function(self, fn: Function, dir_id:
|
|
186
|
+
# def create_function(self, fn: Function, dir_id: UUID | None = None, name: str | None = None) -> None:
|
|
188
187
|
# with env.Env.get().engine.begin() as conn:
|
|
189
188
|
# _logger.debug(f'Pickling function {name}')
|
|
190
189
|
# eval_fn_str = cloudpickle.dumps(fn.eval_fn) if fn.eval_fn is not None else None
|
pixeltable/func/globals.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import inspect
|
|
3
3
|
from types import ModuleType
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
import pixeltable.exceptions as excs
|
|
7
6
|
|
|
8
7
|
|
|
9
|
-
def resolve_symbol(symbol_path: str) ->
|
|
8
|
+
def resolve_symbol(symbol_path: str) -> object | None:
|
|
10
9
|
path_elems = symbol_path.split('.')
|
|
11
|
-
module:
|
|
10
|
+
module: ModuleType | None = None
|
|
12
11
|
i = len(path_elems) - 1
|
|
13
12
|
while i > 0 and module is None:
|
|
14
13
|
try:
|
pixeltable/func/mcp.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import inspect
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
from pixeltable import exceptions as excs, type_system as ts
|
|
@@ -18,7 +18,7 @@ async def mcp_udfs_async(url: str) -> list['pxt.func.Function']:
|
|
|
18
18
|
import mcp
|
|
19
19
|
from mcp.client.streamable_http import streamablehttp_client
|
|
20
20
|
|
|
21
|
-
list_tools_result:
|
|
21
|
+
list_tools_result: mcp.types.ListToolsResult | None = None
|
|
22
22
|
async with (
|
|
23
23
|
streamablehttp_client(url) as (read_stream, write_stream, _),
|
|
24
24
|
mcp.ClientSession(read_stream, write_stream) as session,
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
from functools import reduce
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, overload
|
|
6
6
|
|
|
7
7
|
from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
|
|
8
8
|
|
|
@@ -16,13 +16,13 @@ if TYPE_CHECKING:
|
|
|
16
16
|
class QueryTemplateFunction(Function):
|
|
17
17
|
"""A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
|
|
18
18
|
|
|
19
|
-
template_df:
|
|
20
|
-
self_name:
|
|
21
|
-
_comment:
|
|
19
|
+
template_df: 'DataFrame' | None
|
|
20
|
+
self_name: str | None
|
|
21
|
+
_comment: str | None
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
24
|
def create(
|
|
25
|
-
cls, template_callable: Callable, param_types:
|
|
25
|
+
cls, template_callable: Callable, param_types: list[ts.ColumnType] | None, path: str, name: str
|
|
26
26
|
) -> QueryTemplateFunction:
|
|
27
27
|
# we need to construct a template df and a signature
|
|
28
28
|
py_sig = inspect.signature(template_callable)
|
|
@@ -40,11 +40,11 @@ class QueryTemplateFunction(Function):
|
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self,
|
|
43
|
-
template_df:
|
|
43
|
+
template_df: 'DataFrame' | None,
|
|
44
44
|
sig: Signature,
|
|
45
|
-
path:
|
|
46
|
-
name:
|
|
47
|
-
comment:
|
|
45
|
+
path: str | None = None,
|
|
46
|
+
name: str | None = None,
|
|
47
|
+
comment: str | None = None,
|
|
48
48
|
):
|
|
49
49
|
assert sig is not None
|
|
50
50
|
super().__init__([sig], self_path=path)
|
|
@@ -82,7 +82,7 @@ class QueryTemplateFunction(Function):
|
|
|
82
82
|
def name(self) -> str:
|
|
83
83
|
return self.self_name
|
|
84
84
|
|
|
85
|
-
def comment(self) ->
|
|
85
|
+
def comment(self) -> str | None:
|
|
86
86
|
return self._comment
|
|
87
87
|
|
|
88
88
|
def _as_dict(self) -> dict:
|
|
@@ -100,11 +100,11 @@ def query(py_fn: Callable) -> QueryTemplateFunction: ...
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
@overload
|
|
103
|
-
def query(*, param_types:
|
|
103
|
+
def query(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], QueryTemplateFunction]: ...
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
def query(*args: Any, **kwargs: Any) -> Any:
|
|
107
|
-
def make_query_template(py_fn: Callable, param_types:
|
|
107
|
+
def make_query_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> QueryTemplateFunction:
|
|
108
108
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
109
109
|
# this is a named function in a module
|
|
110
110
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -127,10 +127,10 @@ def query(*args: Any, **kwargs: Any) -> Any:
|
|
|
127
127
|
|
|
128
128
|
def retrieval_udf(
|
|
129
129
|
table: catalog.Table,
|
|
130
|
-
name:
|
|
131
|
-
description:
|
|
132
|
-
parameters:
|
|
133
|
-
limit:
|
|
130
|
+
name: str | None = None,
|
|
131
|
+
description: str | None = None,
|
|
132
|
+
parameters: Iterable[str | exprs.ColumnRef] | None = None,
|
|
133
|
+
limit: int | None = 10,
|
|
134
134
|
) -> func.QueryTemplateFunction:
|
|
135
135
|
"""
|
|
136
136
|
Constructs a retrieval UDF for the given table. The retrieval UDF is a UDF whose parameters are
|