pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/func/udf.py
CHANGED
|
@@ -1,36 +1,49 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import inspect
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence, overload
|
|
4
5
|
|
|
5
6
|
import pixeltable.exceptions as excs
|
|
6
7
|
import pixeltable.type_system as ts
|
|
8
|
+
from pixeltable import catalog
|
|
7
9
|
|
|
8
10
|
from .callable_function import CallableFunction
|
|
9
|
-
from .expr_template_function import ExprTemplateFunction
|
|
10
|
-
from .function import Function
|
|
11
|
+
from .expr_template_function import ExprTemplate, ExprTemplateFunction
|
|
11
12
|
from .function_registry import FunctionRegistry
|
|
12
13
|
from .globals import validate_symbol_path
|
|
13
|
-
from .signature import Signature
|
|
14
|
+
from .signature import Parameter, Signature
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pixeltable import exprs
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
# Decorator invoked without parentheses: @pxt.udf
|
|
17
21
|
@overload
|
|
18
|
-
def udf(decorated_fn: Callable) ->
|
|
22
|
+
def udf(decorated_fn: Callable) -> CallableFunction: ...
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
# Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
|
|
22
26
|
@overload
|
|
23
27
|
def udf(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
*,
|
|
29
|
+
batch_size: int | None = None,
|
|
30
|
+
substitute_fn: Callable | None = None,
|
|
31
|
+
is_method: bool = False,
|
|
32
|
+
is_property: bool = False,
|
|
33
|
+
resource_pool: str | None = None,
|
|
34
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
35
|
+
_force_stored: bool = False,
|
|
36
|
+
) -> Callable[[Callable], CallableFunction]: ...
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# pxt.udf() called explicitly on a Table:
|
|
40
|
+
@overload
|
|
41
|
+
def udf(
|
|
42
|
+
table: catalog.Table, /, *, return_value: Any = None, description: str | None = None
|
|
43
|
+
) -> ExprTemplateFunction: ...
|
|
31
44
|
|
|
32
45
|
|
|
33
|
-
def udf(*args, **kwargs):
|
|
46
|
+
def udf(*args, **kwargs): # type: ignore[no-untyped-def]
|
|
34
47
|
"""A decorator to create a Function from a function definition.
|
|
35
48
|
|
|
36
49
|
Examples:
|
|
@@ -39,33 +52,43 @@ def udf(*args, **kwargs):
|
|
|
39
52
|
... return x + 1
|
|
40
53
|
"""
|
|
41
54
|
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
|
|
42
|
-
|
|
43
55
|
# Decorator invoked without parentheses: @pxt.udf
|
|
44
56
|
# Simply call make_function with defaults.
|
|
45
57
|
return make_function(decorated_fn=args[0])
|
|
46
58
|
|
|
47
|
-
|
|
59
|
+
elif len(args) == 1 and isinstance(args[0], catalog.Table):
|
|
60
|
+
# pxt.udf() called explicitly on a Table
|
|
61
|
+
return_value = kwargs.pop('return_value', None)
|
|
62
|
+
description = kwargs.pop('description', None)
|
|
63
|
+
if len(kwargs) > 0:
|
|
64
|
+
raise excs.Error(f'Invalid udf kwargs: {", ".join(kwargs.keys())}')
|
|
65
|
+
return from_table(args[0], return_value, description)
|
|
48
66
|
|
|
67
|
+
else:
|
|
49
68
|
# Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
|
|
50
69
|
# Create a decorator for the specified schema.
|
|
51
70
|
batch_size = kwargs.pop('batch_size', None)
|
|
52
71
|
substitute_fn = kwargs.pop('substitute_fn', None)
|
|
53
72
|
is_method = kwargs.pop('is_method', None)
|
|
54
73
|
is_property = kwargs.pop('is_property', None)
|
|
74
|
+
resource_pool = kwargs.pop('resource_pool', None)
|
|
75
|
+
type_substitutions = kwargs.pop('type_substitutions', None)
|
|
55
76
|
force_stored = kwargs.pop('_force_stored', False)
|
|
56
77
|
if len(kwargs) > 0:
|
|
57
78
|
raise excs.Error(f'Invalid @udf decorator kwargs: {", ".join(kwargs.keys())}')
|
|
58
79
|
if len(args) > 0:
|
|
59
80
|
raise excs.Error('Unexpected @udf decorator arguments.')
|
|
60
81
|
|
|
61
|
-
def decorator(decorated_fn: Callable):
|
|
82
|
+
def decorator(decorated_fn: Callable) -> CallableFunction:
|
|
62
83
|
return make_function(
|
|
63
84
|
decorated_fn,
|
|
64
85
|
batch_size=batch_size,
|
|
65
86
|
substitute_fn=substitute_fn,
|
|
66
87
|
is_method=is_method,
|
|
67
88
|
is_property=is_property,
|
|
68
|
-
|
|
89
|
+
resource_pool=resource_pool,
|
|
90
|
+
type_substitutions=type_substitutions,
|
|
91
|
+
force_stored=force_stored,
|
|
69
92
|
)
|
|
70
93
|
|
|
71
94
|
return decorator
|
|
@@ -73,15 +96,17 @@ def udf(*args, **kwargs):
|
|
|
73
96
|
|
|
74
97
|
def make_function(
|
|
75
98
|
decorated_fn: Callable,
|
|
76
|
-
return_type:
|
|
77
|
-
param_types:
|
|
78
|
-
batch_size:
|
|
79
|
-
substitute_fn:
|
|
99
|
+
return_type: ts.ColumnType | None = None,
|
|
100
|
+
param_types: list[ts.ColumnType] | None = None,
|
|
101
|
+
batch_size: int | None = None,
|
|
102
|
+
substitute_fn: Callable | None = None,
|
|
80
103
|
is_method: bool = False,
|
|
81
104
|
is_property: bool = False,
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
105
|
+
resource_pool: str | None = None,
|
|
106
|
+
type_substitutions: Sequence[dict] | None = None,
|
|
107
|
+
function_name: str | None = None,
|
|
108
|
+
force_stored: bool = False,
|
|
109
|
+
) -> CallableFunction:
|
|
85
110
|
"""
|
|
86
111
|
Constructs a `CallableFunction` from the specified parameters.
|
|
87
112
|
If `substitute_fn` is specified, then `decorated_fn`
|
|
@@ -104,25 +129,44 @@ def make_function(
|
|
|
104
129
|
# Display name to use for error messages
|
|
105
130
|
errmsg_name = function_name if function_path is None else function_path
|
|
106
131
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
132
|
+
signatures: list[Signature]
|
|
133
|
+
if type_substitutions is None:
|
|
134
|
+
sig = Signature.create(decorated_fn, param_types, return_type)
|
|
135
|
+
|
|
136
|
+
# batched functions must have a batched return type
|
|
137
|
+
# TODO: remove 'Python' from the error messages when we have full inference with Annotated types
|
|
138
|
+
if batch_size is not None and not sig.is_batched:
|
|
139
|
+
raise excs.Error(f'{errmsg_name}(): batch_size is specified; Python return type must be a `Batch`')
|
|
140
|
+
if batch_size is not None and len(sig.batched_parameters) == 0:
|
|
141
|
+
raise excs.Error(f'{errmsg_name}(): batch_size is specified; at least one Python parameter must be `Batch`')
|
|
142
|
+
if batch_size is None and len(sig.batched_parameters) > 0:
|
|
143
|
+
raise excs.Error(f'{errmsg_name}(): batched parameters in udf, but no `batch_size` given')
|
|
144
|
+
|
|
145
|
+
if is_method and is_property:
|
|
146
|
+
raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
|
|
147
|
+
if is_property and len(sig.parameters) != 1:
|
|
148
|
+
raise excs.Error(
|
|
149
|
+
'`is_property=True` expects a UDF with exactly 1 parameter, but '
|
|
150
|
+
f'`{function_name}` has {len(sig.parameters)}'
|
|
151
|
+
)
|
|
152
|
+
if (is_method or is_property) and function_path is None:
|
|
153
|
+
raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
|
|
154
|
+
|
|
155
|
+
signatures = [sig]
|
|
156
|
+
else:
|
|
157
|
+
if function_path is None:
|
|
158
|
+
raise excs.Error(
|
|
159
|
+
f'{errmsg_name}(): type substitutions can only be used with module UDFs (not locally defined UDFs)'
|
|
160
|
+
)
|
|
161
|
+
if batch_size is not None:
|
|
162
|
+
raise excs.Error(f'{errmsg_name}(): type substitutions cannot be used with batched functions')
|
|
163
|
+
if is_method is not None or is_property is not None:
|
|
164
|
+
# TODO: Support this for `is_method`?
|
|
165
|
+
raise excs.Error(f'{errmsg_name}(): type substitutions cannot be used with `is_method` or `is_property`')
|
|
166
|
+
signatures = [
|
|
167
|
+
Signature.create(decorated_fn, param_types, return_type, type_substitutions=subst)
|
|
168
|
+
for subst in type_substitutions
|
|
169
|
+
]
|
|
126
170
|
|
|
127
171
|
if substitute_fn is None:
|
|
128
172
|
py_fn = decorated_fn
|
|
@@ -132,14 +176,16 @@ def make_function(
|
|
|
132
176
|
py_fn = substitute_fn
|
|
133
177
|
|
|
134
178
|
result = CallableFunction(
|
|
135
|
-
|
|
136
|
-
|
|
179
|
+
signatures=signatures,
|
|
180
|
+
py_fns=[py_fn] * len(signatures), # All signatures share the same Python function
|
|
137
181
|
self_path=function_path,
|
|
138
182
|
self_name=function_name,
|
|
139
183
|
batch_size=batch_size,
|
|
140
184
|
is_method=is_method,
|
|
141
|
-
is_property=is_property
|
|
185
|
+
is_property=is_property,
|
|
142
186
|
)
|
|
187
|
+
if resource_pool is not None:
|
|
188
|
+
result.resource_pool(lambda: resource_pool)
|
|
143
189
|
|
|
144
190
|
# If this function is part of a module, register it
|
|
145
191
|
if function_path is not None:
|
|
@@ -149,14 +195,19 @@ def make_function(
|
|
|
149
195
|
|
|
150
196
|
return result
|
|
151
197
|
|
|
198
|
+
|
|
152
199
|
@overload
|
|
153
200
|
def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
|
|
154
201
|
|
|
202
|
+
|
|
155
203
|
@overload
|
|
156
|
-
def expr_udf(*, param_types:
|
|
204
|
+
def expr_udf(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], ExprTemplateFunction]: ...
|
|
205
|
+
|
|
157
206
|
|
|
158
207
|
def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
159
|
-
def make_expr_template(py_fn: Callable, param_types:
|
|
208
|
+
def make_expr_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> ExprTemplateFunction:
|
|
209
|
+
from pixeltable import exprs
|
|
210
|
+
|
|
160
211
|
if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
|
|
161
212
|
# this is a named function in a module
|
|
162
213
|
function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
|
|
@@ -168,15 +219,15 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
168
219
|
|
|
169
220
|
# construct Signature from the function signature
|
|
170
221
|
sig = Signature.create(py_fn=py_fn, param_types=param_types, return_type=ts.InvalidType())
|
|
171
|
-
|
|
222
|
+
|
|
172
223
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
|
|
173
224
|
# call the function with the parameter expressions to construct an Expr with parameters
|
|
174
|
-
|
|
175
|
-
assert isinstance(
|
|
176
|
-
sig.return_type =
|
|
225
|
+
expr = py_fn(*var_exprs)
|
|
226
|
+
assert isinstance(expr, exprs.Expr)
|
|
227
|
+
sig.return_type = expr.col_type
|
|
177
228
|
if function_path is not None:
|
|
178
229
|
validate_symbol_path(function_path)
|
|
179
|
-
return ExprTemplateFunction(
|
|
230
|
+
return ExprTemplateFunction([ExprTemplate(expr, sig)], self_path=function_path, name=py_fn.__name__)
|
|
180
231
|
|
|
181
232
|
if len(args) == 1:
|
|
182
233
|
assert len(kwargs) == 0 and callable(args[0])
|
|
@@ -184,3 +235,75 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
|
|
|
184
235
|
else:
|
|
185
236
|
assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
|
|
186
237
|
return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def from_table(tbl: catalog.Table, return_value: 'exprs.Expr' | None, description: str | None) -> ExprTemplateFunction:
|
|
241
|
+
"""
|
|
242
|
+
Constructs an `ExprTemplateFunction` from a `Table`.
|
|
243
|
+
|
|
244
|
+
The constructed function will have one parameter for each data column in the table, which is optional (with
|
|
245
|
+
default None) if and only if its column type is nullable. The output of the function is a dict of the form
|
|
246
|
+
{
|
|
247
|
+
'data_col_1': Variable('data_col_1', col_type_1),
|
|
248
|
+
'data_col_2': Variable('data_col_2', col_type_2),
|
|
249
|
+
...,
|
|
250
|
+
'computed_col_1': computed_expr_1,
|
|
251
|
+
'computed_col_2': computed_expr_2,
|
|
252
|
+
...
|
|
253
|
+
}
|
|
254
|
+
where the computed expressions correspond to fully substituted expressions for the computed columns of the
|
|
255
|
+
table. In the substitution, ColumnRefs of data columns are replaced by Variable expressions, and ColumnRefs of
|
|
256
|
+
computed columns are replaced by the (previously constructed) expressions for those columns.
|
|
257
|
+
|
|
258
|
+
If an optional `return_value` is specified, then it is used as the return value of the function in place of
|
|
259
|
+
the default dict. The same substitutions will be applied to the `return_value` expression.
|
|
260
|
+
"""
|
|
261
|
+
from pixeltable import exprs
|
|
262
|
+
|
|
263
|
+
ancestors = [tbl, *tbl._get_base_tables()]
|
|
264
|
+
ancestors.reverse() # We must traverse the ancestors in order from base to derived
|
|
265
|
+
|
|
266
|
+
subst: dict[exprs.Expr, exprs.Expr] = {}
|
|
267
|
+
result_dict: dict[str, exprs.Expr] = {}
|
|
268
|
+
params: list[Parameter] = []
|
|
269
|
+
|
|
270
|
+
for t in ancestors:
|
|
271
|
+
for name, col in t._tbl_version.get().cols_by_name.items():
|
|
272
|
+
assert name not in result_dict, f'Column name is not unique: {name}'
|
|
273
|
+
if col.is_computed:
|
|
274
|
+
# Computed column. Apply any existing substitutions and add the new expression to the subst dict.
|
|
275
|
+
new_expr = col.value_expr.copy()
|
|
276
|
+
new_expr.substitute(subst)
|
|
277
|
+
subst[t[name]] = new_expr # Substitute new_expr for ColumnRefs to this column
|
|
278
|
+
result_dict[name] = new_expr
|
|
279
|
+
else:
|
|
280
|
+
# Data column. Include it as a parameter and add a variable expression as the subst dict.
|
|
281
|
+
var = exprs.Variable(name, col.col_type)
|
|
282
|
+
subst[t[name]] = var # Substitute var for ColumnRefs to this column
|
|
283
|
+
result_dict[name] = var
|
|
284
|
+
# Since this is a data column, it becomes a UDF parameter.
|
|
285
|
+
# If the column is nullable, then the parameter will have a default value of None.
|
|
286
|
+
default_value = exprs.Literal(None) if col.col_type.nullable else None
|
|
287
|
+
param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
|
|
288
|
+
params.append(param)
|
|
289
|
+
|
|
290
|
+
if return_value is None:
|
|
291
|
+
return_value = exprs.InlineDict(result_dict)
|
|
292
|
+
else:
|
|
293
|
+
return_value = exprs.Expr.from_object(return_value)
|
|
294
|
+
return_value = return_value.copy().substitute(subst)
|
|
295
|
+
|
|
296
|
+
if description is None:
|
|
297
|
+
# Default description is the table comment
|
|
298
|
+
description = tbl._get_comment()
|
|
299
|
+
if len(description) == 0:
|
|
300
|
+
description = f"UDF for table '{tbl._name}'"
|
|
301
|
+
|
|
302
|
+
# TODO: Use column comments as parameter descriptions, when we have them
|
|
303
|
+
argstring = '\n'.join(f' {param.name}: of type `{param.col_type}`' for param in params)
|
|
304
|
+
docstring = f'{description}\n\nArgs:\n{argstring}'
|
|
305
|
+
|
|
306
|
+
template = ExprTemplate(return_value, Signature(return_value.col_type, params))
|
|
307
|
+
fn = ExprTemplateFunction([template], name=tbl._name)
|
|
308
|
+
fn.__doc__ = docstring
|
|
309
|
+
return fn
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -1,11 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
General Pixeltable UDFs.
|
|
3
|
+
|
|
4
|
+
This parent module contains general-purpose UDFs that apply to multiple data types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# ruff: noqa: F401
|
|
8
|
+
|
|
1
9
|
from pixeltable.utils.code import local_public_names
|
|
2
10
|
|
|
3
|
-
from . import (
|
|
4
|
-
|
|
5
|
-
|
|
11
|
+
from . import (
|
|
12
|
+
anthropic,
|
|
13
|
+
audio,
|
|
14
|
+
bedrock,
|
|
15
|
+
date,
|
|
16
|
+
deepseek,
|
|
17
|
+
fal,
|
|
18
|
+
fireworks,
|
|
19
|
+
gemini,
|
|
20
|
+
groq,
|
|
21
|
+
huggingface,
|
|
22
|
+
image,
|
|
23
|
+
json,
|
|
24
|
+
llama_cpp,
|
|
25
|
+
math,
|
|
26
|
+
mistralai,
|
|
27
|
+
net,
|
|
28
|
+
ollama,
|
|
29
|
+
openai,
|
|
30
|
+
openrouter,
|
|
31
|
+
replicate,
|
|
32
|
+
reve,
|
|
33
|
+
string,
|
|
34
|
+
timestamp,
|
|
35
|
+
together,
|
|
36
|
+
twelvelabs,
|
|
37
|
+
uuid,
|
|
38
|
+
video,
|
|
39
|
+
vision,
|
|
40
|
+
voyageai,
|
|
41
|
+
whisper,
|
|
42
|
+
whisperx,
|
|
43
|
+
yolox,
|
|
44
|
+
)
|
|
45
|
+
from .globals import count, map, max, mean, min, sum
|
|
6
46
|
|
|
7
47
|
__all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
|
|
8
48
|
|
|
9
49
|
|
|
10
|
-
def __dir__():
|
|
50
|
+
def __dir__() -> list[str]:
|
|
11
51
|
return __all__
|