pixeltable 0.2.25__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/dir.py +6 -0
- pixeltable/catalog/globals.py +25 -0
- pixeltable/catalog/named_function.py +4 -0
- pixeltable/catalog/path_dict.py +37 -11
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +421 -231
- pixeltable/catalog/table_version.py +22 -8
- pixeltable/catalog/view.py +5 -7
- pixeltable/dataframe.py +439 -105
- pixeltable/env.py +19 -5
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +6 -7
- pixeltable/exec/expr_eval_node.py +1 -1
- pixeltable/exec/sql_node.py +92 -45
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +1 -1
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +29 -2
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/expr.py +12 -5
- pixeltable/exprs/expr_set.py +8 -0
- pixeltable/exprs/function_call.py +147 -39
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +25 -5
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/exprs/method_ref.py +1 -1
- pixeltable/exprs/row_builder.py +1 -1
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/exprs/similarity_expr.py +14 -7
- pixeltable/exprs/sql_element_cache.py +4 -0
- pixeltable/exprs/type_cast.py +2 -2
- pixeltable/exprs/variable.py +3 -0
- pixeltable/func/__init__.py +5 -4
- pixeltable/func/aggregate_function.py +151 -68
- pixeltable/func/callable_function.py +48 -16
- pixeltable/func/expr_template_function.py +64 -23
- pixeltable/func/function.py +195 -27
- pixeltable/func/function_registry.py +2 -1
- pixeltable/func/query_template_function.py +51 -9
- pixeltable/func/signature.py +64 -7
- pixeltable/func/tools.py +153 -0
- pixeltable/func/udf.py +57 -35
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +51 -4
- pixeltable/functions/gemini.py +85 -0
- pixeltable/functions/globals.py +54 -34
- pixeltable/functions/huggingface.py +10 -28
- pixeltable/functions/json.py +3 -8
- pixeltable/functions/math.py +67 -0
- pixeltable/functions/ollama.py +8 -8
- pixeltable/functions/openai.py +51 -4
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/video.py +3 -9
- pixeltable/functions/vision.py +1 -1
- pixeltable/globals.py +354 -80
- pixeltable/index/embedding_index.py +106 -34
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +39 -19
- pixeltable/iterators/document.py +12 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_16.py +2 -1
- pixeltable/metadata/converters/convert_17.py +2 -1
- pixeltable/metadata/converters/convert_22.py +17 -0
- pixeltable/metadata/converters/convert_23.py +35 -0
- pixeltable/metadata/converters/convert_24.py +56 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/util.py +4 -2
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +1 -0
- pixeltable/plan.py +128 -50
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +196 -54
- pixeltable/utils/arrow.py +8 -3
- pixeltable/utils/description_helper.py +89 -0
- pixeltable/utils/documents.py +14 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/METADATA +30 -20
- pixeltable-0.3.0.dist-info/RECORD +155 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
- pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable-0.2.25.dist-info/RECORD +0 -154
- pixeltable-0.2.25.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
|
@@ -15,6 +15,7 @@ import pixeltable.type_system as ts
|
|
|
15
15
|
from .data_row import DataRow
|
|
16
16
|
from .expr import Expr
|
|
17
17
|
from .inline_expr import InlineDict, InlineList
|
|
18
|
+
from .literal import Literal
|
|
18
19
|
from .row_builder import RowBuilder
|
|
19
20
|
from .rowid_ref import RowidRef
|
|
20
21
|
from .sql_element_cache import SqlElementCache
|
|
@@ -34,6 +35,7 @@ class FunctionCall(Expr):
|
|
|
34
35
|
|
|
35
36
|
arg_types: list[ts.ColumnType]
|
|
36
37
|
kwarg_types: dict[str, ts.ColumnType]
|
|
38
|
+
return_type: ts.ColumnType
|
|
37
39
|
group_by_start_idx: int
|
|
38
40
|
group_by_stop_idx: int
|
|
39
41
|
fn_expr_idx: int
|
|
@@ -43,17 +45,25 @@ class FunctionCall(Expr):
|
|
|
43
45
|
current_partition_vals: Optional[list[Any]]
|
|
44
46
|
|
|
45
47
|
def __init__(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
self,
|
|
49
|
+
fn: func.Function,
|
|
50
|
+
bound_args: dict[str, Any],
|
|
51
|
+
return_type: ts.ColumnType,
|
|
52
|
+
order_by_clause: Optional[list[Any]] = None,
|
|
53
|
+
group_by_clause: Optional[list[Any]] = None,
|
|
54
|
+
is_method_call: bool = False
|
|
55
|
+
):
|
|
48
56
|
if order_by_clause is None:
|
|
49
57
|
order_by_clause = []
|
|
50
58
|
if group_by_clause is None:
|
|
51
59
|
group_by_clause = []
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
|
|
61
|
+
assert not fn.is_polymorphic
|
|
62
|
+
|
|
54
63
|
self.fn = fn
|
|
55
64
|
self.is_method_call = is_method_call
|
|
56
|
-
|
|
65
|
+
|
|
66
|
+
signature = fn.signature
|
|
57
67
|
|
|
58
68
|
# If `return_type` is non-nullable, but the function call has a nullable input to any of its non-nullable
|
|
59
69
|
# parameters, then we need to make it nullable. This is because Pixeltable defaults a function output to
|
|
@@ -67,6 +77,8 @@ class FunctionCall(Expr):
|
|
|
67
77
|
return_type = return_type.copy(nullable=True)
|
|
68
78
|
break
|
|
69
79
|
|
|
80
|
+
self.return_type = return_type
|
|
81
|
+
|
|
70
82
|
super().__init__(return_type)
|
|
71
83
|
|
|
72
84
|
self.agg_init_args = {}
|
|
@@ -74,9 +86,9 @@ class FunctionCall(Expr):
|
|
|
74
86
|
# we separate out the init args for the aggregator
|
|
75
87
|
assert isinstance(fn, func.AggregateFunction)
|
|
76
88
|
self.agg_init_args = {
|
|
77
|
-
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names
|
|
89
|
+
arg_name: arg for arg_name, arg in bound_args.items() if arg_name in fn.init_param_names[0]
|
|
78
90
|
}
|
|
79
|
-
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
|
|
91
|
+
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names[0]}
|
|
80
92
|
|
|
81
93
|
# construct components, args, kwargs
|
|
82
94
|
self.args = []
|
|
@@ -85,8 +97,10 @@ class FunctionCall(Expr):
|
|
|
85
97
|
# we record the types of non-variable parameters for runtime type checks
|
|
86
98
|
self.arg_types = []
|
|
87
99
|
self.kwarg_types = {}
|
|
100
|
+
|
|
88
101
|
# the prefix of parameters that are bound can be passed by position
|
|
89
|
-
|
|
102
|
+
processed_args: set[str] = set()
|
|
103
|
+
for py_param in signature.py_signature.parameters.values():
|
|
90
104
|
if py_param.name not in bound_args or py_param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
91
105
|
break
|
|
92
106
|
arg = bound_args[py_param.name]
|
|
@@ -97,18 +111,19 @@ class FunctionCall(Expr):
|
|
|
97
111
|
self.args.append((None, arg))
|
|
98
112
|
if py_param.kind != inspect.Parameter.VAR_POSITIONAL and py_param.kind != inspect.Parameter.VAR_KEYWORD:
|
|
99
113
|
self.arg_types.append(signature.parameters[py_param.name].col_type)
|
|
114
|
+
processed_args.add(py_param.name)
|
|
100
115
|
|
|
101
116
|
# the remaining args are passed as keywords
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
117
|
+
for param_name in bound_args.keys():
|
|
118
|
+
if param_name not in processed_args:
|
|
119
|
+
arg = bound_args[param_name]
|
|
120
|
+
if isinstance(arg, Expr):
|
|
121
|
+
self.kwargs[param_name] = (len(self.components), None)
|
|
122
|
+
self.components.append(arg.copy())
|
|
123
|
+
else:
|
|
124
|
+
self.kwargs[param_name] = (None, arg)
|
|
125
|
+
if signature.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
|
|
126
|
+
self.kwarg_types[param_name] = signature.parameters[param_name].col_type
|
|
112
127
|
|
|
113
128
|
# window function state:
|
|
114
129
|
# self.components[self.group_by_start_idx:self.group_by_stop_idx] contains group_by exprs
|
|
@@ -126,7 +141,7 @@ class FunctionCall(Expr):
|
|
|
126
141
|
|
|
127
142
|
if isinstance(self.fn, func.ExprTemplateFunction):
|
|
128
143
|
# we instantiate the template to create an Expr that can be evaluated and record that as a component
|
|
129
|
-
fn_expr = self.fn.instantiate(
|
|
144
|
+
fn_expr = self.fn.instantiate([], bound_args)
|
|
130
145
|
self.components.append(fn_expr)
|
|
131
146
|
self.fn_expr_idx = len(self.components) - 1
|
|
132
147
|
else:
|
|
@@ -184,11 +199,6 @@ class FunctionCall(Expr):
|
|
|
184
199
|
pass
|
|
185
200
|
|
|
186
201
|
if not isinstance(arg, Expr):
|
|
187
|
-
# make sure that non-Expr args are json-serializable and are literals of the correct type
|
|
188
|
-
try:
|
|
189
|
-
_ = json.dumps(arg)
|
|
190
|
-
except TypeError:
|
|
191
|
-
raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg} (of type {type(arg)})')
|
|
192
202
|
if arg is not None:
|
|
193
203
|
try:
|
|
194
204
|
param_type = param.col_type
|
|
@@ -255,7 +265,7 @@ class FunctionCall(Expr):
|
|
|
255
265
|
('order_by_start_idx', self.order_by_start_idx)
|
|
256
266
|
]
|
|
257
267
|
|
|
258
|
-
def
|
|
268
|
+
def __repr__(self) -> str:
|
|
259
269
|
return self.display_str()
|
|
260
270
|
|
|
261
271
|
def display_str(self, inline: bool = True) -> str:
|
|
@@ -357,7 +367,7 @@ class FunctionCall(Expr):
|
|
|
357
367
|
"""
|
|
358
368
|
assert self.is_agg_fn_call
|
|
359
369
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
360
|
-
self.aggregator = self.fn.
|
|
370
|
+
self.aggregator = self.fn.agg_class(**self.agg_init_args)
|
|
361
371
|
|
|
362
372
|
def update(self, data_row: DataRow) -> None:
|
|
363
373
|
"""
|
|
@@ -429,27 +439,32 @@ class FunctionCall(Expr):
|
|
|
429
439
|
data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
|
|
430
440
|
elif self.is_window_fn_call:
|
|
431
441
|
assert isinstance(self.fn, func.AggregateFunction)
|
|
442
|
+
agg_cls = self.fn.agg_class
|
|
432
443
|
if self.has_group_by():
|
|
433
444
|
if self.current_partition_vals is None:
|
|
434
445
|
self.current_partition_vals = [None] * len(self.group_by)
|
|
435
446
|
partition_vals = [data_row[e.slot_idx] for e in self.group_by]
|
|
436
447
|
if partition_vals != self.current_partition_vals:
|
|
437
448
|
# new partition
|
|
438
|
-
self.aggregator =
|
|
449
|
+
self.aggregator = agg_cls(**self.agg_init_args)
|
|
439
450
|
self.current_partition_vals = partition_vals
|
|
440
451
|
elif self.aggregator is None:
|
|
441
|
-
self.aggregator =
|
|
452
|
+
self.aggregator = agg_cls(**self.agg_init_args)
|
|
442
453
|
self.aggregator.update(*args)
|
|
443
454
|
data_row[self.slot_idx] = self.aggregator.value()
|
|
444
455
|
else:
|
|
445
|
-
data_row[self.slot_idx] = self.fn.exec(
|
|
456
|
+
data_row[self.slot_idx] = self.fn.exec(args, kwargs)
|
|
446
457
|
|
|
447
458
|
def _as_dict(self) -> dict:
|
|
448
459
|
result = {
|
|
449
|
-
'fn': self.fn.as_dict(),
|
|
450
|
-
'
|
|
460
|
+
'fn': self.fn.as_dict(),
|
|
461
|
+
'args': self.args,
|
|
462
|
+
'kwargs': self.kwargs,
|
|
463
|
+
'return_type': self.return_type.as_dict(),
|
|
464
|
+
'group_by_start_idx': self.group_by_start_idx,
|
|
465
|
+
'group_by_stop_idx': self.group_by_stop_idx,
|
|
451
466
|
'order_by_start_idx': self.order_by_start_idx,
|
|
452
|
-
**super()._as_dict()
|
|
467
|
+
**super()._as_dict(),
|
|
453
468
|
}
|
|
454
469
|
return result
|
|
455
470
|
|
|
@@ -458,15 +473,108 @@ class FunctionCall(Expr):
|
|
|
458
473
|
assert 'fn' in d
|
|
459
474
|
assert 'args' in d
|
|
460
475
|
assert 'kwargs' in d
|
|
461
|
-
|
|
476
|
+
|
|
462
477
|
fn = func.Function.from_dict(d['fn'])
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
bound_args.update(
|
|
466
|
-
{param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
|
|
478
|
+
assert not fn.is_polymorphic
|
|
479
|
+
return_type = ts.ColumnType.from_dict(d['return_type']) if 'return_type' in d else None
|
|
467
480
|
group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
|
|
468
481
|
order_by_exprs = components[d['order_by_start_idx']:]
|
|
482
|
+
|
|
483
|
+
args = [
|
|
484
|
+
expr if idx is None else components[idx]
|
|
485
|
+
for idx, expr in d['args']
|
|
486
|
+
]
|
|
487
|
+
kwargs = {
|
|
488
|
+
param_name: (expr if idx is None else components[idx])
|
|
489
|
+
for param_name, (idx, expr) in d['kwargs'].items()
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
# `Function.from_dict()` does signature matching, so it is safe to assume that `args` and `kwargs` are
|
|
493
|
+
# consistent with its signature.
|
|
494
|
+
|
|
495
|
+
# Reassemble bound_args. Note that args and kwargs represent "already bound arguments": they are not bindable
|
|
496
|
+
# in the Python sense, because variable args (such as *args and **kwargs) have already been condensed.
|
|
497
|
+
param_names = list(fn.signature.parameters.keys())
|
|
498
|
+
bound_args = {param_names[i]: arg for i, arg in enumerate(args)}
|
|
499
|
+
bound_args.update(kwargs.items())
|
|
500
|
+
|
|
501
|
+
# TODO: In order to properly invoke call_return_type, we need to ensure that any InlineLists or InlineDicts
|
|
502
|
+
# in bound_args are unpacked into Python lists/dicts. There is an open task to ensure this is true in general;
|
|
503
|
+
# for now, as a hack, we do the unpacking here for the specific case of an InlineList of Literals (the only
|
|
504
|
+
# case where this is necessary to support existing conditional_return_type implementations). Once the general
|
|
505
|
+
# pattern is implemented, we can remove this hack.
|
|
506
|
+
unpacked_bound_args = {
|
|
507
|
+
param_name: cls.__unpack_bound_arg(arg) for param_name, arg in bound_args.items()
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
# Evaluate the call_return_type as defined in the current codebase.
|
|
511
|
+
call_return_type = fn.call_return_type([], unpacked_bound_args)
|
|
512
|
+
|
|
513
|
+
if return_type is None:
|
|
514
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
|
|
515
|
+
# infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
|
|
516
|
+
# the call_return_type that we just inferred (which matches the deserialization behavior prior to
|
|
517
|
+
# version 25).
|
|
518
|
+
return_type = call_return_type
|
|
519
|
+
else:
|
|
520
|
+
# There is a return_type stored in metadata (schema version >= 25).
|
|
521
|
+
# Check that the stored return_type of the UDF call matches the column type of the FunctionCall, and
|
|
522
|
+
# fail-fast if it doesn't (otherwise we risk getting downstream database errors).
|
|
523
|
+
# TODO: Handle this more gracefully (instead of failing the DB load, allow the DB load to succeed, but
|
|
524
|
+
# mark this FunctionCall as unusable). It's the same issue as dealing with a renamed UDF or Function
|
|
525
|
+
# signature mismatch.
|
|
526
|
+
if not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
527
|
+
raise excs.Error(
|
|
528
|
+
f'The return type stored in the database for a UDF call to `{fn.self_path}` no longer matches the '
|
|
529
|
+
f'return type of the UDF as currently defined in the code.\nThis probably means that the code for '
|
|
530
|
+
f'`{fn.self_path}` has changed in a backward-incompatible way.\n'
|
|
531
|
+
f'Return type in database: `{return_type}`\n'
|
|
532
|
+
f'Return type as currently defined: `{call_return_type}`'
|
|
533
|
+
)
|
|
534
|
+
|
|
469
535
|
fn_call = cls(
|
|
470
|
-
|
|
471
|
-
|
|
536
|
+
fn,
|
|
537
|
+
bound_args,
|
|
538
|
+
return_type,
|
|
539
|
+
group_by_clause=group_by_exprs,
|
|
540
|
+
order_by_clause=order_by_exprs
|
|
541
|
+
)
|
|
472
542
|
return fn_call
|
|
543
|
+
|
|
544
|
+
@classmethod
|
|
545
|
+
def __find_matching_signature(cls, fn: func.Function, args: list[Any], kwargs: dict[str, Any]) -> Optional[int]:
|
|
546
|
+
for idx, sig in enumerate(fn.signatures):
|
|
547
|
+
if cls.__signature_matches(sig, args, kwargs):
|
|
548
|
+
return idx
|
|
549
|
+
return None
|
|
550
|
+
|
|
551
|
+
@classmethod
|
|
552
|
+
def __signature_matches(cls, sig: func.Signature, args: list[Any], kwargs: dict[str, Any]) -> bool:
|
|
553
|
+
unbound_parameters = set(sig.parameters.keys())
|
|
554
|
+
for i, arg in enumerate(args):
|
|
555
|
+
if i >= len(sig.parameters_by_pos):
|
|
556
|
+
return False
|
|
557
|
+
param = sig.parameters_by_pos[i]
|
|
558
|
+
arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
|
|
559
|
+
if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
|
|
560
|
+
return False
|
|
561
|
+
unbound_parameters.remove(param.name)
|
|
562
|
+
for param_name, arg in kwargs.items():
|
|
563
|
+
if param_name not in unbound_parameters:
|
|
564
|
+
return False
|
|
565
|
+
param = sig.parameters[param_name]
|
|
566
|
+
arg_type = arg.col_type if isinstance(arg, Expr) else ts.ColumnType.infer_literal_type(arg)
|
|
567
|
+
if param.col_type is not None and not param.col_type.is_supertype_of(arg_type, ignore_nullable=True):
|
|
568
|
+
return False
|
|
569
|
+
unbound_parameters.remove(param_name)
|
|
570
|
+
for param_name in unbound_parameters:
|
|
571
|
+
param = sig.parameters[param_name]
|
|
572
|
+
if not param.has_default:
|
|
573
|
+
return False
|
|
574
|
+
return True
|
|
575
|
+
|
|
576
|
+
@classmethod
|
|
577
|
+
def __unpack_bound_arg(cls, arg: Any) -> Any:
|
|
578
|
+
if isinstance(arg, InlineList) and all(isinstance(el, Literal) for el in arg.components):
|
|
579
|
+
return [el.val for el in arg.components]
|
|
580
|
+
return arg
|
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -61,7 +61,7 @@ class InPredicate(Expr):
|
|
|
61
61
|
pass
|
|
62
62
|
return result
|
|
63
63
|
|
|
64
|
-
def
|
|
64
|
+
def __repr__(self) -> str:
|
|
65
65
|
if self.value_list is not None:
|
|
66
66
|
return f'{self.components[0]}.isin({self.value_list})'
|
|
67
67
|
return f'{self.components[0]}.isin({self.components[1]})'
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -56,7 +56,7 @@ class InlineArray(Expr):
|
|
|
56
56
|
self.components.extend(exprs)
|
|
57
57
|
self.id = self._create_id()
|
|
58
58
|
|
|
59
|
-
def
|
|
59
|
+
def __repr__(self) -> str:
|
|
60
60
|
elem_strs = [str(expr) for expr in self.components]
|
|
61
61
|
return f'[{", ".join(elem_strs)}]'
|
|
62
62
|
|
|
@@ -101,11 +101,17 @@ class InlineList(Expr):
|
|
|
101
101
|
else:
|
|
102
102
|
exprs.append(Literal(el))
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
json_schema = {
|
|
105
|
+
'type': 'array',
|
|
106
|
+
'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
|
|
107
|
+
'items': False # No additional items (fixed length)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
super().__init__(ts.JsonType(json_schema))
|
|
105
111
|
self.components.extend(exprs)
|
|
106
112
|
self.id = self._create_id()
|
|
107
113
|
|
|
108
|
-
def
|
|
114
|
+
def __repr__(self) -> str:
|
|
109
115
|
elem_strs = [str(expr) for expr in self.components]
|
|
110
116
|
return f'[{", ".join(elem_strs)}]'
|
|
111
117
|
|
|
@@ -149,11 +155,25 @@ class InlineDict(Expr):
|
|
|
149
155
|
else:
|
|
150
156
|
exprs.append(Literal(val))
|
|
151
157
|
|
|
152
|
-
|
|
158
|
+
json_schema: Optional[dict[str, Any]]
|
|
159
|
+
try:
|
|
160
|
+
json_schema = {
|
|
161
|
+
'type': 'object',
|
|
162
|
+
'properties': {
|
|
163
|
+
key: expr.col_type.to_json_schema()
|
|
164
|
+
for key, expr in zip(self.keys, exprs)
|
|
165
|
+
},
|
|
166
|
+
}
|
|
167
|
+
except excs.Error:
|
|
168
|
+
# InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
|
|
169
|
+
# so we can't always construct a valid schema.
|
|
170
|
+
json_schema = None
|
|
171
|
+
|
|
172
|
+
super().__init__(ts.JsonType(json_schema))
|
|
153
173
|
self.components.extend(exprs)
|
|
154
174
|
self.id = self._create_id()
|
|
155
175
|
|
|
156
|
-
def
|
|
176
|
+
def __repr__(self) -> str:
|
|
157
177
|
item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
|
|
158
178
|
return '{' + ', '.join(item_strs) + '}'
|
|
159
179
|
|
pixeltable/exprs/is_null.py
CHANGED
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -69,7 +69,7 @@ class JsonMapper(Expr):
|
|
|
69
69
|
return False
|
|
70
70
|
return self._src_expr.equals(other._src_expr) and self._target_expr.equals(other._target_expr)
|
|
71
71
|
|
|
72
|
-
def
|
|
72
|
+
def __repr__(self) -> str:
|
|
73
73
|
return f'{str(self._src_expr)} >> {str(self._target_expr)}'
|
|
74
74
|
|
|
75
75
|
@property
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -42,7 +42,7 @@ class JsonPath(Expr):
|
|
|
42
42
|
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
43
43
|
self.id = self._create_id()
|
|
44
44
|
|
|
45
|
-
def
|
|
45
|
+
def __repr__(self) -> str:
|
|
46
46
|
# else "R": the anchor is RELATIVE_PATH_ROOT
|
|
47
47
|
return (f'{str(self._anchor) if self._anchor is not None else "R"}'
|
|
48
48
|
f'{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}')
|
pixeltable/exprs/method_ref.py
CHANGED
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -368,7 +368,7 @@ class RowBuilder:
|
|
|
368
368
|
if not ignore_errors:
|
|
369
369
|
input_vals = [data_row[d.slot_idx] for d in expr.dependencies()]
|
|
370
370
|
raise excs.ExprEvalError(
|
|
371
|
-
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0)
|
|
371
|
+
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0) from exc
|
|
372
372
|
|
|
373
373
|
def create_table_row(self, data_row: DataRow, exc_col_ids: set[int]) -> tuple[dict[str, Any], int]:
|
|
374
374
|
"""Create a table row from the slots that have an output column assigned
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -55,7 +55,7 @@ class RowidRef(Expr):
|
|
|
55
55
|
return super()._id_attrs() +\
|
|
56
56
|
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
57
57
|
|
|
58
|
-
def
|
|
58
|
+
def __repr__(self) -> str:
|
|
59
59
|
# check if this is the pos column of a component view
|
|
60
60
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
61
61
|
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
|
|
@@ -23,7 +23,6 @@ class SimilarityExpr(Expr):
|
|
|
23
23
|
assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
|
|
24
24
|
|
|
25
25
|
self.components = [col_ref, item_expr]
|
|
26
|
-
self.id = self._create_id()
|
|
27
26
|
|
|
28
27
|
# determine index to use
|
|
29
28
|
idx_info = col_ref.col.get_idx_info()
|
|
@@ -48,16 +47,20 @@ class SimilarityExpr(Expr):
|
|
|
48
47
|
|
|
49
48
|
if item_expr.col_type.is_string_type() and idx.string_embed is None:
|
|
50
49
|
raise excs.Error(
|
|
51
|
-
f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r}
|
|
52
|
-
f"
|
|
50
|
+
f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have a '
|
|
51
|
+
f"string embedding and does not support string queries")
|
|
53
52
|
if item_expr.col_type.is_image_type() and idx.image_embed is None:
|
|
54
53
|
raise excs.Error(
|
|
55
|
-
f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r}
|
|
56
|
-
f"
|
|
54
|
+
f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} does not have an '
|
|
55
|
+
f"image embedding and does not support image queries")
|
|
56
|
+
self.id = self._create_id()
|
|
57
57
|
|
|
58
|
-
def
|
|
58
|
+
def __repr__(self) -> str:
|
|
59
59
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
60
60
|
|
|
61
|
+
def _id_attrs(self):
|
|
62
|
+
return super()._id_attrs() + [('idx_name', self.idx_info.name)]
|
|
63
|
+
|
|
61
64
|
def default_column_name(self) -> str:
|
|
62
65
|
return 'similarity'
|
|
63
66
|
|
|
@@ -81,8 +84,12 @@ class SimilarityExpr(Expr):
|
|
|
81
84
|
# this should never get called
|
|
82
85
|
assert False
|
|
83
86
|
|
|
87
|
+
def _as_dict(self) -> dict:
|
|
88
|
+
return {'idx_name': self.idx_info.name, **super()._as_dict()}
|
|
89
|
+
|
|
84
90
|
@classmethod
|
|
85
91
|
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
92
|
+
iname = d['idx_name'] if 'idx_name' in d else None
|
|
86
93
|
assert len(components) == 2
|
|
87
94
|
assert isinstance(components[0], ColumnRef)
|
|
88
|
-
return cls(components[0], components[1])
|
|
95
|
+
return cls(components[0], components[1], idx_name=iname)
|
|
@@ -17,6 +17,10 @@ class SqlElementCache:
|
|
|
17
17
|
for e, el in elements.items():
|
|
18
18
|
self.cache[e.id] = el
|
|
19
19
|
|
|
20
|
+
def extend(self, elements: ExprDict[sql.ColumnElement]):
|
|
21
|
+
for e, el in elements.items():
|
|
22
|
+
self.cache[e.id] = el
|
|
23
|
+
|
|
20
24
|
def get(self, e: Expr) -> Optional[sql.ColumnElement]:
|
|
21
25
|
"""Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
|
|
22
26
|
try:
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -51,5 +51,5 @@ class TypeCast(Expr):
|
|
|
51
51
|
assert len(components) == 1
|
|
52
52
|
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
|
53
53
|
|
|
54
|
-
def
|
|
55
|
-
return f'{self._underlying}.astype({self.col_type})'
|
|
54
|
+
def __repr__(self) -> str:
|
|
55
|
+
return f'{self._underlying}.astype({self.col_type._to_str(as_schema=True)})'
|
pixeltable/exprs/variable.py
CHANGED
pixeltable/func/__init__.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from .aggregate_function import
|
|
1
|
+
from .aggregate_function import AggregateFunction, Aggregator, uda
|
|
2
2
|
from .callable_function import CallableFunction
|
|
3
3
|
from .expr_template_function import ExprTemplateFunction
|
|
4
4
|
from .function import Function
|
|
5
5
|
from .function_registry import FunctionRegistry
|
|
6
|
-
from .query_template_function import QueryTemplateFunction
|
|
7
|
-
from .signature import
|
|
8
|
-
from .
|
|
6
|
+
from .query_template_function import QueryTemplateFunction, query
|
|
7
|
+
from .signature import Batch, Parameter, Signature
|
|
8
|
+
from .tools import Tool, ToolChoice, Tools
|
|
9
|
+
from .udf import expr_udf, make_function, udf
|