pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +7 -9
- pixeltable/catalog/column.py +49 -0
- pixeltable/catalog/insertable_table.py +0 -7
- pixeltable/catalog/schema_object.py +1 -14
- pixeltable/catalog/table.py +180 -67
- pixeltable/catalog/table_version.py +42 -146
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/catalog/view.py +2 -1
- pixeltable/config.py +24 -9
- pixeltable/dataframe.py +5 -6
- pixeltable/env.py +113 -21
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +52 -19
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/data_row.py +15 -2
- pixeltable/exprs/expr.py +9 -9
- pixeltable/exprs/function_call.py +61 -23
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/json_path.py +3 -3
- pixeltable/exprs/row_builder.py +25 -21
- pixeltable/exprs/string_op.py +3 -3
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/func/query_template_function.py +2 -2
- pixeltable/func/signature.py +30 -3
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/anthropic.py +76 -27
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/globals.py +2 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/llama_cpp.py +9 -1
- pixeltable/functions/openai.py +76 -55
- pixeltable/functions/video.py +59 -6
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +86 -13
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/fiftyone.py +7 -7
- pixeltable/io/globals.py +3 -3
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +2 -1
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +3 -3
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +2 -8
- pixeltable/iterators/video.py +49 -9
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +51 -42
- pixeltable/share/publish.py +134 -7
- pixeltable/store.py +5 -25
- pixeltable/type_system.py +5 -8
- pixeltable/utils/__init__.py +2 -2
- pixeltable/utils/arrow.py +5 -5
- pixeltable/utils/description_helper.py +3 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/media_store.py +131 -66
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,7 +4,7 @@ import inspect
|
|
|
4
4
|
import logging
|
|
5
5
|
import sys
|
|
6
6
|
from textwrap import dedent
|
|
7
|
-
from typing import Any, Optional, Sequence
|
|
7
|
+
from typing import Any, Optional, Sequence
|
|
8
8
|
|
|
9
9
|
import sqlalchemy as sql
|
|
10
10
|
|
|
@@ -36,7 +36,7 @@ class FunctionCall(Expr):
|
|
|
36
36
|
# - a component index, if the parameter is a non-variadic parameter
|
|
37
37
|
# - a list of component indices, if the parameter is a variadic positional parameter
|
|
38
38
|
# - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
|
|
39
|
-
bound_idxs: dict[str,
|
|
39
|
+
bound_idxs: dict[str, int | list[int] | dict[str, int]]
|
|
40
40
|
|
|
41
41
|
return_type: ts.ColumnType
|
|
42
42
|
group_by_start_idx: int
|
|
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
|
|
|
115
115
|
self._validation_error = validation_error
|
|
116
116
|
|
|
117
117
|
if validation_error is not None:
|
|
118
|
+
self.bound_idxs = {}
|
|
118
119
|
self.resource_pool = None
|
|
119
120
|
return
|
|
120
121
|
|
|
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
|
|
|
300
301
|
"""
|
|
301
302
|
res = super().substitute(spec)
|
|
302
303
|
assert res is self
|
|
303
|
-
|
|
304
|
-
|
|
304
|
+
if self.is_valid:
|
|
305
|
+
# If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
|
|
306
|
+
# FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
|
|
307
|
+
# but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
|
|
308
|
+
# EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
|
|
309
|
+
# fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
|
|
310
|
+
# probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
|
|
311
|
+
# conditional_return_type implemented.)
|
|
312
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
313
|
+
self.col_type = self.return_type
|
|
305
314
|
return self
|
|
306
315
|
|
|
307
316
|
def update(self, data_row: DataRow) -> None:
|
|
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
|
|
|
480
489
|
).strip()
|
|
481
490
|
else:
|
|
482
491
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
483
|
-
call_return_type =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
#
|
|
487
|
-
#
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
492
|
+
call_return_type: Optional[ts.ColumnType] = None
|
|
493
|
+
|
|
494
|
+
if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
|
|
495
|
+
# The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
|
|
496
|
+
# (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
|
|
497
|
+
# from the template expression.
|
|
498
|
+
validation_error = resolved_fn.template.expr.validation_error
|
|
499
|
+
else:
|
|
500
|
+
try:
|
|
501
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
502
|
+
except ImportError as exc:
|
|
503
|
+
validation_error = dedent(
|
|
504
|
+
f"""
|
|
505
|
+
A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
|
|
506
|
+
by the UDF could not be imported:
|
|
507
|
+
{exc}
|
|
508
|
+
"""
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
assert (call_return_type is None) != (validation_error is None)
|
|
512
|
+
|
|
513
|
+
if call_return_type is None and return_type is None:
|
|
514
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
|
|
515
|
+
# way to infer it during DB migration, so we might encounter a stored return_type of None. If the
|
|
516
|
+
# resolution of call_return_type also fails, then we're out of luck; we have no choice but to
|
|
517
|
+
# fail-fast.
|
|
518
|
+
raise excs.Error(validation_error)
|
|
519
|
+
|
|
520
|
+
if call_return_type is not None:
|
|
521
|
+
# call_return_type resolution succeeded.
|
|
522
|
+
if return_type is None:
|
|
523
|
+
# Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
|
|
524
|
+
# fall back on the call_return_type.
|
|
525
|
+
return_type = call_return_type
|
|
526
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
527
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
528
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
529
|
+
validation_error = dedent(
|
|
530
|
+
f"""
|
|
531
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
532
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
533
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
534
|
+
Return type of UDF call in the database: {return_type}
|
|
535
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
536
|
+
"""
|
|
537
|
+
).strip()
|
|
538
|
+
|
|
539
|
+
assert return_type is not None # Guaranteed by the above logic.
|
|
502
540
|
|
|
503
541
|
fn_call = cls(
|
|
504
542
|
resolved_fn,
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -2,10 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
4
|
import enum
|
|
5
|
-
from typing import Union
|
|
6
5
|
|
|
7
6
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes =
|
|
7
|
+
LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def print_slice(s: slice) -> str:
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import jmespath
|
|
6
6
|
import sqlalchemy as sql
|
|
@@ -18,7 +18,7 @@ from .sql_element_cache import SqlElementCache
|
|
|
18
18
|
|
|
19
19
|
class JsonPath(Expr):
|
|
20
20
|
def __init__(
|
|
21
|
-
self, anchor: Optional[Expr], path_elements: Optional[list[
|
|
21
|
+
self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
|
|
22
22
|
) -> None:
|
|
23
23
|
"""
|
|
24
24
|
anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
|
|
@@ -30,7 +30,7 @@ class JsonPath(Expr):
|
|
|
30
30
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
31
31
|
if anchor is not None:
|
|
32
32
|
self.components = [anchor]
|
|
33
|
-
self.path_elements: list[
|
|
33
|
+
self.path_elements: list[str | int | slice] = path_elements
|
|
34
34
|
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
35
35
|
self.scope_idx = scope_idx
|
|
36
36
|
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import sys
|
|
4
4
|
import time
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Any, Iterable, Optional, Sequence
|
|
6
|
+
from typing import Any, Iterable, NamedTuple, Optional, Sequence
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
@@ -34,8 +34,7 @@ class ExecProfile:
|
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
class ColumnSlotIdx:
|
|
37
|
+
class ColumnSlotIdx(NamedTuple):
|
|
39
38
|
"""Info for how to locate materialized column in DataRow
|
|
40
39
|
TODO: can this be integrated into RowBuilder directly?
|
|
41
40
|
"""
|
|
@@ -87,6 +86,8 @@ class RowBuilder:
|
|
|
87
86
|
img_slot_idxs: list[int] # Indices of image slots
|
|
88
87
|
media_slot_idxs: list[int] # Indices of non-image media slots
|
|
89
88
|
array_slot_idxs: list[int] # Indices of array slots
|
|
89
|
+
stored_img_cols: list[exprs.ColumnSlotIdx]
|
|
90
|
+
stored_media_cols: list[exprs.ColumnSlotIdx]
|
|
90
91
|
|
|
91
92
|
@dataclass
|
|
92
93
|
class EvalCtx:
|
|
@@ -113,6 +114,8 @@ class RowBuilder:
|
|
|
113
114
|
"""
|
|
114
115
|
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
115
116
|
self.next_slot_idx = 0
|
|
117
|
+
self.stored_img_cols = []
|
|
118
|
+
self.stored_media_cols = []
|
|
116
119
|
|
|
117
120
|
# record input and output exprs; make copies to avoid reusing execution state
|
|
118
121
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
@@ -127,7 +130,7 @@ class RowBuilder:
|
|
|
127
130
|
)
|
|
128
131
|
|
|
129
132
|
# if init(columns):
|
|
130
|
-
# - we are creating table rows and need to record columns for
|
|
133
|
+
# - we are creating table rows and need to record columns for create_store_table_row()
|
|
131
134
|
# - output_exprs materialize those columns
|
|
132
135
|
# - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
|
|
133
136
|
# - media validation:
|
|
@@ -247,11 +250,13 @@ class RowBuilder:
|
|
|
247
250
|
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
248
251
|
"""Record a column that is part of the table row"""
|
|
249
252
|
assert self.tbl is not None
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
253
|
+
assert col.is_stored
|
|
254
|
+
info = ColumnSlotIdx(col, slot_idx)
|
|
255
|
+
self.table_columns.append(info)
|
|
256
|
+
if col.col_type.is_media_type():
|
|
257
|
+
self.stored_media_cols.append(info)
|
|
258
|
+
if col.col_type.is_image_type():
|
|
259
|
+
self.stored_img_cols.append(info)
|
|
255
260
|
|
|
256
261
|
@property
|
|
257
262
|
def num_materialized(self) -> int:
|
|
@@ -445,20 +450,20 @@ class RowBuilder:
|
|
|
445
450
|
expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
|
|
446
451
|
) from exc
|
|
447
452
|
|
|
448
|
-
def
|
|
453
|
+
def create_store_table_row(
|
|
449
454
|
self, data_row: DataRow, cols_with_excs: Optional[set[int]], pk: tuple[int, ...]
|
|
450
455
|
) -> tuple[list[Any], int]:
|
|
451
|
-
"""Create a table row from the slots that have an output column assigned
|
|
456
|
+
"""Create a store table row from the slots that have an output column assigned
|
|
452
457
|
|
|
453
458
|
Return tuple[list of row values in `self.table_columns` order, # of exceptions]
|
|
454
459
|
This excludes system columns.
|
|
460
|
+
Row values are converted to their store type.
|
|
455
461
|
"""
|
|
456
462
|
from pixeltable.exprs.column_property_ref import ColumnPropertyRef
|
|
457
463
|
|
|
458
464
|
num_excs = 0
|
|
459
465
|
table_row: list[Any] = list(pk)
|
|
460
|
-
for
|
|
461
|
-
col, slot_idx = info.col, info.slot_idx
|
|
466
|
+
for col, slot_idx in self.table_columns:
|
|
462
467
|
if data_row.has_exc(slot_idx):
|
|
463
468
|
exc = data_row.get_exc(slot_idx)
|
|
464
469
|
num_excs += 1
|
|
@@ -469,9 +474,11 @@ class RowBuilder:
|
|
|
469
474
|
# exceptions get stored in the errortype/-msg properties of the cellmd column
|
|
470
475
|
table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
|
|
471
476
|
else:
|
|
472
|
-
if col.col_type.
|
|
473
|
-
|
|
474
|
-
|
|
477
|
+
if col.col_type.is_media_type():
|
|
478
|
+
if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
|
|
479
|
+
# we have yet to store this image
|
|
480
|
+
data_row.flush_img(slot_idx, col)
|
|
481
|
+
data_row.move_tmp_media_file(slot_idx, col)
|
|
475
482
|
val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
|
|
476
483
|
table_row.append(val)
|
|
477
484
|
if col.stores_cellmd:
|
|
@@ -479,7 +486,7 @@ class RowBuilder:
|
|
|
479
486
|
|
|
480
487
|
return table_row, num_excs
|
|
481
488
|
|
|
482
|
-
def store_column_names(self) ->
|
|
489
|
+
def store_column_names(self) -> list[str]:
|
|
483
490
|
"""
|
|
484
491
|
Returns the list of store column names corresponding to the table_columns of this RowBuilder.
|
|
485
492
|
The second tuple element of the return value is a dictionary containing all media columns in the
|
|
@@ -487,16 +494,13 @@ class RowBuilder:
|
|
|
487
494
|
"""
|
|
488
495
|
assert self.tbl is not None, self.table_columns
|
|
489
496
|
store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
|
|
490
|
-
media_cols: dict[int, catalog.Column] = {}
|
|
491
497
|
|
|
492
498
|
for col in self.table_columns:
|
|
493
|
-
if col.col.col_type.is_media_type():
|
|
494
|
-
media_cols[len(store_col_names)] = col.col
|
|
495
499
|
store_col_names.append(col.col.store_name())
|
|
496
500
|
if col.col.stores_cellmd:
|
|
497
501
|
store_col_names.append(col.col.cellmd_store_name())
|
|
498
502
|
|
|
499
|
-
return store_col_names
|
|
503
|
+
return store_col_names
|
|
500
504
|
|
|
501
505
|
def make_row(self) -> exprs.DataRow:
|
|
502
506
|
"""Creates a new DataRow with the current row_builder's configuration."""
|
pixeltable/exprs/string_op.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -76,7 +76,7 @@ class StringOp(Expr):
|
|
|
76
76
|
op2_val = data_row[self._op2.slot_idx]
|
|
77
77
|
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
78
78
|
|
|
79
|
-
def eval_nullable(self, op1_val:
|
|
79
|
+
def eval_nullable(self, op1_val: str | None, op2_val: int | str | None) -> str | None:
|
|
80
80
|
"""
|
|
81
81
|
Return the result of evaluating the expression on two nullable int/float operands,
|
|
82
82
|
None is interpreted as SQL NULL
|
|
@@ -85,7 +85,7 @@ class StringOp(Expr):
|
|
|
85
85
|
return None
|
|
86
86
|
return self.eval_non_null(op1_val, op2_val)
|
|
87
87
|
|
|
88
|
-
def eval_non_null(self, op1_val: str, op2_val:
|
|
88
|
+
def eval_non_null(self, op1_val: str, op2_val: int | str) -> str:
|
|
89
89
|
"""
|
|
90
90
|
Return the result of evaluating the expression on two int/float operands
|
|
91
91
|
"""
|
|
@@ -85,13 +85,16 @@ class ExprTemplateFunction(Function):
|
|
|
85
85
|
conditional_return_type).
|
|
86
86
|
"""
|
|
87
87
|
assert not self.is_polymorphic
|
|
88
|
-
template = self.template
|
|
89
88
|
with_defaults = bound_args.copy()
|
|
90
89
|
with_defaults.update(
|
|
91
|
-
{
|
|
90
|
+
{
|
|
91
|
+
param_name: default
|
|
92
|
+
for param_name, default in self.template.defaults.items()
|
|
93
|
+
if param_name not in bound_args
|
|
94
|
+
}
|
|
92
95
|
)
|
|
93
96
|
substituted_expr = self.template.expr.copy().substitute(
|
|
94
|
-
{template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
97
|
+
{self.template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
95
98
|
)
|
|
96
99
|
return substituted_expr.col_type
|
|
97
100
|
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
from functools import reduce
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, overload
|
|
6
6
|
|
|
7
7
|
from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
|
|
8
8
|
|
|
@@ -129,7 +129,7 @@ def retrieval_udf(
|
|
|
129
129
|
table: catalog.Table,
|
|
130
130
|
name: Optional[str] = None,
|
|
131
131
|
description: Optional[str] = None,
|
|
132
|
-
parameters: Optional[Iterable[
|
|
132
|
+
parameters: Optional[Iterable[str | exprs.ColumnRef]] = None,
|
|
133
133
|
limit: Optional[int] = 10,
|
|
134
134
|
) -> func.QueryTemplateFunction:
|
|
135
135
|
"""
|
pixeltable/func/signature.py
CHANGED
|
@@ -84,8 +84,28 @@ class Signature:
|
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
86
|
SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
|
|
88
|
+
|
|
89
|
+
return_type: ts.ColumnType
|
|
90
|
+
is_batched: bool
|
|
91
|
+
parameters: dict[str, Parameter] # name -> Parameter
|
|
92
|
+
parameters_by_pos: list[Parameter] # ordered by position in the signature
|
|
93
|
+
constant_parameters: list[Parameter] # parameters that are not batched
|
|
94
|
+
batched_parameters: list[Parameter] # parameters that are batched
|
|
95
|
+
required_parameters: list[Parameter] # parameters that do not have a default value
|
|
96
|
+
|
|
97
|
+
# the names of recognized system parameters in the signature; these are excluded from self.parameters
|
|
98
|
+
system_parameters: list[str]
|
|
99
|
+
|
|
100
|
+
py_signature: inspect.Signature
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
return_type: ts.ColumnType,
|
|
105
|
+
parameters: list[Parameter],
|
|
106
|
+
is_batched: bool = False,
|
|
107
|
+
system_parameters: Optional[list[str]] = None,
|
|
108
|
+
):
|
|
89
109
|
assert isinstance(return_type, ts.ColumnType)
|
|
90
110
|
self.return_type = return_type
|
|
91
111
|
self.is_batched = is_batched
|
|
@@ -95,6 +115,7 @@ class Signature:
|
|
|
95
115
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
96
116
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
97
117
|
self.required_parameters = [p for p in parameters if not p.has_default()]
|
|
118
|
+
self.system_parameters = system_parameters if system_parameters is not None else []
|
|
98
119
|
self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
|
|
99
120
|
|
|
100
121
|
def get_return_type(self) -> ts.ColumnType:
|
|
@@ -237,6 +258,7 @@ class Signature:
|
|
|
237
258
|
type_substitutions: Optional[dict] = None,
|
|
238
259
|
is_cls_method: bool = False,
|
|
239
260
|
) -> list[Parameter]:
|
|
261
|
+
"""Ignores parameters starting with '_'."""
|
|
240
262
|
from pixeltable import exprs
|
|
241
263
|
|
|
242
264
|
assert (py_fn is None) != (py_params is None)
|
|
@@ -251,6 +273,10 @@ class Signature:
|
|
|
251
273
|
for idx, param in enumerate(py_params):
|
|
252
274
|
if is_cls_method and idx == 0:
|
|
253
275
|
continue # skip 'self' or 'cls' parameter
|
|
276
|
+
if param.name in cls.SYSTEM_PARAM_NAMES:
|
|
277
|
+
continue # skip system parameters
|
|
278
|
+
if param.name.startswith('_'):
|
|
279
|
+
raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
|
|
254
280
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
255
281
|
raise excs.Error(f'{param.name!r} is a reserved parameter name')
|
|
256
282
|
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
|
|
@@ -308,5 +334,6 @@ class Signature:
|
|
|
308
334
|
raise excs.Error('Cannot infer pixeltable return type')
|
|
309
335
|
else:
|
|
310
336
|
_, return_is_batched = cls._infer_type(sig.return_annotation)
|
|
337
|
+
system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
|
|
311
338
|
|
|
312
|
-
return Signature(return_type, parameters, return_is_batched)
|
|
339
|
+
return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
|
pixeltable/func/tools.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
|
3
3
|
|
|
4
4
|
import pydantic
|
|
5
5
|
|
|
@@ -100,7 +100,7 @@ class Tools(pydantic.BaseModel):
|
|
|
100
100
|
self,
|
|
101
101
|
auto: bool = False,
|
|
102
102
|
required: bool = False,
|
|
103
|
-
tool:
|
|
103
|
+
tool: str | Function | None = None,
|
|
104
104
|
parallel_tool_calls: bool = True,
|
|
105
105
|
) -> ToolChoice:
|
|
106
106
|
if sum([auto, required, tool is not None]) != 1:
|
|
@@ -38,6 +38,53 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
|
|
|
38
38
|
return env.Env.get().get_client('anthropic')
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def _get_header_info(
|
|
42
|
+
headers: httpx.Headers,
|
|
43
|
+
) -> tuple[
|
|
44
|
+
Optional[tuple[int, int, datetime.datetime]],
|
|
45
|
+
Optional[tuple[int, int, datetime.datetime]],
|
|
46
|
+
Optional[tuple[int, int, datetime.datetime]],
|
|
47
|
+
]:
|
|
48
|
+
"""Extract rate limit info from Anthropic API response headers."""
|
|
49
|
+
requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
|
|
50
|
+
requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
|
|
51
|
+
requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
|
|
52
|
+
requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
|
|
53
|
+
requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
|
|
54
|
+
requests_reset = (
|
|
55
|
+
datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
|
|
56
|
+
)
|
|
57
|
+
requests_info = (requests_limit, requests_remaining, requests_reset) if requests_reset else None
|
|
58
|
+
|
|
59
|
+
input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
|
|
60
|
+
input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
|
|
61
|
+
input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
|
|
62
|
+
input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
|
|
63
|
+
input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
|
|
64
|
+
input_tokens_reset = (
|
|
65
|
+
datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
|
|
66
|
+
if input_tokens_reset_str
|
|
67
|
+
else None
|
|
68
|
+
)
|
|
69
|
+
input_tokens_info = (input_tokens_limit, input_tokens_remaining, input_tokens_reset) if input_tokens_reset else None
|
|
70
|
+
|
|
71
|
+
output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
|
|
72
|
+
output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
|
|
73
|
+
output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
|
|
74
|
+
output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
|
|
75
|
+
output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
|
|
76
|
+
output_tokens_reset = (
|
|
77
|
+
datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
|
|
78
|
+
if output_tokens_reset_str
|
|
79
|
+
else None
|
|
80
|
+
)
|
|
81
|
+
output_tokens_info = (
|
|
82
|
+
(output_tokens_limit, output_tokens_remaining, output_tokens_reset) if output_tokens_reset else None
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return requests_info, input_tokens_info, output_tokens_info
|
|
86
|
+
|
|
87
|
+
|
|
41
88
|
class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
42
89
|
def __init__(self) -> None:
|
|
43
90
|
super().__init__(self._get_request_resources)
|
|
@@ -51,6 +98,27 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
|
51
98
|
input_len += len(message['content'])
|
|
52
99
|
return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
|
|
53
100
|
|
|
101
|
+
def record_exc(self, exc: Exception) -> None:
|
|
102
|
+
import anthropic
|
|
103
|
+
|
|
104
|
+
if (
|
|
105
|
+
not isinstance(exc, anthropic.APIError)
|
|
106
|
+
or not hasattr(exc, 'response')
|
|
107
|
+
or not hasattr(exc.response, 'headers')
|
|
108
|
+
):
|
|
109
|
+
return
|
|
110
|
+
requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
|
|
111
|
+
_logger.debug(
|
|
112
|
+
f'record_exc(): requests_info={requests_info} input_tokens_info={input_tokens_info} '
|
|
113
|
+
f'output_tokens_info={output_tokens_info}'
|
|
114
|
+
)
|
|
115
|
+
self.record(requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info)
|
|
116
|
+
self.has_exc = True
|
|
117
|
+
|
|
118
|
+
retry_after_str = exc.response.headers.get('retry-after')
|
|
119
|
+
if retry_after_str is not None:
|
|
120
|
+
_logger.debug(f'retry-after: {retry_after_str}')
|
|
121
|
+
|
|
54
122
|
def get_retry_delay(self, exc: Exception) -> Optional[float]:
|
|
55
123
|
import anthropic
|
|
56
124
|
|
|
@@ -64,8 +132,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
|
64
132
|
should_retry_str = exc.response.headers.get('x-should-retry', '')
|
|
65
133
|
if should_retry_str.lower() != 'true':
|
|
66
134
|
return None
|
|
67
|
-
|
|
68
|
-
return int(retry_after_str)
|
|
135
|
+
return super().get_retry_delay(exc)
|
|
69
136
|
|
|
70
137
|
|
|
71
138
|
@pxt.udf
|
|
@@ -77,6 +144,7 @@ async def messages(
|
|
|
77
144
|
model_kwargs: Optional[dict[str, Any]] = None,
|
|
78
145
|
tools: Optional[list[dict[str, Any]]] = None,
|
|
79
146
|
tool_choice: Optional[dict[str, Any]] = None,
|
|
147
|
+
_runtime_ctx: Optional[env.RuntimeCtx] = None,
|
|
80
148
|
) -> dict:
|
|
81
149
|
"""
|
|
82
150
|
Create a Message.
|
|
@@ -151,32 +219,13 @@ async def messages(
|
|
|
151
219
|
messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
|
|
152
220
|
)
|
|
153
221
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
|
|
160
|
-
input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
|
|
161
|
-
input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
|
|
162
|
-
input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
|
|
163
|
-
input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
|
|
164
|
-
input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
|
|
165
|
-
input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
|
|
166
|
-
output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
|
|
167
|
-
output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
|
|
168
|
-
output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
|
|
169
|
-
output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
|
|
170
|
-
output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
|
|
171
|
-
output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
|
|
172
|
-
retry_after_str = result.headers.get('retry-after')
|
|
173
|
-
if retry_after_str is not None:
|
|
174
|
-
_logger.debug(f'retry-after: {retry_after_str}')
|
|
175
|
-
|
|
222
|
+
requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
|
|
223
|
+
# retry_after_str = result.headers.get('retry-after')
|
|
224
|
+
# if retry_after_str is not None:
|
|
225
|
+
# _logger.debug(f'retry-after: {retry_after_str}')
|
|
226
|
+
is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
|
|
176
227
|
rate_limits_info.record(
|
|
177
|
-
requests=
|
|
178
|
-
input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
|
|
179
|
-
output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
|
|
228
|
+
requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info, reset_exc=is_retry
|
|
180
229
|
)
|
|
181
230
|
|
|
182
231
|
result_dict = json.loads(result.text)
|
pixeltable/functions/deepseek.py
CHANGED
|
@@ -26,7 +26,7 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
|
|
|
26
26
|
return env.Env.get().get_client('deepseek')
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
@pxt.udf
|
|
29
|
+
@pxt.udf(resource_pool='request-rate:deepseek')
|
|
30
30
|
async def chat_completions(
|
|
31
31
|
messages: list,
|
|
32
32
|
*,
|
|
@@ -43,6 +43,10 @@ async def chat_completions(
|
|
|
43
43
|
|
|
44
44
|
Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
|
|
45
45
|
|
|
46
|
+
Request throttling:
|
|
47
|
+
Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
|
|
48
|
+
limit is configured, uses a default of 600 RPM.
|
|
49
|
+
|
|
46
50
|
__Requirements:__
|
|
47
51
|
|
|
48
52
|
- `pip install openai`
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -14,6 +14,7 @@ import PIL.Image
|
|
|
14
14
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
from pixeltable import env, exceptions as excs, exprs
|
|
17
|
+
from pixeltable.utils.media_store import TempStore
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
19
20
|
from google import genai
|
|
@@ -39,7 +40,7 @@ async def generate_content(
|
|
|
39
40
|
<https://ai.google.dev/gemini-api/docs/text-generation>
|
|
40
41
|
|
|
41
42
|
Request throttling:
|
|
42
|
-
Applies the rate limit set in the config (section `gemini
|
|
43
|
+
Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
|
|
43
44
|
limit is configured, uses a default of 600 RPM.
|
|
44
45
|
|
|
45
46
|
__Requirements:__
|
|
@@ -126,6 +127,10 @@ async def generate_images(prompt: str, *, model: str, config: Optional[dict] = N
|
|
|
126
127
|
Generates images based on a text description and configuration. For additional details, see:
|
|
127
128
|
<https://ai.google.dev/gemini-api/docs/image-generation>
|
|
128
129
|
|
|
130
|
+
Request throttling:
|
|
131
|
+
Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
|
|
132
|
+
limit is configured, uses a default of 600 RPM.
|
|
133
|
+
|
|
129
134
|
__Requirements:__
|
|
130
135
|
|
|
131
136
|
- `pip install google-genai`
|
|
@@ -167,6 +172,10 @@ async def generate_videos(
|
|
|
167
172
|
Generates videos based on a text description and configuration. For additional details, see:
|
|
168
173
|
<https://ai.google.dev/gemini-api/docs/video-generation>
|
|
169
174
|
|
|
175
|
+
Request throttling:
|
|
176
|
+
Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
|
|
177
|
+
limit is configured, uses a default of 600 RPM.
|
|
178
|
+
|
|
170
179
|
__Requirements:__
|
|
171
180
|
|
|
172
181
|
- `pip install google-genai`
|
|
@@ -215,7 +224,7 @@ async def generate_videos(
|
|
|
215
224
|
assert video_bytes is not None
|
|
216
225
|
|
|
217
226
|
# Create a temporary file to store the video bytes
|
|
218
|
-
output_path =
|
|
227
|
+
output_path = TempStore.create_path(extension='.mp4')
|
|
219
228
|
Path(output_path).write_bytes(video_bytes)
|
|
220
229
|
return str(output_path)
|
|
221
230
|
|
pixeltable/functions/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import builtins
|
|
2
2
|
import typing
|
|
3
|
-
from typing import Any, Callable, Optional
|
|
3
|
+
from typing import Any, Callable, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -11,7 +11,7 @@ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
# TODO: remove and replace calls with astype()
|
|
14
|
-
def cast(expr: exprs.Expr, target_type:
|
|
14
|
+
def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
|
|
15
15
|
expr.col_type = ts.ColumnType.normalize_type(target_type)
|
|
16
16
|
return expr
|
|
17
17
|
|