pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show
  1. pixeltable/__init__.py +4 -2
  2. pixeltable/catalog/__init__.py +1 -1
  3. pixeltable/catalog/catalog.py +7 -9
  4. pixeltable/catalog/column.py +49 -0
  5. pixeltable/catalog/insertable_table.py +0 -7
  6. pixeltable/catalog/schema_object.py +1 -14
  7. pixeltable/catalog/table.py +180 -67
  8. pixeltable/catalog/table_version.py +42 -146
  9. pixeltable/catalog/table_version_path.py +6 -5
  10. pixeltable/catalog/view.py +2 -1
  11. pixeltable/config.py +24 -9
  12. pixeltable/dataframe.py +5 -6
  13. pixeltable/env.py +113 -21
  14. pixeltable/exec/aggregation_node.py +1 -1
  15. pixeltable/exec/cache_prefetch_node.py +4 -3
  16. pixeltable/exec/exec_node.py +0 -8
  17. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  18. pixeltable/exec/expr_eval/globals.py +1 -0
  19. pixeltable/exec/expr_eval/schedulers.py +52 -19
  20. pixeltable/exec/in_memory_data_node.py +2 -3
  21. pixeltable/exprs/array_slice.py +2 -2
  22. pixeltable/exprs/data_row.py +15 -2
  23. pixeltable/exprs/expr.py +9 -9
  24. pixeltable/exprs/function_call.py +61 -23
  25. pixeltable/exprs/globals.py +1 -2
  26. pixeltable/exprs/json_path.py +3 -3
  27. pixeltable/exprs/row_builder.py +25 -21
  28. pixeltable/exprs/string_op.py +3 -3
  29. pixeltable/func/expr_template_function.py +6 -3
  30. pixeltable/func/query_template_function.py +2 -2
  31. pixeltable/func/signature.py +30 -3
  32. pixeltable/func/tools.py +2 -2
  33. pixeltable/functions/anthropic.py +76 -27
  34. pixeltable/functions/deepseek.py +5 -1
  35. pixeltable/functions/gemini.py +11 -2
  36. pixeltable/functions/globals.py +2 -2
  37. pixeltable/functions/huggingface.py +6 -12
  38. pixeltable/functions/llama_cpp.py +9 -1
  39. pixeltable/functions/openai.py +76 -55
  40. pixeltable/functions/video.py +59 -6
  41. pixeltable/functions/vision.py +2 -2
  42. pixeltable/globals.py +86 -13
  43. pixeltable/io/datarows.py +3 -3
  44. pixeltable/io/fiftyone.py +7 -7
  45. pixeltable/io/globals.py +3 -3
  46. pixeltable/io/hf_datasets.py +4 -4
  47. pixeltable/io/label_studio.py +2 -1
  48. pixeltable/io/pandas.py +6 -6
  49. pixeltable/io/parquet.py +3 -3
  50. pixeltable/io/table_data_conduit.py +2 -2
  51. pixeltable/io/utils.py +2 -2
  52. pixeltable/iterators/audio.py +3 -2
  53. pixeltable/iterators/document.py +2 -8
  54. pixeltable/iterators/video.py +49 -9
  55. pixeltable/plan.py +0 -16
  56. pixeltable/share/packager.py +51 -42
  57. pixeltable/share/publish.py +134 -7
  58. pixeltable/store.py +5 -25
  59. pixeltable/type_system.py +5 -8
  60. pixeltable/utils/__init__.py +2 -2
  61. pixeltable/utils/arrow.py +5 -5
  62. pixeltable/utils/description_helper.py +3 -3
  63. pixeltable/utils/iceberg.py +1 -2
  64. pixeltable/utils/media_store.py +131 -66
  65. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
  66. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
  67. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
  68. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
  69. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import inspect
4
4
  import logging
5
5
  import sys
6
6
  from textwrap import dedent
7
- from typing import Any, Optional, Sequence, Union
7
+ from typing import Any, Optional, Sequence
8
8
 
9
9
  import sqlalchemy as sql
10
10
 
@@ -36,7 +36,7 @@ class FunctionCall(Expr):
36
36
  # - a component index, if the parameter is a non-variadic parameter
37
37
  # - a list of component indices, if the parameter is a variadic positional parameter
38
38
  # - a dict mapping keyword names to component indices, if the parameter is a variadic keyword parameter
39
- bound_idxs: dict[str, Union[int, list[int], dict[str, int]]]
39
+ bound_idxs: dict[str, int | list[int] | dict[str, int]]
40
40
 
41
41
  return_type: ts.ColumnType
42
42
  group_by_start_idx: int
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
115
115
  self._validation_error = validation_error
116
116
 
117
117
  if validation_error is not None:
118
+ self.bound_idxs = {}
118
119
  self.resource_pool = None
119
120
  return
120
121
 
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
300
301
  """
301
302
  res = super().substitute(spec)
302
303
  assert res is self
303
- self.return_type = self.fn.call_return_type(self.bound_args)
304
- self.col_type = self.return_type
304
+ if self.is_valid:
305
+ # If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
306
+ # FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
307
+ # but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
308
+ # EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
309
+ # fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
310
+ # probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
311
+ # conditional_return_type implemented.)
312
+ self.return_type = self.fn.call_return_type(self.bound_args)
313
+ self.col_type = self.return_type
305
314
  return self
306
315
 
307
316
  def update(self, data_row: DataRow) -> None:
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
480
489
  ).strip()
481
490
  else:
482
491
  # Evaluate the call_return_type as defined in the current codebase.
483
- call_return_type = resolved_fn.call_return_type(bound_args)
484
- if return_type is None:
485
- # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
486
- # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
487
- # the call_return_type that we just inferred (which matches the deserialization behavior prior to
488
- # version 25).
489
- return_type = call_return_type
490
- elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
491
- # There is a return_type stored in metadata (schema version >= 25),
492
- # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
493
- validation_error = dedent(
494
- f"""
495
- The return type stored in the database for a UDF call to {fn.self_path!r} no longer
496
- matches its return type as currently defined in the code. This probably means that the
497
- code for {fn.self_path!r} has changed in a backward-incompatible way.
498
- Return type of UDF call in the database: {return_type}
499
- Return type of UDF as currently defined in code: {call_return_type}
500
- """
501
- ).strip()
492
+ call_return_type: Optional[ts.ColumnType] = None
493
+
494
+ if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
495
+ # The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
496
+ # (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
497
+ # from the template expression.
498
+ validation_error = resolved_fn.template.expr.validation_error
499
+ else:
500
+ try:
501
+ call_return_type = resolved_fn.call_return_type(bound_args)
502
+ except ImportError as exc:
503
+ validation_error = dedent(
504
+ f"""
505
+ A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
506
+ by the UDF could not be imported:
507
+ {exc}
508
+ """
509
+ )
510
+
511
+ assert (call_return_type is None) != (validation_error is None)
512
+
513
+ if call_return_type is None and return_type is None:
514
+ # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
515
+ # way to infer it during DB migration, so we might encounter a stored return_type of None. If the
516
+ # resolution of call_return_type also fails, then we're out of luck; we have no choice but to
517
+ # fail-fast.
518
+ raise excs.Error(validation_error)
519
+
520
+ if call_return_type is not None:
521
+ # call_return_type resolution succeeded.
522
+ if return_type is None:
523
+ # Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
524
+ # fall back on the call_return_type.
525
+ return_type = call_return_type
526
+ elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
527
+ # There is a return_type stored in metadata (schema version >= 25),
528
+ # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
529
+ validation_error = dedent(
530
+ f"""
531
+ The return type stored in the database for a UDF call to {fn.self_path!r} no longer
532
+ matches its return type as currently defined in the code. This probably means that the
533
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
534
+ Return type of UDF call in the database: {return_type}
535
+ Return type of UDF as currently defined in code: {call_return_type}
536
+ """
537
+ ).strip()
538
+
539
+ assert return_type is not None # Guaranteed by the above logic.
502
540
 
503
541
  fn_call = cls(
504
542
  resolved_fn,
@@ -2,10 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import datetime
4
4
  import enum
5
- from typing import Union
6
5
 
7
6
  # Python types corresponding to our literal types
8
- LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
7
+ LiteralPythonTypes = str | int | float | bool | datetime.datetime | datetime.date
9
8
 
10
9
 
11
10
  def print_slice(s: slice) -> str:
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional, Union
3
+ from typing import Any, Optional
4
4
 
5
5
  import jmespath
6
6
  import sqlalchemy as sql
@@ -18,7 +18,7 @@ from .sql_element_cache import SqlElementCache
18
18
 
19
19
  class JsonPath(Expr):
20
20
  def __init__(
21
- self, anchor: Optional[Expr], path_elements: Optional[list[Union[str, int, slice]]] = None, scope_idx: int = 0
21
+ self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
22
22
  ) -> None:
23
23
  """
24
24
  anchor can be None, in which case this is a relative JsonPath and the anchor is set later via set_anchor().
@@ -30,7 +30,7 @@ class JsonPath(Expr):
30
30
  super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
31
31
  if anchor is not None:
32
32
  self.components = [anchor]
33
- self.path_elements: list[Union[str, int, slice]] = path_elements
33
+ self.path_elements: list[str | int | slice] = path_elements
34
34
  self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
35
35
  self.scope_idx = scope_idx
36
36
  # NOTE: the _create_id() result will change if set_anchor() gets called;
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import sys
4
4
  import time
5
5
  from dataclasses import dataclass
6
- from typing import Any, Iterable, Optional, Sequence
6
+ from typing import Any, Iterable, NamedTuple, Optional, Sequence
7
7
  from uuid import UUID
8
8
 
9
9
  import numpy as np
@@ -34,8 +34,7 @@ class ExecProfile:
34
34
  )
35
35
 
36
36
 
37
- @dataclass
38
- class ColumnSlotIdx:
37
+ class ColumnSlotIdx(NamedTuple):
39
38
  """Info for how to locate materialized column in DataRow
40
39
  TODO: can this be integrated into RowBuilder directly?
41
40
  """
@@ -87,6 +86,8 @@ class RowBuilder:
87
86
  img_slot_idxs: list[int] # Indices of image slots
88
87
  media_slot_idxs: list[int] # Indices of non-image media slots
89
88
  array_slot_idxs: list[int] # Indices of array slots
89
+ stored_img_cols: list[exprs.ColumnSlotIdx]
90
+ stored_media_cols: list[exprs.ColumnSlotIdx]
90
91
 
91
92
  @dataclass
92
93
  class EvalCtx:
@@ -113,6 +114,8 @@ class RowBuilder:
113
114
  """
114
115
  self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
115
116
  self.next_slot_idx = 0
117
+ self.stored_img_cols = []
118
+ self.stored_media_cols = []
116
119
 
117
120
  # record input and output exprs; make copies to avoid reusing execution state
118
121
  unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
@@ -127,7 +130,7 @@ class RowBuilder:
127
130
  )
128
131
 
129
132
  # if init(columns):
130
- # - we are creating table rows and need to record columns for create_table_row()
133
+ # - we are creating table rows and need to record columns for create_store_table_row()
131
134
  # - output_exprs materialize those columns
132
135
  # - input_exprs are ColumnRefs of the non-computed columns (ie, what needs to be provided as input)
133
136
  # - media validation:
@@ -247,11 +250,13 @@ class RowBuilder:
247
250
  def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
248
251
  """Record a column that is part of the table row"""
249
252
  assert self.tbl is not None
250
- self.table_columns.append(ColumnSlotIdx(col, slot_idx))
251
-
252
- def output_slot_idxs(self) -> list[ColumnSlotIdx]:
253
- """Return ColumnSlotIdx for output columns"""
254
- return self.table_columns
253
+ assert col.is_stored
254
+ info = ColumnSlotIdx(col, slot_idx)
255
+ self.table_columns.append(info)
256
+ if col.col_type.is_media_type():
257
+ self.stored_media_cols.append(info)
258
+ if col.col_type.is_image_type():
259
+ self.stored_img_cols.append(info)
255
260
 
256
261
  @property
257
262
  def num_materialized(self) -> int:
@@ -445,20 +450,20 @@ class RowBuilder:
445
450
  expr, f'expression {expr}', data_row.get_exc(expr.slot_idx), exc_tb, input_vals, 0
446
451
  ) from exc
447
452
 
448
- def create_table_row(
453
+ def create_store_table_row(
449
454
  self, data_row: DataRow, cols_with_excs: Optional[set[int]], pk: tuple[int, ...]
450
455
  ) -> tuple[list[Any], int]:
451
- """Create a table row from the slots that have an output column assigned
456
+ """Create a store table row from the slots that have an output column assigned
452
457
 
453
458
  Return tuple[list of row values in `self.table_columns` order, # of exceptions]
454
459
  This excludes system columns.
460
+ Row values are converted to their store type.
455
461
  """
456
462
  from pixeltable.exprs.column_property_ref import ColumnPropertyRef
457
463
 
458
464
  num_excs = 0
459
465
  table_row: list[Any] = list(pk)
460
- for info in self.table_columns:
461
- col, slot_idx = info.col, info.slot_idx
466
+ for col, slot_idx in self.table_columns:
462
467
  if data_row.has_exc(slot_idx):
463
468
  exc = data_row.get_exc(slot_idx)
464
469
  num_excs += 1
@@ -469,9 +474,11 @@ class RowBuilder:
469
474
  # exceptions get stored in the errortype/-msg properties of the cellmd column
470
475
  table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
471
476
  else:
472
- if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
473
- # we have yet to store this image
474
- data_row.flush_img(slot_idx, col)
477
+ if col.col_type.is_media_type():
478
+ if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
479
+ # we have yet to store this image
480
+ data_row.flush_img(slot_idx, col)
481
+ data_row.move_tmp_media_file(slot_idx, col)
475
482
  val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
476
483
  table_row.append(val)
477
484
  if col.stores_cellmd:
@@ -479,7 +486,7 @@ class RowBuilder:
479
486
 
480
487
  return table_row, num_excs
481
488
 
482
- def store_column_names(self) -> tuple[list[str], dict[int, catalog.Column]]:
489
+ def store_column_names(self) -> list[str]:
483
490
  """
484
491
  Returns the list of store column names corresponding to the table_columns of this RowBuilder.
485
492
  The second tuple element of the return value is a dictionary containing all media columns in the
@@ -487,16 +494,13 @@ class RowBuilder:
487
494
  """
488
495
  assert self.tbl is not None, self.table_columns
489
496
  store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
490
- media_cols: dict[int, catalog.Column] = {}
491
497
 
492
498
  for col in self.table_columns:
493
- if col.col.col_type.is_media_type():
494
- media_cols[len(store_col_names)] = col.col
495
499
  store_col_names.append(col.col.store_name())
496
500
  if col.col.stores_cellmd:
497
501
  store_col_names.append(col.col.cellmd_store_name())
498
502
 
499
- return store_col_names, media_cols
503
+ return store_col_names
500
504
 
501
505
  def make_row(self) -> exprs.DataRow:
502
506
  """Creates a new DataRow with the current row_builder's configuration."""
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional, Union
3
+ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -76,7 +76,7 @@ class StringOp(Expr):
76
76
  op2_val = data_row[self._op2.slot_idx]
77
77
  data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
78
78
 
79
- def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
79
+ def eval_nullable(self, op1_val: str | None, op2_val: int | str | None) -> str | None:
80
80
  """
81
81
  Return the result of evaluating the expression on two nullable int/float operands,
82
82
  None is interpreted as SQL NULL
@@ -85,7 +85,7 @@ class StringOp(Expr):
85
85
  return None
86
86
  return self.eval_non_null(op1_val, op2_val)
87
87
 
88
- def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
88
+ def eval_non_null(self, op1_val: str, op2_val: int | str) -> str:
89
89
  """
90
90
  Return the result of evaluating the expression on two int/float operands
91
91
  """
@@ -85,13 +85,16 @@ class ExprTemplateFunction(Function):
85
85
  conditional_return_type).
86
86
  """
87
87
  assert not self.is_polymorphic
88
- template = self.template
89
88
  with_defaults = bound_args.copy()
90
89
  with_defaults.update(
91
- {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
90
+ {
91
+ param_name: default
92
+ for param_name, default in self.template.defaults.items()
93
+ if param_name not in bound_args
94
+ }
92
95
  )
93
96
  substituted_expr = self.template.expr.copy().substitute(
94
- {template.param_exprs[name]: expr for name, expr in with_defaults.items()}
97
+ {self.template.param_exprs[name]: expr for name, expr in with_defaults.items()}
95
98
  )
96
99
  return substituted_expr.col_type
97
100
 
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  from functools import reduce
5
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Union, overload
5
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, overload
6
6
 
7
7
  from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
8
8
 
@@ -129,7 +129,7 @@ def retrieval_udf(
129
129
  table: catalog.Table,
130
130
  name: Optional[str] = None,
131
131
  description: Optional[str] = None,
132
- parameters: Optional[Iterable[Union[str, exprs.ColumnRef]]] = None,
132
+ parameters: Optional[Iterable[str | exprs.ColumnRef]] = None,
133
133
  limit: Optional[int] = 10,
134
134
  ) -> func.QueryTemplateFunction:
135
135
  """
@@ -84,8 +84,28 @@ class Signature:
84
84
  """
85
85
 
86
86
  SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
87
-
88
- def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
87
+ SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
88
+
89
+ return_type: ts.ColumnType
90
+ is_batched: bool
91
+ parameters: dict[str, Parameter] # name -> Parameter
92
+ parameters_by_pos: list[Parameter] # ordered by position in the signature
93
+ constant_parameters: list[Parameter] # parameters that are not batched
94
+ batched_parameters: list[Parameter] # parameters that are batched
95
+ required_parameters: list[Parameter] # parameters that do not have a default value
96
+
97
+ # the names of recognized system parameters in the signature; these are excluded from self.parameters
98
+ system_parameters: list[str]
99
+
100
+ py_signature: inspect.Signature
101
+
102
+ def __init__(
103
+ self,
104
+ return_type: ts.ColumnType,
105
+ parameters: list[Parameter],
106
+ is_batched: bool = False,
107
+ system_parameters: Optional[list[str]] = None,
108
+ ):
89
109
  assert isinstance(return_type, ts.ColumnType)
90
110
  self.return_type = return_type
91
111
  self.is_batched = is_batched
@@ -95,6 +115,7 @@ class Signature:
95
115
  self.constant_parameters = [p for p in parameters if not p.is_batched]
96
116
  self.batched_parameters = [p for p in parameters if p.is_batched]
97
117
  self.required_parameters = [p for p in parameters if not p.has_default()]
118
+ self.system_parameters = system_parameters if system_parameters is not None else []
98
119
  self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
99
120
 
100
121
  def get_return_type(self) -> ts.ColumnType:
@@ -237,6 +258,7 @@ class Signature:
237
258
  type_substitutions: Optional[dict] = None,
238
259
  is_cls_method: bool = False,
239
260
  ) -> list[Parameter]:
261
+ """Ignores parameters starting with '_'."""
240
262
  from pixeltable import exprs
241
263
 
242
264
  assert (py_fn is None) != (py_params is None)
@@ -251,6 +273,10 @@ class Signature:
251
273
  for idx, param in enumerate(py_params):
252
274
  if is_cls_method and idx == 0:
253
275
  continue # skip 'self' or 'cls' parameter
276
+ if param.name in cls.SYSTEM_PARAM_NAMES:
277
+ continue # skip system parameters
278
+ if param.name.startswith('_'):
279
+ raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
254
280
  if param.name in cls.SPECIAL_PARAM_NAMES:
255
281
  raise excs.Error(f'{param.name!r} is a reserved parameter name')
256
282
  if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
@@ -308,5 +334,6 @@ class Signature:
308
334
  raise excs.Error('Cannot infer pixeltable return type')
309
335
  else:
310
336
  _, return_is_batched = cls._infer_type(sig.return_annotation)
337
+ system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
311
338
 
312
- return Signature(return_type, parameters, return_is_batched)
339
+ return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
pixeltable/func/tools.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
2
+ from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
3
3
 
4
4
  import pydantic
5
5
 
@@ -100,7 +100,7 @@ class Tools(pydantic.BaseModel):
100
100
  self,
101
101
  auto: bool = False,
102
102
  required: bool = False,
103
- tool: Union[str, Function, None] = None,
103
+ tool: str | Function | None = None,
104
104
  parallel_tool_calls: bool = True,
105
105
  ) -> ToolChoice:
106
106
  if sum([auto, required, tool is not None]) != 1:
@@ -38,6 +38,53 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
38
38
  return env.Env.get().get_client('anthropic')
39
39
 
40
40
 
41
+ def _get_header_info(
42
+ headers: httpx.Headers,
43
+ ) -> tuple[
44
+ Optional[tuple[int, int, datetime.datetime]],
45
+ Optional[tuple[int, int, datetime.datetime]],
46
+ Optional[tuple[int, int, datetime.datetime]],
47
+ ]:
48
+ """Extract rate limit info from Anthropic API response headers."""
49
+ requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
50
+ requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
51
+ requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
52
+ requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
53
+ requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
54
+ requests_reset = (
55
+ datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
56
+ )
57
+ requests_info = (requests_limit, requests_remaining, requests_reset) if requests_reset else None
58
+
59
+ input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
60
+ input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
61
+ input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
62
+ input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
63
+ input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
64
+ input_tokens_reset = (
65
+ datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
66
+ if input_tokens_reset_str
67
+ else None
68
+ )
69
+ input_tokens_info = (input_tokens_limit, input_tokens_remaining, input_tokens_reset) if input_tokens_reset else None
70
+
71
+ output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
72
+ output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
73
+ output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
74
+ output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
75
+ output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
76
+ output_tokens_reset = (
77
+ datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
78
+ if output_tokens_reset_str
79
+ else None
80
+ )
81
+ output_tokens_info = (
82
+ (output_tokens_limit, output_tokens_remaining, output_tokens_reset) if output_tokens_reset else None
83
+ )
84
+
85
+ return requests_info, input_tokens_info, output_tokens_info
86
+
87
+
41
88
  class AnthropicRateLimitsInfo(env.RateLimitsInfo):
42
89
  def __init__(self) -> None:
43
90
  super().__init__(self._get_request_resources)
@@ -51,6 +98,27 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
51
98
  input_len += len(message['content'])
52
99
  return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
53
100
 
101
+ def record_exc(self, exc: Exception) -> None:
102
+ import anthropic
103
+
104
+ if (
105
+ not isinstance(exc, anthropic.APIError)
106
+ or not hasattr(exc, 'response')
107
+ or not hasattr(exc.response, 'headers')
108
+ ):
109
+ return
110
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
111
+ _logger.debug(
112
+ f'record_exc(): requests_info={requests_info} input_tokens_info={input_tokens_info} '
113
+ f'output_tokens_info={output_tokens_info}'
114
+ )
115
+ self.record(requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info)
116
+ self.has_exc = True
117
+
118
+ retry_after_str = exc.response.headers.get('retry-after')
119
+ if retry_after_str is not None:
120
+ _logger.debug(f'retry-after: {retry_after_str}')
121
+
54
122
  def get_retry_delay(self, exc: Exception) -> Optional[float]:
55
123
  import anthropic
56
124
 
@@ -64,8 +132,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
64
132
  should_retry_str = exc.response.headers.get('x-should-retry', '')
65
133
  if should_retry_str.lower() != 'true':
66
134
  return None
67
- retry_after_str = exc.response.headers.get('retry-after', '1')
68
- return int(retry_after_str)
135
+ return super().get_retry_delay(exc)
69
136
 
70
137
 
71
138
  @pxt.udf
@@ -77,6 +144,7 @@ async def messages(
77
144
  model_kwargs: Optional[dict[str, Any]] = None,
78
145
  tools: Optional[list[dict[str, Any]]] = None,
79
146
  tool_choice: Optional[dict[str, Any]] = None,
147
+ _runtime_ctx: Optional[env.RuntimeCtx] = None,
80
148
  ) -> dict:
81
149
  """
82
150
  Create a Message.
@@ -151,32 +219,13 @@ async def messages(
151
219
  messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
152
220
  )
153
221
 
154
- requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
155
- requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
156
- requests_remaining_str = result.headers.get('anthropic-ratelimit-requests-remaining')
157
- requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
158
- requests_reset_str = result.headers.get('anthropic-ratelimit-requests-reset')
159
- requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
160
- input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
161
- input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
162
- input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
163
- input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
164
- input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
165
- input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
166
- output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
167
- output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
168
- output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
169
- output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
170
- output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
171
- output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
172
- retry_after_str = result.headers.get('retry-after')
173
- if retry_after_str is not None:
174
- _logger.debug(f'retry-after: {retry_after_str}')
175
-
222
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
223
+ # retry_after_str = result.headers.get('retry-after')
224
+ # if retry_after_str is not None:
225
+ # _logger.debug(f'retry-after: {retry_after_str}')
226
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
176
227
  rate_limits_info.record(
177
- requests=(requests_limit, requests_remaining, requests_reset),
178
- input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
179
- output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
228
+ requests=requests_info, input_tokens=input_tokens_info, output_tokens=output_tokens_info, reset_exc=is_retry
180
229
  )
181
230
 
182
231
  result_dict = json.loads(result.text)
@@ -26,7 +26,7 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
26
26
  return env.Env.get().get_client('deepseek')
27
27
 
28
28
 
29
- @pxt.udf
29
+ @pxt.udf(resource_pool='request-rate:deepseek')
30
30
  async def chat_completions(
31
31
  messages: list,
32
32
  *,
@@ -43,6 +43,10 @@ async def chat_completions(
43
43
 
44
44
  Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
45
45
 
46
+ Request throttling:
47
+ Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
48
+ limit is configured, uses a default of 600 RPM.
49
+
46
50
  __Requirements:__
47
51
 
48
52
  - `pip install openai`
@@ -14,6 +14,7 @@ import PIL.Image
14
14
 
15
15
  import pixeltable as pxt
16
16
  from pixeltable import env, exceptions as excs, exprs
17
+ from pixeltable.utils.media_store import TempStore
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from google import genai
@@ -39,7 +40,7 @@ async def generate_content(
39
40
  <https://ai.google.dev/gemini-api/docs/text-generation>
40
41
 
41
42
  Request throttling:
42
- Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
43
+ Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
43
44
  limit is configured, uses a default of 600 RPM.
44
45
 
45
46
  __Requirements:__
@@ -126,6 +127,10 @@ async def generate_images(prompt: str, *, model: str, config: Optional[dict] = N
126
127
  Generates images based on a text description and configuration. For additional details, see:
127
128
  <https://ai.google.dev/gemini-api/docs/image-generation>
128
129
 
130
+ Request throttling:
131
+ Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
132
+ limit is configured, uses a default of 600 RPM.
133
+
129
134
  __Requirements:__
130
135
 
131
136
  - `pip install google-genai`
@@ -167,6 +172,10 @@ async def generate_videos(
167
172
  Generates videos based on a text description and configuration. For additional details, see:
168
173
  <https://ai.google.dev/gemini-api/docs/video-generation>
169
174
 
175
+ Request throttling:
176
+ Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
177
+ limit is configured, uses a default of 600 RPM.
178
+
170
179
  __Requirements:__
171
180
 
172
181
  - `pip install google-genai`
@@ -215,7 +224,7 @@ async def generate_videos(
215
224
  assert video_bytes is not None
216
225
 
217
226
  # Create a temporary file to store the video bytes
218
- output_path = env.Env.get().create_tmp_path('.mp4')
227
+ output_path = TempStore.create_path(extension='.mp4')
219
228
  Path(output_path).write_bytes(video_bytes)
220
229
  return str(output_path)
221
230
 
@@ -1,6 +1,6 @@
1
1
  import builtins
2
2
  import typing
3
- from typing import Any, Callable, Optional, Union
3
+ from typing import Any, Callable, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -11,7 +11,7 @@ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
11
 
12
12
 
13
13
  # TODO: remove and replace calls with astype()
14
- def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
14
+ def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
15
15
  expr.col_type = ts.ColumnType.normalize_type(target_type)
16
16
  return expr
17
17