pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (64) hide show
  1. pixeltable/__init__.py +6 -1
  2. pixeltable/catalog/catalog.py +107 -45
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +5 -0
  6. pixeltable/catalog/table_version.py +100 -106
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/update_status.py +12 -0
  9. pixeltable/config.py +6 -0
  10. pixeltable/dataframe.py +11 -5
  11. pixeltable/env.py +52 -19
  12. pixeltable/exec/__init__.py +2 -0
  13. pixeltable/exec/cell_materialization_node.py +231 -0
  14. pixeltable/exec/cell_reconstruction_node.py +135 -0
  15. pixeltable/exec/exec_node.py +1 -1
  16. pixeltable/exec/expr_eval/evaluators.py +1 -0
  17. pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
  18. pixeltable/exec/expr_eval/globals.py +2 -0
  19. pixeltable/exec/globals.py +32 -0
  20. pixeltable/exec/object_store_save_node.py +1 -4
  21. pixeltable/exec/row_update_node.py +16 -9
  22. pixeltable/exec/sql_node.py +107 -14
  23. pixeltable/exprs/__init__.py +1 -1
  24. pixeltable/exprs/arithmetic_expr.py +10 -11
  25. pixeltable/exprs/column_property_ref.py +10 -10
  26. pixeltable/exprs/column_ref.py +2 -2
  27. pixeltable/exprs/data_row.py +106 -37
  28. pixeltable/exprs/expr.py +9 -0
  29. pixeltable/exprs/expr_set.py +14 -7
  30. pixeltable/exprs/inline_expr.py +2 -19
  31. pixeltable/exprs/json_path.py +45 -12
  32. pixeltable/exprs/row_builder.py +54 -22
  33. pixeltable/functions/__init__.py +1 -0
  34. pixeltable/functions/bedrock.py +7 -0
  35. pixeltable/functions/deepseek.py +11 -4
  36. pixeltable/functions/llama_cpp.py +7 -0
  37. pixeltable/functions/math.py +1 -1
  38. pixeltable/functions/ollama.py +7 -0
  39. pixeltable/functions/openai.py +4 -4
  40. pixeltable/functions/openrouter.py +143 -0
  41. pixeltable/functions/video.py +123 -9
  42. pixeltable/functions/whisperx.py +2 -0
  43. pixeltable/functions/yolox.py +2 -0
  44. pixeltable/globals.py +56 -31
  45. pixeltable/io/__init__.py +1 -0
  46. pixeltable/io/globals.py +16 -15
  47. pixeltable/io/table_data_conduit.py +46 -21
  48. pixeltable/iterators/__init__.py +1 -0
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/publish.py +0 -1
  54. pixeltable/store.py +2 -2
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/console_output.py +4 -1
  57. pixeltable/utils/exception_handler.py +5 -28
  58. pixeltable/utils/image.py +7 -0
  59. pixeltable/utils/misc.py +5 -0
  60. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
  61. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
  62. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
  63. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
  64. {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
@@ -9,26 +9,33 @@ T = TypeVar('T', bound='Expr')
9
9
 
10
10
  class ExprSet(Generic[T]):
11
11
  """
12
- A set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by Expr.id.
12
+ An ordered set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by
13
+ Expr.id.
13
14
  """
14
15
 
15
16
  exprs: dict[int, T] # key: Expr.id
17
+ expr_offsets: dict[int, int] # key: Expr.id, value: offset into self.exprs.keys()
16
18
  exprs_by_idx: dict[int, T] # key: slot_idx
17
19
 
18
20
  def __init__(self, elements: Optional[Iterable[T]] = None):
19
21
  self.exprs = {}
22
+ self.expr_offsets = {}
20
23
  self.exprs_by_idx = {}
21
24
  if elements is not None:
22
25
  for e in elements:
23
26
  self.add(e)
24
27
 
25
- def add(self, expr: T) -> None:
26
- if expr.id in self.exprs:
27
- return
28
+ def add(self, expr: T) -> int:
29
+ """Returns offset corresponding to iteration order"""
30
+ offset = self.expr_offsets.get(expr.id)
31
+ if offset is not None:
32
+ return offset
33
+ offset = len(self.exprs)
28
34
  self.exprs[expr.id] = expr
29
- if expr.slot_idx is None:
30
- return
31
- self.exprs_by_idx[expr.slot_idx] = expr
35
+ self.expr_offsets[expr.id] = offset
36
+ if expr.slot_idx is not None:
37
+ self.exprs_by_idx[expr.slot_idx] = expr
38
+ return offset
32
39
 
33
40
  def update(self, *others: Iterable[T]) -> None:
34
41
  for other in others:
@@ -98,13 +98,7 @@ class InlineList(Expr):
98
98
  def __init__(self, elements: Iterable):
99
99
  exprs = [Expr.from_object(el) for el in elements]
100
100
 
101
- json_schema = {
102
- 'type': 'array',
103
- 'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
104
- 'items': False, # No additional items (fixed length)
105
- }
106
-
107
- super().__init__(ts.JsonType(json_schema))
101
+ super().__init__(ts.JsonType())
108
102
  self.components.extend(exprs)
109
103
  self.id = self._create_id()
110
104
 
@@ -150,18 +144,7 @@ class InlineDict(Expr):
150
144
  self.keys.append(key)
151
145
  exprs.append(Expr.from_object(val))
152
146
 
153
- json_schema: Optional[dict[str, Any]]
154
- try:
155
- json_schema = {
156
- 'type': 'object',
157
- 'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
158
- }
159
- except excs.Error:
160
- # InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
161
- # so we can't always construct a valid schema.
162
- json_schema = None
163
-
164
- super().__init__(ts.JsonType(json_schema))
147
+ super().__init__(ts.JsonType())
165
148
  self.components.extend(exprs)
166
149
  self.id = self._create_id()
167
150
 
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import io
4
+ from pathlib import Path
3
5
  from typing import Any, Optional
4
6
 
5
7
  import jmespath
@@ -7,6 +9,7 @@ import sqlalchemy as sql
7
9
 
8
10
  from pixeltable import catalog, exceptions as excs, type_system as ts
9
11
 
12
+ from .column_ref import ColumnRef
10
13
  from .data_row import DataRow
11
14
  from .expr import Expr
12
15
  from .globals import print_slice
@@ -23,6 +26,11 @@ class JsonPath(Expr):
23
26
  (0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
24
27
  """
25
28
 
29
+ path_elements: list[str | int | slice]
30
+ compiled_path: jmespath.parser.ParsedResult | None
31
+ scope_idx: int
32
+ file_handles: dict[Path, io.BufferedReader] # key: file path
33
+
26
34
  def __init__(
27
35
  self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
28
36
  ) -> None:
@@ -31,16 +39,22 @@ class JsonPath(Expr):
31
39
  super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
32
40
  if anchor is not None:
33
41
  self.components = [anchor]
34
- self.path_elements: list[str | int | slice] = path_elements
42
+ self.path_elements = path_elements
35
43
  self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
36
44
  self.scope_idx = scope_idx
37
45
  # NOTE: the _create_id() result will change if set_anchor() gets called;
38
46
  # this is not a problem, because _create_id() shouldn't be called after init()
39
47
  self.id = self._create_id()
48
+ self.file_handles = {}
49
+
50
+ def release(self) -> None:
51
+ for fh in self.file_handles.values():
52
+ fh.close()
53
+ self.file_handles.clear()
40
54
 
41
55
  def __repr__(self) -> str:
42
56
  # else 'R': the anchor is RELATIVE_PATH_ROOT
43
- anchor_str = str(self._anchor) if self._anchor is not None else 'R'
57
+ anchor_str = str(self.anchor) if self.anchor is not None else 'R'
44
58
  if len(self.path_elements) == 0:
45
59
  return anchor_str
46
60
  return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
@@ -67,7 +81,7 @@ class JsonPath(Expr):
67
81
  return cls(anchor, path_elements, d['scope_idx'])
68
82
 
69
83
  @property
70
- def _anchor(self) -> Optional[Expr]:
84
+ def anchor(self) -> Optional[Expr]:
71
85
  return None if len(self.components) == 0 else self.components[0]
72
86
 
73
87
  def set_anchor(self, anchor: Expr) -> None:
@@ -75,7 +89,7 @@ class JsonPath(Expr):
75
89
  self.components = [anchor]
76
90
 
77
91
  def is_relative_path(self) -> bool:
78
- return self._anchor is None
92
+ return self.anchor is None
79
93
 
80
94
  def _has_relative_path(self) -> bool:
81
95
  return self.is_relative_path() or super()._has_relative_path()
@@ -85,7 +99,7 @@ class JsonPath(Expr):
85
99
  # TODO: take scope_idx into account
86
100
  self.set_anchor(mapper.scope_anchor)
87
101
  else:
88
- self._anchor._bind_rel_paths(mapper)
102
+ self.anchor._bind_rel_paths(mapper)
89
103
 
90
104
  def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
91
105
  """
@@ -99,15 +113,15 @@ class JsonPath(Expr):
99
113
 
100
114
  def __getattr__(self, name: str) -> 'JsonPath':
101
115
  assert isinstance(name, str)
102
- return JsonPath(self._anchor, [*self.path_elements, name])
116
+ return JsonPath(self.anchor, [*self.path_elements, name])
103
117
 
104
118
  def __getitem__(self, index: object) -> 'JsonPath':
105
119
  if isinstance(index, (int, slice, str)):
106
- return JsonPath(self._anchor, [*self.path_elements, index])
120
+ return JsonPath(self.anchor, [*self.path_elements, index])
107
121
  raise excs.Error(f'Invalid json list index: {index}')
108
122
 
109
123
  def default_column_name(self) -> Optional[str]:
110
- anchor_name = self._anchor.default_column_name() if self._anchor is not None else ''
124
+ anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
111
125
  ret_name = f'{anchor_name}.{self._json_path()}'
112
126
 
113
127
  def cleanup_char(s: str) -> str:
@@ -159,12 +173,31 @@ class JsonPath(Expr):
159
173
  result.append(f'[{print_slice(element)}]')
160
174
  return ''.join(result)
161
175
 
162
- def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
163
- assert self._anchor is not None, self
164
- val = data_row[self._anchor.slot_idx]
176
+ def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
177
+ assert self.anchor is not None, self
178
+ val = row[self.anchor.slot_idx]
165
179
  if self.compiled_path is not None:
166
180
  val = self.compiled_path.search(val)
167
- data_row[self.slot_idx] = val
181
+ row[self.slot_idx] = val
182
+ if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
183
+ return
184
+
185
+ # the origin of val is a json-typed column, which might stored inlined objects
186
+ if self.anchor.slot_idx not in row.slot_md:
187
+ # we can infer that there aren't any inlined objects because our execution plan doesn't include
188
+ # materializing the cellmd (eg, insert plans)
189
+ # TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
190
+ return
191
+
192
+ # defer import until it's needed
193
+ from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
194
+
195
+ cell_md = row.slot_md[self.anchor.slot_idx]
196
+ if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
197
+ # val doesn't contain inlined objects
198
+ return
199
+
200
+ row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
168
201
 
169
202
 
170
203
  RELATIVE_PATH_ROOT = JsonPath(None)
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import dataclasses
3
4
  import sys
4
5
  import time
5
- from dataclasses import dataclass
6
6
  from typing import Any, Iterable, NamedTuple, Optional, Sequence
7
7
  from uuid import UUID
8
8
 
9
9
  import numpy as np
10
+ import sqlalchemy as sql
10
11
 
11
12
  from pixeltable import catalog, exceptions as excs, exprs, utils
12
13
  from pixeltable.env import Env
14
+ from pixeltable.utils.misc import non_none_dict_factory
13
15
 
14
16
  from .data_row import DataRow
15
17
  from .expr import Expr, ExprScope
@@ -68,7 +70,7 @@ class RowBuilder:
68
70
  input_exprs: ExprSet
69
71
 
70
72
  tbl: Optional[catalog.TableVersion] # reference table of the RowBuilder; used to identify pk columns for writes
71
- table_columns: list[ColumnSlotIdx]
73
+ table_columns: dict[catalog.Column, int | None] # value: slot idx, if the result of an expr
72
74
  default_eval_ctx: EvalCtx
73
75
  unstored_iter_args: dict[UUID, Expr]
74
76
 
@@ -92,10 +94,9 @@ class RowBuilder:
92
94
  img_slot_idxs: list[int] # Indices of image slots
93
95
  media_slot_idxs: list[int] # Indices of non-image media slots
94
96
  array_slot_idxs: list[int] # Indices of array slots
95
- stored_img_cols: list[exprs.ColumnSlotIdx]
96
- stored_media_cols: list[exprs.ColumnSlotIdx]
97
+ json_slot_idxs: list[int] # Indices of json slots
97
98
 
98
- @dataclass
99
+ @dataclasses.dataclass
99
100
  class EvalCtx:
100
101
  """Context for evaluating a set of target exprs"""
101
102
 
@@ -113,8 +114,6 @@ class RowBuilder:
113
114
  ):
114
115
  self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
115
116
  self.next_slot_idx = 0
116
- self.stored_img_cols = []
117
- self.stored_media_cols = []
118
117
 
119
118
  # record input and output exprs; make copies to avoid reusing execution state
120
119
  unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
@@ -138,7 +137,7 @@ class RowBuilder:
138
137
  from .column_ref import ColumnRef
139
138
 
140
139
  self.tbl = tbl
141
- self.table_columns: list[ColumnSlotIdx] = []
140
+ self.table_columns = {}
142
141
  self.input_exprs = ExprSet()
143
142
  validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
144
143
  for col in columns:
@@ -245,17 +244,27 @@ class RowBuilder:
245
244
  e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
246
245
  ]
247
246
  self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
247
+ self.json_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_json_type()]
248
248
 
249
249
  def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
250
- """Record a column that is part of the table row"""
250
+ """Record an output column for which the value is produced via expr evaluation"""
251
251
  assert self.tbl is not None
252
252
  assert col.is_stored
253
- info = ColumnSlotIdx(col, slot_idx)
254
- self.table_columns.append(info)
255
- if col.col_type.is_media_type():
256
- self.stored_media_cols.append(info)
257
- if col.col_type.is_image_type():
258
- self.stored_img_cols.append(info)
253
+ self.table_columns[col] = slot_idx
254
+
255
+ def add_table_columns(self, cols: list[catalog.Column]) -> None:
256
+ """Record output columns whose values are materialized into DataRow.cell_vals"""
257
+ for col in cols:
258
+ self.table_columns[col] = None
259
+
260
+ @property
261
+ def media_output_col_info(self) -> list[ColumnSlotIdx]:
262
+ """Return slot idxs for media output columns whose values are produced by expr evaluation"""
263
+ return [
264
+ ColumnSlotIdx(col, slot_idx)
265
+ for col, slot_idx in self.table_columns.items()
266
+ if col.col_type.is_media_type() and slot_idx is not None
267
+ ]
259
268
 
260
269
  @property
261
270
  def num_materialized(self) -> int:
@@ -462,13 +471,30 @@ class RowBuilder:
462
471
 
463
472
  num_excs = 0
464
473
  table_row: list[Any] = list(pk)
465
- for col, slot_idx in self.table_columns:
474
+ # Nulls in JSONB columns need to be stored as sql.sql.null(), otherwise it stores a json 'null'
475
+ for col, slot_idx in self.table_columns.items():
476
+ if col.id in data_row.cell_vals:
477
+ table_row.append(data_row.cell_vals[col.id])
478
+ if col.stores_cellmd:
479
+ if data_row.cell_md[col.id] is None:
480
+ table_row.append(sql.sql.null())
481
+ else:
482
+ # we want to minimize the size of the stored dict and use dict_factory to remove Nones
483
+ md = dataclasses.asdict(data_row.cell_md[col.id], dict_factory=non_none_dict_factory)
484
+ assert len(md) > 0
485
+ table_row.append(md)
486
+ if slot_idx is not None and data_row.has_exc(slot_idx):
487
+ num_excs += 1
488
+ if cols_with_excs is not None:
489
+ cols_with_excs.add(col.id)
490
+ continue
491
+
466
492
  if data_row.has_exc(slot_idx):
467
493
  exc = data_row.get_exc(slot_idx)
468
494
  num_excs += 1
469
495
  if cols_with_excs is not None:
470
496
  cols_with_excs.add(col.id)
471
- table_row.append(None)
497
+ table_row.append(sql.sql.null() if col.col_type.is_json_type() else None)
472
498
  if col.stores_cellmd:
473
499
  # exceptions get stored in the errortype/-msg properties of the cellmd column
474
500
  table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
@@ -476,7 +502,7 @@ class RowBuilder:
476
502
  val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
477
503
  table_row.append(val)
478
504
  if col.stores_cellmd:
479
- table_row.append(None) # placeholder for cellmd column
505
+ table_row.append(sql.sql.null()) # placeholder for cellmd column
480
506
 
481
507
  return table_row, num_excs
482
508
 
@@ -490,12 +516,18 @@ class RowBuilder:
490
516
  store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
491
517
 
492
518
  for col in self.table_columns:
493
- store_col_names.append(col.col.store_name())
494
- if col.col.stores_cellmd:
495
- store_col_names.append(col.col.cellmd_store_name())
519
+ store_col_names.append(col.store_name())
520
+ if col.stores_cellmd:
521
+ store_col_names.append(col.cellmd_store_name())
496
522
 
497
523
  return store_col_names
498
524
 
499
525
  def make_row(self) -> exprs.DataRow:
500
526
  """Creates a new DataRow with the current row_builder's configuration."""
501
- return exprs.DataRow(self.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)
527
+ return exprs.DataRow(
528
+ size=self.num_materialized,
529
+ img_slot_idxs=self.img_slot_idxs,
530
+ media_slot_idxs=self.media_slot_idxs,
531
+ array_slot_idxs=self.array_slot_idxs,
532
+ json_slot_idxs=self.json_slot_idxs,
533
+ )
@@ -19,6 +19,7 @@ from . import (
19
19
  mistralai,
20
20
  ollama,
21
21
  openai,
22
+ openrouter,
22
23
  replicate,
23
24
  string,
24
25
  timestamp,
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable UDFs for AWS Bedrock AI models.
3
+
4
+ Provides integration with AWS Bedrock for accessing various foundation models
5
+ including Anthropic Claude, Amazon Titan, and other providers.
6
+ """
7
+
1
8
  import logging
2
9
  from typing import TYPE_CHECKING, Any, Optional
3
10
 
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable UDFs for Deepseek AI models.
3
+
4
+ Provides integration with Deepseek's language models for chat completions
5
+ and other AI capabilities.
6
+ """
7
+
1
8
  import json
2
9
  from typing import TYPE_CHECKING, Any, Optional
3
10
 
@@ -67,10 +74,10 @@ async def chat_completions(
67
74
  of the table `tbl`:
68
75
 
69
76
  >>> messages = [
70
- {'role': 'system', 'content': 'You are a helpful assistant.'},
71
- {'role': 'user', 'content': tbl.prompt}
72
- ]
73
- tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
77
+ ... {'role': 'system', 'content': 'You are a helpful assistant.'},
78
+ ... {'role': 'user', 'content': tbl.prompt}
79
+ ... ]
80
+ >>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
74
81
  """
75
82
  if model_kwargs is None:
76
83
  model_kwargs = {}
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable UDFs for llama.cpp models.
3
+
4
+ Provides integration with llama.cpp for running quantized language models locally,
5
+ supporting chat completions and embeddings with GGUF format models.
6
+ """
7
+
1
8
  from pathlib import Path
2
9
  from typing import TYPE_CHECKING, Any, Optional
3
10
 
@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
97
97
  if digits is None:
98
98
  return sql.func.round(self)
99
99
  else:
100
- return sql.func.round(self.cast(sql.Numeric), digits.cast(sql.Integer))
100
+ return sql.cast(sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer)), sql.Float)
101
101
 
102
102
 
103
103
  @pxt.udf(is_method=True)
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable UDFs for Ollama local models.
3
+
4
+ Provides integration with Ollama for running large language models locally,
5
+ including chat completions and embeddings.
6
+ """
7
+
1
8
  from typing import TYPE_CHECKING, Optional
2
9
 
3
10
  import numpy as np
@@ -395,10 +395,10 @@ async def chat_completions(
395
395
  of the table `tbl`:
396
396
 
397
397
  >>> messages = [
398
- {'role': 'system', 'content': 'You are a helpful assistant.'},
399
- {'role': 'user', 'content': tbl.prompt}
400
- ]
401
- tbl.add_computed_column(response=chat_completions(messages, model='gpt-4o-mini'))
398
+ ... {'role': 'system', 'content': 'You are a helpful assistant.'},
399
+ ... {'role': 'user', 'content': tbl.prompt}
400
+ ... ]
401
+ >>> tbl.add_computed_column(response=chat_completions(messages, model='gpt-4o-mini'))
402
402
  """
403
403
  if model_kwargs is None:
404
404
  model_kwargs = {}
@@ -0,0 +1,143 @@
1
+ """
2
+ Pixeltable UDFs that wrap the OpenRouter API.
3
+
4
+ OpenRouter provides a unified interface to multiple LLM providers. In order to use it,
5
+ you must first sign up at https://openrouter.ai, create an API key, and configure it
6
+ as described in the Working with OpenRouter tutorial.
7
+ """
8
+
9
+ from typing import TYPE_CHECKING, Any, Optional
10
+
11
+ import pixeltable as pxt
12
+ from pixeltable.env import Env, register_client
13
+ from pixeltable.utils.code import local_public_names
14
+
15
+ if TYPE_CHECKING:
16
+ import openai
17
+
18
+
19
+ @register_client('openrouter')
20
+ def _(api_key: str, site_url: Optional[str] = None, app_name: Optional[str] = None) -> 'openai.AsyncOpenAI':
21
+ import openai
22
+
23
+ # Create default headers for OpenRouter
24
+ default_headers: dict[str, Any] = {}
25
+ if site_url:
26
+ default_headers['HTTP-Referer'] = site_url
27
+ if app_name:
28
+ default_headers['X-Title'] = app_name
29
+
30
+ return openai.AsyncOpenAI(base_url='https://openrouter.ai/api/v1', api_key=api_key, default_headers=default_headers)
31
+
32
+
33
+ def _openrouter_client() -> 'openai.AsyncOpenAI':
34
+ return Env.get().get_client('openrouter')
35
+
36
+
37
+ @pxt.udf(resource_pool='request-rate:openrouter')
38
+ async def chat_completions(
39
+ messages: list,
40
+ *,
41
+ model: str,
42
+ model_kwargs: Optional[dict[str, Any]] = None,
43
+ tools: Optional[list[dict[str, Any]]] = None,
44
+ tool_choice: Optional[dict[str, Any]] = None,
45
+ provider: Optional[dict[str, Any]] = None,
46
+ transforms: Optional[list[str]] = None,
47
+ ) -> dict:
48
+ """
49
+ Chat Completion API via OpenRouter.
50
+
51
+ OpenRouter provides access to multiple LLM providers through a unified API.
52
+ For additional details, see: <https://openrouter.ai/docs>
53
+
54
+ Supported models can be found at: <https://openrouter.ai/models>
55
+
56
+ Request throttling:
57
+ Applies the rate limit set in the config (section `openrouter`, key `rate_limit`). If no rate
58
+ limit is configured, uses a default of 600 RPM.
59
+
60
+ __Requirements:__
61
+
62
+ - `pip install openai`
63
+
64
+ Args:
65
+ messages: A list of messages comprising the conversation so far.
66
+ model: ID of the model to use (e.g., 'anthropic/claude-3.5-sonnet', 'openai/gpt-4').
67
+ model_kwargs: Additional OpenAI-compatible parameters.
68
+ tools: List of tools available to the model.
69
+ tool_choice: Controls which (if any) tool is called by the model.
70
+ provider: OpenRouter-specific provider preferences (e.g., {'order': ['Anthropic', 'OpenAI']}).
71
+ transforms: List of message transforms to apply (e.g., ['middle-out']).
72
+
73
+ Returns:
74
+ A dictionary containing the response in OpenAI format.
75
+
76
+ Examples:
77
+ Basic chat completion:
78
+
79
+ >>> messages = [{'role': 'user', 'content': tbl.prompt}]
80
+ ... tbl.add_computed_column(
81
+ ... response=chat_completions(
82
+ ... messages,
83
+ ... model='anthropic/claude-3.5-sonnet'
84
+ ... )
85
+ ... )
86
+
87
+ With provider routing:
88
+
89
+ >>> tbl.add_computed_column(
90
+ ... response=chat_completions(
91
+ ... messages,
92
+ ... model='anthropic/claude-3.5-sonnet',
93
+ ... provider={'require_parameters': True, 'order': ['Anthropic']}
94
+ ... )
95
+ ... )
96
+
97
+ With transforms:
98
+
99
+ >>> tbl.add_computed_column(
100
+ ... response=chat_completions(
101
+ ... messages,
102
+ ... model='openai/gpt-4',
103
+ ... transforms=['middle-out'] # Optimize for long contexts
104
+ ... )
105
+ ... )
106
+ """
107
+ if model_kwargs is None:
108
+ model_kwargs = {}
109
+
110
+ Env.get().require_package('openai')
111
+
112
+ # Handle tools if provided
113
+ if tools is not None:
114
+ model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
115
+
116
+ if tool_choice is not None:
117
+ if tool_choice['auto']:
118
+ model_kwargs['tool_choice'] = 'auto'
119
+ elif tool_choice['required']:
120
+ model_kwargs['tool_choice'] = 'required'
121
+ else:
122
+ assert tool_choice['tool'] is not None
123
+ model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
124
+
125
+ # Prepare OpenRouter-specific parameters for extra_body
126
+ extra_body: dict[str, Any] = {}
127
+ if provider is not None:
128
+ extra_body['provider'] = provider
129
+ if transforms is not None:
130
+ extra_body['transforms'] = transforms
131
+
132
+ # Make the API call
133
+ result = await _openrouter_client().chat.completions.create(
134
+ messages=messages, model=model, extra_body=extra_body if extra_body else None, **model_kwargs
135
+ )
136
+ return result.model_dump()
137
+
138
+
139
+ __all__ = local_public_names(__name__)
140
+
141
+
142
+ def __dir__() -> list[str]:
143
+ return __all__