pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +6 -1
- pixeltable/catalog/catalog.py +107 -45
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +5 -0
- pixeltable/catalog/table_version.py +100 -106
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/update_status.py +12 -0
- pixeltable/config.py +6 -0
- pixeltable/dataframe.py +11 -5
- pixeltable/env.py +52 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +10 -11
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +123 -9
- pixeltable/functions/whisperx.py +2 -0
- pixeltable/functions/yolox.py +2 -0
- pixeltable/globals.py +56 -31
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/globals.py +16 -15
- pixeltable/io/table_data_conduit.py +46 -21
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/publish.py +0 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +5 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
pixeltable/exprs/expr_set.py
CHANGED
|
@@ -9,26 +9,33 @@ T = TypeVar('T', bound='Expr')
|
|
|
9
9
|
|
|
10
10
|
class ExprSet(Generic[T]):
|
|
11
11
|
"""
|
|
12
|
-
|
|
12
|
+
An ordered set that also supports indexed lookup (by slot_idx and Expr.id). Exprs are uniquely identified by
|
|
13
|
+
Expr.id.
|
|
13
14
|
"""
|
|
14
15
|
|
|
15
16
|
exprs: dict[int, T] # key: Expr.id
|
|
17
|
+
expr_offsets: dict[int, int] # key: Expr.id, value: offset into self.exprs.keys()
|
|
16
18
|
exprs_by_idx: dict[int, T] # key: slot_idx
|
|
17
19
|
|
|
18
20
|
def __init__(self, elements: Optional[Iterable[T]] = None):
|
|
19
21
|
self.exprs = {}
|
|
22
|
+
self.expr_offsets = {}
|
|
20
23
|
self.exprs_by_idx = {}
|
|
21
24
|
if elements is not None:
|
|
22
25
|
for e in elements:
|
|
23
26
|
self.add(e)
|
|
24
27
|
|
|
25
|
-
def add(self, expr: T) ->
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
def add(self, expr: T) -> int:
|
|
29
|
+
"""Returns offset corresponding to iteration order"""
|
|
30
|
+
offset = self.expr_offsets.get(expr.id)
|
|
31
|
+
if offset is not None:
|
|
32
|
+
return offset
|
|
33
|
+
offset = len(self.exprs)
|
|
28
34
|
self.exprs[expr.id] = expr
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
self.expr_offsets[expr.id] = offset
|
|
36
|
+
if expr.slot_idx is not None:
|
|
37
|
+
self.exprs_by_idx[expr.slot_idx] = expr
|
|
38
|
+
return offset
|
|
32
39
|
|
|
33
40
|
def update(self, *others: Iterable[T]) -> None:
|
|
34
41
|
for other in others:
|
pixeltable/exprs/inline_expr.py
CHANGED
|
@@ -98,13 +98,7 @@ class InlineList(Expr):
|
|
|
98
98
|
def __init__(self, elements: Iterable):
|
|
99
99
|
exprs = [Expr.from_object(el) for el in elements]
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
'type': 'array',
|
|
103
|
-
'prefixItems': [expr.col_type.to_json_schema() for expr in exprs],
|
|
104
|
-
'items': False, # No additional items (fixed length)
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
super().__init__(ts.JsonType(json_schema))
|
|
101
|
+
super().__init__(ts.JsonType())
|
|
108
102
|
self.components.extend(exprs)
|
|
109
103
|
self.id = self._create_id()
|
|
110
104
|
|
|
@@ -150,18 +144,7 @@ class InlineDict(Expr):
|
|
|
150
144
|
self.keys.append(key)
|
|
151
145
|
exprs.append(Expr.from_object(val))
|
|
152
146
|
|
|
153
|
-
|
|
154
|
-
try:
|
|
155
|
-
json_schema = {
|
|
156
|
-
'type': 'object',
|
|
157
|
-
'properties': {key: expr.col_type.to_json_schema() for key, expr in zip(self.keys, exprs)},
|
|
158
|
-
}
|
|
159
|
-
except excs.Error:
|
|
160
|
-
# InlineDicts are used to store iterator arguments, which are not required to be valid JSON types,
|
|
161
|
-
# so we can't always construct a valid schema.
|
|
162
|
-
json_schema = None
|
|
163
|
-
|
|
164
|
-
super().__init__(ts.JsonType(json_schema))
|
|
147
|
+
super().__init__(ts.JsonType())
|
|
165
148
|
self.components.extend(exprs)
|
|
166
149
|
self.id = self._create_id()
|
|
167
150
|
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import io
|
|
4
|
+
from pathlib import Path
|
|
3
5
|
from typing import Any, Optional
|
|
4
6
|
|
|
5
7
|
import jmespath
|
|
@@ -7,6 +9,7 @@ import sqlalchemy as sql
|
|
|
7
9
|
|
|
8
10
|
from pixeltable import catalog, exceptions as excs, type_system as ts
|
|
9
11
|
|
|
12
|
+
from .column_ref import ColumnRef
|
|
10
13
|
from .data_row import DataRow
|
|
11
14
|
from .expr import Expr
|
|
12
15
|
from .globals import print_slice
|
|
@@ -23,6 +26,11 @@ class JsonPath(Expr):
|
|
|
23
26
|
(0: indicates the immediately preceding JsonMapper, -1: the parent of the immediately preceding mapper, ...)
|
|
24
27
|
"""
|
|
25
28
|
|
|
29
|
+
path_elements: list[str | int | slice]
|
|
30
|
+
compiled_path: jmespath.parser.ParsedResult | None
|
|
31
|
+
scope_idx: int
|
|
32
|
+
file_handles: dict[Path, io.BufferedReader] # key: file path
|
|
33
|
+
|
|
26
34
|
def __init__(
|
|
27
35
|
self, anchor: Optional[Expr], path_elements: Optional[list[str | int | slice]] = None, scope_idx: int = 0
|
|
28
36
|
) -> None:
|
|
@@ -31,16 +39,22 @@ class JsonPath(Expr):
|
|
|
31
39
|
super().__init__(ts.JsonType(nullable=True)) # JsonPath expressions are always nullable
|
|
32
40
|
if anchor is not None:
|
|
33
41
|
self.components = [anchor]
|
|
34
|
-
self.path_elements
|
|
42
|
+
self.path_elements = path_elements
|
|
35
43
|
self.compiled_path = jmespath.compile(self._json_path()) if len(path_elements) > 0 else None
|
|
36
44
|
self.scope_idx = scope_idx
|
|
37
45
|
# NOTE: the _create_id() result will change if set_anchor() gets called;
|
|
38
46
|
# this is not a problem, because _create_id() shouldn't be called after init()
|
|
39
47
|
self.id = self._create_id()
|
|
48
|
+
self.file_handles = {}
|
|
49
|
+
|
|
50
|
+
def release(self) -> None:
|
|
51
|
+
for fh in self.file_handles.values():
|
|
52
|
+
fh.close()
|
|
53
|
+
self.file_handles.clear()
|
|
40
54
|
|
|
41
55
|
def __repr__(self) -> str:
|
|
42
56
|
# else 'R': the anchor is RELATIVE_PATH_ROOT
|
|
43
|
-
anchor_str = str(self.
|
|
57
|
+
anchor_str = str(self.anchor) if self.anchor is not None else 'R'
|
|
44
58
|
if len(self.path_elements) == 0:
|
|
45
59
|
return anchor_str
|
|
46
60
|
return f'{anchor_str}{"." if isinstance(self.path_elements[0], str) else ""}{self._json_path()}'
|
|
@@ -67,7 +81,7 @@ class JsonPath(Expr):
|
|
|
67
81
|
return cls(anchor, path_elements, d['scope_idx'])
|
|
68
82
|
|
|
69
83
|
@property
|
|
70
|
-
def
|
|
84
|
+
def anchor(self) -> Optional[Expr]:
|
|
71
85
|
return None if len(self.components) == 0 else self.components[0]
|
|
72
86
|
|
|
73
87
|
def set_anchor(self, anchor: Expr) -> None:
|
|
@@ -75,7 +89,7 @@ class JsonPath(Expr):
|
|
|
75
89
|
self.components = [anchor]
|
|
76
90
|
|
|
77
91
|
def is_relative_path(self) -> bool:
|
|
78
|
-
return self.
|
|
92
|
+
return self.anchor is None
|
|
79
93
|
|
|
80
94
|
def _has_relative_path(self) -> bool:
|
|
81
95
|
return self.is_relative_path() or super()._has_relative_path()
|
|
@@ -85,7 +99,7 @@ class JsonPath(Expr):
|
|
|
85
99
|
# TODO: take scope_idx into account
|
|
86
100
|
self.set_anchor(mapper.scope_anchor)
|
|
87
101
|
else:
|
|
88
|
-
self.
|
|
102
|
+
self.anchor._bind_rel_paths(mapper)
|
|
89
103
|
|
|
90
104
|
def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
|
|
91
105
|
"""
|
|
@@ -99,15 +113,15 @@ class JsonPath(Expr):
|
|
|
99
113
|
|
|
100
114
|
def __getattr__(self, name: str) -> 'JsonPath':
|
|
101
115
|
assert isinstance(name, str)
|
|
102
|
-
return JsonPath(self.
|
|
116
|
+
return JsonPath(self.anchor, [*self.path_elements, name])
|
|
103
117
|
|
|
104
118
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
105
119
|
if isinstance(index, (int, slice, str)):
|
|
106
|
-
return JsonPath(self.
|
|
120
|
+
return JsonPath(self.anchor, [*self.path_elements, index])
|
|
107
121
|
raise excs.Error(f'Invalid json list index: {index}')
|
|
108
122
|
|
|
109
123
|
def default_column_name(self) -> Optional[str]:
|
|
110
|
-
anchor_name = self.
|
|
124
|
+
anchor_name = self.anchor.default_column_name() if self.anchor is not None else ''
|
|
111
125
|
ret_name = f'{anchor_name}.{self._json_path()}'
|
|
112
126
|
|
|
113
127
|
def cleanup_char(s: str) -> str:
|
|
@@ -159,12 +173,31 @@ class JsonPath(Expr):
|
|
|
159
173
|
result.append(f'[{print_slice(element)}]')
|
|
160
174
|
return ''.join(result)
|
|
161
175
|
|
|
162
|
-
def eval(self,
|
|
163
|
-
assert self.
|
|
164
|
-
val =
|
|
176
|
+
def eval(self, row: DataRow, row_builder: RowBuilder) -> None:
|
|
177
|
+
assert self.anchor is not None, self
|
|
178
|
+
val = row[self.anchor.slot_idx]
|
|
165
179
|
if self.compiled_path is not None:
|
|
166
180
|
val = self.compiled_path.search(val)
|
|
167
|
-
|
|
181
|
+
row[self.slot_idx] = val
|
|
182
|
+
if val is None or self.anchor is None or not isinstance(self.anchor, ColumnRef):
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
# the origin of val is a json-typed column, which might stored inlined objects
|
|
186
|
+
if self.anchor.slot_idx not in row.slot_md:
|
|
187
|
+
# we can infer that there aren't any inlined objects because our execution plan doesn't include
|
|
188
|
+
# materializing the cellmd (eg, insert plans)
|
|
189
|
+
# TODO: have the planner pass that fact into ExprEvalNode explicitly to streamline this path a bit more
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
# defer import until it's needed
|
|
193
|
+
from pixeltable.exec.cell_reconstruction_node import json_has_inlined_objs, reconstruct_json
|
|
194
|
+
|
|
195
|
+
cell_md = row.slot_md[self.anchor.slot_idx]
|
|
196
|
+
if cell_md is None or cell_md.file_urls is None or not json_has_inlined_objs(val):
|
|
197
|
+
# val doesn't contain inlined objects
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
row.vals[self.slot_idx] = reconstruct_json(val, cell_md.file_urls, self.file_handles)
|
|
168
201
|
|
|
169
202
|
|
|
170
203
|
RELATIVE_PATH_ROOT = JsonPath(None)
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import dataclasses
|
|
3
4
|
import sys
|
|
4
5
|
import time
|
|
5
|
-
from dataclasses import dataclass
|
|
6
6
|
from typing import Any, Iterable, NamedTuple, Optional, Sequence
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
import sqlalchemy as sql
|
|
10
11
|
|
|
11
12
|
from pixeltable import catalog, exceptions as excs, exprs, utils
|
|
12
13
|
from pixeltable.env import Env
|
|
14
|
+
from pixeltable.utils.misc import non_none_dict_factory
|
|
13
15
|
|
|
14
16
|
from .data_row import DataRow
|
|
15
17
|
from .expr import Expr, ExprScope
|
|
@@ -68,7 +70,7 @@ class RowBuilder:
|
|
|
68
70
|
input_exprs: ExprSet
|
|
69
71
|
|
|
70
72
|
tbl: Optional[catalog.TableVersion] # reference table of the RowBuilder; used to identify pk columns for writes
|
|
71
|
-
table_columns:
|
|
73
|
+
table_columns: dict[catalog.Column, int | None] # value: slot idx, if the result of an expr
|
|
72
74
|
default_eval_ctx: EvalCtx
|
|
73
75
|
unstored_iter_args: dict[UUID, Expr]
|
|
74
76
|
|
|
@@ -92,10 +94,9 @@ class RowBuilder:
|
|
|
92
94
|
img_slot_idxs: list[int] # Indices of image slots
|
|
93
95
|
media_slot_idxs: list[int] # Indices of non-image media slots
|
|
94
96
|
array_slot_idxs: list[int] # Indices of array slots
|
|
95
|
-
|
|
96
|
-
stored_media_cols: list[exprs.ColumnSlotIdx]
|
|
97
|
+
json_slot_idxs: list[int] # Indices of json slots
|
|
97
98
|
|
|
98
|
-
@dataclass
|
|
99
|
+
@dataclasses.dataclass
|
|
99
100
|
class EvalCtx:
|
|
100
101
|
"""Context for evaluating a set of target exprs"""
|
|
101
102
|
|
|
@@ -113,8 +114,6 @@ class RowBuilder:
|
|
|
113
114
|
):
|
|
114
115
|
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
115
116
|
self.next_slot_idx = 0
|
|
116
|
-
self.stored_img_cols = []
|
|
117
|
-
self.stored_media_cols = []
|
|
118
117
|
|
|
119
118
|
# record input and output exprs; make copies to avoid reusing execution state
|
|
120
119
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
@@ -138,7 +137,7 @@ class RowBuilder:
|
|
|
138
137
|
from .column_ref import ColumnRef
|
|
139
138
|
|
|
140
139
|
self.tbl = tbl
|
|
141
|
-
self.table_columns
|
|
140
|
+
self.table_columns = {}
|
|
142
141
|
self.input_exprs = ExprSet()
|
|
143
142
|
validating_colrefs: dict[Expr, Expr] = {} # key: non-validating colref, value: corresp. validating colref
|
|
144
143
|
for col in columns:
|
|
@@ -245,17 +244,27 @@ class RowBuilder:
|
|
|
245
244
|
e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
|
|
246
245
|
]
|
|
247
246
|
self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
|
|
247
|
+
self.json_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_json_type()]
|
|
248
248
|
|
|
249
249
|
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
250
|
-
"""Record
|
|
250
|
+
"""Record an output column for which the value is produced via expr evaluation"""
|
|
251
251
|
assert self.tbl is not None
|
|
252
252
|
assert col.is_stored
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
253
|
+
self.table_columns[col] = slot_idx
|
|
254
|
+
|
|
255
|
+
def add_table_columns(self, cols: list[catalog.Column]) -> None:
|
|
256
|
+
"""Record output columns whose values are materialized into DataRow.cell_vals"""
|
|
257
|
+
for col in cols:
|
|
258
|
+
self.table_columns[col] = None
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def media_output_col_info(self) -> list[ColumnSlotIdx]:
|
|
262
|
+
"""Return slot idxs for media output columns whose values are produced by expr evaluation"""
|
|
263
|
+
return [
|
|
264
|
+
ColumnSlotIdx(col, slot_idx)
|
|
265
|
+
for col, slot_idx in self.table_columns.items()
|
|
266
|
+
if col.col_type.is_media_type() and slot_idx is not None
|
|
267
|
+
]
|
|
259
268
|
|
|
260
269
|
@property
|
|
261
270
|
def num_materialized(self) -> int:
|
|
@@ -462,13 +471,30 @@ class RowBuilder:
|
|
|
462
471
|
|
|
463
472
|
num_excs = 0
|
|
464
473
|
table_row: list[Any] = list(pk)
|
|
465
|
-
|
|
474
|
+
# Nulls in JSONB columns need to be stored as sql.sql.null(), otherwise it stores a json 'null'
|
|
475
|
+
for col, slot_idx in self.table_columns.items():
|
|
476
|
+
if col.id in data_row.cell_vals:
|
|
477
|
+
table_row.append(data_row.cell_vals[col.id])
|
|
478
|
+
if col.stores_cellmd:
|
|
479
|
+
if data_row.cell_md[col.id] is None:
|
|
480
|
+
table_row.append(sql.sql.null())
|
|
481
|
+
else:
|
|
482
|
+
# we want to minimize the size of the stored dict and use dict_factory to remove Nones
|
|
483
|
+
md = dataclasses.asdict(data_row.cell_md[col.id], dict_factory=non_none_dict_factory)
|
|
484
|
+
assert len(md) > 0
|
|
485
|
+
table_row.append(md)
|
|
486
|
+
if slot_idx is not None and data_row.has_exc(slot_idx):
|
|
487
|
+
num_excs += 1
|
|
488
|
+
if cols_with_excs is not None:
|
|
489
|
+
cols_with_excs.add(col.id)
|
|
490
|
+
continue
|
|
491
|
+
|
|
466
492
|
if data_row.has_exc(slot_idx):
|
|
467
493
|
exc = data_row.get_exc(slot_idx)
|
|
468
494
|
num_excs += 1
|
|
469
495
|
if cols_with_excs is not None:
|
|
470
496
|
cols_with_excs.add(col.id)
|
|
471
|
-
table_row.append(None)
|
|
497
|
+
table_row.append(sql.sql.null() if col.col_type.is_json_type() else None)
|
|
472
498
|
if col.stores_cellmd:
|
|
473
499
|
# exceptions get stored in the errortype/-msg properties of the cellmd column
|
|
474
500
|
table_row.append(ColumnPropertyRef.create_cellmd_exc(exc))
|
|
@@ -476,7 +502,7 @@ class RowBuilder:
|
|
|
476
502
|
val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
|
|
477
503
|
table_row.append(val)
|
|
478
504
|
if col.stores_cellmd:
|
|
479
|
-
table_row.append(
|
|
505
|
+
table_row.append(sql.sql.null()) # placeholder for cellmd column
|
|
480
506
|
|
|
481
507
|
return table_row, num_excs
|
|
482
508
|
|
|
@@ -490,12 +516,18 @@ class RowBuilder:
|
|
|
490
516
|
store_col_names: list[str] = [pk_col.name for pk_col in self.tbl.store_tbl.pk_columns()]
|
|
491
517
|
|
|
492
518
|
for col in self.table_columns:
|
|
493
|
-
store_col_names.append(col.
|
|
494
|
-
if col.
|
|
495
|
-
store_col_names.append(col.
|
|
519
|
+
store_col_names.append(col.store_name())
|
|
520
|
+
if col.stores_cellmd:
|
|
521
|
+
store_col_names.append(col.cellmd_store_name())
|
|
496
522
|
|
|
497
523
|
return store_col_names
|
|
498
524
|
|
|
499
525
|
def make_row(self) -> exprs.DataRow:
|
|
500
526
|
"""Creates a new DataRow with the current row_builder's configuration."""
|
|
501
|
-
return exprs.DataRow(
|
|
527
|
+
return exprs.DataRow(
|
|
528
|
+
size=self.num_materialized,
|
|
529
|
+
img_slot_idxs=self.img_slot_idxs,
|
|
530
|
+
media_slot_idxs=self.media_slot_idxs,
|
|
531
|
+
array_slot_idxs=self.array_slot_idxs,
|
|
532
|
+
json_slot_idxs=self.json_slot_idxs,
|
|
533
|
+
)
|
pixeltable/functions/__init__.py
CHANGED
pixeltable/functions/bedrock.py
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for AWS Bedrock AI models.
|
|
3
|
+
|
|
4
|
+
Provides integration with AWS Bedrock for accessing various foundation models
|
|
5
|
+
including Anthropic Claude, Amazon Titan, and other providers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import logging
|
|
2
9
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
10
|
|
pixeltable/functions/deepseek.py
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for Deepseek AI models.
|
|
3
|
+
|
|
4
|
+
Provides integration with Deepseek's language models for chat completions
|
|
5
|
+
and other AI capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import json
|
|
2
9
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
10
|
|
|
@@ -67,10 +74,10 @@ async def chat_completions(
|
|
|
67
74
|
of the table `tbl`:
|
|
68
75
|
|
|
69
76
|
>>> messages = [
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
... {'role': 'system', 'content': 'You are a helpful assistant.'},
|
|
78
|
+
... {'role': 'user', 'content': tbl.prompt}
|
|
79
|
+
... ]
|
|
80
|
+
>>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
|
|
74
81
|
"""
|
|
75
82
|
if model_kwargs is None:
|
|
76
83
|
model_kwargs = {}
|
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for llama.cpp models.
|
|
3
|
+
|
|
4
|
+
Provides integration with llama.cpp for running quantized language models locally,
|
|
5
|
+
supporting chat completions and embeddings with GGUF format models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
from pathlib import Path
|
|
2
9
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
10
|
|
pixeltable/functions/math.py
CHANGED
|
@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
|
|
|
97
97
|
if digits is None:
|
|
98
98
|
return sql.func.round(self)
|
|
99
99
|
else:
|
|
100
|
-
return sql.func.round(
|
|
100
|
+
return sql.cast(sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer)), sql.Float)
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
@pxt.udf(is_method=True)
|
pixeltable/functions/ollama.py
CHANGED
pixeltable/functions/openai.py
CHANGED
|
@@ -395,10 +395,10 @@ async def chat_completions(
|
|
|
395
395
|
of the table `tbl`:
|
|
396
396
|
|
|
397
397
|
>>> messages = [
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
398
|
+
... {'role': 'system', 'content': 'You are a helpful assistant.'},
|
|
399
|
+
... {'role': 'user', 'content': tbl.prompt}
|
|
400
|
+
... ]
|
|
401
|
+
>>> tbl.add_computed_column(response=chat_completions(messages, model='gpt-4o-mini'))
|
|
402
402
|
"""
|
|
403
403
|
if model_kwargs is None:
|
|
404
404
|
model_kwargs = {}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs that wrap the OpenRouter API.
|
|
3
|
+
|
|
4
|
+
OpenRouter provides a unified interface to multiple LLM providers. In order to use it,
|
|
5
|
+
you must first sign up at https://openrouter.ai, create an API key, and configure it
|
|
6
|
+
as described in the Working with OpenRouter tutorial.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
10
|
+
|
|
11
|
+
import pixeltable as pxt
|
|
12
|
+
from pixeltable.env import Env, register_client
|
|
13
|
+
from pixeltable.utils.code import local_public_names
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import openai
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@register_client('openrouter')
|
|
20
|
+
def _(api_key: str, site_url: Optional[str] = None, app_name: Optional[str] = None) -> 'openai.AsyncOpenAI':
|
|
21
|
+
import openai
|
|
22
|
+
|
|
23
|
+
# Create default headers for OpenRouter
|
|
24
|
+
default_headers: dict[str, Any] = {}
|
|
25
|
+
if site_url:
|
|
26
|
+
default_headers['HTTP-Referer'] = site_url
|
|
27
|
+
if app_name:
|
|
28
|
+
default_headers['X-Title'] = app_name
|
|
29
|
+
|
|
30
|
+
return openai.AsyncOpenAI(base_url='https://openrouter.ai/api/v1', api_key=api_key, default_headers=default_headers)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _openrouter_client() -> 'openai.AsyncOpenAI':
|
|
34
|
+
return Env.get().get_client('openrouter')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pxt.udf(resource_pool='request-rate:openrouter')
|
|
38
|
+
async def chat_completions(
|
|
39
|
+
messages: list,
|
|
40
|
+
*,
|
|
41
|
+
model: str,
|
|
42
|
+
model_kwargs: Optional[dict[str, Any]] = None,
|
|
43
|
+
tools: Optional[list[dict[str, Any]]] = None,
|
|
44
|
+
tool_choice: Optional[dict[str, Any]] = None,
|
|
45
|
+
provider: Optional[dict[str, Any]] = None,
|
|
46
|
+
transforms: Optional[list[str]] = None,
|
|
47
|
+
) -> dict:
|
|
48
|
+
"""
|
|
49
|
+
Chat Completion API via OpenRouter.
|
|
50
|
+
|
|
51
|
+
OpenRouter provides access to multiple LLM providers through a unified API.
|
|
52
|
+
For additional details, see: <https://openrouter.ai/docs>
|
|
53
|
+
|
|
54
|
+
Supported models can be found at: <https://openrouter.ai/models>
|
|
55
|
+
|
|
56
|
+
Request throttling:
|
|
57
|
+
Applies the rate limit set in the config (section `openrouter`, key `rate_limit`). If no rate
|
|
58
|
+
limit is configured, uses a default of 600 RPM.
|
|
59
|
+
|
|
60
|
+
__Requirements:__
|
|
61
|
+
|
|
62
|
+
- `pip install openai`
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
messages: A list of messages comprising the conversation so far.
|
|
66
|
+
model: ID of the model to use (e.g., 'anthropic/claude-3.5-sonnet', 'openai/gpt-4').
|
|
67
|
+
model_kwargs: Additional OpenAI-compatible parameters.
|
|
68
|
+
tools: List of tools available to the model.
|
|
69
|
+
tool_choice: Controls which (if any) tool is called by the model.
|
|
70
|
+
provider: OpenRouter-specific provider preferences (e.g., {'order': ['Anthropic', 'OpenAI']}).
|
|
71
|
+
transforms: List of message transforms to apply (e.g., ['middle-out']).
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
A dictionary containing the response in OpenAI format.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
Basic chat completion:
|
|
78
|
+
|
|
79
|
+
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
80
|
+
... tbl.add_computed_column(
|
|
81
|
+
... response=chat_completions(
|
|
82
|
+
... messages,
|
|
83
|
+
... model='anthropic/claude-3.5-sonnet'
|
|
84
|
+
... )
|
|
85
|
+
... )
|
|
86
|
+
|
|
87
|
+
With provider routing:
|
|
88
|
+
|
|
89
|
+
>>> tbl.add_computed_column(
|
|
90
|
+
... response=chat_completions(
|
|
91
|
+
... messages,
|
|
92
|
+
... model='anthropic/claude-3.5-sonnet',
|
|
93
|
+
... provider={'require_parameters': True, 'order': ['Anthropic']}
|
|
94
|
+
... )
|
|
95
|
+
... )
|
|
96
|
+
|
|
97
|
+
With transforms:
|
|
98
|
+
|
|
99
|
+
>>> tbl.add_computed_column(
|
|
100
|
+
... response=chat_completions(
|
|
101
|
+
... messages,
|
|
102
|
+
... model='openai/gpt-4',
|
|
103
|
+
... transforms=['middle-out'] # Optimize for long contexts
|
|
104
|
+
... )
|
|
105
|
+
... )
|
|
106
|
+
"""
|
|
107
|
+
if model_kwargs is None:
|
|
108
|
+
model_kwargs = {}
|
|
109
|
+
|
|
110
|
+
Env.get().require_package('openai')
|
|
111
|
+
|
|
112
|
+
# Handle tools if provided
|
|
113
|
+
if tools is not None:
|
|
114
|
+
model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
|
|
115
|
+
|
|
116
|
+
if tool_choice is not None:
|
|
117
|
+
if tool_choice['auto']:
|
|
118
|
+
model_kwargs['tool_choice'] = 'auto'
|
|
119
|
+
elif tool_choice['required']:
|
|
120
|
+
model_kwargs['tool_choice'] = 'required'
|
|
121
|
+
else:
|
|
122
|
+
assert tool_choice['tool'] is not None
|
|
123
|
+
model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
|
|
124
|
+
|
|
125
|
+
# Prepare OpenRouter-specific parameters for extra_body
|
|
126
|
+
extra_body: dict[str, Any] = {}
|
|
127
|
+
if provider is not None:
|
|
128
|
+
extra_body['provider'] = provider
|
|
129
|
+
if transforms is not None:
|
|
130
|
+
extra_body['transforms'] = transforms
|
|
131
|
+
|
|
132
|
+
# Make the API call
|
|
133
|
+
result = await _openrouter_client().chat.completions.create(
|
|
134
|
+
messages=messages, model=model, extra_body=extra_body if extra_body else None, **model_kwargs
|
|
135
|
+
)
|
|
136
|
+
return result.model_dump()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
__all__ = local_public_names(__name__)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def __dir__() -> list[str]:
|
|
143
|
+
return __all__
|