pixeltable 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -27
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -7
- pixeltable/catalog/column.py +6 -2
- pixeltable/catalog/dir.py +2 -1
- pixeltable/catalog/insertable_table.py +11 -0
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +27 -38
- pixeltable/catalog/table_version.py +19 -0
- pixeltable/catalog/table_version_path.py +7 -0
- pixeltable/catalog/view.py +31 -0
- pixeltable/dataframe.py +50 -7
- pixeltable/env.py +1 -1
- pixeltable/exceptions.py +20 -2
- pixeltable/exec/aggregation_node.py +14 -0
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +0 -4
- pixeltable/exec/expr_eval/expr_eval_node.py +1 -2
- pixeltable/exec/sql_node.py +3 -2
- pixeltable/exprs/column_ref.py +42 -17
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/globals.py +1 -1
- pixeltable/exprs/literal.py +11 -1
- pixeltable/exprs/rowid_ref.py +4 -1
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/func/function.py +1 -1
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +1 -1
- pixeltable/functions/bedrock.py +130 -0
- pixeltable/functions/date.py +185 -0
- pixeltable/functions/gemini.py +22 -20
- pixeltable/functions/globals.py +1 -16
- pixeltable/functions/huggingface.py +7 -6
- pixeltable/functions/image.py +15 -16
- pixeltable/functions/json.py +2 -1
- pixeltable/functions/math.py +40 -0
- pixeltable/functions/mistralai.py +3 -2
- pixeltable/functions/openai.py +9 -8
- pixeltable/functions/string.py +1 -2
- pixeltable/functions/together.py +4 -3
- pixeltable/functions/video.py +2 -2
- pixeltable/globals.py +26 -9
- pixeltable/io/datarows.py +4 -3
- pixeltable/io/hf_datasets.py +2 -2
- pixeltable/io/label_studio.py +17 -17
- pixeltable/io/pandas.py +29 -16
- pixeltable/io/parquet.py +2 -0
- pixeltable/io/table_data_conduit.py +8 -2
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +12 -5
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +219 -119
- pixeltable/share/publish.py +61 -16
- pixeltable/store.py +45 -20
- pixeltable/type_system.py +46 -2
- pixeltable/utils/arrow.py +8 -2
- pixeltable/utils/pytorch.py +4 -0
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/RECORD +66 -63
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0
pixeltable/exec/sql_node.py
CHANGED
|
@@ -424,6 +424,7 @@ class SqlAggregationNode(SqlNode):
|
|
|
424
424
|
"""
|
|
425
425
|
|
|
426
426
|
group_by_items: Optional[list[exprs.Expr]]
|
|
427
|
+
input_cte: Optional[sql.CTE]
|
|
427
428
|
|
|
428
429
|
def __init__(
|
|
429
430
|
self,
|
|
@@ -440,13 +441,13 @@ class SqlAggregationNode(SqlNode):
|
|
|
440
441
|
group_by_items: list of expressions to group by
|
|
441
442
|
limit: max number of rows to return: None = no limit
|
|
442
443
|
"""
|
|
443
|
-
|
|
444
|
+
self.input_cte, input_col_map = input.to_cte()
|
|
444
445
|
sql_elements = exprs.SqlElementCache(input_col_map)
|
|
445
446
|
super().__init__(None, row_builder, select_list, sql_elements)
|
|
446
447
|
self.group_by_items = group_by_items
|
|
447
448
|
|
|
448
449
|
def _create_stmt(self) -> sql.Select:
|
|
449
|
-
stmt = super()._create_stmt()
|
|
450
|
+
stmt = super()._create_stmt().select_from(self.input_cte)
|
|
450
451
|
if self.group_by_items is not None:
|
|
451
452
|
sql_group_by_items = [self.sql_elements.get(e) for e in self.group_by_items]
|
|
452
453
|
assert all(e is not None for e in sql_group_by_items)
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -31,12 +31,18 @@ class ColumnRef(Expr):
|
|
|
31
31
|
- in that case, the ColumnRef also instantiates a second non-validating ColumnRef as a component (= dependency)
|
|
32
32
|
- the non-validating ColumnRef is used for SQL translation
|
|
33
33
|
|
|
34
|
+
A ColumnRef may have an optional reference table, which carries the context of the ColumnRef resolution. Thus
|
|
35
|
+
if `v` is a view of `t` (for example), then `v.my_col` and `t.my_col` refer to the same underlying column, but
|
|
36
|
+
their reference tables will be `v` and `t`, respectively. This is to ensure correct behavior of expressions such
|
|
37
|
+
as `v.my_col.head()`.
|
|
38
|
+
|
|
34
39
|
TODO:
|
|
35
40
|
separate Exprs (like validating ColumnRefs) from the logical expression tree and instead have RowBuilder
|
|
36
41
|
insert them into the EvalCtxs as needed
|
|
37
42
|
"""
|
|
38
43
|
|
|
39
44
|
col: catalog.Column
|
|
45
|
+
reference_tbl: Optional[catalog.TableVersionPath]
|
|
40
46
|
is_unstored_iter_col: bool
|
|
41
47
|
iter_arg_ctx: Optional[RowBuilder.EvalCtx]
|
|
42
48
|
base_rowid_len: int
|
|
@@ -46,10 +52,16 @@ class ColumnRef(Expr):
|
|
|
46
52
|
id: int
|
|
47
53
|
perform_validation: bool # if True, performs media validation
|
|
48
54
|
|
|
49
|
-
def __init__(
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
col: catalog.Column,
|
|
58
|
+
reference_tbl: Optional[catalog.TableVersionPath] = None,
|
|
59
|
+
perform_validation: Optional[bool] = None,
|
|
60
|
+
):
|
|
50
61
|
super().__init__(col.col_type)
|
|
51
62
|
assert col.tbl is not None
|
|
52
63
|
self.col = col
|
|
64
|
+
self.reference_tbl = reference_tbl
|
|
53
65
|
self.is_unstored_iter_col = (
|
|
54
66
|
col.tbl.get().is_component_view and col.tbl.get().is_iterator_column(col) and not col.is_stored
|
|
55
67
|
)
|
|
@@ -95,7 +107,7 @@ class ColumnRef(Expr):
|
|
|
95
107
|
target = tbl_versions[self.col.tbl.id]
|
|
96
108
|
assert self.col.id in target.cols_by_id
|
|
97
109
|
col = target.cols_by_id[self.col.id]
|
|
98
|
-
return ColumnRef(col)
|
|
110
|
+
return ColumnRef(col, self.reference_tbl)
|
|
99
111
|
|
|
100
112
|
def __getattr__(self, name: str) -> Expr:
|
|
101
113
|
from .column_property_ref import ColumnPropertyRef
|
|
@@ -126,26 +138,26 @@ class ColumnRef(Expr):
|
|
|
126
138
|
|
|
127
139
|
return super().__getattr__(name)
|
|
128
140
|
|
|
129
|
-
@classmethod
|
|
130
141
|
def find_embedding_index(
|
|
131
|
-
|
|
142
|
+
self, idx_name: Optional[str], method_name: str
|
|
132
143
|
) -> dict[str, catalog.TableVersion.IndexInfo]:
|
|
133
144
|
"""Return IndexInfo for a column, with an optional given name"""
|
|
134
|
-
# determine index to use
|
|
135
|
-
idx_info_dict = col.get_idx_info()
|
|
136
145
|
from pixeltable import index
|
|
137
146
|
|
|
147
|
+
# determine index to use
|
|
148
|
+
idx_info_dict = self.col.get_idx_info(self.reference_tbl)
|
|
149
|
+
|
|
138
150
|
embedding_idx_info = {
|
|
139
151
|
info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
|
|
140
152
|
}
|
|
141
153
|
if len(embedding_idx_info) == 0:
|
|
142
|
-
raise excs.Error(f'No indices found for {method_name!r} on column {col.name!r}')
|
|
154
|
+
raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
|
|
143
155
|
if idx_name is not None and idx_name not in embedding_idx_info:
|
|
144
|
-
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {col.name!r}')
|
|
156
|
+
raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
|
|
145
157
|
if len(embedding_idx_info) > 1:
|
|
146
158
|
if idx_name is None:
|
|
147
159
|
raise excs.Error(
|
|
148
|
-
f'Column {col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
160
|
+
f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
|
|
149
161
|
f'`{method_name}(..., idx=<index_name>)`'
|
|
150
162
|
)
|
|
151
163
|
idx_info = {idx_name: embedding_idx_info[idx_name]}
|
|
@@ -159,7 +171,7 @@ class ColumnRef(Expr):
|
|
|
159
171
|
return SimilarityExpr(self, item, idx_name=idx)
|
|
160
172
|
|
|
161
173
|
def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
|
|
162
|
-
idx_info =
|
|
174
|
+
idx_info = self.find_embedding_index(idx, 'embedding')
|
|
163
175
|
assert len(idx_info) == 1
|
|
164
176
|
col = copy.copy(next(iter(idx_info.values())).val_col)
|
|
165
177
|
col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
|
|
@@ -167,14 +179,21 @@ class ColumnRef(Expr):
|
|
|
167
179
|
return ColumnRef(col)
|
|
168
180
|
|
|
169
181
|
def default_column_name(self) -> Optional[str]:
|
|
170
|
-
return
|
|
182
|
+
return self.col.name if self.col is not None else None
|
|
171
183
|
|
|
172
184
|
def _equals(self, other: ColumnRef) -> bool:
|
|
173
185
|
return self.col == other.col and self.perform_validation == other.perform_validation
|
|
174
186
|
|
|
175
187
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
176
|
-
|
|
177
|
-
|
|
188
|
+
from pixeltable import plan
|
|
189
|
+
|
|
190
|
+
if self.reference_tbl is None:
|
|
191
|
+
# No reference table; use the current version of the table to which the column belongs
|
|
192
|
+
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
193
|
+
return tbl.select(self)
|
|
194
|
+
else:
|
|
195
|
+
# Explicit reference table; construct a DataFrame directly from it
|
|
196
|
+
return pxt.DataFrame(plan.FromClause([self.reference_tbl])).select(self)
|
|
178
197
|
|
|
179
198
|
def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
|
|
180
199
|
return self._df().show(*args, **kwargs)
|
|
@@ -188,6 +207,10 @@ class ColumnRef(Expr):
|
|
|
188
207
|
def count(self) -> int:
|
|
189
208
|
return self._df().count()
|
|
190
209
|
|
|
210
|
+
def distinct(self) -> 'pxt.dataframe.DataFrame':
|
|
211
|
+
"""Return distinct values in this column."""
|
|
212
|
+
return self._df().distinct()
|
|
213
|
+
|
|
191
214
|
def __str__(self) -> str:
|
|
192
215
|
if self.col.name is None:
|
|
193
216
|
return f'<unnamed column {self.col.id}>'
|
|
@@ -203,7 +226,7 @@ class ColumnRef(Expr):
|
|
|
203
226
|
def _descriptors(self) -> DescriptionHelper:
|
|
204
227
|
tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
|
|
205
228
|
helper = DescriptionHelper()
|
|
206
|
-
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path
|
|
229
|
+
helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
|
|
207
230
|
helper.append(tbl._col_descriptor([self.col.name]))
|
|
208
231
|
idxs = tbl._index_descriptor([self.col.name])
|
|
209
232
|
if len(idxs) > 0:
|
|
@@ -260,13 +283,14 @@ class ColumnRef(Expr):
|
|
|
260
283
|
|
|
261
284
|
def _as_dict(self) -> dict:
|
|
262
285
|
tbl = self.col.tbl
|
|
263
|
-
|
|
286
|
+
tbl_version = tbl.get().version if tbl.get().is_snapshot else None
|
|
264
287
|
# we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
|
|
265
288
|
# non-validating component ColumnRef
|
|
266
289
|
return {
|
|
267
290
|
'tbl_id': str(tbl.id),
|
|
268
|
-
'tbl_version':
|
|
291
|
+
'tbl_version': tbl_version,
|
|
269
292
|
'col_id': self.col.id,
|
|
293
|
+
'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
|
|
270
294
|
'perform_validation': self.perform_validation,
|
|
271
295
|
}
|
|
272
296
|
|
|
@@ -281,5 +305,6 @@ class ColumnRef(Expr):
|
|
|
281
305
|
@classmethod
|
|
282
306
|
def _from_dict(cls, d: dict, _: list[Expr]) -> ColumnRef:
|
|
283
307
|
col = cls.get_column(d)
|
|
308
|
+
reference_tbl = None if d['reference_tbl'] is None else catalog.TableVersionPath.from_dict(d['reference_tbl'])
|
|
284
309
|
perform_validation = d['perform_validation']
|
|
285
|
-
return cls(col, perform_validation=perform_validation)
|
|
310
|
+
return cls(col, reference_tbl, perform_validation=perform_validation)
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -29,10 +29,13 @@ class DataRow:
|
|
|
29
29
|
- FloatType: float
|
|
30
30
|
- BoolType: bool
|
|
31
31
|
- TimestampType: datetime.datetime
|
|
32
|
+
- DateType: datetime.date
|
|
32
33
|
- JsonType: json-serializable object
|
|
33
34
|
- ArrayType: numpy.ndarray
|
|
34
35
|
- ImageType: PIL.Image.Image
|
|
35
36
|
- VideoType: local path if available, otherwise url
|
|
37
|
+
- AudioType: local path if available, otherwise url
|
|
38
|
+
- DocumentType: local path if available, otherwise url
|
|
36
39
|
"""
|
|
37
40
|
|
|
38
41
|
vals: np.ndarray # of object
|
pixeltable/exprs/globals.py
CHANGED
|
@@ -5,7 +5,7 @@ import enum
|
|
|
5
5
|
from typing import Union
|
|
6
6
|
|
|
7
7
|
# Python types corresponding to our literal types
|
|
8
|
-
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime]
|
|
8
|
+
LiteralPythonTypes = Union[str, int, float, bool, datetime.datetime, datetime.date]
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def print_slice(s: slice) -> str:
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -50,6 +50,9 @@ class Literal(Expr):
|
|
|
50
50
|
assert isinstance(self.val, datetime.datetime)
|
|
51
51
|
default_tz = Env.get().default_time_zone
|
|
52
52
|
return f"'{self.val.astimezone(default_tz).isoformat()}'"
|
|
53
|
+
if self.col_type.is_date_type():
|
|
54
|
+
assert isinstance(self.val, datetime.date)
|
|
55
|
+
return f"'{self.val.isoformat()}'"
|
|
53
56
|
if self.col_type.is_array_type():
|
|
54
57
|
assert isinstance(self.val, np.ndarray)
|
|
55
58
|
return str(self.val.tolist())
|
|
@@ -82,6 +85,10 @@ class Literal(Expr):
|
|
|
82
85
|
# stored as UTC in the database)
|
|
83
86
|
encoded_val = self.val.isoformat()
|
|
84
87
|
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
88
|
+
elif self.col_type.is_date_type():
|
|
89
|
+
assert isinstance(self.val, datetime.date)
|
|
90
|
+
encoded_val = self.val.isoformat()
|
|
91
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
85
92
|
elif self.col_type.is_array_type():
|
|
86
93
|
assert isinstance(self.val, np.ndarray)
|
|
87
94
|
return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
@@ -96,7 +103,10 @@ class Literal(Expr):
|
|
|
96
103
|
assert 'val' in d
|
|
97
104
|
if 'val_t' in d:
|
|
98
105
|
val_t = d['val_t']
|
|
99
|
-
if val_t == ts.ColumnType.Type.
|
|
106
|
+
if val_t == ts.ColumnType.Type.DATE.name:
|
|
107
|
+
dt = datetime.date.fromisoformat(d['val'])
|
|
108
|
+
return cls(dt)
|
|
109
|
+
elif val_t == ts.ColumnType.Type.TIMESTAMP.name:
|
|
100
110
|
dt = datetime.datetime.fromisoformat(d['val'])
|
|
101
111
|
assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
|
|
102
112
|
return cls(dt)
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -30,7 +30,7 @@ class RowidRef(Expr):
|
|
|
30
30
|
|
|
31
31
|
def __init__(
|
|
32
32
|
self,
|
|
33
|
-
tbl: catalog.TableVersionHandle,
|
|
33
|
+
tbl: Optional[catalog.TableVersionHandle],
|
|
34
34
|
idx: int,
|
|
35
35
|
tbl_id: Optional[UUID] = None,
|
|
36
36
|
normalized_base_id: Optional[UUID] = None,
|
|
@@ -98,6 +98,9 @@ class RowidRef(Expr):
|
|
|
98
98
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
99
99
|
tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
|
|
100
100
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
101
|
+
assert self.rowid_component_idx <= len(rowid_cols), (
|
|
102
|
+
f'{self.rowid_component_idx} not consistent with {rowid_cols}'
|
|
103
|
+
)
|
|
101
104
|
return rowid_cols[self.rowid_component_idx]
|
|
102
105
|
|
|
103
106
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -26,7 +26,7 @@ class SimilarityExpr(Expr):
|
|
|
26
26
|
from pixeltable import index
|
|
27
27
|
|
|
28
28
|
# determine index to use
|
|
29
|
-
idx_dict =
|
|
29
|
+
idx_dict = col_ref.find_embedding_index(idx_name, 'similarity')
|
|
30
30
|
assert len(idx_dict) == 1
|
|
31
31
|
self.idx_info = next(iter(idx_dict.values()))
|
|
32
32
|
idx = self.idx_info.idx
|
pixeltable/func/function.py
CHANGED
|
@@ -514,7 +514,7 @@ class InvalidFunction(Function):
|
|
|
514
514
|
def _as_dict(self) -> dict:
|
|
515
515
|
"""
|
|
516
516
|
Here we write out (verbatim) the original metadata that failed to load (and that resulted in the
|
|
517
|
-
InvalidFunction). Note that the InvalidFunction itself is never
|
|
517
|
+
InvalidFunction). Note that the InvalidFunction itself is never serialized, so there is no corresponding
|
|
518
518
|
from_dict() method.
|
|
519
519
|
"""
|
|
520
520
|
return self.fn_dict
|
pixeltable/func/udf.py
CHANGED
|
@@ -262,7 +262,7 @@ def from_table(
|
|
|
262
262
|
"""
|
|
263
263
|
from pixeltable import exprs
|
|
264
264
|
|
|
265
|
-
ancestors = [tbl, *tbl.
|
|
265
|
+
ancestors = [tbl, *tbl._base_tables]
|
|
266
266
|
ancestors.reverse() # We must traverse the ancestors in order from base to derived
|
|
267
267
|
|
|
268
268
|
subst: dict[exprs.Expr, exprs.Expr] = {}
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -112,7 +112,7 @@ async def messages(
|
|
|
112
112
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
113
113
|
|
|
114
114
|
>>> msgs = [{'role': 'user', 'content': tbl.prompt}]
|
|
115
|
-
... tbl.add_computed_column(response=
|
|
115
|
+
... tbl.add_computed_column(response=messages(msgs, model='claude-3-haiku-20240307'))
|
|
116
116
|
"""
|
|
117
117
|
|
|
118
118
|
# it doesn't look like count_tokens() actually exists in the current version of the library
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
3
|
+
|
|
4
|
+
import pixeltable as pxt
|
|
5
|
+
from pixeltable import env, exprs
|
|
6
|
+
from pixeltable.func import Tools
|
|
7
|
+
from pixeltable.utils.code import local_public_names
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from botocore.client import BaseClient
|
|
11
|
+
|
|
12
|
+
_logger = logging.getLogger('pixeltable')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@env.register_client('bedrock')
|
|
16
|
+
def _() -> 'BaseClient':
|
|
17
|
+
import boto3
|
|
18
|
+
|
|
19
|
+
return boto3.client(service_name='bedrock-runtime')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# boto3 typing is weird; type information is dynamically defined, so the best we can do for the static checker is `Any`
|
|
23
|
+
def _bedrock_client() -> Any:
|
|
24
|
+
return env.Env.get().get_client('bedrock')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pxt.udf
|
|
28
|
+
def converse(
|
|
29
|
+
messages: list[dict[str, Any]],
|
|
30
|
+
*,
|
|
31
|
+
model_id: str,
|
|
32
|
+
system: Optional[list[dict[str, Any]]] = None,
|
|
33
|
+
inference_config: Optional[dict] = None,
|
|
34
|
+
additional_model_request_fields: Optional[dict] = None,
|
|
35
|
+
tool_config: Optional[list[dict]] = None,
|
|
36
|
+
) -> dict:
|
|
37
|
+
"""
|
|
38
|
+
Generate a conversation response.
|
|
39
|
+
|
|
40
|
+
Equivalent to the AWS Bedrock `converse` API endpoint.
|
|
41
|
+
For additional details, see: <https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
|
|
42
|
+
|
|
43
|
+
__Requirements:__
|
|
44
|
+
|
|
45
|
+
- `pip install boto3`
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
messages: Input messages.
|
|
49
|
+
model_id: The model that will complete your prompt.
|
|
50
|
+
system: An optional system prompt.
|
|
51
|
+
inference_config: Base inference parameters to use.
|
|
52
|
+
additional_model_request_fields: Additional inference parameters to use.
|
|
53
|
+
|
|
54
|
+
For details on the optional parameters, see:
|
|
55
|
+
<https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A dictionary containing the response and other metadata.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
Add a computed column that applies the model `anthropic.claude-3-haiku-20240307-v1:0`
|
|
62
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
63
|
+
|
|
64
|
+
>>> msgs = [{'role': 'user', 'content': [{'text': tbl.prompt}]}]
|
|
65
|
+
... tbl.add_computed_column(response=messages(msgs, model_id='anthropic.claude-3-haiku-20240307-v1:0'))
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
kwargs: dict[str, Any] = {'messages': messages, 'modelId': model_id}
|
|
69
|
+
|
|
70
|
+
if system is not None:
|
|
71
|
+
kwargs['system'] = system
|
|
72
|
+
if inference_config is not None:
|
|
73
|
+
kwargs['inferenceConfig'] = inference_config
|
|
74
|
+
if additional_model_request_fields is not None:
|
|
75
|
+
kwargs['additionalModelRequestFields'] = additional_model_request_fields
|
|
76
|
+
|
|
77
|
+
if tool_config is not None:
|
|
78
|
+
tool_config_ = {
|
|
79
|
+
'tools': [
|
|
80
|
+
{
|
|
81
|
+
'toolSpec': {
|
|
82
|
+
'name': tool['name'],
|
|
83
|
+
'description': tool['description'],
|
|
84
|
+
'inputSchema': {
|
|
85
|
+
'json': {
|
|
86
|
+
'type': 'object',
|
|
87
|
+
'properties': tool['parameters']['properties'],
|
|
88
|
+
'required': tool['required'],
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
for tool in tool_config
|
|
94
|
+
]
|
|
95
|
+
}
|
|
96
|
+
kwargs['toolConfig'] = tool_config_
|
|
97
|
+
|
|
98
|
+
return _bedrock_client().converse(**kwargs)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
102
|
+
"""Converts an Anthropic response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
|
|
103
|
+
return tools._invoke(_bedrock_response_to_pxt_tool_calls(response))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@pxt.udf
|
|
107
|
+
def _bedrock_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
|
|
108
|
+
if response.get('stopReason') != 'tool_use':
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
|
|
112
|
+
for message in response['output']['message']['content']:
|
|
113
|
+
if 'toolUse' in message:
|
|
114
|
+
tool_call = message['toolUse']
|
|
115
|
+
tool_name = tool_call['name']
|
|
116
|
+
if tool_name not in pxt_tool_calls:
|
|
117
|
+
pxt_tool_calls[tool_name] = []
|
|
118
|
+
pxt_tool_calls[tool_name].append({'args': tool_call['input']})
|
|
119
|
+
|
|
120
|
+
if len(pxt_tool_calls) == 0:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
return pxt_tool_calls
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
__all__ = local_public_names(__name__)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def __dir__() -> list[str]:
|
|
130
|
+
return __all__
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `DateType`.
|
|
3
|
+
|
|
4
|
+
Usage example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.date_col.year, t.date_col.weekday()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from datetime import date, timedelta
|
|
14
|
+
|
|
15
|
+
import sqlalchemy as sql
|
|
16
|
+
|
|
17
|
+
import pixeltable as pxt
|
|
18
|
+
from pixeltable.utils.code import local_public_names
|
|
19
|
+
|
|
20
|
+
_SQL_ZERO = sql.literal(0)
|
|
21
|
+
|
|
22
|
+
# NOT YET SUPPORTED date +/- integer
|
|
23
|
+
# NOT YET SUPPORTED date1 - date2 -> integer
|
|
24
|
+
# NOT YET SUPPORTED timestamp(date)
|
|
25
|
+
# NOT YET SUPPORTED date(timestamp)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pxt.udf(is_property=True)
|
|
29
|
+
def year(self: date) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
32
|
+
[`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
|
|
33
|
+
|
|
34
|
+
Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
|
|
35
|
+
"""
|
|
36
|
+
return self.year
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@year.to_sql
|
|
40
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
41
|
+
return sql.extract('year', self)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pxt.udf(is_property=True)
|
|
45
|
+
def month(self: date) -> int:
|
|
46
|
+
"""
|
|
47
|
+
Between 1 and 12 inclusive.
|
|
48
|
+
|
|
49
|
+
Equivalent to [`date.month`](https://docs.python.org/3/library/datetime.html#datetime.date.month).
|
|
50
|
+
"""
|
|
51
|
+
return self.month
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@month.to_sql
|
|
55
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
56
|
+
return sql.extract('month', self)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@pxt.udf(is_property=True)
|
|
60
|
+
def day(self: date) -> int:
|
|
61
|
+
"""
|
|
62
|
+
Between 1 and the number of days in the given month of the given year.
|
|
63
|
+
|
|
64
|
+
Equivalent to [`date.day`](https://docs.python.org/3/library/datetime.html#datetime.date.day).
|
|
65
|
+
"""
|
|
66
|
+
return self.day
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@day.to_sql
|
|
70
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
71
|
+
return sql.extract('day', self)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pxt.udf(is_method=True)
|
|
75
|
+
def make_date(year: int, month: int, day: int) -> date:
|
|
76
|
+
"""
|
|
77
|
+
Create a date.
|
|
78
|
+
|
|
79
|
+
Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.date).
|
|
80
|
+
"""
|
|
81
|
+
return date(year, month, day)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@make_date.to_sql
|
|
85
|
+
def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
|
|
86
|
+
return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@pxt.udf(is_method=True)
|
|
90
|
+
def weekday(self: date) -> int:
|
|
91
|
+
"""
|
|
92
|
+
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
93
|
+
|
|
94
|
+
Equivalent to [`date.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.weekday).
|
|
95
|
+
"""
|
|
96
|
+
return self.weekday()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@weekday.to_sql
|
|
100
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
101
|
+
return sql.extract('isodow', self) - 1
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pxt.udf(is_method=True)
|
|
105
|
+
def isoweekday(self: date) -> int:
|
|
106
|
+
"""
|
|
107
|
+
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
108
|
+
|
|
109
|
+
Equivalent to [`date.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoweekday).
|
|
110
|
+
"""
|
|
111
|
+
return self.isoweekday()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@isoweekday.to_sql
|
|
115
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
116
|
+
return sql.extract('isodow', self)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@pxt.udf(is_method=True)
|
|
120
|
+
def isocalendar(self: date) -> dict:
|
|
121
|
+
"""
|
|
122
|
+
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
123
|
+
|
|
124
|
+
Equivalent to
|
|
125
|
+
[`date.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar).
|
|
126
|
+
"""
|
|
127
|
+
iso_year, iso_week, iso_weekday = self.isocalendar()
|
|
128
|
+
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@pxt.udf(is_method=True)
|
|
132
|
+
def isoformat(self: date, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
133
|
+
"""
|
|
134
|
+
Return a string representing the date and time in ISO 8601 format.
|
|
135
|
+
|
|
136
|
+
Equivalent to [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat).
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
sep: Separator between date and time.
|
|
140
|
+
timespec: The number of additional terms in the output. See the
|
|
141
|
+
[`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat)
|
|
142
|
+
documentation for more details.
|
|
143
|
+
"""
|
|
144
|
+
return self.isoformat()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@pxt.udf(is_method=True)
|
|
148
|
+
def toordinal(self: date) -> int:
|
|
149
|
+
"""
|
|
150
|
+
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
151
|
+
|
|
152
|
+
Equivalent to [`date.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.date.toordinal).
|
|
153
|
+
"""
|
|
154
|
+
return self.toordinal()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pxt.udf(is_method=True)
|
|
158
|
+
def strftime(self: date, format: str) -> str:
|
|
159
|
+
"""
|
|
160
|
+
Return a string representing the date and time, controlled by an explicit format string.
|
|
161
|
+
|
|
162
|
+
Equivalent to [`date.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.date.strftime).
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
format: The format string to control the output. For a complete list of formatting directives, see
|
|
166
|
+
[`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
|
|
167
|
+
"""
|
|
168
|
+
return self.strftime(format)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@pxt.udf(is_method=True)
|
|
172
|
+
def add_days(self: date, n: int) -> date:
|
|
173
|
+
"""
|
|
174
|
+
Add `n` days to the date.
|
|
175
|
+
|
|
176
|
+
Equivalent to [`date + timedelta(days=n)`](https://docs.python.org/3/library/datetime.html#datetime.timedelta).
|
|
177
|
+
"""
|
|
178
|
+
return self + timedelta(days=n)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
__all__ = local_public_names(__name__)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def __dir__() -> list[str]:
|
|
185
|
+
return __all__
|