pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for `DateType`.
|
|
3
|
+
|
|
4
|
+
Usage example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.date_col.year, t.date_col.weekday()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from datetime import date, timedelta
|
|
14
|
+
|
|
15
|
+
import sqlalchemy as sql
|
|
16
|
+
|
|
17
|
+
import pixeltable as pxt
|
|
18
|
+
from pixeltable.utils.code import local_public_names
|
|
19
|
+
|
|
20
|
+
_SQL_ZERO = sql.literal(0)
|
|
21
|
+
|
|
22
|
+
# NOT YET SUPPORTED date +/- integer
|
|
23
|
+
# NOT YET SUPPORTED date1 - date2 -> integer
|
|
24
|
+
# NOT YET SUPPORTED timestamp(date)
|
|
25
|
+
# NOT YET SUPPORTED date(timestamp)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pxt.udf(is_property=True)
|
|
29
|
+
def year(self: date) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Between 1 and 9999 inclusive.
|
|
32
|
+
|
|
33
|
+
(Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
34
|
+
[`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) as defined by the Python `datetime`
|
|
35
|
+
library).
|
|
36
|
+
|
|
37
|
+
Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
|
|
38
|
+
"""
|
|
39
|
+
return self.year
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@year.to_sql
|
|
43
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
44
|
+
return sql.extract('year', self)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pxt.udf(is_property=True)
|
|
48
|
+
def month(self: date) -> int:
|
|
49
|
+
"""
|
|
50
|
+
Between 1 and 12 inclusive.
|
|
51
|
+
|
|
52
|
+
Equivalent to [`date.month`](https://docs.python.org/3/library/datetime.html#datetime.date.month).
|
|
53
|
+
"""
|
|
54
|
+
return self.month
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@month.to_sql
|
|
58
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
59
|
+
return sql.extract('month', self)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@pxt.udf(is_property=True)
|
|
63
|
+
def day(self: date) -> int:
|
|
64
|
+
"""
|
|
65
|
+
Between 1 and the number of days in the given month of the given year.
|
|
66
|
+
|
|
67
|
+
Equivalent to [`date.day`](https://docs.python.org/3/library/datetime.html#datetime.date.day).
|
|
68
|
+
"""
|
|
69
|
+
return self.day
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@day.to_sql
|
|
73
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
74
|
+
return sql.extract('day', self)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@pxt.udf(is_method=True)
|
|
78
|
+
def make_date(year: int, month: int, day: int) -> date:
|
|
79
|
+
"""
|
|
80
|
+
Create a date.
|
|
81
|
+
|
|
82
|
+
Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.date).
|
|
83
|
+
"""
|
|
84
|
+
return date(year, month, day)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@make_date.to_sql
|
|
88
|
+
def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
|
|
89
|
+
return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@pxt.udf(is_method=True)
|
|
93
|
+
def weekday(self: date) -> int:
|
|
94
|
+
"""
|
|
95
|
+
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
96
|
+
|
|
97
|
+
Equivalent to [`date.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.weekday).
|
|
98
|
+
"""
|
|
99
|
+
return self.weekday()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@weekday.to_sql
|
|
103
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
104
|
+
return sql.extract('isodow', self) - 1
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pxt.udf(is_method=True)
|
|
108
|
+
def isoweekday(self: date) -> int:
|
|
109
|
+
"""
|
|
110
|
+
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
111
|
+
|
|
112
|
+
Equivalent to [`date.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoweekday).
|
|
113
|
+
"""
|
|
114
|
+
return self.isoweekday()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@isoweekday.to_sql
|
|
118
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
119
|
+
return sql.extract('isodow', self)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@pxt.udf(is_method=True)
|
|
123
|
+
def isocalendar(self: date) -> dict:
|
|
124
|
+
"""
|
|
125
|
+
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
126
|
+
|
|
127
|
+
Equivalent to
|
|
128
|
+
[`date.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar).
|
|
129
|
+
"""
|
|
130
|
+
iso_year, iso_week, iso_weekday = self.isocalendar()
|
|
131
|
+
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@pxt.udf(is_method=True)
|
|
135
|
+
def isoformat(self: date, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
136
|
+
"""
|
|
137
|
+
Return a string representing the date and time in ISO 8601 format.
|
|
138
|
+
|
|
139
|
+
Equivalent to [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat).
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
sep: Separator between date and time.
|
|
143
|
+
timespec: The number of additional terms in the output. See the
|
|
144
|
+
[`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat)
|
|
145
|
+
documentation for more details.
|
|
146
|
+
"""
|
|
147
|
+
return self.isoformat()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@pxt.udf(is_method=True)
|
|
151
|
+
def toordinal(self: date) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
154
|
+
|
|
155
|
+
Equivalent to [`date.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.date.toordinal).
|
|
156
|
+
"""
|
|
157
|
+
return self.toordinal()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@pxt.udf(is_method=True)
|
|
161
|
+
def strftime(self: date, format: str) -> str:
|
|
162
|
+
"""
|
|
163
|
+
Return a string representing the date and time, controlled by an explicit format string.
|
|
164
|
+
|
|
165
|
+
Equivalent to [`date.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.date.strftime).
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
format: The format string to control the output. For a complete list of formatting directives, see
|
|
169
|
+
[`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
|
|
170
|
+
"""
|
|
171
|
+
return self.strftime(format)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@pxt.udf(is_method=True)
|
|
175
|
+
def add_days(self: date, n: int) -> date:
|
|
176
|
+
"""
|
|
177
|
+
Add `n` days to the date.
|
|
178
|
+
|
|
179
|
+
Equivalent to [`date + timedelta(days=n)`](https://docs.python.org/3/library/datetime.html#datetime.timedelta).
|
|
180
|
+
"""
|
|
181
|
+
return self + timedelta(days=n)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
__all__ = local_public_names(__name__)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def __dir__() -> list[str]:
|
|
188
|
+
return __all__
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for Deepseek AI models.
|
|
3
|
+
|
|
4
|
+
Provides integration with Deepseek's language models for chat completions
|
|
5
|
+
and other AI capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
import pixeltable as pxt
|
|
14
|
+
from pixeltable import env
|
|
15
|
+
from pixeltable.utils.code import local_public_names
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import openai
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@env.register_client('deepseek')
|
|
22
|
+
def _(api_key: str) -> 'openai.AsyncOpenAI':
|
|
23
|
+
import openai
|
|
24
|
+
|
|
25
|
+
return openai.AsyncOpenAI(
|
|
26
|
+
api_key=api_key,
|
|
27
|
+
base_url='https://api.deepseek.com',
|
|
28
|
+
http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _deepseek_client() -> 'openai.AsyncOpenAI':
|
|
33
|
+
return env.Env.get().get_client('deepseek')
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pxt.udf(resource_pool='request-rate:deepseek')
|
|
37
|
+
async def chat_completions(
|
|
38
|
+
messages: list,
|
|
39
|
+
*,
|
|
40
|
+
model: str,
|
|
41
|
+
model_kwargs: dict[str, Any] | None = None,
|
|
42
|
+
tools: list[dict[str, Any]] | None = None,
|
|
43
|
+
tool_choice: dict[str, Any] | None = None,
|
|
44
|
+
) -> dict:
|
|
45
|
+
"""
|
|
46
|
+
Creates a model response for the given chat conversation.
|
|
47
|
+
|
|
48
|
+
Equivalent to the Deepseek `chat/completions` API endpoint.
|
|
49
|
+
For additional details, see: <https://api-docs.deepseek.com/api/create-chat-completion>
|
|
50
|
+
|
|
51
|
+
Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
|
|
52
|
+
|
|
53
|
+
Request throttling:
|
|
54
|
+
Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
|
|
55
|
+
limit is configured, uses a default of 600 RPM.
|
|
56
|
+
|
|
57
|
+
__Requirements:__
|
|
58
|
+
|
|
59
|
+
- `pip install openai`
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
|
|
63
|
+
model: The model to use for chat completion.
|
|
64
|
+
model_kwargs: Additional keyword args for the Deepseek `chat/completions` API.
|
|
65
|
+
For details on the available parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
|
|
66
|
+
tools: An optional list of Pixeltable tools to use for the request.
|
|
67
|
+
tool_choice: An optional tool choice configuration.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
A dictionary containing the response and other metadata.
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
Add a computed column that applies the model `deepseek-chat` to an existing Pixeltable column `tbl.prompt`
|
|
74
|
+
of the table `tbl`:
|
|
75
|
+
|
|
76
|
+
>>> messages = [
|
|
77
|
+
... {'role': 'system', 'content': 'You are a helpful assistant.'},
|
|
78
|
+
... {'role': 'user', 'content': tbl.prompt}
|
|
79
|
+
... ]
|
|
80
|
+
>>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
|
|
81
|
+
"""
|
|
82
|
+
if model_kwargs is None:
|
|
83
|
+
model_kwargs = {}
|
|
84
|
+
|
|
85
|
+
if tools is not None:
|
|
86
|
+
model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
|
|
87
|
+
|
|
88
|
+
if tool_choice is not None:
|
|
89
|
+
if tool_choice['auto']:
|
|
90
|
+
model_kwargs['tool_choice'] = 'auto'
|
|
91
|
+
elif tool_choice['required']:
|
|
92
|
+
model_kwargs['tool_choice'] = 'required'
|
|
93
|
+
else:
|
|
94
|
+
assert tool_choice['tool'] is not None
|
|
95
|
+
model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
|
|
96
|
+
|
|
97
|
+
if tool_choice is not None and not tool_choice['parallel_tool_calls']:
|
|
98
|
+
if 'extra_body' not in model_kwargs:
|
|
99
|
+
model_kwargs['extra_body'] = {}
|
|
100
|
+
model_kwargs['extra_body']['parallel_tool_calls'] = False
|
|
101
|
+
|
|
102
|
+
result = await _deepseek_client().chat.completions.with_raw_response.create(
|
|
103
|
+
messages=messages, model=model, **model_kwargs
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return json.loads(result.text)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
__all__ = local_public_names(__name__)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def __dir__() -> list[str]:
|
|
113
|
+
return __all__
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for `DocumentType`.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import pixeltable as pxt
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def document_splitter(
|
|
11
|
+
document: Any,
|
|
12
|
+
separators: str,
|
|
13
|
+
*,
|
|
14
|
+
elements: list[Literal['text', 'image']] | None = None,
|
|
15
|
+
limit: int | None = None,
|
|
16
|
+
overlap: int | None = None,
|
|
17
|
+
metadata: str = '',
|
|
18
|
+
skip_tags: list[str] | None = None,
|
|
19
|
+
tiktoken_encoding: str | None = 'cl100k_base',
|
|
20
|
+
tiktoken_target_model: str | None = None,
|
|
21
|
+
image_dpi: int = 300,
|
|
22
|
+
image_format: str = 'png',
|
|
23
|
+
) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
|
|
24
|
+
"""Iterator over chunks of a document. The document is chunked according to the specified `separators`.
|
|
25
|
+
|
|
26
|
+
The iterator yields a `text` field containing the text of the chunk, and it may also
|
|
27
|
+
include additional metadata fields if specified in the `metadata` parameter, as explained below.
|
|
28
|
+
|
|
29
|
+
Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
separators: separators to use to chunk the document. Options are:
|
|
33
|
+
`'heading'`, `'paragraph'`, `'sentence'`, `'token_limit'`, `'char_limit'`, `'page'`.
|
|
34
|
+
This may be a comma-separated string, e.g., `'heading,token_limit'`.
|
|
35
|
+
elements: list of elements to extract from the document. Options are:
|
|
36
|
+
`'text'`, `'image'`. Defaults to `['text']` if not specified. The `'image'` element is only supported
|
|
37
|
+
for the `'page'` separator on PDF documents.
|
|
38
|
+
limit: the maximum number of tokens or characters in each chunk, if `'token_limit'`
|
|
39
|
+
or `'char_limit'` is specified.
|
|
40
|
+
metadata: additional metadata fields to include in the output. Options are:
|
|
41
|
+
`'title'`, `'heading'` (HTML and Markdown), `'sourceline'` (HTML), `'page'` (PDF), `'bounding_box'`
|
|
42
|
+
(PDF). The input may be a comma-separated string, e.g., `'title,heading,sourceline'`.
|
|
43
|
+
image_dpi: DPI to use when extracting images from PDFs. Defaults to 300.
|
|
44
|
+
image_format: format to use when extracting images from PDFs. Defaults to 'png'.
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
All these examples assume an existing table `tbl` with a column `doc` of type `pxt.Document`.
|
|
48
|
+
|
|
49
|
+
Create a view that splits all documents into chunks of up to 300 tokens:
|
|
50
|
+
|
|
51
|
+
>>> pxt.create_view('chunks', tbl, iterator=document_splitter(tbl.doc, separators='token_limit', limit=300))
|
|
52
|
+
|
|
53
|
+
Create a view that splits all documents along sentence boundaries, including title and heading metadata:
|
|
54
|
+
|
|
55
|
+
>>> pxt.create_view(
|
|
56
|
+
... 'sentence_chunks',
|
|
57
|
+
... tbl,
|
|
58
|
+
... iterator=document_splitter(tbl.doc, separators='sentence', metadata='title,heading')
|
|
59
|
+
... )
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
kwargs: dict[str, Any] = {}
|
|
63
|
+
if elements is not None:
|
|
64
|
+
kwargs['elements'] = elements
|
|
65
|
+
if limit is not None:
|
|
66
|
+
kwargs['limit'] = limit
|
|
67
|
+
if overlap is not None:
|
|
68
|
+
kwargs['overlap'] = overlap
|
|
69
|
+
if metadata != '':
|
|
70
|
+
kwargs['metadata'] = metadata
|
|
71
|
+
if skip_tags is not None:
|
|
72
|
+
kwargs['skip_tags'] = skip_tags
|
|
73
|
+
if tiktoken_encoding != 'cl100k_base':
|
|
74
|
+
kwargs['tiktoken_encoding'] = tiktoken_encoding
|
|
75
|
+
if tiktoken_target_model is not None:
|
|
76
|
+
kwargs['tiktoken_target_model'] = tiktoken_target_model
|
|
77
|
+
if image_dpi != 300:
|
|
78
|
+
kwargs['image_dpi'] = image_dpi
|
|
79
|
+
if image_format != 'png':
|
|
80
|
+
kwargs['image_format'] = image_format
|
|
81
|
+
return pxt.iterators.document.DocumentSplitter._create(document=document, separators=separators, **kwargs)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs
|
|
3
|
+
that wrap various endpoints from the fal.ai API. In order to use them, you must
|
|
4
|
+
first `pip install fal-client` and configure your fal.ai credentials, as described in
|
|
5
|
+
the [Working with fal.ai](https://docs.pixeltable.com/notebooks/integrations/working-with-fal) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
import pixeltable as pxt
|
|
11
|
+
from pixeltable.env import Env, register_client
|
|
12
|
+
from pixeltable.utils.code import local_public_names
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
import fal_client
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@register_client('fal')
|
|
19
|
+
def _(api_key: str) -> 'fal_client.AsyncClient':
|
|
20
|
+
import fal_client
|
|
21
|
+
|
|
22
|
+
return fal_client.AsyncClient(key=api_key)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _fal_client() -> 'fal_client.AsyncClient':
|
|
26
|
+
return Env.get().get_client('fal')
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pxt.udf(resource_pool='request-rate:fal')
|
|
30
|
+
async def run(input: dict[str, Any], *, app: str) -> pxt.Json:
|
|
31
|
+
"""
|
|
32
|
+
Run a model on fal.ai.
|
|
33
|
+
|
|
34
|
+
Uses fal's queue-based subscribe mechanism for reliable execution.
|
|
35
|
+
For additional details, see: <https://fal.ai/docs>
|
|
36
|
+
|
|
37
|
+
Request throttling:
|
|
38
|
+
Applies the rate limit set in the config (section `fal`, key `rate_limit`). If no rate
|
|
39
|
+
limit is configured, uses a default of 600 RPM.
|
|
40
|
+
|
|
41
|
+
__Requirements:__
|
|
42
|
+
|
|
43
|
+
- `pip install fal-client`
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
input: The input parameters for the model.
|
|
47
|
+
app: The name or ID of the fal.ai application to run (e.g., 'fal-ai/flux/schnell').
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The output of the model as a JSON object.
|
|
51
|
+
|
|
52
|
+
Examples:
|
|
53
|
+
Add a computed column that applies the model `fal-ai/flux/schnell`
|
|
54
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
55
|
+
|
|
56
|
+
>>> input = {'prompt': tbl.prompt}
|
|
57
|
+
... tbl.add_computed_column(response=run(input, app='fal-ai/flux/schnell'))
|
|
58
|
+
|
|
59
|
+
Add a computed column that uses the model `fal-ai/fast-sdxl`
|
|
60
|
+
to generate images from an existing Pixeltable column `tbl.prompt`:
|
|
61
|
+
|
|
62
|
+
>>> input = {'prompt': tbl.prompt, 'image_size': 'square', 'num_inference_steps': 25}
|
|
63
|
+
... tbl.add_computed_column(response=run(input, app='fal-ai/fast-sdxl'))
|
|
64
|
+
... tbl.add_computed_column(image=tbl.response['images'][0]['url'].astype(pxt.Image))
|
|
65
|
+
"""
|
|
66
|
+
Env.get().require_package('fal_client')
|
|
67
|
+
client = _fal_client()
|
|
68
|
+
result = await client.subscribe(app, arguments=input)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
__all__ = local_public_names(__name__)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def __dir__() -> list[str]:
|
|
76
|
+
return __all__
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs
|
|
3
3
|
that wrap various endpoints from the Fireworks AI API. In order to use them, you must
|
|
4
4
|
first `pip install fireworks-ai` and configure your Fireworks AI credentials, as described in
|
|
5
|
-
the [Working with Fireworks](https://pixeltable.
|
|
5
|
+
the [Working with Fireworks](https://docs.pixeltable.com/notebooks/integrations/working-with-fireworks) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
12
|
+
from pixeltable.config import Config
|
|
12
13
|
from pixeltable.utils.code import local_public_names
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
@@ -26,21 +27,19 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
|
|
|
26
27
|
return env.Env.get().get_client('fireworks')
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
@pxt.udf
|
|
30
|
-
def chat_completions(
|
|
31
|
-
messages: list[dict[str, str]],
|
|
32
|
-
*,
|
|
33
|
-
model: str,
|
|
34
|
-
max_tokens: Optional[int] = None,
|
|
35
|
-
top_k: Optional[int] = None,
|
|
36
|
-
top_p: Optional[float] = None,
|
|
37
|
-
temperature: Optional[float] = None,
|
|
30
|
+
@pxt.udf(resource_pool='request-rate:fireworks')
|
|
31
|
+
async def chat_completions(
|
|
32
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
38
33
|
) -> dict:
|
|
39
34
|
"""
|
|
40
35
|
Creates a model response for the given chat conversation.
|
|
41
36
|
|
|
42
37
|
Equivalent to the Fireworks AI `chat/completions` API endpoint.
|
|
43
|
-
For additional details, see:
|
|
38
|
+
For additional details, see: <https://docs.fireworks.ai/api-reference/post-chatcompletions>
|
|
39
|
+
|
|
40
|
+
Request throttling:
|
|
41
|
+
Applies the rate limit set in the config (section `fireworks`, key `rate_limit`). If no rate
|
|
42
|
+
limit is configured, uses a default of 600 RPM.
|
|
44
43
|
|
|
45
44
|
__Requirements:__
|
|
46
45
|
|
|
@@ -49,8 +48,8 @@ def chat_completions(
|
|
|
49
48
|
Args:
|
|
50
49
|
messages: A list of messages comprising the conversation so far.
|
|
51
50
|
model: The name of the model to use.
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
model_kwargs: Additional keyword args for the Fireworks `chat_completions` API. For details on the available
|
|
52
|
+
parameters, see: <https://docs.fireworks.ai/api-reference/post-chatcompletions>
|
|
54
53
|
|
|
55
54
|
Returns:
|
|
56
55
|
A dictionary containing the response and other metadata.
|
|
@@ -60,15 +59,68 @@ def chat_completions(
|
|
|
60
59
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
61
60
|
|
|
62
61
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
63
|
-
... tbl
|
|
62
|
+
... tbl.add_computed_column(
|
|
63
|
+
... response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
|
|
64
|
+
... )
|
|
64
65
|
"""
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
if model_kwargs is None:
|
|
67
|
+
model_kwargs = {}
|
|
68
|
+
|
|
69
|
+
# for debugging purposes:
|
|
70
|
+
# res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
|
|
71
|
+
# res_sync_dict = res_sync.dict()
|
|
72
|
+
|
|
73
|
+
if 'request_timeout' not in model_kwargs:
|
|
74
|
+
model_kwargs['request_timeout'] = Config.get().get_int_value('timeout', section='fireworks') or 600
|
|
75
|
+
# TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
|
|
76
|
+
# of the chunks; addressing this would require a timeout for the task running this udf
|
|
77
|
+
stream = _fireworks_client().chat.completions.acreate(model=model, messages=messages, **model_kwargs)
|
|
78
|
+
chunks = []
|
|
79
|
+
async for chunk in stream:
|
|
80
|
+
chunks.append(chunk)
|
|
81
|
+
|
|
82
|
+
res = {
|
|
83
|
+
'id': chunks[0].id,
|
|
84
|
+
'object': 'chat.completion',
|
|
85
|
+
'created': chunks[0].created,
|
|
86
|
+
'model': chunks[0].model,
|
|
87
|
+
'choices': [
|
|
88
|
+
{
|
|
89
|
+
'index': 0,
|
|
90
|
+
'message': {
|
|
91
|
+
'role': None,
|
|
92
|
+
'content': '',
|
|
93
|
+
'tool_calls': None,
|
|
94
|
+
'tool_call_id': None,
|
|
95
|
+
'function': None,
|
|
96
|
+
'name': None,
|
|
97
|
+
},
|
|
98
|
+
'finish_reason': None,
|
|
99
|
+
'logprobs': None,
|
|
100
|
+
'raw_output': None,
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
'usage': {},
|
|
104
|
+
}
|
|
105
|
+
for chunk in chunks:
|
|
106
|
+
d = chunk.dict()
|
|
107
|
+
if 'usage' in d and d['usage'] is not None:
|
|
108
|
+
res['usage'] = d['usage']
|
|
109
|
+
if chunk.choices[0].finish_reason is not None:
|
|
110
|
+
res['choices'][0]['finish_reason'] = chunk.choices[0].finish_reason
|
|
111
|
+
if chunk.choices[0].delta.role is not None:
|
|
112
|
+
res['choices'][0]['message']['role'] = chunk.choices[0].delta.role
|
|
113
|
+
if chunk.choices[0].delta.content is not None:
|
|
114
|
+
res['choices'][0]['message']['content'] += chunk.choices[0].delta.content
|
|
115
|
+
if chunk.choices[0].delta.tool_calls is not None:
|
|
116
|
+
res['choices'][0]['message']['tool_calls'] = chunk.choices[0].delta.tool_calls
|
|
117
|
+
if chunk.choices[0].delta.function is not None:
|
|
118
|
+
res['choices'][0]['message']['function'] = chunk.choices[0].delta.function
|
|
119
|
+
return res
|
|
68
120
|
|
|
69
121
|
|
|
70
122
|
__all__ = local_public_names(__name__)
|
|
71
123
|
|
|
72
124
|
|
|
73
|
-
def __dir__():
|
|
125
|
+
def __dir__() -> list[str]:
|
|
74
126
|
return __all__
|