pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for mathematical operations.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.float_col.floor()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import builtins
|
|
14
|
+
import math
|
|
15
|
+
|
|
16
|
+
import sqlalchemy as sql
|
|
17
|
+
|
|
18
|
+
import pixeltable as pxt
|
|
19
|
+
from pixeltable.utils.code import local_public_names
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pxt.udf(is_method=True)
|
|
23
|
+
def abs(self: float) -> float:
|
|
24
|
+
"""
|
|
25
|
+
Return the absolute value of the given number.
|
|
26
|
+
|
|
27
|
+
Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
|
|
28
|
+
"""
|
|
29
|
+
return builtins.abs(self)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@abs.to_sql
|
|
33
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
34
|
+
return sql.func.abs(self)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pxt.udf(is_method=True)
|
|
38
|
+
def ceil(self: float) -> float:
|
|
39
|
+
"""
|
|
40
|
+
Return the ceiling of the given number.
|
|
41
|
+
|
|
42
|
+
Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
43
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
44
|
+
`math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
|
|
45
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
46
|
+
"""
|
|
47
|
+
# This ensures the same behavior as SQL
|
|
48
|
+
if math.isfinite(self):
|
|
49
|
+
return float(math.ceil(self))
|
|
50
|
+
else:
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@ceil.to_sql
|
|
55
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
56
|
+
return sql.func.ceiling(self)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@pxt.udf(is_method=True)
|
|
60
|
+
def floor(self: float) -> float:
|
|
61
|
+
"""
|
|
62
|
+
Return the ceiling of the given number.
|
|
63
|
+
|
|
64
|
+
Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
65
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
66
|
+
`math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
|
|
67
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
68
|
+
"""
|
|
69
|
+
# This ensures the same behavior as SQL
|
|
70
|
+
if math.isfinite(self):
|
|
71
|
+
return float(math.floor(self))
|
|
72
|
+
else:
|
|
73
|
+
return self
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@floor.to_sql
|
|
77
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
78
|
+
return sql.func.floor(self)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pxt.udf(is_method=True)
|
|
82
|
+
def round(self: float, digits: int | None = None) -> float:
|
|
83
|
+
"""
|
|
84
|
+
Round a number to a given precision in decimal digits.
|
|
85
|
+
|
|
86
|
+
Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
|
|
87
|
+
Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
|
|
88
|
+
`builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
|
|
89
|
+
"""
|
|
90
|
+
# Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
|
|
91
|
+
return builtins.round(self, digits or 0)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@round.to_sql
|
|
95
|
+
def _(self: sql.ColumnElement, digits: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
96
|
+
if digits is None:
|
|
97
|
+
return sql.func.round(self)
|
|
98
|
+
else:
|
|
99
|
+
return sql.cast(sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer)), sql.Float)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@pxt.udf(is_method=True)
|
|
103
|
+
def pow(self: int, other: int) -> float:
|
|
104
|
+
"""
|
|
105
|
+
Raise `self` to the power of `other`.
|
|
106
|
+
|
|
107
|
+
Equivalent to Python [`self ** other`](https://docs.python.org/3/library/functions.html#pow).
|
|
108
|
+
"""
|
|
109
|
+
return self**other
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@pow.to_sql
|
|
113
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
114
|
+
return sql.func.pow(self, other)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@pxt.udf(is_method=True)
|
|
118
|
+
def bitwise_and(self: int, other: int) -> int:
|
|
119
|
+
"""
|
|
120
|
+
Bitwise AND of two integers.
|
|
121
|
+
|
|
122
|
+
Equivalent to Python
|
|
123
|
+
[`self & other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
124
|
+
"""
|
|
125
|
+
return self & other
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@bitwise_and.to_sql
|
|
129
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
130
|
+
return self.bitwise_and(other)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@pxt.udf(is_method=True)
|
|
134
|
+
def bitwise_or(self: int, other: int) -> int:
|
|
135
|
+
"""
|
|
136
|
+
Bitwise OR of two integers.
|
|
137
|
+
|
|
138
|
+
Equivalent to Python
|
|
139
|
+
[`self | other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
140
|
+
"""
|
|
141
|
+
return self | other
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@bitwise_or.to_sql
|
|
145
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
146
|
+
return self.bitwise_or(other)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@pxt.udf(is_method=True)
|
|
150
|
+
def bitwise_xor(self: int, other: int) -> int:
|
|
151
|
+
"""
|
|
152
|
+
Bitwise XOR of two integers.
|
|
153
|
+
|
|
154
|
+
Equivalent to Python
|
|
155
|
+
[`self ^ other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
|
|
156
|
+
"""
|
|
157
|
+
return self ^ other
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@bitwise_xor.to_sql
|
|
161
|
+
def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
|
|
162
|
+
return self.bitwise_xor(other)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
__all__ = local_public_names(__name__)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def __dir__() -> list[str]:
|
|
169
|
+
return __all__
|
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs
|
|
3
3
|
that wrap various endpoints from the Mistral AI API. In order to use them, you must
|
|
4
4
|
first `pip install mistralai` and configure your Mistral AI credentials, as described in
|
|
5
|
-
the [Working with Mistral AI](https://pixeltable.
|
|
5
|
+
the [Working with Mistral AI](https://docs.pixeltable.com/notebooks/integrations/working-with-mistralai) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import TYPE_CHECKING,
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
12
|
import pixeltable as pxt
|
|
13
|
+
import pixeltable.type_system as ts
|
|
13
14
|
from pixeltable.env import Env, register_client
|
|
14
15
|
from pixeltable.func.signature import Batch
|
|
15
16
|
from pixeltable.utils.code import local_public_names
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
|
-
import mistralai
|
|
19
|
+
import mistralai
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@register_client('mistral')
|
|
22
23
|
def _(api_key: str) -> 'mistralai.Mistral':
|
|
23
24
|
import mistralai
|
|
25
|
+
|
|
24
26
|
return mistralai.Mistral(api_key=api_key)
|
|
25
27
|
|
|
26
28
|
|
|
@@ -28,18 +30,9 @@ def _mistralai_client() -> 'mistralai.Mistral':
|
|
|
28
30
|
return Env.get().get_client('mistral')
|
|
29
31
|
|
|
30
32
|
|
|
31
|
-
@pxt.udf
|
|
32
|
-
def chat_completions(
|
|
33
|
-
messages: list[dict[str, str]],
|
|
34
|
-
*,
|
|
35
|
-
model: str,
|
|
36
|
-
temperature: Optional[float] = 0.7,
|
|
37
|
-
top_p: Optional[float] = 1.0,
|
|
38
|
-
max_tokens: Optional[int] = None,
|
|
39
|
-
stop: Optional[list[str]] = None,
|
|
40
|
-
random_seed: Optional[int] = None,
|
|
41
|
-
response_format: Optional[dict] = None,
|
|
42
|
-
safe_prompt: Optional[bool] = False,
|
|
33
|
+
@pxt.udf(resource_pool='request-rate:mistral')
|
|
34
|
+
async def chat_completions(
|
|
35
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
43
36
|
) -> dict:
|
|
44
37
|
"""
|
|
45
38
|
Chat Completion API.
|
|
@@ -47,6 +40,10 @@ def chat_completions(
|
|
|
47
40
|
Equivalent to the Mistral AI `chat/completions` API endpoint.
|
|
48
41
|
For additional details, see: <https://docs.mistral.ai/api/#tag/chat>
|
|
49
42
|
|
|
43
|
+
Request throttling:
|
|
44
|
+
Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
|
|
45
|
+
limit is configured, uses a default of 600 RPM.
|
|
46
|
+
|
|
50
47
|
__Requirements:__
|
|
51
48
|
|
|
52
49
|
- `pip install mistralai`
|
|
@@ -54,8 +51,8 @@ def chat_completions(
|
|
|
54
51
|
Args:
|
|
55
52
|
messages: The prompt(s) to generate completions for.
|
|
56
53
|
model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
model_kwargs: Additional keyword args for the Mistral `chat/completions` API.
|
|
55
|
+
For details on the available parameters, see: <https://docs.mistral.ai/api/#tag/chat>
|
|
59
56
|
|
|
60
57
|
Returns:
|
|
61
58
|
A dictionary containing the response and other metadata.
|
|
@@ -65,41 +62,32 @@ def chat_completions(
|
|
|
65
62
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
66
63
|
|
|
67
64
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
68
|
-
... tbl
|
|
65
|
+
... tbl.add_computed_column(response=completions(messages, model='mistral-latest-small'))
|
|
69
66
|
"""
|
|
67
|
+
if model_kwargs is None:
|
|
68
|
+
model_kwargs = {}
|
|
69
|
+
|
|
70
70
|
Env.get().require_package('mistralai')
|
|
71
|
-
|
|
71
|
+
result = await _mistralai_client().chat.complete_async(
|
|
72
72
|
messages=messages, # type: ignore[arg-type]
|
|
73
73
|
model=model,
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
).dict()
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@pxt.udf
|
|
85
|
-
def fim_completions(
|
|
86
|
-
prompt: str,
|
|
87
|
-
*,
|
|
88
|
-
model: str,
|
|
89
|
-
temperature: Optional[float] = 0.7,
|
|
90
|
-
top_p: Optional[float] = 1.0,
|
|
91
|
-
max_tokens: Optional[int] = None,
|
|
92
|
-
min_tokens: Optional[int] = None,
|
|
93
|
-
stop: Optional[list[str]] = None,
|
|
94
|
-
random_seed: Optional[int] = None,
|
|
95
|
-
suffix: Optional[str] = None,
|
|
96
|
-
) -> dict:
|
|
74
|
+
**model_kwargs,
|
|
75
|
+
)
|
|
76
|
+
return result.dict()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pxt.udf(resource_pool='request-rate:mistral')
|
|
80
|
+
async def fim_completions(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> dict:
|
|
97
81
|
"""
|
|
98
82
|
Fill-in-the-middle Completion API.
|
|
99
83
|
|
|
100
84
|
Equivalent to the Mistral AI `fim/completions` API endpoint.
|
|
101
85
|
For additional details, see: <https://docs.mistral.ai/api/#tag/fim>
|
|
102
86
|
|
|
87
|
+
Request throttling:
|
|
88
|
+
Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
|
|
89
|
+
limit is configured, uses a default of 600 RPM.
|
|
90
|
+
|
|
103
91
|
__Requirements:__
|
|
104
92
|
|
|
105
93
|
- `pip install mistralai`
|
|
@@ -107,6 +95,8 @@ def fim_completions(
|
|
|
107
95
|
Args:
|
|
108
96
|
prompt: The text/code to complete.
|
|
109
97
|
model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
|
|
98
|
+
model_kwargs: Additional keyword args for the Mistral `fim/completions` API.
|
|
99
|
+
For details on the available parameters, see: <https://docs.mistral.ai/api/#tag/fim>
|
|
110
100
|
|
|
111
101
|
For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/fim>
|
|
112
102
|
|
|
@@ -117,35 +107,31 @@ def fim_completions(
|
|
|
117
107
|
Add a computed column that applies the model `codestral-latest`
|
|
118
108
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
119
109
|
|
|
120
|
-
>>> tbl
|
|
110
|
+
>>> tbl.add_computed_column(response=completions(tbl.prompt, model='codestral-latest'))
|
|
121
111
|
"""
|
|
112
|
+
if model_kwargs is None:
|
|
113
|
+
model_kwargs = {}
|
|
114
|
+
|
|
122
115
|
Env.get().require_package('mistralai')
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
model=model,
|
|
126
|
-
temperature=temperature,
|
|
127
|
-
top_p=top_p,
|
|
128
|
-
max_tokens=_opt(max_tokens),
|
|
129
|
-
min_tokens=_opt(min_tokens),
|
|
130
|
-
stop=stop,
|
|
131
|
-
random_seed=_opt(random_seed),
|
|
132
|
-
suffix=_opt(suffix)
|
|
133
|
-
).dict()
|
|
116
|
+
result = await _mistralai_client().fim.complete_async(prompt=prompt, model=model, **model_kwargs)
|
|
117
|
+
return result.dict()
|
|
134
118
|
|
|
135
119
|
|
|
136
|
-
_embedding_dimensions_cache: dict[str, int] = {
|
|
137
|
-
'mistral-embed': 1024
|
|
138
|
-
}
|
|
120
|
+
_embedding_dimensions_cache: dict[str, int] = {'mistral-embed': 1024}
|
|
139
121
|
|
|
140
122
|
|
|
141
|
-
@pxt.udf(batch_size=16)
|
|
142
|
-
def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
|
|
123
|
+
@pxt.udf(batch_size=16, resource_pool='request-rate:mistral')
|
|
124
|
+
async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]: # noqa: RUF029
|
|
143
125
|
"""
|
|
144
126
|
Embeddings API.
|
|
145
127
|
|
|
146
128
|
Equivalent to the Mistral AI `embeddings` API endpoint.
|
|
147
129
|
For additional details, see: <https://docs.mistral.ai/api/#tag/embeddings>
|
|
148
130
|
|
|
131
|
+
Request throttling:
|
|
132
|
+
Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
|
|
133
|
+
limit is configured, uses a default of 600 RPM.
|
|
134
|
+
|
|
149
135
|
__Requirements:__
|
|
150
136
|
|
|
151
137
|
- `pip install mistralai`
|
|
@@ -158,29 +144,18 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt
|
|
|
158
144
|
An array representing the application of the given embedding to `input`.
|
|
159
145
|
"""
|
|
160
146
|
Env.get().require_package('mistralai')
|
|
161
|
-
result = _mistralai_client().embeddings.create(
|
|
162
|
-
inputs=input,
|
|
163
|
-
model=model,
|
|
164
|
-
)
|
|
147
|
+
result = _mistralai_client().embeddings.create(inputs=input, model=model)
|
|
165
148
|
return [np.array(data.embedding, dtype=np.float64) for data in result.data]
|
|
166
149
|
|
|
167
150
|
|
|
168
151
|
@embeddings.conditional_return_type
|
|
169
|
-
def _(model: str) ->
|
|
152
|
+
def _(model: str) -> ts.ArrayType:
|
|
170
153
|
dimensions = _embedding_dimensions_cache.get(model) # `None` if unknown model
|
|
171
|
-
return
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
_T = TypeVar('_T')
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def _opt(arg: Optional[_T]) -> Union[_T, 'mistralai.types.basemodel.Unset']:
|
|
178
|
-
from mistralai.types import UNSET
|
|
179
|
-
return arg if arg is not None else UNSET
|
|
154
|
+
return ts.ArrayType((dimensions,), dtype=ts.FloatType())
|
|
180
155
|
|
|
181
156
|
|
|
182
157
|
__all__ = local_public_names(__name__)
|
|
183
158
|
|
|
184
159
|
|
|
185
|
-
def __dir__():
|
|
160
|
+
def __dir__() -> list[str]:
|
|
186
161
|
return __all__
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDF for converting media file URIs to presigned HTTP URLs.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pixeltable import exceptions as excs
|
|
6
|
+
from pixeltable.func.udf import udf
|
|
7
|
+
from pixeltable.utils.code import local_public_names
|
|
8
|
+
from pixeltable.utils.object_stores import ObjectOps, ObjectPath, StorageTarget
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@udf
|
|
12
|
+
def presigned_url(uri: str, expiration_seconds: int) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Convert a blob storage URI to a presigned HTTP URL for direct access.
|
|
15
|
+
|
|
16
|
+
Generates a time-limited, publicly accessible URL from cloud storage URIs
|
|
17
|
+
(S3, GCS, Azure, etc.) that can be used to serve media files over HTTP.
|
|
18
|
+
|
|
19
|
+
Note:
|
|
20
|
+
This function uses presigned URLs from storage providers. Provider-specific
|
|
21
|
+
limitations apply:
|
|
22
|
+
|
|
23
|
+
- Google Cloud Storage: maximum 7-day expiration
|
|
24
|
+
- AWS S3: requires proper region configuration
|
|
25
|
+
- Azure: subject to storage account access policies
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
uri: The media file URI (e.g., `s3://bucket/path`, `gs://bucket/path`, `azure://container/path`)
|
|
29
|
+
expiration_seconds: How long the URL remains valid
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A presigned HTTP URL for accessing the file
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
Error: If the URI is a local file:// path
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
Generate a presigned URL for a video column with 1-hour expiration:
|
|
39
|
+
|
|
40
|
+
>>> tbl.select(
|
|
41
|
+
... original_url=tbl.video.fileurl,
|
|
42
|
+
... presigned_url=pxtf.net.presigned_url(tbl.video.fileurl, 3600)
|
|
43
|
+
... ).collect()
|
|
44
|
+
"""
|
|
45
|
+
if not uri:
|
|
46
|
+
return uri
|
|
47
|
+
|
|
48
|
+
# Parse the object storage address from the URI
|
|
49
|
+
soa = ObjectPath.parse_object_storage_addr(uri, allow_obj_name=True)
|
|
50
|
+
|
|
51
|
+
# HTTP/HTTPS URLs are already publicly accessible
|
|
52
|
+
if soa.storage_target == StorageTarget.HTTP_STORE:
|
|
53
|
+
return uri
|
|
54
|
+
|
|
55
|
+
# For file:// URLs, we can't generate presigned URLs
|
|
56
|
+
if soa.storage_target == StorageTarget.LOCAL_STORE:
|
|
57
|
+
raise excs.Error(
|
|
58
|
+
'Cannot generate presigned URL for local file:// URLs. '
|
|
59
|
+
'Please use cloud storage (S3, GCS, Azure) for presigned URLs.'
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
store = ObjectOps.get_store(soa, allow_obj_name=True)
|
|
63
|
+
return store.create_presigned_url(soa, expiration_seconds)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
__all__ = local_public_names(__name__)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def __dir__() -> list[str]:
|
|
70
|
+
return __all__
|
pixeltable/functions/ollama.py
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for Ollama local models.
|
|
3
|
+
|
|
4
|
+
Provides integration with Ollama for running large language models locally,
|
|
5
|
+
including chat completions and embeddings.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
2
9
|
|
|
3
10
|
import numpy as np
|
|
4
11
|
|
|
@@ -14,10 +21,11 @@ if TYPE_CHECKING:
|
|
|
14
21
|
@env.register_client('ollama')
|
|
15
22
|
def _(host: str) -> 'ollama.Client':
|
|
16
23
|
import ollama
|
|
24
|
+
|
|
17
25
|
return ollama.Client(host=host)
|
|
18
26
|
|
|
19
27
|
|
|
20
|
-
def _ollama_client() ->
|
|
28
|
+
def _ollama_client() -> 'ollama.Client | None':
|
|
21
29
|
try:
|
|
22
30
|
return env.Env.get().get_client('ollama')
|
|
23
31
|
except Exception:
|
|
@@ -32,10 +40,10 @@ def generate(
|
|
|
32
40
|
suffix: str = '',
|
|
33
41
|
system: str = '',
|
|
34
42
|
template: str = '',
|
|
35
|
-
context:
|
|
43
|
+
context: list[int] | None = None,
|
|
36
44
|
raw: bool = False,
|
|
37
|
-
format: str =
|
|
38
|
-
options:
|
|
45
|
+
format: str | None = None,
|
|
46
|
+
options: dict | None = None,
|
|
39
47
|
) -> dict:
|
|
40
48
|
"""
|
|
41
49
|
Generate a response for a given prompt with a provided model.
|
|
@@ -44,14 +52,14 @@ def generate(
|
|
|
44
52
|
prompt: The prompt to generate a response for.
|
|
45
53
|
model: The model name.
|
|
46
54
|
suffix: The text after the model response.
|
|
47
|
-
format: The format of the response; must be one of `'json'` or `
|
|
55
|
+
format: The format of the response; must be one of `'json'` or `None`.
|
|
48
56
|
system: System message.
|
|
49
57
|
template: Prompt template to use.
|
|
50
58
|
context: The context parameter returned from a previous call to `generate()`.
|
|
51
59
|
raw: If `True`, no formatting will be applied to the prompt.
|
|
52
|
-
options: Additional options
|
|
53
|
-
For details, see the
|
|
54
|
-
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.
|
|
60
|
+
options: Additional options for the Ollama `chat` call, such as `max_tokens`, `temperature`, `top_p`, and
|
|
61
|
+
`top_k`. For details, see the
|
|
62
|
+
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
|
|
55
63
|
section of the Ollama documentation.
|
|
56
64
|
"""
|
|
57
65
|
env.Env.get().require_package('ollama')
|
|
@@ -76,9 +84,9 @@ def chat(
|
|
|
76
84
|
messages: list[dict],
|
|
77
85
|
*,
|
|
78
86
|
model: str,
|
|
79
|
-
tools:
|
|
80
|
-
format: str =
|
|
81
|
-
options:
|
|
87
|
+
tools: list[dict] | None = None,
|
|
88
|
+
format: str | None = None,
|
|
89
|
+
options: dict | None = None,
|
|
82
90
|
) -> dict:
|
|
83
91
|
"""
|
|
84
92
|
Generate the next message in a chat with a provided model.
|
|
@@ -87,32 +95,22 @@ def chat(
|
|
|
87
95
|
messages: The messages of the chat.
|
|
88
96
|
model: The model name.
|
|
89
97
|
tools: Tools for the model to use.
|
|
90
|
-
format: The format of the response; must be one of `'json'` or `
|
|
91
|
-
options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and
|
|
92
|
-
For details, see the
|
|
93
|
-
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.
|
|
98
|
+
format: The format of the response; must be one of `'json'` or `None`.
|
|
99
|
+
options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and
|
|
100
|
+
`top_k`. For details, see the
|
|
101
|
+
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
|
|
94
102
|
section of the Ollama documentation.
|
|
95
103
|
"""
|
|
96
104
|
env.Env.get().require_package('ollama')
|
|
97
105
|
import ollama
|
|
98
106
|
|
|
99
107
|
client = _ollama_client() or ollama
|
|
100
|
-
return client.chat(
|
|
101
|
-
model=model,
|
|
102
|
-
messages=messages,
|
|
103
|
-
tools=tools,
|
|
104
|
-
format=format,
|
|
105
|
-
options=options,
|
|
106
|
-
).dict() # type: ignore[call-overload]
|
|
108
|
+
return client.chat(model=model, messages=messages, tools=tools, format=format, options=options).dict() # type: ignore[call-overload]
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
@pxt.udf(batch_size=16)
|
|
110
112
|
def embed(
|
|
111
|
-
input: Batch[str],
|
|
112
|
-
*,
|
|
113
|
-
model: str,
|
|
114
|
-
truncate: bool = True,
|
|
115
|
-
options: Optional[dict] = None,
|
|
113
|
+
input: Batch[str], *, model: str, truncate: bool = True, options: dict | None = None
|
|
116
114
|
) -> Batch[pxt.Array[(None,), pxt.Float]]:
|
|
117
115
|
"""
|
|
118
116
|
Generate embeddings from a model.
|
|
@@ -124,24 +122,19 @@ def embed(
|
|
|
124
122
|
Returns error if false and context length is exceeded.
|
|
125
123
|
options: Additional options to pass to the `embed` call.
|
|
126
124
|
For details, see the
|
|
127
|
-
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.
|
|
125
|
+
[Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
|
|
128
126
|
section of the Ollama documentation.
|
|
129
127
|
"""
|
|
130
128
|
env.Env.get().require_package('ollama')
|
|
131
129
|
import ollama
|
|
132
130
|
|
|
133
131
|
client = _ollama_client() or ollama
|
|
134
|
-
results = client.embed(
|
|
135
|
-
model=model,
|
|
136
|
-
input=input,
|
|
137
|
-
truncate=truncate,
|
|
138
|
-
options=options,
|
|
139
|
-
).dict()
|
|
132
|
+
results = client.embed(model=model, input=input, truncate=truncate, options=options).dict()
|
|
140
133
|
return [np.array(data, dtype=np.float64) for data in results['embeddings']]
|
|
141
134
|
|
|
142
135
|
|
|
143
136
|
__all__ = local_public_names(__name__)
|
|
144
137
|
|
|
145
138
|
|
|
146
|
-
def __dir__():
|
|
139
|
+
def __dir__() -> list[str]:
|
|
147
140
|
return __all__
|