pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/functions/together.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs
|
|
3
3
|
that wrap various endpoints from the Together AI API. In order to use them, you must
|
|
4
4
|
first `pip install together` and configure your Together AI credentials, as described in
|
|
5
|
-
the [Working with Together AI](https://pixeltable.
|
|
5
|
+
the [Working with Together AI](https://docs.pixeltable.com/notebooks/integrations/working-with-together-ai) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
9
|
import io
|
|
10
|
-
from typing import TYPE_CHECKING,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, TypeVar
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
@@ -50,21 +50,7 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
53
|
-
async def completions(
|
|
54
|
-
prompt: str,
|
|
55
|
-
*,
|
|
56
|
-
model: str,
|
|
57
|
-
max_tokens: Optional[int] = None,
|
|
58
|
-
stop: Optional[list] = None,
|
|
59
|
-
temperature: Optional[float] = None,
|
|
60
|
-
top_p: Optional[float] = None,
|
|
61
|
-
top_k: Optional[int] = None,
|
|
62
|
-
repetition_penalty: Optional[float] = None,
|
|
63
|
-
logprobs: Optional[int] = None,
|
|
64
|
-
echo: Optional[bool] = None,
|
|
65
|
-
n: Optional[int] = None,
|
|
66
|
-
safety_model: Optional[str] = None,
|
|
67
|
-
) -> dict:
|
|
53
|
+
async def completions(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> dict:
|
|
68
54
|
"""
|
|
69
55
|
Generate completions based on a given prompt using a specified model.
|
|
70
56
|
|
|
@@ -82,8 +68,8 @@ async def completions(
|
|
|
82
68
|
Args:
|
|
83
69
|
prompt: A string providing context for the model to complete.
|
|
84
70
|
model: The name of the model to query.
|
|
85
|
-
|
|
86
|
-
|
|
71
|
+
model_kwargs: Additional keyword arguments for the Together `completions` API.
|
|
72
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/completions-1>
|
|
87
73
|
|
|
88
74
|
Returns:
|
|
89
75
|
A dictionary containing the response and other metadata.
|
|
@@ -94,41 +80,16 @@ async def completions(
|
|
|
94
80
|
|
|
95
81
|
>>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
96
82
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
stop=stop,
|
|
102
|
-
temperature=temperature,
|
|
103
|
-
top_p=top_p,
|
|
104
|
-
top_k=top_k,
|
|
105
|
-
repetition_penalty=repetition_penalty,
|
|
106
|
-
logprobs=logprobs,
|
|
107
|
-
echo=echo,
|
|
108
|
-
n=n,
|
|
109
|
-
safety_model=safety_model,
|
|
110
|
-
)
|
|
83
|
+
if model_kwargs is None:
|
|
84
|
+
model_kwargs = {}
|
|
85
|
+
|
|
86
|
+
result = await _together_client().completions.create(prompt=prompt, model=model, **model_kwargs)
|
|
111
87
|
return result.dict()
|
|
112
88
|
|
|
113
89
|
|
|
114
90
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
115
91
|
async def chat_completions(
|
|
116
|
-
messages: list[dict[str, str]],
|
|
117
|
-
*,
|
|
118
|
-
model: str,
|
|
119
|
-
max_tokens: Optional[int] = None,
|
|
120
|
-
stop: Optional[list[str]] = None,
|
|
121
|
-
temperature: Optional[float] = None,
|
|
122
|
-
top_p: Optional[float] = None,
|
|
123
|
-
top_k: Optional[int] = None,
|
|
124
|
-
repetition_penalty: Optional[float] = None,
|
|
125
|
-
logprobs: Optional[int] = None,
|
|
126
|
-
echo: Optional[bool] = None,
|
|
127
|
-
n: Optional[int] = None,
|
|
128
|
-
safety_model: Optional[str] = None,
|
|
129
|
-
response_format: Optional[dict] = None,
|
|
130
|
-
tools: Optional[dict] = None,
|
|
131
|
-
tool_choice: Optional[dict] = None,
|
|
92
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
132
93
|
) -> dict:
|
|
133
94
|
"""
|
|
134
95
|
Generate chat completions based on a given prompt using a specified model.
|
|
@@ -147,8 +108,8 @@ async def chat_completions(
|
|
|
147
108
|
Args:
|
|
148
109
|
messages: A list of messages comprising the conversation so far.
|
|
149
110
|
model: The name of the model to query.
|
|
150
|
-
|
|
151
|
-
|
|
111
|
+
model_kwargs: Additional keyword arguments for the Together `chat/completions` API.
|
|
112
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/chat-completions-1>
|
|
152
113
|
|
|
153
114
|
Returns:
|
|
154
115
|
A dictionary containing the response and other metadata.
|
|
@@ -160,23 +121,10 @@ async def chat_completions(
|
|
|
160
121
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
161
122
|
... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
162
123
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
stop=stop,
|
|
168
|
-
temperature=temperature,
|
|
169
|
-
top_p=top_p,
|
|
170
|
-
top_k=top_k,
|
|
171
|
-
repetition_penalty=repetition_penalty,
|
|
172
|
-
logprobs=logprobs,
|
|
173
|
-
echo=echo,
|
|
174
|
-
n=n,
|
|
175
|
-
safety_model=safety_model,
|
|
176
|
-
response_format=response_format,
|
|
177
|
-
tools=tools,
|
|
178
|
-
tool_choice=tool_choice,
|
|
179
|
-
)
|
|
124
|
+
if model_kwargs is None:
|
|
125
|
+
model_kwargs = {}
|
|
126
|
+
|
|
127
|
+
result = await _together_client().chat.completions.create(messages=messages, model=model, **model_kwargs)
|
|
180
128
|
return result.dict()
|
|
181
129
|
|
|
182
130
|
|
|
@@ -235,16 +183,7 @@ def _(model: str) -> ts.ArrayType:
|
|
|
235
183
|
|
|
236
184
|
|
|
237
185
|
@pxt.udf(resource_pool='request-rate:together:images')
|
|
238
|
-
async def image_generations(
|
|
239
|
-
prompt: str,
|
|
240
|
-
*,
|
|
241
|
-
model: str,
|
|
242
|
-
steps: Optional[int] = None,
|
|
243
|
-
seed: Optional[int] = None,
|
|
244
|
-
height: Optional[int] = None,
|
|
245
|
-
width: Optional[int] = None,
|
|
246
|
-
negative_prompt: Optional[str] = None,
|
|
247
|
-
) -> PIL.Image.Image:
|
|
186
|
+
async def image_generations(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> PIL.Image.Image:
|
|
248
187
|
"""
|
|
249
188
|
Generate images based on a given prompt using a specified model.
|
|
250
189
|
|
|
@@ -262,8 +201,8 @@ async def image_generations(
|
|
|
262
201
|
Args:
|
|
263
202
|
prompt: A description of the desired images.
|
|
264
203
|
model: The model to use for image generation.
|
|
265
|
-
|
|
266
|
-
|
|
204
|
+
model_kwargs: Additional keyword args for the Together `images/generations` API.
|
|
205
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/post_images-generations>
|
|
267
206
|
|
|
268
207
|
Returns:
|
|
269
208
|
The generated image.
|
|
@@ -276,9 +215,10 @@ async def image_generations(
|
|
|
276
215
|
... response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
|
|
277
216
|
... )
|
|
278
217
|
"""
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
218
|
+
if model_kwargs is None:
|
|
219
|
+
model_kwargs = {}
|
|
220
|
+
|
|
221
|
+
result = await _together_client().images.generate(prompt=prompt, model=model, **model_kwargs)
|
|
282
222
|
if result.data[0].b64_json is not None:
|
|
283
223
|
b64_bytes = base64.b64decode(result.data[0].b64_json)
|
|
284
224
|
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs
|
|
3
|
+
that wrap various endpoints from the TwelveLabs API. In order to use them, you must
|
|
4
|
+
first `pip install twelvelabs` and configure your TwelveLabs credentials, as described in
|
|
5
|
+
the [Working with TwelveLabs](https://docs.pixeltable.com/notebooks/integrations/working-with-twelvelabs) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from base64 import b64encode
|
|
9
|
+
from typing import TYPE_CHECKING, Literal
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
import pixeltable as pxt
|
|
14
|
+
from pixeltable import env, type_system as ts
|
|
15
|
+
from pixeltable.utils.code import local_public_names
|
|
16
|
+
from pixeltable.utils.image import to_base64
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from twelvelabs import AsyncTwelveLabs
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@env.register_client('twelvelabs')
|
|
23
|
+
def _(api_key: str) -> 'AsyncTwelveLabs':
|
|
24
|
+
from twelvelabs import AsyncTwelveLabs
|
|
25
|
+
|
|
26
|
+
return AsyncTwelveLabs(api_key=api_key)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _twelvelabs_client() -> 'AsyncTwelveLabs':
|
|
30
|
+
return env.Env.get().get_client('twelvelabs')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pxt.udf(resource_pool='request-rate:twelvelabs')
|
|
34
|
+
async def embed(text: str, image: pxt.Image | None = None, *, model_name: str) -> pxt.Array[np.float32] | None:
|
|
35
|
+
"""
|
|
36
|
+
Creates an embedding vector for the given text, audio, image, or video input.
|
|
37
|
+
|
|
38
|
+
Each UDF signature corresponds to one of the four supported input types. If text is specified, it is possible to
|
|
39
|
+
specify an image as well, corresponding to the `text_image` embedding type in the TwelveLabs API. This is
|
|
40
|
+
(currently) the only way to include more than one input type at a time.
|
|
41
|
+
|
|
42
|
+
Equivalent to the TwelveLabs Embed API:
|
|
43
|
+
<https://docs.twelvelabs.io/v1.3/docs/guides/create-embeddings>
|
|
44
|
+
|
|
45
|
+
Request throttling:
|
|
46
|
+
Applies the rate limit set in the config (section `twelvelabs`, key `rate_limit`). If no rate
|
|
47
|
+
limit is configured, uses a default of 600 RPM.
|
|
48
|
+
|
|
49
|
+
__Requirements:__
|
|
50
|
+
|
|
51
|
+
- `pip install twelvelabs`
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
model_name: The name of the model to use. Check
|
|
55
|
+
[the TwelveLabs documentation](https://docs.twelvelabs.io/v1.3/sdk-reference/python/create-text-image-and-audio-embeddings)
|
|
56
|
+
for available models.
|
|
57
|
+
text: The text to embed.
|
|
58
|
+
image: If specified, the embedding will be created from both the text and the image.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The embedding.
|
|
62
|
+
|
|
63
|
+
Examples:
|
|
64
|
+
Add a computed column `embed` for an embedding of a string column `input`:
|
|
65
|
+
|
|
66
|
+
>>> tbl.add_computed_column(
|
|
67
|
+
... embed=embed(model_name='marengo3.0', text=tbl.input)
|
|
68
|
+
... )
|
|
69
|
+
"""
|
|
70
|
+
env.Env.get().require_package('twelvelabs')
|
|
71
|
+
import twelvelabs
|
|
72
|
+
|
|
73
|
+
cl = _twelvelabs_client()
|
|
74
|
+
res: twelvelabs.EmbeddingSuccessResponse
|
|
75
|
+
if image is None:
|
|
76
|
+
# Text-only
|
|
77
|
+
res = await cl.embed.v_2.create(
|
|
78
|
+
input_type='text', model_name=model_name, text=twelvelabs.TextInputRequest(input_text=text)
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
b64str = to_base64(image, format=('png' if image.has_transparency_data else 'jpeg'))
|
|
82
|
+
res = await cl.embed.v_2.create(
|
|
83
|
+
input_type='text_image',
|
|
84
|
+
model_name=model_name,
|
|
85
|
+
text_image=twelvelabs.TextImageInputRequest(
|
|
86
|
+
media_source=twelvelabs.MediaSource(base_64_string=b64str), input_text=text
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
if not res.data:
|
|
90
|
+
raise pxt.Error(f"Didn't receive embedding for text: {text}\n{res}")
|
|
91
|
+
vector = res.data[0].embedding
|
|
92
|
+
return np.array(vector, dtype='float32')
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@embed.overload
|
|
96
|
+
async def _(image: pxt.Image, *, model_name: str) -> pxt.Array[np.float32] | None:
|
|
97
|
+
env.Env.get().require_package('twelvelabs')
|
|
98
|
+
import twelvelabs
|
|
99
|
+
|
|
100
|
+
cl = _twelvelabs_client()
|
|
101
|
+
b64_str = to_base64(image, format=('png' if image.has_transparency_data else 'jpeg'))
|
|
102
|
+
res = await cl.embed.v_2.create(
|
|
103
|
+
input_type='image',
|
|
104
|
+
model_name=model_name,
|
|
105
|
+
image=twelvelabs.ImageInputRequest(media_source=twelvelabs.MediaSource(base_64_string=b64_str)),
|
|
106
|
+
)
|
|
107
|
+
if not res.data:
|
|
108
|
+
raise pxt.Error(f"Didn't receive embedding for image: {image}\n{res}")
|
|
109
|
+
vector = res.data[0].embedding
|
|
110
|
+
return np.array(vector, dtype='float32')
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@embed.overload
|
|
114
|
+
async def _(
|
|
115
|
+
audio: pxt.Audio,
|
|
116
|
+
*,
|
|
117
|
+
model_name: str,
|
|
118
|
+
start_sec: float | None = None,
|
|
119
|
+
end_sec: float | None = None,
|
|
120
|
+
embedding_option: list[Literal['audio', 'transcription']] | None = None,
|
|
121
|
+
) -> pxt.Array[np.float32] | None:
|
|
122
|
+
env.Env.get().require_package('twelvelabs')
|
|
123
|
+
import twelvelabs
|
|
124
|
+
|
|
125
|
+
cl = _twelvelabs_client()
|
|
126
|
+
with open(audio, 'rb') as fp:
|
|
127
|
+
b64_str = b64encode(fp.read()).decode('utf-8')
|
|
128
|
+
res = await cl.embed.v_2.create(
|
|
129
|
+
input_type='audio',
|
|
130
|
+
model_name=model_name,
|
|
131
|
+
audio=twelvelabs.AudioInputRequest(
|
|
132
|
+
media_source=twelvelabs.MediaSource(base_64_string=b64_str),
|
|
133
|
+
start_sec=start_sec,
|
|
134
|
+
end_sec=end_sec,
|
|
135
|
+
embedding_option=embedding_option,
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
if not res.data:
|
|
139
|
+
raise pxt.Error(f"Didn't receive embedding for audio: {audio}\n{res}")
|
|
140
|
+
vector = res.data[0].embedding
|
|
141
|
+
return np.array(vector, dtype='float32')
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@embed.overload
|
|
145
|
+
async def _(
|
|
146
|
+
video: pxt.Video,
|
|
147
|
+
*,
|
|
148
|
+
model_name: str,
|
|
149
|
+
start_sec: float | None = None,
|
|
150
|
+
end_sec: float | None = None,
|
|
151
|
+
embedding_option: list[Literal['visual', 'audio', 'transcription']] | None = None,
|
|
152
|
+
) -> pxt.Array[np.float32] | None:
|
|
153
|
+
env.Env.get().require_package('twelvelabs')
|
|
154
|
+
import twelvelabs
|
|
155
|
+
|
|
156
|
+
cl = _twelvelabs_client()
|
|
157
|
+
with open(video, 'rb') as fp:
|
|
158
|
+
b64_str = b64encode(fp.read()).decode('utf-8')
|
|
159
|
+
res = await cl.embed.v_2.create(
|
|
160
|
+
input_type='video',
|
|
161
|
+
model_name=model_name,
|
|
162
|
+
video=twelvelabs.VideoInputRequest(
|
|
163
|
+
media_source=twelvelabs.MediaSource(base_64_string=b64_str),
|
|
164
|
+
start_sec=start_sec,
|
|
165
|
+
end_sec=end_sec,
|
|
166
|
+
embedding_option=embedding_option,
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
if not res.data:
|
|
170
|
+
raise pxt.Error(f"Didn't receive embedding for video: {video}\n{res}")
|
|
171
|
+
vector = res.data[0].embedding
|
|
172
|
+
return np.array(vector, dtype='float32')
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@embed.conditional_return_type
|
|
176
|
+
def _(model_name: str) -> ts.ArrayType:
|
|
177
|
+
if model_name == 'Marengo-retrieval-2.7':
|
|
178
|
+
return ts.ArrayType(shape=(1024,), dtype=np.dtype('float32'))
|
|
179
|
+
if model_name == 'marengo3.0':
|
|
180
|
+
return ts.ArrayType(shape=(512,), dtype=np.dtype('float32'))
|
|
181
|
+
return ts.ArrayType(dtype=np.dtype('float32'))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
__all__ = local_public_names(__name__)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def __dir__() -> list[str]:
|
|
188
|
+
return __all__
|
pixeltable/functions/util.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import PIL.Image
|
|
2
2
|
|
|
3
|
+
from pixeltable.config import Config
|
|
3
4
|
from pixeltable.env import Env
|
|
4
5
|
|
|
5
6
|
|
|
@@ -7,10 +8,14 @@ def resolve_torch_device(device: str, allow_mps: bool = True) -> str:
|
|
|
7
8
|
Env.get().require_package('torch')
|
|
8
9
|
import torch
|
|
9
10
|
|
|
11
|
+
mps_enabled = Config.get().get_bool_value('enable_mps')
|
|
12
|
+
if mps_enabled is None:
|
|
13
|
+
mps_enabled = True # Default to True if not set in config
|
|
14
|
+
|
|
10
15
|
if device == 'auto':
|
|
11
16
|
if torch.cuda.is_available():
|
|
12
17
|
return 'cuda'
|
|
13
|
-
if allow_mps and torch.backends.mps.is_available():
|
|
18
|
+
if mps_enabled and allow_mps and torch.backends.mps.is_available():
|
|
14
19
|
return 'mps'
|
|
15
20
|
return 'cpu'
|
|
16
21
|
return device
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for `UUIDType`.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
import sqlalchemy as sql
|
|
8
|
+
|
|
9
|
+
import pixeltable as pxt
|
|
10
|
+
from pixeltable.utils.code import local_public_names
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pxt.udf
|
|
14
|
+
def uuid4() -> uuid.UUID:
|
|
15
|
+
"""
|
|
16
|
+
Generate a random UUID (version 4).
|
|
17
|
+
"""
|
|
18
|
+
return uuid.uuid4()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@uuid4.to_sql
|
|
22
|
+
def _() -> sql.ColumnElement:
|
|
23
|
+
return sql.func.gen_random_uuid() # Generates uuid version 4
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = local_public_names(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def __dir__() -> list[str]:
|
|
30
|
+
return __all__
|