pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -15,7 +15,7 @@ import builtins
|
|
|
15
15
|
import re
|
|
16
16
|
import textwrap
|
|
17
17
|
from string import whitespace
|
|
18
|
-
from typing import Any
|
|
18
|
+
from typing import Any
|
|
19
19
|
|
|
20
20
|
import sqlalchemy as sql
|
|
21
21
|
|
|
@@ -78,9 +78,7 @@ def contains(self: str, substr: str, case: bool = True) -> bool:
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
@contains.to_sql
|
|
81
|
-
def _(
|
|
82
|
-
self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
|
|
83
|
-
) -> sql.ColumnElement:
|
|
81
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement, case: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
84
82
|
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
85
83
|
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
86
84
|
if case is None:
|
|
@@ -153,7 +151,7 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
|
153
151
|
|
|
154
152
|
|
|
155
153
|
@pxt.udf(is_method=True)
|
|
156
|
-
def find(self: str, substr: str, start: int = 0, end:
|
|
154
|
+
def find(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
|
|
157
155
|
"""
|
|
158
156
|
Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
|
|
159
157
|
|
|
@@ -169,10 +167,7 @@ def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> i
|
|
|
169
167
|
|
|
170
168
|
@find.to_sql
|
|
171
169
|
def _(
|
|
172
|
-
self: sql.ColumnElement,
|
|
173
|
-
substr: sql.ColumnElement,
|
|
174
|
-
start: sql.ColumnElement,
|
|
175
|
-
end: Optional[sql.ColumnElement] = None,
|
|
170
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, start: sql.ColumnElement, end: sql.ColumnElement | None = None
|
|
176
171
|
) -> sql.ColumnElement:
|
|
177
172
|
sl = pxt.functions.string.slice._to_sql(self, start, end)
|
|
178
173
|
if sl is None:
|
|
@@ -227,7 +222,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
227
222
|
|
|
228
223
|
|
|
229
224
|
@pxt.udf(is_method=True)
|
|
230
|
-
def index(self: str, substr: str, start: int = 0, end:
|
|
225
|
+
def index(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
|
|
231
226
|
"""
|
|
232
227
|
Return the lowest index in string where `substr` is found within the slice `[start:end]`.
|
|
233
228
|
Raises ValueError if `substr` is not found.
|
|
@@ -413,7 +408,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
413
408
|
|
|
414
409
|
|
|
415
410
|
@pxt.udf(is_method=True)
|
|
416
|
-
def lstrip(self: str, chars:
|
|
411
|
+
def lstrip(self: str, chars: str | None = None) -> str:
|
|
417
412
|
"""
|
|
418
413
|
Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
|
|
419
414
|
characters to be removed. If omitted or `None`, whitespace characters are removed.
|
|
@@ -427,7 +422,7 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
427
422
|
|
|
428
423
|
|
|
429
424
|
@lstrip.to_sql
|
|
430
|
-
def _(self: sql.ColumnElement, chars:
|
|
425
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
431
426
|
return sql.func.ltrim(self, chars if chars is not None else whitespace)
|
|
432
427
|
|
|
433
428
|
|
|
@@ -535,7 +530,7 @@ def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
535
530
|
|
|
536
531
|
|
|
537
532
|
@pxt.udf(is_method=True)
|
|
538
|
-
def replace(self: str, substr: str, repl: str, n:
|
|
533
|
+
def replace(self: str, substr: str, repl: str, n: int | None = None) -> str:
|
|
539
534
|
"""
|
|
540
535
|
Replace occurrences of `substr` with `repl`.
|
|
541
536
|
|
|
@@ -551,7 +546,7 @@ def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
|
|
|
551
546
|
|
|
552
547
|
@replace.to_sql
|
|
553
548
|
def _(
|
|
554
|
-
self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n:
|
|
549
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: sql.ColumnElement | None = None
|
|
555
550
|
) -> sql.ColumnElement:
|
|
556
551
|
if n is not None:
|
|
557
552
|
return None # SQL does not support bounding the number of replacements
|
|
@@ -560,7 +555,7 @@ def _(
|
|
|
560
555
|
|
|
561
556
|
|
|
562
557
|
@pxt.udf(is_method=True)
|
|
563
|
-
def replace_re(self: str, pattern: str, repl: str, n:
|
|
558
|
+
def replace_re(self: str, pattern: str, repl: str, n: int | None = None, flags: int = 0) -> str:
|
|
564
559
|
"""
|
|
565
560
|
Replace occurrences of a regular expression pattern with `repl`.
|
|
566
561
|
|
|
@@ -591,7 +586,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
591
586
|
|
|
592
587
|
|
|
593
588
|
@pxt.udf(is_method=True)
|
|
594
|
-
def rfind(self: str, substr: str, start:
|
|
589
|
+
def rfind(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
|
|
595
590
|
"""
|
|
596
591
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
597
592
|
|
|
@@ -606,7 +601,7 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
606
601
|
|
|
607
602
|
|
|
608
603
|
@pxt.udf(is_method=True)
|
|
609
|
-
def rindex(self: str, substr: str, start:
|
|
604
|
+
def rindex(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
|
|
610
605
|
"""
|
|
611
606
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
612
607
|
Raises ValueError if `substr` is not found.
|
|
@@ -643,7 +638,7 @@ def rpartition(self: str, sep: str = ' ') -> list:
|
|
|
643
638
|
|
|
644
639
|
|
|
645
640
|
@pxt.udf(is_method=True)
|
|
646
|
-
def rstrip(self: str, chars:
|
|
641
|
+
def rstrip(self: str, chars: str | None = None) -> str:
|
|
647
642
|
"""
|
|
648
643
|
Return a copy of string with trailing characters removed.
|
|
649
644
|
|
|
@@ -656,12 +651,12 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
656
651
|
|
|
657
652
|
|
|
658
653
|
@rstrip.to_sql
|
|
659
|
-
def _(self: sql.ColumnElement, chars:
|
|
654
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
660
655
|
return sql.func.rtrim(self, chars if chars is not None else whitespace)
|
|
661
656
|
|
|
662
657
|
|
|
663
658
|
@pxt.udf(is_method=True)
|
|
664
|
-
def slice(self: str, start:
|
|
659
|
+
def slice(self: str, start: int | None = None, stop: int | None = None, step: int | None = None) -> str:
|
|
665
660
|
"""
|
|
666
661
|
Return a slice.
|
|
667
662
|
|
|
@@ -676,9 +671,9 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
|
|
|
676
671
|
@slice.to_sql
|
|
677
672
|
def _(
|
|
678
673
|
self: sql.ColumnElement,
|
|
679
|
-
start:
|
|
680
|
-
stop:
|
|
681
|
-
step:
|
|
674
|
+
start: sql.ColumnElement | None = None,
|
|
675
|
+
stop: sql.ColumnElement | None = None,
|
|
676
|
+
step: sql.ColumnElement | None = None,
|
|
682
677
|
) -> sql.ColumnElement:
|
|
683
678
|
if step is not None:
|
|
684
679
|
return None
|
|
@@ -709,9 +704,7 @@ def _(
|
|
|
709
704
|
|
|
710
705
|
|
|
711
706
|
@pxt.udf(is_method=True)
|
|
712
|
-
def slice_replace(
|
|
713
|
-
self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
|
|
714
|
-
) -> str:
|
|
707
|
+
def slice_replace(self: str, start: int | None = None, stop: int | None = None, repl: str | None = None) -> str:
|
|
715
708
|
"""
|
|
716
709
|
Replace a positional slice with another value.
|
|
717
710
|
|
|
@@ -744,7 +737,7 @@ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
744
737
|
|
|
745
738
|
|
|
746
739
|
@pxt.udf(is_method=True)
|
|
747
|
-
def strip(self: str, chars:
|
|
740
|
+
def strip(self: str, chars: str | None = None) -> str:
|
|
748
741
|
"""
|
|
749
742
|
Return a copy of string with leading and trailing characters removed.
|
|
750
743
|
|
|
@@ -757,7 +750,7 @@ def strip(self: str, chars: Optional[str] = None) -> str:
|
|
|
757
750
|
|
|
758
751
|
|
|
759
752
|
@strip.to_sql
|
|
760
|
-
def _(self: sql.ColumnElement, chars:
|
|
753
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
761
754
|
return sql.func.trim(self, chars if chars is not None else whitespace)
|
|
762
755
|
|
|
763
756
|
|
|
@@ -11,7 +11,6 @@ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
from datetime import datetime
|
|
14
|
-
from typing import Optional
|
|
15
14
|
|
|
16
15
|
import sqlalchemy as sql
|
|
17
16
|
|
|
@@ -271,13 +270,13 @@ def _(
|
|
|
271
270
|
@pxt.udf(is_method=True)
|
|
272
271
|
def replace(
|
|
273
272
|
self: datetime,
|
|
274
|
-
year:
|
|
275
|
-
month:
|
|
276
|
-
day:
|
|
277
|
-
hour:
|
|
278
|
-
minute:
|
|
279
|
-
second:
|
|
280
|
-
microsecond:
|
|
273
|
+
year: int | None = None,
|
|
274
|
+
month: int | None = None,
|
|
275
|
+
day: int | None = None,
|
|
276
|
+
hour: int | None = None,
|
|
277
|
+
minute: int | None = None,
|
|
278
|
+
second: int | None = None,
|
|
279
|
+
microsecond: int | None = None,
|
|
281
280
|
) -> datetime:
|
|
282
281
|
"""
|
|
283
282
|
Return a datetime with the same attributes, except for those attributes given new values by whichever keyword
|
pixeltable/functions/together.py
CHANGED
|
@@ -7,7 +7,7 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
|
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
9
|
import io
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, TypeVar
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
@@ -50,7 +50,7 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
53
|
-
async def completions(prompt: str, *, model: str, model_kwargs:
|
|
53
|
+
async def completions(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> dict:
|
|
54
54
|
"""
|
|
55
55
|
Generate completions based on a given prompt using a specified model.
|
|
56
56
|
|
|
@@ -89,7 +89,7 @@ async def completions(prompt: str, *, model: str, model_kwargs: Optional[dict[st
|
|
|
89
89
|
|
|
90
90
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
91
91
|
async def chat_completions(
|
|
92
|
-
messages: list[dict[str, str]], *, model: str, model_kwargs:
|
|
92
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
93
93
|
) -> dict:
|
|
94
94
|
"""
|
|
95
95
|
Generate chat completions based on a given prompt using a specified model.
|
|
@@ -183,9 +183,7 @@ def _(model: str) -> ts.ArrayType:
|
|
|
183
183
|
|
|
184
184
|
|
|
185
185
|
@pxt.udf(resource_pool='request-rate:together:images')
|
|
186
|
-
async def image_generations(
|
|
187
|
-
prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
|
|
188
|
-
) -> PIL.Image.Image:
|
|
186
|
+
async def image_generations(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> PIL.Image.Image:
|
|
189
187
|
"""
|
|
190
188
|
Generate images based on a given prompt using a specified model.
|
|
191
189
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wrap various endpoints from the TwelveLabs API. In order to use them, you must
|
|
4
|
+
first `pip install twelvelabs` and configure your TwelveLabs credentials, as described in
|
|
5
|
+
the [Working with TwelveLabs](https://pixeltable.readme.io/docs/working-with-twelvelabs) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
import pixeltable as pxt
|
|
13
|
+
from pixeltable import env
|
|
14
|
+
from pixeltable.utils.code import local_public_names
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from twelvelabs import AsyncTwelveLabs
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@env.register_client('twelvelabs')
|
|
21
|
+
def _(api_key: str) -> 'AsyncTwelveLabs':
|
|
22
|
+
from twelvelabs import AsyncTwelveLabs
|
|
23
|
+
|
|
24
|
+
return AsyncTwelveLabs(api_key=api_key)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _twelvelabs_client() -> 'AsyncTwelveLabs':
|
|
28
|
+
return env.Env.get().get_client('twelvelabs')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pxt.udf(resource_pool='request-rate:twelvelabs')
|
|
32
|
+
async def embed(
|
|
33
|
+
model_name: str,
|
|
34
|
+
*,
|
|
35
|
+
text: str | None = None,
|
|
36
|
+
text_truncate: Literal['none', 'start', 'end'] | None = None,
|
|
37
|
+
audio: pxt.Audio | None = None,
|
|
38
|
+
# TODO: support images
|
|
39
|
+
# image: pxt.Image | None = None,
|
|
40
|
+
**kwargs: Any,
|
|
41
|
+
) -> pxt.Array[(1024,), pxt.Float]:
|
|
42
|
+
"""
|
|
43
|
+
Creates an embedding vector for the given `text`, `audio`, or `image` parameter. Only one of `text`, `audio`, or
|
|
44
|
+
`image` may be specified.
|
|
45
|
+
|
|
46
|
+
Equivalent to the TwelveLabs Embed API.
|
|
47
|
+
https://docs.twelvelabs.io/v1.3/docs/guides/create-embeddings
|
|
48
|
+
|
|
49
|
+
Request throttling:
|
|
50
|
+
Applies the rate limit set in the config (section `twelvelabs`, key `rate_limit`). If no rate
|
|
51
|
+
limit is configured, uses a default of 600 RPM.
|
|
52
|
+
|
|
53
|
+
__Requirements:__
|
|
54
|
+
|
|
55
|
+
- `pip install twelvelabs`
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
model_name: The name of the model to use. Check
|
|
59
|
+
[the TwelveLabs documentation](https://docs.twelvelabs.io/v1.3/sdk-reference/python/create-text-image-and-audio-embeddings)
|
|
60
|
+
for available models.
|
|
61
|
+
text: The text to embed.
|
|
62
|
+
text_truncate: Truncation mode for the text.
|
|
63
|
+
audio: The audio to embed.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The embedding.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
Add a computed column `embed` for an embedding of a string column `input`:
|
|
70
|
+
|
|
71
|
+
>>> tbl.add_computed_column(
|
|
72
|
+
... embed=embed(model_name='Marengo-retrieval-2.7', text=tbl.input)
|
|
73
|
+
... )
|
|
74
|
+
"""
|
|
75
|
+
cl = _twelvelabs_client()
|
|
76
|
+
res = await cl.embed.create(
|
|
77
|
+
model_name=model_name, text=text, text_truncate=text_truncate, audio_file=audio, **kwargs
|
|
78
|
+
)
|
|
79
|
+
if text is not None:
|
|
80
|
+
if res.text_embedding is None:
|
|
81
|
+
raise pxt.Error(f"Didn't receive embedding for text: {text}")
|
|
82
|
+
vector = res.text_embedding.segments[0].float_
|
|
83
|
+
return np.array(vector, dtype=np.float64)
|
|
84
|
+
# TODO: handle audio and image, once we know how to get a non-error response
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
__all__ = local_public_names(__name__)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def __dir__() -> list[str]:
|
|
92
|
+
return __all__
|
pixeltable/functions/video.py
CHANGED
|
@@ -20,28 +20,6 @@ from pixeltable.utils.code import local_public_names
|
|
|
20
20
|
from pixeltable.utils.local_store import TempStore
|
|
21
21
|
|
|
22
22
|
_logger = logging.getLogger('pixeltable')
|
|
23
|
-
_format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
|
|
24
|
-
'wav': ('pcm_s16le', 'wav'),
|
|
25
|
-
'mp3': ('libmp3lame', 'mp3'),
|
|
26
|
-
'flac': ('flac', 'flac'),
|
|
27
|
-
# 'mp4': ('aac', 'm4a'),
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
# for mp4:
|
|
31
|
-
# - extract_audio() fails with
|
|
32
|
-
# "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
|
|
33
|
-
# - chatgpt suggests this can be fixed in the following manner
|
|
34
|
-
# for packet in container.demux(audio_stream):
|
|
35
|
-
# packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
|
|
36
|
-
# packet.dts = None
|
|
37
|
-
# for frame in packet.decode():
|
|
38
|
-
# frame.pts = None
|
|
39
|
-
# for packet in output_stream.encode(frame):
|
|
40
|
-
# output_container.mux(packet)
|
|
41
|
-
#
|
|
42
|
-
# # Flush remaining packets
|
|
43
|
-
# for packet in output_stream.encode():
|
|
44
|
-
# output_container.mux(packet)
|
|
45
23
|
|
|
46
24
|
|
|
47
25
|
@pxt.uda(requires_order_by=True)
|
|
@@ -150,9 +128,9 @@ def extract_audio(
|
|
|
150
128
|
... extracted_audio=tbl.video_col.extract_audio(format='flac')
|
|
151
129
|
... )
|
|
152
130
|
"""
|
|
153
|
-
if format not in
|
|
131
|
+
if format not in av_utils.AUDIO_FORMATS:
|
|
154
132
|
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
155
|
-
default_codec, ext =
|
|
133
|
+
default_codec, ext = av_utils.AUDIO_FORMATS[format]
|
|
156
134
|
|
|
157
135
|
with av.open(video_path) as container:
|
|
158
136
|
if len(container.streams.audio) <= stream_idx:
|
pixeltable/functions/vision.py
CHANGED
|
@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
|
|
|
14
14
|
import colorsys
|
|
15
15
|
import hashlib
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Any
|
|
17
|
+
from typing import Any
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import PIL.Image
|
|
@@ -293,13 +293,13 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
|
293
293
|
def draw_bounding_boxes(
|
|
294
294
|
img: PIL.Image.Image,
|
|
295
295
|
boxes: list[list[int]],
|
|
296
|
-
labels:
|
|
297
|
-
color:
|
|
298
|
-
box_colors:
|
|
296
|
+
labels: list[Any] | None = None,
|
|
297
|
+
color: str | None = None,
|
|
298
|
+
box_colors: list[str] | None = None,
|
|
299
299
|
fill: bool = False,
|
|
300
300
|
width: int = 1,
|
|
301
|
-
font:
|
|
302
|
-
font_size:
|
|
301
|
+
font: str | None = None,
|
|
302
|
+
font_size: int | None = None,
|
|
303
303
|
) -> PIL.Image.Image:
|
|
304
304
|
"""
|
|
305
305
|
Draws bounding boxes on the given image.
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -6,7 +6,7 @@ This UDF will cause Pixeltable to invoke the relevant model locally. In order to
|
|
|
6
6
|
first `pip install openai-whisper`.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from typing import TYPE_CHECKING,
|
|
9
|
+
from typing import TYPE_CHECKING, Sequence
|
|
10
10
|
|
|
11
11
|
import pixeltable as pxt
|
|
12
12
|
from pixeltable.env import Env
|
|
@@ -21,16 +21,16 @@ def transcribe(
|
|
|
21
21
|
audio: pxt.Audio,
|
|
22
22
|
*,
|
|
23
23
|
model: str,
|
|
24
|
-
temperature:
|
|
25
|
-
compression_ratio_threshold:
|
|
26
|
-
logprob_threshold:
|
|
27
|
-
no_speech_threshold:
|
|
24
|
+
temperature: Sequence[float] | None = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
|
25
|
+
compression_ratio_threshold: float | None = 2.4,
|
|
26
|
+
logprob_threshold: float | None = -1.0,
|
|
27
|
+
no_speech_threshold: float | None = 0.6,
|
|
28
28
|
condition_on_previous_text: bool = True,
|
|
29
|
-
initial_prompt:
|
|
29
|
+
initial_prompt: str | None = None,
|
|
30
30
|
word_timestamps: bool = False,
|
|
31
31
|
prepend_punctuations: str = '"\'“¿([{-',
|
|
32
32
|
append_punctuations: str = '"\'.。,,!!??::”)]}、', # noqa: RUF001
|
|
33
|
-
decode_options:
|
|
33
|
+
decode_options: dict | None = None,
|
|
34
34
|
) -> dict:
|
|
35
35
|
"""
|
|
36
36
|
Transcribe an audio file using Whisper.
|
pixeltable/functions/whisperx.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""WhisperX audio transcription and diarization functions."""
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
@@ -21,17 +21,17 @@ def transcribe(
|
|
|
21
21
|
*,
|
|
22
22
|
model: str,
|
|
23
23
|
diarize: bool = False,
|
|
24
|
-
compute_type:
|
|
25
|
-
language:
|
|
26
|
-
task:
|
|
27
|
-
chunk_size:
|
|
28
|
-
alignment_model_name:
|
|
29
|
-
interpolate_method:
|
|
30
|
-
return_char_alignments:
|
|
31
|
-
diarization_model_name:
|
|
32
|
-
num_speakers:
|
|
33
|
-
min_speakers:
|
|
34
|
-
max_speakers:
|
|
24
|
+
compute_type: str | None = None,
|
|
25
|
+
language: str | None = None,
|
|
26
|
+
task: str | None = None,
|
|
27
|
+
chunk_size: int | None = None,
|
|
28
|
+
alignment_model_name: str | None = None,
|
|
29
|
+
interpolate_method: str | None = None,
|
|
30
|
+
return_char_alignments: bool | None = None,
|
|
31
|
+
diarization_model_name: str | None = None,
|
|
32
|
+
num_speakers: int | None = None,
|
|
33
|
+
min_speakers: int | None = None,
|
|
34
|
+
max_speakers: int | None = None,
|
|
35
35
|
) -> dict:
|
|
36
36
|
"""
|
|
37
37
|
Transcribe an audio file using WhisperX.
|
|
@@ -144,7 +144,7 @@ def _lookup_transcription_model(model: str, device: str, compute_type: str) -> '
|
|
|
144
144
|
return _model_cache[key]
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def _lookup_alignment_model(language_code: str, device: str, model_name:
|
|
147
|
+
def _lookup_alignment_model(language_code: str, device: str, model_name: str | None) -> tuple['Wav2Vec2Model', dict]:
|
|
148
148
|
import whisperx
|
|
149
149
|
|
|
150
150
|
key = (language_code, device, model_name)
|
|
@@ -154,7 +154,7 @@ def _lookup_alignment_model(language_code: str, device: str, model_name: Optiona
|
|
|
154
154
|
return _alignment_model_cache[key]
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
def _lookup_diarization_model(device: str, model_name:
|
|
157
|
+
def _lookup_diarization_model(device: str, model_name: str | None) -> 'DiarizationPipeline':
|
|
158
158
|
from whisperx.diarize import DiarizationPipeline
|
|
159
159
|
|
|
160
160
|
key = (device, model_name)
|
|
@@ -168,8 +168,8 @@ def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'Diariz
|
|
|
168
168
|
|
|
169
169
|
|
|
170
170
|
_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
|
|
171
|
-
_alignment_model_cache: dict[tuple[str, str,
|
|
172
|
-
_diarization_model_cache: dict[tuple[str,
|
|
171
|
+
_alignment_model_cache: dict[tuple[str, str, str | None], tuple['Wav2Vec2Model', dict]] = {}
|
|
172
|
+
_diarization_model_cache: dict[tuple[str, str | None], 'DiarizationPipeline'] = {}
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
__all__ = local_public_names(__name__)
|