pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -15,7 +15,7 @@ import builtins
15
15
  import re
16
16
  import textwrap
17
17
  from string import whitespace
18
- from typing import Any, Optional
18
+ from typing import Any
19
19
 
20
20
  import sqlalchemy as sql
21
21
 
@@ -78,9 +78,7 @@ def contains(self: str, substr: str, case: bool = True) -> bool:
78
78
 
79
79
 
80
80
  @contains.to_sql
81
- def _(
82
- self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
83
- ) -> sql.ColumnElement:
81
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement, case: sql.ColumnElement | None = None) -> sql.ColumnElement:
84
82
  # Replace all occurrences of `%`, `_`, and `\` with escaped versions
85
83
  escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
86
84
  if case is None:
@@ -153,7 +151,7 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
153
151
 
154
152
 
155
153
  @pxt.udf(is_method=True)
156
- def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
154
+ def find(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
157
155
  """
158
156
  Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
159
157
 
@@ -169,10 +167,7 @@ def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> i
169
167
 
170
168
  @find.to_sql
171
169
  def _(
172
- self: sql.ColumnElement,
173
- substr: sql.ColumnElement,
174
- start: sql.ColumnElement,
175
- end: Optional[sql.ColumnElement] = None,
170
+ self: sql.ColumnElement, substr: sql.ColumnElement, start: sql.ColumnElement, end: sql.ColumnElement | None = None
176
171
  ) -> sql.ColumnElement:
177
172
  sl = pxt.functions.string.slice._to_sql(self, start, end)
178
173
  if sl is None:
@@ -227,7 +222,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
227
222
 
228
223
 
229
224
  @pxt.udf(is_method=True)
230
- def index(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
225
+ def index(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
231
226
  """
232
227
  Return the lowest index in string where `substr` is found within the slice `[start:end]`.
233
228
  Raises ValueError if `substr` is not found.
@@ -413,7 +408,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
413
408
 
414
409
 
415
410
  @pxt.udf(is_method=True)
416
- def lstrip(self: str, chars: Optional[str] = None) -> str:
411
+ def lstrip(self: str, chars: str | None = None) -> str:
417
412
  """
418
413
  Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
419
414
  characters to be removed. If omitted or `None`, whitespace characters are removed.
@@ -427,7 +422,7 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
427
422
 
428
423
 
429
424
  @lstrip.to_sql
430
- def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
425
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
431
426
  return sql.func.ltrim(self, chars if chars is not None else whitespace)
432
427
 
433
428
 
@@ -535,7 +530,7 @@ def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
535
530
 
536
531
 
537
532
  @pxt.udf(is_method=True)
538
- def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
533
+ def replace(self: str, substr: str, repl: str, n: int | None = None) -> str:
539
534
  """
540
535
  Replace occurrences of `substr` with `repl`.
541
536
 
@@ -551,7 +546,7 @@ def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
551
546
 
552
547
  @replace.to_sql
553
548
  def _(
554
- self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: Optional[sql.ColumnElement] = None
549
+ self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: sql.ColumnElement | None = None
555
550
  ) -> sql.ColumnElement:
556
551
  if n is not None:
557
552
  return None # SQL does not support bounding the number of replacements
@@ -560,7 +555,7 @@ def _(
560
555
 
561
556
 
562
557
  @pxt.udf(is_method=True)
563
- def replace_re(self: str, pattern: str, repl: str, n: Optional[int] = None, flags: int = 0) -> str:
558
+ def replace_re(self: str, pattern: str, repl: str, n: int | None = None, flags: int = 0) -> str:
564
559
  """
565
560
  Replace occurrences of a regular expression pattern with `repl`.
566
561
 
@@ -591,7 +586,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
591
586
 
592
587
 
593
588
  @pxt.udf(is_method=True)
594
- def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
589
+ def rfind(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
595
590
  """
596
591
  Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
597
592
 
@@ -606,7 +601,7 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
606
601
 
607
602
 
608
603
  @pxt.udf(is_method=True)
609
- def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
604
+ def rindex(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
610
605
  """
611
606
  Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
612
607
  Raises ValueError if `substr` is not found.
@@ -643,7 +638,7 @@ def rpartition(self: str, sep: str = ' ') -> list:
643
638
 
644
639
 
645
640
  @pxt.udf(is_method=True)
646
- def rstrip(self: str, chars: Optional[str] = None) -> str:
641
+ def rstrip(self: str, chars: str | None = None) -> str:
647
642
  """
648
643
  Return a copy of string with trailing characters removed.
649
644
 
@@ -656,12 +651,12 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
656
651
 
657
652
 
658
653
  @rstrip.to_sql
659
- def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
654
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
660
655
  return sql.func.rtrim(self, chars if chars is not None else whitespace)
661
656
 
662
657
 
663
658
  @pxt.udf(is_method=True)
664
- def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
659
+ def slice(self: str, start: int | None = None, stop: int | None = None, step: int | None = None) -> str:
665
660
  """
666
661
  Return a slice.
667
662
 
@@ -676,9 +671,9 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
676
671
  @slice.to_sql
677
672
  def _(
678
673
  self: sql.ColumnElement,
679
- start: Optional[sql.ColumnElement] = None,
680
- stop: Optional[sql.ColumnElement] = None,
681
- step: Optional[sql.ColumnElement] = None,
674
+ start: sql.ColumnElement | None = None,
675
+ stop: sql.ColumnElement | None = None,
676
+ step: sql.ColumnElement | None = None,
682
677
  ) -> sql.ColumnElement:
683
678
  if step is not None:
684
679
  return None
@@ -709,9 +704,7 @@ def _(
709
704
 
710
705
 
711
706
  @pxt.udf(is_method=True)
712
- def slice_replace(
713
- self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
714
- ) -> str:
707
+ def slice_replace(self: str, start: int | None = None, stop: int | None = None, repl: str | None = None) -> str:
715
708
  """
716
709
  Replace a positional slice with another value.
717
710
 
@@ -744,7 +737,7 @@ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
744
737
 
745
738
 
746
739
  @pxt.udf(is_method=True)
747
- def strip(self: str, chars: Optional[str] = None) -> str:
740
+ def strip(self: str, chars: str | None = None) -> str:
748
741
  """
749
742
  Return a copy of string with leading and trailing characters removed.
750
743
 
@@ -757,7 +750,7 @@ def strip(self: str, chars: Optional[str] = None) -> str:
757
750
 
758
751
 
759
752
  @strip.to_sql
760
- def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
753
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
761
754
  return sql.func.trim(self, chars if chars is not None else whitespace)
762
755
 
763
756
 
@@ -11,7 +11,6 @@ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
11
11
  """
12
12
 
13
13
  from datetime import datetime
14
- from typing import Optional
15
14
 
16
15
  import sqlalchemy as sql
17
16
 
@@ -271,13 +270,13 @@ def _(
271
270
  @pxt.udf(is_method=True)
272
271
  def replace(
273
272
  self: datetime,
274
- year: Optional[int] = None,
275
- month: Optional[int] = None,
276
- day: Optional[int] = None,
277
- hour: Optional[int] = None,
278
- minute: Optional[int] = None,
279
- second: Optional[int] = None,
280
- microsecond: Optional[int] = None,
273
+ year: int | None = None,
274
+ month: int | None = None,
275
+ day: int | None = None,
276
+ hour: int | None = None,
277
+ minute: int | None = None,
278
+ second: int | None = None,
279
+ microsecond: int | None = None,
281
280
  ) -> datetime:
282
281
  """
283
282
  Return a datetime with the same attributes, except for those attributes given new values by whichever keyword
@@ -7,7 +7,7 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
7
7
 
8
8
  import base64
9
9
  import io
10
- from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
10
+ from typing import TYPE_CHECKING, Any, Callable, TypeVar
11
11
 
12
12
  import numpy as np
13
13
  import PIL.Image
@@ -50,7 +50,7 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
50
50
 
51
51
 
52
52
  @pxt.udf(resource_pool='request-rate:together:chat')
53
- async def completions(prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None) -> dict:
53
+ async def completions(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> dict:
54
54
  """
55
55
  Generate completions based on a given prompt using a specified model.
56
56
 
@@ -89,7 +89,7 @@ async def completions(prompt: str, *, model: str, model_kwargs: Optional[dict[st
89
89
 
90
90
  @pxt.udf(resource_pool='request-rate:together:chat')
91
91
  async def chat_completions(
92
- messages: list[dict[str, str]], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
92
+ messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
93
93
  ) -> dict:
94
94
  """
95
95
  Generate chat completions based on a given prompt using a specified model.
@@ -183,9 +183,7 @@ def _(model: str) -> ts.ArrayType:
183
183
 
184
184
 
185
185
  @pxt.udf(resource_pool='request-rate:together:images')
186
- async def image_generations(
187
- prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
188
- ) -> PIL.Image.Image:
186
+ async def image_generations(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> PIL.Image.Image:
189
187
  """
190
188
  Generate images based on a given prompt using a specified model.
191
189
 
@@ -0,0 +1,92 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wrap various endpoints from the TwelveLabs API. In order to use them, you must
4
+ first `pip install twelvelabs` and configure your TwelveLabs credentials, as described in
5
+ the [Working with TwelveLabs](https://pixeltable.readme.io/docs/working-with-twelvelabs) tutorial.
6
+ """
7
+
8
+ from typing import TYPE_CHECKING, Any, Literal
9
+
10
+ import numpy as np
11
+
12
+ import pixeltable as pxt
13
+ from pixeltable import env
14
+ from pixeltable.utils.code import local_public_names
15
+
16
+ if TYPE_CHECKING:
17
+ from twelvelabs import AsyncTwelveLabs
18
+
19
+
20
+ @env.register_client('twelvelabs')
21
+ def _(api_key: str) -> 'AsyncTwelveLabs':
22
+ from twelvelabs import AsyncTwelveLabs
23
+
24
+ return AsyncTwelveLabs(api_key=api_key)
25
+
26
+
27
+ def _twelvelabs_client() -> 'AsyncTwelveLabs':
28
+ return env.Env.get().get_client('twelvelabs')
29
+
30
+
31
+ @pxt.udf(resource_pool='request-rate:twelvelabs')
32
+ async def embed(
33
+ model_name: str,
34
+ *,
35
+ text: str | None = None,
36
+ text_truncate: Literal['none', 'start', 'end'] | None = None,
37
+ audio: pxt.Audio | None = None,
38
+ # TODO: support images
39
+ # image: pxt.Image | None = None,
40
+ **kwargs: Any,
41
+ ) -> pxt.Array[(1024,), pxt.Float]:
42
+ """
43
+ Creates an embedding vector for the given `text`, `audio`, or `image` parameter. Only one of `text`, `audio`, or
44
+ `image` may be specified.
45
+
46
+ Equivalent to the TwelveLabs Embed API.
47
+ https://docs.twelvelabs.io/v1.3/docs/guides/create-embeddings
48
+
49
+ Request throttling:
50
+ Applies the rate limit set in the config (section `twelvelabs`, key `rate_limit`). If no rate
51
+ limit is configured, uses a default of 600 RPM.
52
+
53
+ __Requirements:__
54
+
55
+ - `pip install twelvelabs`
56
+
57
+ Args:
58
+ model_name: The name of the model to use. Check
59
+ [the TwelveLabs documentation](https://docs.twelvelabs.io/v1.3/sdk-reference/python/create-text-image-and-audio-embeddings)
60
+ for available models.
61
+ text: The text to embed.
62
+ text_truncate: Truncation mode for the text.
63
+ audio: The audio to embed.
64
+
65
+ Returns:
66
+ The embedding.
67
+
68
+ Examples:
69
+ Add a computed column `embed` for an embedding of a string column `input`:
70
+
71
+ >>> tbl.add_computed_column(
72
+ ... embed=embed(model_name='Marengo-retrieval-2.7', text=tbl.input)
73
+ ... )
74
+ """
75
+ cl = _twelvelabs_client()
76
+ res = await cl.embed.create(
77
+ model_name=model_name, text=text, text_truncate=text_truncate, audio_file=audio, **kwargs
78
+ )
79
+ if text is not None:
80
+ if res.text_embedding is None:
81
+ raise pxt.Error(f"Didn't receive embedding for text: {text}")
82
+ vector = res.text_embedding.segments[0].float_
83
+ return np.array(vector, dtype=np.float64)
84
+ # TODO: handle audio and image, once we know how to get a non-error response
85
+ return None
86
+
87
+
88
+ __all__ = local_public_names(__name__)
89
+
90
+
91
+ def __dir__() -> list[str]:
92
+ return __all__
@@ -20,28 +20,6 @@ from pixeltable.utils.code import local_public_names
20
20
  from pixeltable.utils.local_store import TempStore
21
21
 
22
22
  _logger = logging.getLogger('pixeltable')
23
- _format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
24
- 'wav': ('pcm_s16le', 'wav'),
25
- 'mp3': ('libmp3lame', 'mp3'),
26
- 'flac': ('flac', 'flac'),
27
- # 'mp4': ('aac', 'm4a'),
28
- }
29
-
30
- # for mp4:
31
- # - extract_audio() fails with
32
- # "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
33
- # - chatgpt suggests this can be fixed in the following manner
34
- # for packet in container.demux(audio_stream):
35
- # packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
36
- # packet.dts = None
37
- # for frame in packet.decode():
38
- # frame.pts = None
39
- # for packet in output_stream.encode(frame):
40
- # output_container.mux(packet)
41
- #
42
- # # Flush remaining packets
43
- # for packet in output_stream.encode():
44
- # output_container.mux(packet)
45
23
 
46
24
 
47
25
  @pxt.uda(requires_order_by=True)
@@ -150,9 +128,9 @@ def extract_audio(
150
128
  ... extracted_audio=tbl.video_col.extract_audio(format='flac')
151
129
  ... )
152
130
  """
153
- if format not in _format_defaults:
131
+ if format not in av_utils.AUDIO_FORMATS:
154
132
  raise ValueError(f'extract_audio(): unsupported audio format: {format}')
155
- default_codec, ext = _format_defaults[format]
133
+ default_codec, ext = av_utils.AUDIO_FORMATS[format]
156
134
 
157
135
  with av.open(video_path) as container:
158
136
  if len(container.streams.audio) <= stream_idx:
@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
14
14
  import colorsys
15
15
  import hashlib
16
16
  from collections import defaultdict
17
- from typing import Any, Optional
17
+ from typing import Any
18
18
 
19
19
  import numpy as np
20
20
  import PIL.Image
@@ -293,13 +293,13 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
293
293
  def draw_bounding_boxes(
294
294
  img: PIL.Image.Image,
295
295
  boxes: list[list[int]],
296
- labels: Optional[list[Any]] = None,
297
- color: Optional[str] = None,
298
- box_colors: Optional[list[str]] = None,
296
+ labels: list[Any] | None = None,
297
+ color: str | None = None,
298
+ box_colors: list[str] | None = None,
299
299
  fill: bool = False,
300
300
  width: int = 1,
301
- font: Optional[str] = None,
302
- font_size: Optional[int] = None,
301
+ font: str | None = None,
302
+ font_size: int | None = None,
303
303
  ) -> PIL.Image.Image:
304
304
  """
305
305
  Draws bounding boxes on the given image.
@@ -6,7 +6,7 @@ This UDF will cause Pixeltable to invoke the relevant model locally. In order to
6
6
  first `pip install openai-whisper`.
7
7
  """
8
8
 
9
- from typing import TYPE_CHECKING, Optional, Sequence
9
+ from typing import TYPE_CHECKING, Sequence
10
10
 
11
11
  import pixeltable as pxt
12
12
  from pixeltable.env import Env
@@ -21,16 +21,16 @@ def transcribe(
21
21
  audio: pxt.Audio,
22
22
  *,
23
23
  model: str,
24
- temperature: Optional[Sequence[float]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
25
- compression_ratio_threshold: Optional[float] = 2.4,
26
- logprob_threshold: Optional[float] = -1.0,
27
- no_speech_threshold: Optional[float] = 0.6,
24
+ temperature: Sequence[float] | None = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
25
+ compression_ratio_threshold: float | None = 2.4,
26
+ logprob_threshold: float | None = -1.0,
27
+ no_speech_threshold: float | None = 0.6,
28
28
  condition_on_previous_text: bool = True,
29
- initial_prompt: Optional[str] = None,
29
+ initial_prompt: str | None = None,
30
30
  word_timestamps: bool = False,
31
31
  prepend_punctuations: str = '"\'“¿([{-',
32
32
  append_punctuations: str = '"\'.。,,!!??::”)]}、', # noqa: RUF001
33
- decode_options: Optional[dict] = None,
33
+ decode_options: dict | None = None,
34
34
  ) -> dict:
35
35
  """
36
36
  Transcribe an audio file using Whisper.
@@ -1,6 +1,6 @@
1
1
  """WhisperX audio transcription and diarization functions."""
2
2
 
3
- from typing import TYPE_CHECKING, Any, Optional
3
+ from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import numpy as np
6
6
 
@@ -21,17 +21,17 @@ def transcribe(
21
21
  *,
22
22
  model: str,
23
23
  diarize: bool = False,
24
- compute_type: Optional[str] = None,
25
- language: Optional[str] = None,
26
- task: Optional[str] = None,
27
- chunk_size: Optional[int] = None,
28
- alignment_model_name: Optional[str] = None,
29
- interpolate_method: Optional[str] = None,
30
- return_char_alignments: Optional[bool] = None,
31
- diarization_model_name: Optional[str] = None,
32
- num_speakers: Optional[int] = None,
33
- min_speakers: Optional[int] = None,
34
- max_speakers: Optional[int] = None,
24
+ compute_type: str | None = None,
25
+ language: str | None = None,
26
+ task: str | None = None,
27
+ chunk_size: int | None = None,
28
+ alignment_model_name: str | None = None,
29
+ interpolate_method: str | None = None,
30
+ return_char_alignments: bool | None = None,
31
+ diarization_model_name: str | None = None,
32
+ num_speakers: int | None = None,
33
+ min_speakers: int | None = None,
34
+ max_speakers: int | None = None,
35
35
  ) -> dict:
36
36
  """
37
37
  Transcribe an audio file using WhisperX.
@@ -144,7 +144,7 @@ def _lookup_transcription_model(model: str, device: str, compute_type: str) -> '
144
144
  return _model_cache[key]
145
145
 
146
146
 
147
- def _lookup_alignment_model(language_code: str, device: str, model_name: Optional[str]) -> tuple['Wav2Vec2Model', dict]:
147
+ def _lookup_alignment_model(language_code: str, device: str, model_name: str | None) -> tuple['Wav2Vec2Model', dict]:
148
148
  import whisperx
149
149
 
150
150
  key = (language_code, device, model_name)
@@ -154,7 +154,7 @@ def _lookup_alignment_model(language_code: str, device: str, model_name: Optiona
154
154
  return _alignment_model_cache[key]
155
155
 
156
156
 
157
- def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'DiarizationPipeline':
157
+ def _lookup_diarization_model(device: str, model_name: str | None) -> 'DiarizationPipeline':
158
158
  from whisperx.diarize import DiarizationPipeline
159
159
 
160
160
  key = (device, model_name)
@@ -168,8 +168,8 @@ def _lookup_diarization_model(device: str, model_name: Optional[str]) -> 'Diariz
168
168
 
169
169
 
170
170
  _model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
171
- _alignment_model_cache: dict[tuple[str, str, Optional[str]], tuple['Wav2Vec2Model', dict]] = {}
172
- _diarization_model_cache: dict[tuple[str, Optional[str]], 'DiarizationPipeline'] = {}
171
+ _alignment_model_cache: dict[tuple[str, str, str | None], tuple['Wav2Vec2Model', dict]] = {}
172
+ _diarization_model_cache: dict[tuple[str, str | None], 'DiarizationPipeline'] = {}
173
173
 
174
174
 
175
175
  __all__ = local_public_names(__name__)