pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/functions/vision.py
CHANGED
|
@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
|
|
|
14
14
|
import colorsys
|
|
15
15
|
import hashlib
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Any
|
|
17
|
+
from typing import Any
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import PIL.Image
|
|
@@ -293,13 +293,13 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
|
293
293
|
def draw_bounding_boxes(
|
|
294
294
|
img: PIL.Image.Image,
|
|
295
295
|
boxes: list[list[int]],
|
|
296
|
-
labels:
|
|
297
|
-
color:
|
|
298
|
-
box_colors:
|
|
296
|
+
labels: list[Any] | None = None,
|
|
297
|
+
color: str | None = None,
|
|
298
|
+
box_colors: list[str] | None = None,
|
|
299
299
|
fill: bool = False,
|
|
300
300
|
width: int = 1,
|
|
301
|
-
font:
|
|
302
|
-
font_size:
|
|
301
|
+
font: str | None = None,
|
|
302
|
+
font_size: int | None = None,
|
|
303
303
|
) -> PIL.Image.Image:
|
|
304
304
|
"""
|
|
305
305
|
Draws bounding boxes on the given image.
|
|
@@ -352,7 +352,7 @@ def draw_bounding_boxes(
|
|
|
352
352
|
from PIL import ImageColor, ImageDraw, ImageFont
|
|
353
353
|
|
|
354
354
|
# set default font if not provided
|
|
355
|
-
txt_font:
|
|
355
|
+
txt_font: ImageFont.ImageFont | ImageFont.FreeTypeFont = (
|
|
356
356
|
ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
|
|
357
357
|
)
|
|
358
358
|
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -6,10 +6,11 @@ This UDF will cause Pixeltable to invoke the relevant model locally. In order to
|
|
|
6
6
|
first `pip install openai-whisper`.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from typing import TYPE_CHECKING,
|
|
9
|
+
from typing import TYPE_CHECKING, Sequence
|
|
10
10
|
|
|
11
11
|
import pixeltable as pxt
|
|
12
12
|
from pixeltable.env import Env
|
|
13
|
+
from pixeltable.utils.code import local_public_names
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
15
16
|
from whisper import Whisper # type: ignore[import-untyped]
|
|
@@ -20,16 +21,16 @@ def transcribe(
|
|
|
20
21
|
audio: pxt.Audio,
|
|
21
22
|
*,
|
|
22
23
|
model: str,
|
|
23
|
-
temperature:
|
|
24
|
-
compression_ratio_threshold:
|
|
25
|
-
logprob_threshold:
|
|
26
|
-
no_speech_threshold:
|
|
24
|
+
temperature: Sequence[float] | None = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
|
25
|
+
compression_ratio_threshold: float | None = 2.4,
|
|
26
|
+
logprob_threshold: float | None = -1.0,
|
|
27
|
+
no_speech_threshold: float | None = 0.6,
|
|
27
28
|
condition_on_previous_text: bool = True,
|
|
28
|
-
initial_prompt:
|
|
29
|
+
initial_prompt: str | None = None,
|
|
29
30
|
word_timestamps: bool = False,
|
|
30
31
|
prepend_punctuations: str = '"\'“¿([{-',
|
|
31
32
|
append_punctuations: str = '"\'.。,,!!??::”)]}、', # noqa: RUF001
|
|
32
|
-
decode_options:
|
|
33
|
+
decode_options: dict | None = None,
|
|
33
34
|
) -> dict:
|
|
34
35
|
"""
|
|
35
36
|
Transcribe an audio file using Whisper.
|
|
@@ -90,3 +91,10 @@ def _lookup_model(model_id: str, device: str) -> 'Whisper':
|
|
|
90
91
|
|
|
91
92
|
|
|
92
93
|
_model_cache: dict[tuple[str, str], 'Whisper'] = {}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
__all__ = local_public_names(__name__)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def __dir__() -> list[str]:
|
|
100
|
+
return __all__
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""WhisperX audio transcription and diarization functions."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
import pixeltable as pxt
|
|
8
|
+
from pixeltable.config import Config
|
|
9
|
+
from pixeltable.functions.util import resolve_torch_device
|
|
10
|
+
from pixeltable.utils.code import local_public_names
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from transformers import Wav2Vec2Model
|
|
14
|
+
from whisperx.asr import FasterWhisperPipeline # type: ignore[import-untyped]
|
|
15
|
+
from whisperx.diarize import DiarizationPipeline # type: ignore[import-untyped]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pxt.udf
|
|
19
|
+
def transcribe(
|
|
20
|
+
audio: pxt.Audio,
|
|
21
|
+
*,
|
|
22
|
+
model: str,
|
|
23
|
+
diarize: bool = False,
|
|
24
|
+
compute_type: str | None = None,
|
|
25
|
+
language: str | None = None,
|
|
26
|
+
task: str | None = None,
|
|
27
|
+
chunk_size: int | None = None,
|
|
28
|
+
alignment_model_name: str | None = None,
|
|
29
|
+
interpolate_method: str | None = None,
|
|
30
|
+
return_char_alignments: bool | None = None,
|
|
31
|
+
diarization_model_name: str | None = None,
|
|
32
|
+
num_speakers: int | None = None,
|
|
33
|
+
min_speakers: int | None = None,
|
|
34
|
+
max_speakers: int | None = None,
|
|
35
|
+
) -> dict:
|
|
36
|
+
"""
|
|
37
|
+
Transcribe an audio file using WhisperX.
|
|
38
|
+
|
|
39
|
+
This UDF runs a transcription model _locally_ using the WhisperX library,
|
|
40
|
+
equivalent to the WhisperX `transcribe` function, as described in the
|
|
41
|
+
[WhisperX library documentation](https://github.com/m-bain/whisperX).
|
|
42
|
+
|
|
43
|
+
If `diarize=True`, then speaker diarization will also be performed. Several of the UDF parameters are only valid if
|
|
44
|
+
`diarize=True`, as documented in the parameters list below.
|
|
45
|
+
|
|
46
|
+
__Requirements:__
|
|
47
|
+
|
|
48
|
+
- `pip install whisperx`
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
audio: The audio file to transcribe.
|
|
52
|
+
model: The name of the model to use for transcription.
|
|
53
|
+
diarize: Whether to perform speaker diarization.
|
|
54
|
+
compute_type: The compute type to use for the model (e.g., `'int8'`, `'float16'`). If `None`,
|
|
55
|
+
defaults to `'float16'` on CUDA devices and `'int8'` otherwise.
|
|
56
|
+
language: The language code for the transcription (e.g., `'en'` for English).
|
|
57
|
+
task: The task to perform (e.g., `'transcribe'` or `'translate'`). Defaults to `'transcribe'`.
|
|
58
|
+
chunk_size: The size of the audio chunks to process, in seconds. Defaults to `30`.
|
|
59
|
+
alignment_model_name: The name of the alignment model to use. If `None`, uses the default model for the given
|
|
60
|
+
language. Only valid if `diarize=True`.
|
|
61
|
+
interpolate_method: The method to use for interpolation of the alignment results. If not specified, uses the
|
|
62
|
+
WhisperX default (`'nearest'`). Only valid if `diarize=True`.
|
|
63
|
+
return_char_alignments: Whether to return character-level alignments. Defaults to `False`.
|
|
64
|
+
Only valid if `diarize=True`.
|
|
65
|
+
diarization_model_name: The name of the diarization model to use. Defaults to
|
|
66
|
+
`pyannote/speaker-diarization-3.1`. Only valid if `diarize=True`.
|
|
67
|
+
num_speakers: The number of speakers to expect in the audio. By default, the model with try to detect the
|
|
68
|
+
number of speakers. Only valid if `diarize=True`.
|
|
69
|
+
min_speakers: If specified, the minimum number of speakers to expect in the audio.
|
|
70
|
+
Only valid if `diarize=True`.
|
|
71
|
+
max_speakers: If specified, the maximum number of speakers to expect in the audio.
|
|
72
|
+
Only valid if `diarize=True`.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A dictionary containing the audio transcription, diarization (if enabled), and various other metadata.
|
|
76
|
+
|
|
77
|
+
Examples:
|
|
78
|
+
Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
|
|
79
|
+
of the table `tbl`:
|
|
80
|
+
|
|
81
|
+
>>> tbl.add_computed_column(result=transcribe(tbl.audio, model='tiny.en'))
|
|
82
|
+
|
|
83
|
+
Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
|
|
84
|
+
of the table `tbl`, with speaker diarization enabled, expecting at least 2 speakers:
|
|
85
|
+
|
|
86
|
+
>>> tbl.add_computed_column(
|
|
87
|
+
... result=transcribe(
|
|
88
|
+
... tbl.audio, model='tiny.en', diarize=True, min_speakers=2
|
|
89
|
+
... )
|
|
90
|
+
... )
|
|
91
|
+
"""
|
|
92
|
+
import whisperx # type: ignore[import-untyped]
|
|
93
|
+
|
|
94
|
+
if not diarize:
|
|
95
|
+
args = locals()
|
|
96
|
+
for param in (
|
|
97
|
+
'alignment_model_name',
|
|
98
|
+
'interpolate_method',
|
|
99
|
+
'return_char_alignments',
|
|
100
|
+
'diarization_model_name',
|
|
101
|
+
'num_speakers',
|
|
102
|
+
'min_speakers',
|
|
103
|
+
'max_speakers',
|
|
104
|
+
):
|
|
105
|
+
if args[param] is not None:
|
|
106
|
+
raise pxt.Error(f'`{param}` can only be set if `diarize=True`')
|
|
107
|
+
|
|
108
|
+
device = resolve_torch_device('auto', allow_mps=False)
|
|
109
|
+
compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
|
|
110
|
+
transcription_model = _lookup_transcription_model(model, device, compute_type)
|
|
111
|
+
audio_array: np.ndarray = whisperx.load_audio(audio)
|
|
112
|
+
kwargs: dict[str, Any] = {'language': language, 'task': task}
|
|
113
|
+
if chunk_size is not None:
|
|
114
|
+
kwargs['chunk_size'] = chunk_size
|
|
115
|
+
result: dict[str, Any] = transcription_model.transcribe(audio_array, batch_size=16, **kwargs)
|
|
116
|
+
|
|
117
|
+
if diarize:
|
|
118
|
+
# Alignment
|
|
119
|
+
alignment_model, metadata = _lookup_alignment_model(result['language'], device, alignment_model_name)
|
|
120
|
+
kwargs = {}
|
|
121
|
+
if interpolate_method is not None:
|
|
122
|
+
kwargs['interpolate_method'] = interpolate_method
|
|
123
|
+
if return_char_alignments is not None:
|
|
124
|
+
kwargs['return_char_alignments'] = return_char_alignments
|
|
125
|
+
result = whisperx.align(result['segments'], alignment_model, metadata, audio_array, device, **kwargs)
|
|
126
|
+
|
|
127
|
+
# Diarization
|
|
128
|
+
diarization_model = _lookup_diarization_model(device, diarization_model_name)
|
|
129
|
+
diarization_segments = diarization_model(
|
|
130
|
+
audio_array, num_speakers=num_speakers, min_speakers=min_speakers, max_speakers=max_speakers
|
|
131
|
+
)
|
|
132
|
+
result = whisperx.assign_word_speakers(diarization_segments, result)
|
|
133
|
+
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _lookup_transcription_model(model: str, device: str, compute_type: str) -> 'FasterWhisperPipeline':
|
|
138
|
+
import whisperx
|
|
139
|
+
|
|
140
|
+
key = (model, device, compute_type)
|
|
141
|
+
if key not in _model_cache:
|
|
142
|
+
transcription_model = whisperx.load_model(model, device, compute_type=compute_type)
|
|
143
|
+
_model_cache[key] = transcription_model
|
|
144
|
+
return _model_cache[key]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _lookup_alignment_model(language_code: str, device: str, model_name: str | None) -> tuple['Wav2Vec2Model', dict]:
|
|
148
|
+
import whisperx
|
|
149
|
+
|
|
150
|
+
key = (language_code, device, model_name)
|
|
151
|
+
if key not in _alignment_model_cache:
|
|
152
|
+
model, metadata = whisperx.load_align_model(language_code=language_code, device=device, model_name=model_name)
|
|
153
|
+
_alignment_model_cache[key] = (model, metadata)
|
|
154
|
+
return _alignment_model_cache[key]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _lookup_diarization_model(device: str, model_name: str | None) -> 'DiarizationPipeline':
|
|
158
|
+
from whisperx.diarize import DiarizationPipeline
|
|
159
|
+
|
|
160
|
+
key = (device, model_name)
|
|
161
|
+
if key not in _diarization_model_cache:
|
|
162
|
+
auth_token = Config.get().get_string_value('auth_token', section='hf')
|
|
163
|
+
kwargs: dict[str, Any] = {'device': device, 'use_auth_token': auth_token}
|
|
164
|
+
if model_name is not None:
|
|
165
|
+
kwargs['model_name'] = model_name
|
|
166
|
+
_diarization_model_cache[key] = DiarizationPipeline(**kwargs)
|
|
167
|
+
return _diarization_model_cache[key]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
|
|
171
|
+
_alignment_model_cache: dict[tuple[str, str, str | None], tuple['Wav2Vec2Model', dict]] = {}
|
|
172
|
+
_diarization_model_cache: dict[tuple[str, str | None], 'DiarizationPipeline'] = {}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
__all__ = local_public_names(__name__)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def __dir__() -> list[str]:
|
|
179
|
+
return __all__
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""YOLOX object detection functions."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from typing import TYPE_CHECKING
|
|
3
5
|
|
|
@@ -20,8 +22,6 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
|
|
|
20
22
|
Computes YOLOX object detections for the specified image. `model_id` should reference one of the models
|
|
21
23
|
defined in the [YOLOX documentation](https://github.com/Megvii-BaseDetection/YOLOX).
|
|
22
24
|
|
|
23
|
-
YOLOX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
|
|
24
|
-
|
|
25
25
|
__Requirements__:
|
|
26
26
|
|
|
27
27
|
- `pip install pixeltable-yolox`
|
|
@@ -55,8 +55,6 @@ def yolo_to_coco(detections: dict) -> list:
|
|
|
55
55
|
"""
|
|
56
56
|
Converts the output of a YOLOX object detection model to COCO format.
|
|
57
57
|
|
|
58
|
-
YOLOX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
|
|
59
|
-
|
|
60
58
|
Args:
|
|
61
59
|
detections: The output of a YOLOX object detection model, as returned by `yolox`.
|
|
62
60
|
|