pixeltable 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/catalog/catalog.py +4 -6
- pixeltable/catalog/insertable_table.py +125 -28
- pixeltable/catalog/table.py +51 -15
- pixeltable/catalog/table_version.py +12 -8
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/config.py +25 -9
- pixeltable/dataframe.py +3 -3
- pixeltable/env.py +89 -20
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +16 -4
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/data_row.py +5 -5
- pixeltable/exprs/function_call.py +59 -21
- pixeltable/exprs/row_builder.py +11 -5
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +1 -2
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/openai.py +2 -1
- pixeltable/functions/video.py +5 -5
- pixeltable/functions/whisperx.py +177 -0
- pixeltable/{ext/functions → functions}/yolox.py +0 -4
- pixeltable/globals.py +16 -3
- pixeltable/io/fiftyone.py +3 -3
- pixeltable/io/label_studio.py +2 -1
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +0 -6
- pixeltable/metadata/__init__.py +3 -1
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +6 -6
- pixeltable/share/publish.py +134 -7
- pixeltable/type_system.py +20 -4
- pixeltable/utils/media_store.py +131 -66
- pixeltable/utils/pydantic.py +60 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/METADATA +186 -121
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/RECORD +47 -46
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/licenses/LICENSE +0 -0
pixeltable/ext/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extended integrations for Pixeltable. This package contains experimental or demonstration features that
|
|
3
|
-
are not intended for production use. Long-term support cannot be guaranteed, usually because the features
|
|
4
|
-
have dependencies whose future support is unclear.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
# ruff: noqa: F401
|
|
8
|
-
|
|
9
|
-
from pixeltable.utils.code import local_public_names
|
|
10
|
-
|
|
11
|
-
from . import functions
|
|
12
|
-
|
|
13
|
-
__all__ = local_public_names(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def __dir__() -> list[str]:
|
|
17
|
-
return __all__
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Optional
|
|
2
|
-
|
|
3
|
-
from pixeltable.utils.code import local_public_names
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from whisperx.asr import FasterWhisperPipeline # type: ignore[import-untyped]
|
|
7
|
-
|
|
8
|
-
import pixeltable as pxt
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pxt.udf
|
|
12
|
-
def transcribe(
|
|
13
|
-
audio: pxt.Audio,
|
|
14
|
-
*,
|
|
15
|
-
model: str,
|
|
16
|
-
compute_type: Optional[str] = None,
|
|
17
|
-
language: Optional[str] = None,
|
|
18
|
-
chunk_size: int = 30,
|
|
19
|
-
) -> dict:
|
|
20
|
-
"""
|
|
21
|
-
Transcribe an audio file using WhisperX.
|
|
22
|
-
|
|
23
|
-
This UDF runs a transcription model _locally_ using the WhisperX library,
|
|
24
|
-
equivalent to the WhisperX `transcribe` function, as described in the
|
|
25
|
-
[WhisperX library documentation](https://github.com/m-bain/whisperX).
|
|
26
|
-
|
|
27
|
-
WhisperX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
|
|
28
|
-
|
|
29
|
-
__Requirements:__
|
|
30
|
-
|
|
31
|
-
- `pip install whisperx`
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
audio: The audio file to transcribe.
|
|
35
|
-
model: The name of the model to use for transcription.
|
|
36
|
-
|
|
37
|
-
See the [WhisperX library documentation](https://github.com/m-bain/whisperX) for details
|
|
38
|
-
on the remaining parameters.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
A dictionary containing the transcription and various other metadata.
|
|
42
|
-
|
|
43
|
-
Examples:
|
|
44
|
-
Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
|
|
45
|
-
of the table `tbl`:
|
|
46
|
-
|
|
47
|
-
>>> tbl.add_computed_column(result=transcribe(tbl.audio, model='tiny.en'))
|
|
48
|
-
"""
|
|
49
|
-
import torch
|
|
50
|
-
import whisperx # type: ignore[import-untyped]
|
|
51
|
-
|
|
52
|
-
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
53
|
-
compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
|
|
54
|
-
model = _lookup_model(model, device, compute_type)
|
|
55
|
-
audio_array = whisperx.load_audio(audio)
|
|
56
|
-
result = model.transcribe(audio_array, batch_size=16, language=language, chunk_size=chunk_size)
|
|
57
|
-
return result
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _lookup_model(model_id: str, device: str, compute_type: str) -> 'FasterWhisperPipeline':
|
|
61
|
-
import whisperx
|
|
62
|
-
|
|
63
|
-
key = (model_id, device, compute_type)
|
|
64
|
-
if key not in _model_cache:
|
|
65
|
-
model = whisperx.load_model(model_id, device, compute_type=compute_type)
|
|
66
|
-
_model_cache[key] = model
|
|
67
|
-
return _model_cache[key]
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
__all__ = local_public_names(__name__)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def __dir__() -> list[str]:
|
|
77
|
-
return __all__
|
|
File without changes
|
|
File without changes
|
|
File without changes
|