pixeltable 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/catalog/insertable_table.py +125 -28
- pixeltable/catalog/table.py +10 -1
- pixeltable/config.py +1 -0
- pixeltable/env.py +57 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/audio.py +2 -1
- pixeltable/functions/gemini.py +8 -0
- pixeltable/functions/video.py +534 -81
- pixeltable/functions/whisper.py +8 -0
- pixeltable/functions/whisperx.py +177 -0
- pixeltable/{ext/functions → functions}/yolox.py +0 -4
- pixeltable/globals.py +3 -1
- pixeltable/iterators/video.py +138 -0
- pixeltable/metadata/__init__.py +3 -1
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/type_system.py +20 -4
- pixeltable/utils/av.py +111 -0
- pixeltable/utils/code.py +2 -1
- pixeltable/utils/pydantic.py +60 -0
- {pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/METADATA +1 -1
- {pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/RECORD +26 -24
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- {pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.8.dist-info → pixeltable-0.4.10.dist-info}/licenses/LICENSE +0 -0
pixeltable/utils/av.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import av
|
|
4
|
+
import av.stream
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_metadata(path: str) -> dict:
|
|
8
|
+
with av.open(path) as container:
|
|
9
|
+
assert isinstance(container, av.container.InputContainer)
|
|
10
|
+
streams_info = [__get_stream_metadata(stream) for stream in container.streams]
|
|
11
|
+
result = {
|
|
12
|
+
'bit_exact': getattr(container, 'bit_exact', False),
|
|
13
|
+
'bit_rate': container.bit_rate,
|
|
14
|
+
'size': container.size,
|
|
15
|
+
'metadata': container.metadata,
|
|
16
|
+
'streams': streams_info,
|
|
17
|
+
}
|
|
18
|
+
return result
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __get_stream_metadata(stream: av.stream.Stream) -> dict:
|
|
22
|
+
if stream.type not in ('audio', 'video'):
|
|
23
|
+
return {'type': stream.type} # Currently unsupported
|
|
24
|
+
|
|
25
|
+
codec_context = stream.codec_context
|
|
26
|
+
codec_context_md: dict[str, Any] = {
|
|
27
|
+
'name': codec_context.name,
|
|
28
|
+
'codec_tag': codec_context.codec_tag.encode('unicode-escape').decode('utf-8'),
|
|
29
|
+
'profile': codec_context.profile,
|
|
30
|
+
}
|
|
31
|
+
metadata = {
|
|
32
|
+
'type': stream.type,
|
|
33
|
+
'duration': stream.duration,
|
|
34
|
+
'time_base': float(stream.time_base) if stream.time_base is not None else None,
|
|
35
|
+
'duration_seconds': float(stream.duration * stream.time_base)
|
|
36
|
+
if stream.duration is not None and stream.time_base is not None
|
|
37
|
+
else None,
|
|
38
|
+
'frames': stream.frames,
|
|
39
|
+
'metadata': stream.metadata,
|
|
40
|
+
'codec_context': codec_context_md,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if stream.type == 'audio':
|
|
44
|
+
# Additional metadata for audio
|
|
45
|
+
channels = getattr(stream.codec_context, 'channels', None)
|
|
46
|
+
codec_context_md['channels'] = int(channels) if channels is not None else None
|
|
47
|
+
else:
|
|
48
|
+
assert stream.type == 'video'
|
|
49
|
+
assert isinstance(stream, av.video.stream.VideoStream)
|
|
50
|
+
# Additional metadata for video
|
|
51
|
+
codec_context_md['pix_fmt'] = getattr(stream.codec_context, 'pix_fmt', None)
|
|
52
|
+
metadata.update(
|
|
53
|
+
**{
|
|
54
|
+
'width': stream.width,
|
|
55
|
+
'height': stream.height,
|
|
56
|
+
'frames': stream.frames,
|
|
57
|
+
'average_rate': float(stream.average_rate) if stream.average_rate is not None else None,
|
|
58
|
+
'base_rate': float(stream.base_rate) if stream.base_rate is not None else None,
|
|
59
|
+
'guessed_rate': float(stream.guessed_rate) if stream.guessed_rate is not None else None,
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return metadata
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_video_duration(path: str) -> float | None:
|
|
67
|
+
"""Return video duration in seconds."""
|
|
68
|
+
with av.open(path) as container:
|
|
69
|
+
video_stream = container.streams.video[0]
|
|
70
|
+
if video_stream is None:
|
|
71
|
+
return None
|
|
72
|
+
if video_stream.duration is not None:
|
|
73
|
+
return float(video_stream.duration * video_stream.time_base)
|
|
74
|
+
|
|
75
|
+
# if duration is not in the header, look for it in the last packet
|
|
76
|
+
last_pts: int | None = None
|
|
77
|
+
for packet in container.demux(video_stream):
|
|
78
|
+
if packet.pts is not None:
|
|
79
|
+
last_pts = packet.pts
|
|
80
|
+
if last_pts is not None:
|
|
81
|
+
return float(last_pts * video_stream.time_base)
|
|
82
|
+
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def has_audio_stream(path: str) -> bool:
|
|
87
|
+
"""Check if video has audio stream using PyAV."""
|
|
88
|
+
md = get_metadata(path)
|
|
89
|
+
return any(stream['type'] == 'audio' for stream in md['streams'])
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def ffmpeg_clip_cmd(input_path: str, output_path: str, start_time: float, duration: float | None = None) -> list[str]:
|
|
93
|
+
# the order of arguments is critical: -ss <start> -t <duration> -i <input>
|
|
94
|
+
cmd = ['ffmpeg', '-ss', str(start_time)]
|
|
95
|
+
if duration is not None:
|
|
96
|
+
cmd.extend(['-t', str(duration)])
|
|
97
|
+
cmd.extend(
|
|
98
|
+
[
|
|
99
|
+
'-i', # Input file
|
|
100
|
+
input_path,
|
|
101
|
+
'-y', # Overwrite output file
|
|
102
|
+
'-loglevel',
|
|
103
|
+
'error', # Only show errors
|
|
104
|
+
'-c',
|
|
105
|
+
'copy', # Stream copy (no re-encoding)
|
|
106
|
+
'-map',
|
|
107
|
+
'0', # Copy all streams from input
|
|
108
|
+
output_path,
|
|
109
|
+
]
|
|
110
|
+
)
|
|
111
|
+
return cmd
|
pixeltable/utils/code.py
CHANGED
|
@@ -21,7 +21,8 @@ def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> li
|
|
|
21
21
|
for obj in mod.__dict__.values():
|
|
22
22
|
if isinstance(obj, Function):
|
|
23
23
|
# Pixeltable function
|
|
24
|
-
|
|
24
|
+
if not obj.name.startswith('_'):
|
|
25
|
+
names.append(obj.name)
|
|
25
26
|
elif isinstance(obj, types.FunctionType):
|
|
26
27
|
# Python function
|
|
27
28
|
if obj.__module__ == mod.__name__ and not obj.__name__.startswith('_'):
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from types import UnionType
|
|
5
|
+
from typing import Any, Union
|
|
6
|
+
|
|
7
|
+
import pydantic
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def is_json_convertible(model: type[pydantic.BaseModel]) -> bool:
|
|
11
|
+
"""
|
|
12
|
+
Determine if instances of a Pydantic model can be converted to valid JSON
|
|
13
|
+
based on the type hints of its fields.
|
|
14
|
+
"""
|
|
15
|
+
type_hints = typing.get_type_hints(model)
|
|
16
|
+
return all(_type_is_json_convertible(field_type) for field_type in type_hints.values())
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _type_is_json_convertible(type_hint: Any) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Recursively check if a type hint represents a JSON-compatible type.
|
|
22
|
+
|
|
23
|
+
TODO: also allow ndarrays and PIL.Image.Image, once we support those within json structures.
|
|
24
|
+
"""
|
|
25
|
+
if type_hint is type(None):
|
|
26
|
+
return True
|
|
27
|
+
if type_hint is Any:
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
if type_hint in (str, int, float, bool, datetime):
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
if isinstance(type_hint, type) and issubclass(type_hint, Enum):
|
|
34
|
+
return all(isinstance(member.value, (str, int, float, bool, type(None))) for member in type_hint)
|
|
35
|
+
|
|
36
|
+
if isinstance(type_hint, type) and issubclass(type_hint, pydantic.BaseModel):
|
|
37
|
+
return is_json_convertible(type_hint)
|
|
38
|
+
|
|
39
|
+
origin = typing.get_origin(type_hint)
|
|
40
|
+
args = typing.get_args(type_hint)
|
|
41
|
+
|
|
42
|
+
if origin in (Union, UnionType):
|
|
43
|
+
return all(_type_is_json_convertible(arg) for arg in args)
|
|
44
|
+
|
|
45
|
+
if origin in (list, tuple):
|
|
46
|
+
return all(_type_is_json_convertible(arg) for arg in args) if len(args) > 0 else False
|
|
47
|
+
|
|
48
|
+
if origin is dict:
|
|
49
|
+
if len(args) != 2:
|
|
50
|
+
# we can't tell what this is
|
|
51
|
+
return False
|
|
52
|
+
key_type, value_type = args
|
|
53
|
+
# keys must be strings, values must be json-convertible
|
|
54
|
+
return key_type is str and _type_is_json_convertible(value_type)
|
|
55
|
+
|
|
56
|
+
# Literal types are json-convertible if their values are
|
|
57
|
+
if origin is typing.Literal:
|
|
58
|
+
return all(isinstance(val, (str, int, float, bool, type(None))) for val in args)
|
|
59
|
+
|
|
60
|
+
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.10
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Project-URL: homepage, https://pixeltable.com/
|
|
6
6
|
Project-URL: repository, https://github.com/pixeltable/pixeltable
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
pixeltable/__init__.py,sha256=
|
|
1
|
+
pixeltable/__init__.py,sha256=wJ_4oQdkBAaaVKM8XiZKKSsWPnoemZxh34o6_5vDcxk,1562
|
|
2
2
|
pixeltable/__version__.py,sha256=LnMIuAxx6nAQDMev_jnZyUdgsaiE3F8lulfXQBRl9qQ,112
|
|
3
|
-
pixeltable/config.py,sha256
|
|
3
|
+
pixeltable/config.py,sha256=-aoSVF0Aak83IC-u-XANw3if76TDq5VnnWNWoFDR5Hc,8390
|
|
4
4
|
pixeltable/dataframe.py,sha256=I6iEJGD4pivUN-cPVFq_rcniZN7C55xpr37sMJ2BIdE,62986
|
|
5
|
-
pixeltable/env.py,sha256=
|
|
5
|
+
pixeltable/env.py,sha256=vmqDgsfonYLYubsR1N4n5H7aSo4MXtlnBN1Z8xFOeFI,44443
|
|
6
6
|
pixeltable/exceptions.py,sha256=Gm8d3TL2iiv6Pj2DLd29wp_j41qNBhxXL9iTQnL4Nk4,1116
|
|
7
|
-
pixeltable/globals.py,sha256=
|
|
7
|
+
pixeltable/globals.py,sha256=8NijkEmtjY5me6J8zF4G-t1v5_z4q7btOK2yjUREUak,39118
|
|
8
8
|
pixeltable/plan.py,sha256=4yAe7ExAqaSvkFxwK7LPH_HpmoumwqoLeOo7czJ8CyQ,48001
|
|
9
9
|
pixeltable/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
pixeltable/store.py,sha256=CneWUmgN-EwaPYLcizlAxONC7WYwMr8SNpSFeNBBmOA,22885
|
|
11
|
-
pixeltable/type_system.py,sha256=
|
|
11
|
+
pixeltable/type_system.py,sha256=UfPZZy4zJ2kGvdHXI9rqxOGAjgIxCZ9QGvvidPWcq-M,56153
|
|
12
12
|
pixeltable/catalog/__init__.py,sha256=zw6hiyAIjMBxCExtsr7G51ul2XQ9fTQQKcs45rIy7xA,682
|
|
13
13
|
pixeltable/catalog/catalog.py,sha256=gaq10XFwkr6jyv8yVi5xV3_oiDkPvqVe55vxOo14W6k,93853
|
|
14
14
|
pixeltable/catalog/column.py,sha256=MXa5o3ku94T8ZFEL7wnAvqvlk65fOmmHPqIvrUVf3uo,13514
|
|
15
15
|
pixeltable/catalog/dir.py,sha256=VYTscPlKR6XhupPTXlJ8txAHxS5GSpPJ3LIleDJagVQ,2047
|
|
16
16
|
pixeltable/catalog/globals.py,sha256=uMIDsbeDzFxZbcgKDTOiT5plC1gAKgz1oxxdh1odIPw,2648
|
|
17
|
-
pixeltable/catalog/insertable_table.py,sha256=
|
|
17
|
+
pixeltable/catalog/insertable_table.py,sha256=VUuJ8z7OtMqgy_LMzkn1KzeLXdR-9poTttClscQ_uaU,13899
|
|
18
18
|
pixeltable/catalog/named_function.py,sha256=vZ-j7P4HugWh9OmUzBMwyRYvO3tQn9jWyJz_1stPavU,1210
|
|
19
19
|
pixeltable/catalog/path.py,sha256=O3FfxrvyX2crijBhp_2k4-3mG3BFxwba-tlPB74QtJQ,3780
|
|
20
20
|
pixeltable/catalog/schema_object.py,sha256=rQ6-3rzqnOHyEEHi97kai2S7BO3D9AkH7rirnfbGc14,1785
|
|
21
|
-
pixeltable/catalog/table.py,sha256=
|
|
21
|
+
pixeltable/catalog/table.py,sha256=NLo8mcM8SKM8jC4uzRT5elhrC0XTGunjQYznqrCz3w0,81315
|
|
22
22
|
pixeltable/catalog/table_version.py,sha256=jTYKzAdQuHBrknQLADBqjLhKwwqeVxmAPosMKsL051Q,64983
|
|
23
23
|
pixeltable/catalog/table_version_handle.py,sha256=FTPRqcGY-h-POcWyZbd9b8P2D5zIw5OSUvwF_dbyCGo,3608
|
|
24
24
|
pixeltable/catalog/table_version_path.py,sha256=IaFVDH06_6ZMuBv5eLNCRTlWizpvz95jgAzqp4OVx_o,9713
|
|
@@ -69,10 +69,6 @@ pixeltable/exprs/sql_element_cache.py,sha256=c7Q6vFK4xnf9vmcRYnXiAcwPBBwmw0dolft
|
|
|
69
69
|
pixeltable/exprs/string_op.py,sha256=PGWRH1yUaqj7xszdumIBOTHzVkXE0k831jXxIeFPDog,4131
|
|
70
70
|
pixeltable/exprs/type_cast.py,sha256=_nDzTxg5kXVGLewI0FrH2zmwJzgptdxYd5Jvuyig0UI,2322
|
|
71
71
|
pixeltable/exprs/variable.py,sha256=UwWwaNECbtwyC8v0g8iqCa3a6mO8z9lK7ta5NrlCwvs,1493
|
|
72
|
-
pixeltable/ext/__init__.py,sha256=UgDXWzGWiQIrwOuEvWTePLBcR2kecllPAE7gp-42Awg,457
|
|
73
|
-
pixeltable/ext/functions/__init__.py,sha256=Ox3kUHn5IslVEmEKsjrHfkHDrUkmLl9RCO2YkrPJkgc,193
|
|
74
|
-
pixeltable/ext/functions/whisperx.py,sha256=qda6kFQSvZTY2asfrYPwHb1cvSa03LbhJ-Wf9b7qPhw,2355
|
|
75
|
-
pixeltable/ext/functions/yolox.py,sha256=dX22nMb-0n2hZi7WhZ1Y4LIpFk5loyeXXuSUcc2Fgrg,3724
|
|
76
72
|
pixeltable/func/__init__.py,sha256=SQPtGr_9dZNyXzxaZQcP3oVLKnbbs4UqV6sg8XUQHxQ,535
|
|
77
73
|
pixeltable/func/aggregate_function.py,sha256=5_MgqHAlMaacX2sPIHv_auTvYXtqR5MIZy_WqYQSdho,13264
|
|
78
74
|
pixeltable/func/callable_function.py,sha256=g_pA-g631YcFGLix9PpHYfgjOeS2qF0Csm1VxX8fah0,9278
|
|
@@ -85,14 +81,14 @@ pixeltable/func/query_template_function.py,sha256=aX6GgANSdDTQwrObEV-B_la_oVRVky
|
|
|
85
81
|
pixeltable/func/signature.py,sha256=LdHbdim14Zu7Xt1pMhOCzl6Xn2fq5CQQpwSXmu28umw,14988
|
|
86
82
|
pixeltable/func/tools.py,sha256=2_M_u0Jiy5-uToZziB4O54aTuJeaytPmh71q3I2ydNw,6062
|
|
87
83
|
pixeltable/func/udf.py,sha256=6tKpMt37t3BmXwRyA5fFAd6OM4D5EPEd2KuAr7DQhr0,13231
|
|
88
|
-
pixeltable/functions/__init__.py,sha256=
|
|
84
|
+
pixeltable/functions/__init__.py,sha256=ZeRB7ksbzjdrvePXtd_mNxyP2RhjvN0ayl5nv7TdWcQ,613
|
|
89
85
|
pixeltable/functions/anthropic.py,sha256=2Ja-pryC_3Yd1sXW-pibRuvKjgyfYqOhhl6nBWNOBt0,10504
|
|
90
|
-
pixeltable/functions/audio.py,sha256=
|
|
86
|
+
pixeltable/functions/audio.py,sha256=S9xSg45Fx5kmB4NxOTSG99_5Kxc8kFfxuawV7qjMeS8,1660
|
|
91
87
|
pixeltable/functions/bedrock.py,sha256=lTCFHjYunF3minBGWcjXR90yJ8resFjXr4niyKhfxms,4217
|
|
92
88
|
pixeltable/functions/date.py,sha256=qs1svJ9FVod3OTa5hQNKIuashb6tVhW_2EAEXYGQX74,5308
|
|
93
89
|
pixeltable/functions/deepseek.py,sha256=iw59TKKcw3VqbHMHB2ugtcTPeTVKuHp_3pfkjF6DYmE,3550
|
|
94
90
|
pixeltable/functions/fireworks.py,sha256=q7eWlYfiWbA0d9r3CB_NAe1fw3q-Z7qsw2gyGJNgWLQ,4786
|
|
95
|
-
pixeltable/functions/gemini.py,sha256=
|
|
91
|
+
pixeltable/functions/gemini.py,sha256=igtpGBiVekkaWtVE6X04pQ7C9md8nY42W7xU_XuMayE,8924
|
|
96
92
|
pixeltable/functions/globals.py,sha256=OyPJUJ4S6VWyzxstxIzk3xzYBGIEMwgk1RmSTWTZzdI,5106
|
|
97
93
|
pixeltable/functions/groq.py,sha256=FpR_LJpfZfzyhEvoBMMbQpQ-VQSRzBsS9U21qaINwww,3593
|
|
98
94
|
pixeltable/functions/huggingface.py,sha256=Y-io3EungSs5ibr43vLEXs4dz_Ej20F1nglD0fyLrXA,20371
|
|
@@ -108,9 +104,11 @@ pixeltable/functions/string.py,sha256=LdBNOna5PUSPmM5VlJ_qhmwzyFhumW0k6Dvx2rXSZt
|
|
|
108
104
|
pixeltable/functions/timestamp.py,sha256=3GVCVIWdry96Qk5XXuvbJ58Tp30iY5snBibzl2CHjQc,9143
|
|
109
105
|
pixeltable/functions/together.py,sha256=A8J19BXywyWQ6a2_n05-8uIG5jquOBGqPmW3mb-NrIc,8842
|
|
110
106
|
pixeltable/functions/util.py,sha256=uQNkyBSkTVMe1wbUI2Q0nz-mM3qPVTF86yK8c9OFIcE,954
|
|
111
|
-
pixeltable/functions/video.py,sha256=
|
|
107
|
+
pixeltable/functions/video.py,sha256=0Hgfi3PHA2BPVpgWEQ3RffFtvc2YkjX3UX3dXSzrEJk,27009
|
|
112
108
|
pixeltable/functions/vision.py,sha256=17h9bOm3NJyQzFMBwXDHMqnkcuCspyQJgHdBOXV1Ip8,15380
|
|
113
|
-
pixeltable/functions/whisper.py,sha256=
|
|
109
|
+
pixeltable/functions/whisper.py,sha256=u2QcDU7JdtgLIImCkFPkzjWEjLTJIrlSkAWqeITyIJw,3103
|
|
110
|
+
pixeltable/functions/whisperx.py,sha256=BT9gwXEf5V1lgDxynkrrH6gsuCLqjCzfMJKj5DaOtSM,7661
|
|
111
|
+
pixeltable/functions/yolox.py,sha256=ZdYr6WIqTCHOJoZSoXe4CbME54dYeeeOhkOi1I7VtcE,3518
|
|
114
112
|
pixeltable/index/__init__.py,sha256=97aFuxiP_oz1ldn5iq8IWApkOV8XG6ZIBW5-9rkS0vM,122
|
|
115
113
|
pixeltable/index/base.py,sha256=200s7v3Zy810bRlbSAYzxxaEjVssl6r8esTHiSvWRwQ,1704
|
|
116
114
|
pixeltable/index/btree.py,sha256=8B06D67ay0DFUtEBC5q4bLjxMq7ILpKyyoLAiSaamzA,2503
|
|
@@ -132,8 +130,8 @@ pixeltable/iterators/base.py,sha256=ZC0ZvXL4iw6AmT8cu-Mdx-T2UG9nmJYV1C6LK4efAfw,
|
|
|
132
130
|
pixeltable/iterators/document.py,sha256=7NIN5W5jHVm4v5_FzGsH0XJigtPCm8DfXJUc3_hEtHQ,20073
|
|
133
131
|
pixeltable/iterators/image.py,sha256=RrFdf5cnFIQzWKJk4uYi1m1p2qAiz909THYhRQ27DbY,3603
|
|
134
132
|
pixeltable/iterators/string.py,sha256=URj5edWp-CsorjN_8nnfWGvtIFs_Zh4VPm6htlJbFkU,1257
|
|
135
|
-
pixeltable/iterators/video.py,sha256=
|
|
136
|
-
pixeltable/metadata/__init__.py,sha256=
|
|
133
|
+
pixeltable/iterators/video.py,sha256=aKT2YxZGUsAifkWK434RDnqZj_gGtcQ1waN9AV98fMA,16105
|
|
134
|
+
pixeltable/metadata/__init__.py,sha256=oTO9kN6h4xJ2lsk4a2bq6ejAD-4wToy7b5_i3Pq1Qnc,3289
|
|
137
135
|
pixeltable/metadata/notes.py,sha256=3fdZDFpL1-b194Ejv0Y0YP-vbnV-XvVP9wOmZM9XARA,1545
|
|
138
136
|
pixeltable/metadata/schema.py,sha256=fs9W2SLh32Ehxc9AChVH7YCtlSSnQkgGMbEyOh0B4W0,13416
|
|
139
137
|
pixeltable/metadata/utils.py,sha256=NJQXWhhK1hdOZ4H3hh9N0mqbl-I9JqMUqrfA6OWLflE,2682
|
|
@@ -167,13 +165,16 @@ pixeltable/metadata/converters/convert_37.py,sha256=IVZGtKFaaYMGBs39V_H_okWvpxxa
|
|
|
167
165
|
pixeltable/metadata/converters/convert_38.py,sha256=YyNyocwzzdJRcI0YSCo_70Q4hSk63235iE4IxhwSEzs,1169
|
|
168
166
|
pixeltable/metadata/converters/convert_39.py,sha256=YaEfgStxtYGRbuRLFw8wTAZVJRzIU6zL6nPU2zuDcEU,4658
|
|
169
167
|
pixeltable/metadata/converters/util.py,sha256=QUBOj2F_6rCAdIo0lgD1IVgAM15Vmq7ikQspB4s0eQ8,7732
|
|
168
|
+
pixeltable/mypy/__init__.py,sha256=cD_oHXClR_bDM8qVNIfaOAgRhQjPfcWvLcinz79ua6o,54
|
|
169
|
+
pixeltable/mypy/mypy_plugin.py,sha256=KCjzKOeKW5CBqJOq9Ch7ZJ25ICPc4nlTB49DxtC6oDM,5460
|
|
170
170
|
pixeltable/share/__init__.py,sha256=AtR4nS6YkfkFRkXA-zZXFTK5pSQjHry8MnxdVLUk5SA,68
|
|
171
171
|
pixeltable/share/packager.py,sha256=5rSKnQCs3YP5h48d79bXEK4L8tLUSeTSbXaB8X9SmBI,31265
|
|
172
172
|
pixeltable/share/publish.py,sha256=KS_R59AuVkHWkXHwELP74xgSHs8Z5z8SBPMcjzttt44,11469
|
|
173
173
|
pixeltable/utils/__init__.py,sha256=45qEM20L2VuIe-Cc3BTKWFqQb-S7A8qDtmmgl77zYK0,1728
|
|
174
174
|
pixeltable/utils/arrow.py,sha256=Rooa02GL5k--D2utlKATtYKrrlsHbbi6JmkarXMux1M,6384
|
|
175
|
+
pixeltable/utils/av.py,sha256=omJufz62dzaTTwlR7quKfcT7apf8KkBLJ9cQ9240dt0,4016
|
|
175
176
|
pixeltable/utils/coco.py,sha256=Y1DWVYguZD4VhKyf7JruYfHWvhkJLq39fzbiSm5cdyY,7304
|
|
176
|
-
pixeltable/utils/code.py,sha256=
|
|
177
|
+
pixeltable/utils/code.py,sha256=3CZMVJm69JIG5sxmd56mjB4Fo4L-s0_Y8YvQeJIj0F0,1280
|
|
177
178
|
pixeltable/utils/console_output.py,sha256=x23iDnNwUbsr7Ec20BQ7BLATTsrQZflxc9NucAt_sVU,1150
|
|
178
179
|
pixeltable/utils/coroutine.py,sha256=d87kLlkVIZq2u0kTE7kJ5Tc_yjEkdGi5sXAuxjLLxXY,896
|
|
179
180
|
pixeltable/utils/dbms.py,sha256=cuQqlzLF7WON_mkJZ4QWlfX6lCxA97V32lhtMcOlDLg,2018
|
|
@@ -185,12 +186,13 @@ pixeltable/utils/formatter.py,sha256=tbMxE9rBw6wdKUnJhNZ8h9uAF8dZKcihQ2KesqAag9A
|
|
|
185
186
|
pixeltable/utils/http_server.py,sha256=6khOAtpVj1lDIm9Dx8VIECLm87cFEp4IFbAg8T92A2o,2441
|
|
186
187
|
pixeltable/utils/iceberg.py,sha256=COeNqqy5RRMkDGLS8CTnaUeAccG10x2fwP3e1veuqIA,522
|
|
187
188
|
pixeltable/utils/media_store.py,sha256=HVOuK5JTTvgSH_st0lsapv39Lnu29QGpkKUtZQybBTA,10560
|
|
189
|
+
pixeltable/utils/pydantic.py,sha256=-ztUsuRXA7B6bywb5Yy1h5pNQ2DnsT1d0oHMxqtK3WY,2011
|
|
188
190
|
pixeltable/utils/pytorch.py,sha256=564VHRdDHwD9h0v5lBHEDTJ8c6zx8wuzWYx8ZYjBxlI,3621
|
|
189
191
|
pixeltable/utils/s3.py,sha256=pxip2MlCqd2Qon2dzJXzfxvwtZyc-BAsjAnLL4J_OXY,587
|
|
190
192
|
pixeltable/utils/sql.py,sha256=Sa4Lh-VGe8GToU5W7DRiWf2lMl9B6saPqemiT0ZdHEc,806
|
|
191
193
|
pixeltable/utils/transactional_directory.py,sha256=OFKmu90oP7KwBAljwjnzP_w8euGdAXob3y4Nx9SCNHA,1357
|
|
192
|
-
pixeltable-0.4.
|
|
193
|
-
pixeltable-0.4.
|
|
194
|
-
pixeltable-0.4.
|
|
195
|
-
pixeltable-0.4.
|
|
196
|
-
pixeltable-0.4.
|
|
194
|
+
pixeltable-0.4.10.dist-info/METADATA,sha256=I3iYbF6fjvaQwBtbUNqF_KUbvzirqnv21npnqAJmxjc,24248
|
|
195
|
+
pixeltable-0.4.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
196
|
+
pixeltable-0.4.10.dist-info/entry_points.txt,sha256=rrKugZmxDtGnXCnEQ5UJMaaSYY7-g1cLjUZ4W1moIhM,98
|
|
197
|
+
pixeltable-0.4.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
198
|
+
pixeltable-0.4.10.dist-info/RECORD,,
|
pixeltable/ext/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Extended integrations for Pixeltable. This package contains experimental or demonstration features that
|
|
3
|
-
are not intended for production use. Long-term support cannot be guaranteed, usually because the features
|
|
4
|
-
have dependencies whose future support is unclear.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
# ruff: noqa: F401
|
|
8
|
-
|
|
9
|
-
from pixeltable.utils.code import local_public_names
|
|
10
|
-
|
|
11
|
-
from . import functions
|
|
12
|
-
|
|
13
|
-
__all__ = local_public_names(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def __dir__() -> list[str]:
|
|
17
|
-
return __all__
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Optional
|
|
2
|
-
|
|
3
|
-
from pixeltable.utils.code import local_public_names
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from whisperx.asr import FasterWhisperPipeline # type: ignore[import-untyped]
|
|
7
|
-
|
|
8
|
-
import pixeltable as pxt
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pxt.udf
|
|
12
|
-
def transcribe(
|
|
13
|
-
audio: pxt.Audio,
|
|
14
|
-
*,
|
|
15
|
-
model: str,
|
|
16
|
-
compute_type: Optional[str] = None,
|
|
17
|
-
language: Optional[str] = None,
|
|
18
|
-
chunk_size: int = 30,
|
|
19
|
-
) -> dict:
|
|
20
|
-
"""
|
|
21
|
-
Transcribe an audio file using WhisperX.
|
|
22
|
-
|
|
23
|
-
This UDF runs a transcription model _locally_ using the WhisperX library,
|
|
24
|
-
equivalent to the WhisperX `transcribe` function, as described in the
|
|
25
|
-
[WhisperX library documentation](https://github.com/m-bain/whisperX).
|
|
26
|
-
|
|
27
|
-
WhisperX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
|
|
28
|
-
|
|
29
|
-
__Requirements:__
|
|
30
|
-
|
|
31
|
-
- `pip install whisperx`
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
audio: The audio file to transcribe.
|
|
35
|
-
model: The name of the model to use for transcription.
|
|
36
|
-
|
|
37
|
-
See the [WhisperX library documentation](https://github.com/m-bain/whisperX) for details
|
|
38
|
-
on the remaining parameters.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
A dictionary containing the transcription and various other metadata.
|
|
42
|
-
|
|
43
|
-
Examples:
|
|
44
|
-
Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
|
|
45
|
-
of the table `tbl`:
|
|
46
|
-
|
|
47
|
-
>>> tbl.add_computed_column(result=transcribe(tbl.audio, model='tiny.en'))
|
|
48
|
-
"""
|
|
49
|
-
import torch
|
|
50
|
-
import whisperx # type: ignore[import-untyped]
|
|
51
|
-
|
|
52
|
-
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
53
|
-
compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
|
|
54
|
-
model = _lookup_model(model, device, compute_type)
|
|
55
|
-
audio_array = whisperx.load_audio(audio)
|
|
56
|
-
result = model.transcribe(audio_array, batch_size=16, language=language, chunk_size=chunk_size)
|
|
57
|
-
return result
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _lookup_model(model_id: str, device: str, compute_type: str) -> 'FasterWhisperPipeline':
|
|
61
|
-
import whisperx
|
|
62
|
-
|
|
63
|
-
key = (model_id, device, compute_type)
|
|
64
|
-
if key not in _model_cache:
|
|
65
|
-
model = whisperx.load_model(model_id, device, compute_type=compute_type)
|
|
66
|
-
_model_cache[key] = model
|
|
67
|
-
return _model_cache[key]
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
__all__ = local_public_names(__name__)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def __dir__() -> list[str]:
|
|
77
|
-
return __all__
|
|
File without changes
|
|
File without changes
|
|
File without changes
|