livekit-plugins-speechmatics 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/speechmatics/__init__.py +1 -1
- livekit/plugins/speechmatics/stt.py +15 -12
- livekit/plugins/speechmatics/types.py +4 -1
- livekit/plugins/speechmatics/utils.py +3 -3
- livekit/plugins/speechmatics/version.py +1 -1
- {livekit_plugins_speechmatics-1.0.23.dist-info → livekit_plugins_speechmatics-1.1.0.dist-info}/METADATA +2 -2
- livekit_plugins_speechmatics-1.1.0.dist-info/RECORD +10 -0
- livekit_plugins_speechmatics-1.0.23.dist-info/RECORD +0 -10
- {livekit_plugins_speechmatics-1.0.23.dist-info → livekit_plugins_speechmatics-1.1.0.dist-info}/WHEEL +0 -0
@@ -69,6 +69,7 @@ class STT(stt.STT):
|
|
69
69
|
operating_point="enhanced",
|
70
70
|
enable_partials=True,
|
71
71
|
max_delay=0.7,
|
72
|
+
speaker_diarization_config={"max_speakers": 2},
|
72
73
|
)
|
73
74
|
if not is_given(connection_settings):
|
74
75
|
connection_settings = ConnectionSettings( # noqa: B008
|
@@ -105,13 +106,13 @@ class STT(stt.STT):
|
|
105
106
|
language: NotGivenOr[str] = NOT_GIVEN,
|
106
107
|
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
107
108
|
) -> SpeechStream:
|
108
|
-
|
109
|
+
transcription_config = dataclasses.replace(self._transcription_config)
|
109
110
|
if is_given(language):
|
110
|
-
|
111
|
+
transcription_config.language = language
|
111
112
|
stream = SpeechStream(
|
112
113
|
stt=self,
|
113
|
-
transcription_config=
|
114
|
-
audio_settings=
|
114
|
+
transcription_config=transcription_config,
|
115
|
+
audio_settings=self._audio_settings,
|
115
116
|
connection_settings=self._connection_settings,
|
116
117
|
conn_options=conn_options,
|
117
118
|
http_session=self.session,
|
@@ -145,10 +146,10 @@ class SpeechStream(stt.SpeechStream):
|
|
145
146
|
self._recognition_started = asyncio.Event()
|
146
147
|
self._seq_no = 0
|
147
148
|
|
148
|
-
async def _run(self):
|
149
|
+
async def _run(self) -> None:
|
149
150
|
closing_ws = False
|
150
151
|
|
151
|
-
async def send_task(ws: aiohttp.ClientWebSocketResponse):
|
152
|
+
async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
|
152
153
|
nonlocal closing_ws
|
153
154
|
|
154
155
|
start_recognition_msg = {
|
@@ -186,7 +187,7 @@ class SpeechStream(stt.SpeechStream):
|
|
186
187
|
)
|
187
188
|
)
|
188
189
|
|
189
|
-
async def recv_task(ws: aiohttp.ClientWebSocketResponse):
|
190
|
+
async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
|
190
191
|
nonlocal closing_ws
|
191
192
|
while True:
|
192
193
|
msg = await ws.receive()
|
@@ -221,9 +222,9 @@ class SpeechStream(stt.SpeechStream):
|
|
221
222
|
|
222
223
|
try:
|
223
224
|
done, _ = await asyncio.wait(
|
224
|
-
|
225
|
+
(tasks_group, wait_reconnect_task),
|
225
226
|
return_when=asyncio.FIRST_COMPLETED,
|
226
|
-
)
|
227
|
+
)
|
227
228
|
for task in done:
|
228
229
|
if task != wait_reconnect_task:
|
229
230
|
task.result()
|
@@ -316,6 +317,7 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
|
|
316
317
|
alt.get("confidence", 1.0),
|
317
318
|
alt.get("language", "en"),
|
318
319
|
)
|
320
|
+
speaker = alt.get("speaker")
|
319
321
|
|
320
322
|
if not content:
|
321
323
|
continue
|
@@ -326,8 +328,9 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
|
|
326
328
|
elif speech_data and start_time == speech_data[-1].end_time:
|
327
329
|
speech_data[-1].text += " " + content
|
328
330
|
else:
|
329
|
-
|
330
|
-
|
331
|
-
|
331
|
+
sd = stt.SpeechData(language, content, start_time, end_time, confidence)
|
332
|
+
if speaker is not None:
|
333
|
+
sd.speaker_id = speaker
|
334
|
+
speech_data.append(sd)
|
332
335
|
|
333
336
|
return speech_data
|
@@ -49,6 +49,9 @@ class TranscriptionConfig:
|
|
49
49
|
transcript_filtering_config: Optional[dict] = None
|
50
50
|
"""Removes disfluencies with the remove_disfluencies setting."""
|
51
51
|
|
52
|
+
speaker_diarization_config: Optional[dict] = None
|
53
|
+
"""Options for speaker diarization such as ``max_speakers``."""
|
54
|
+
|
52
55
|
def asdict(self) -> dict[Any, Any]:
|
53
56
|
"""Returns model as a dict while excluding None values recursively."""
|
54
57
|
return asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None})
|
@@ -65,7 +68,7 @@ class AudioSettings:
|
|
65
68
|
sample_rate: int = 16000
|
66
69
|
"""Sampling rate in hertz."""
|
67
70
|
|
68
|
-
def asdict(self):
|
71
|
+
def asdict(self) -> dict[str, Any]:
|
69
72
|
return {
|
70
73
|
"type": "raw",
|
71
74
|
"encoding": self.encoding,
|
@@ -16,7 +16,7 @@ async def get_access_token(api_key: str) -> str:
|
|
16
16
|
if resp.status == 201:
|
17
17
|
try:
|
18
18
|
data = await resp.json()
|
19
|
-
return data["key_value"]
|
19
|
+
return data["key_value"] # type: ignore
|
20
20
|
except (ValueError, KeyError) as e:
|
21
21
|
raise Exception(
|
22
22
|
f"Failed to parse Speechmatics access token response: {e}"
|
@@ -29,12 +29,12 @@ async def get_access_token(api_key: str) -> str:
|
|
29
29
|
)
|
30
30
|
|
31
31
|
|
32
|
-
def get_sdk_version():
|
32
|
+
def get_sdk_version() -> str:
|
33
33
|
version = importlib.metadata.version("livekit-plugins-speechmatics")
|
34
34
|
return f"livekit-plugins-{version}"
|
35
35
|
|
36
36
|
|
37
|
-
def sanitize_url(url, language):
|
37
|
+
def sanitize_url(url: str, language: str) -> str:
|
38
38
|
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
39
39
|
|
40
40
|
parsed_url = urlparse(url)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-speechmatics
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Agent Framework plugin for Speechmatics
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
18
|
Classifier: Topic :: Multimedia :: Video
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
-
Requires-Dist: livekit-agents>=1.0
|
21
|
+
Requires-Dist: livekit-agents>=1.1.0
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
24
24
|
# Speechmatics STT plugin for LiveKit Agents
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/speechmatics/__init__.py,sha256=LpY2NZfiSfEeqmjB37anBHdPMbNkSiNsnJ9GZ6Jf6Ac,1236
|
2
|
+
livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
|
3
|
+
livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
livekit/plugins/speechmatics/stt.py,sha256=10SRGs9j_8m0Enh3uw7tKvszNNErK90h4bD0mEHTO58,12000
|
5
|
+
livekit/plugins/speechmatics/types.py,sha256=x9Jl5yABOU4hkN0adKEIhv0WLR32QWnVRo53qvyIPkY,4970
|
6
|
+
livekit/plugins/speechmatics/utils.py,sha256=qVg7MenwUvp_f2AngntQlmZ4xb9b8c8NB5ftqGk1Wr0,1907
|
7
|
+
livekit/plugins/speechmatics/version.py,sha256=lFVJxRoJYH4JZgg7iJQF0_dlrbfVbRYYWQzbHnv_p-w,600
|
8
|
+
livekit_plugins_speechmatics-1.1.0.dist-info/METADATA,sha256=JaJtVnuE9UB6eFJj2wtdbiz64Qrwz8Q2lZk1xsPisEg,2049
|
9
|
+
livekit_plugins_speechmatics-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
+
livekit_plugins_speechmatics-1.1.0.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/speechmatics/__init__.py,sha256=mHlhJcJJc0aIhQxHx6fUJC9epfGjw1E8b-FdR0eUzMQ,1228
|
2
|
-
livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
|
3
|
-
livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
livekit/plugins/speechmatics/stt.py,sha256=NrK3cljwbx5dEgslepG3ROszvok-hZadanNHRNo7oFk,11775
|
5
|
-
livekit/plugins/speechmatics/types.py,sha256=LG9ZL_r9f0LMmKPaWqCNzOU6wQJmXp8w7S23xuQaHO8,4829
|
6
|
-
livekit/plugins/speechmatics/utils.py,sha256=9pbI8yrB8e06WY4NOH_RaoTSmY6JJc-P12-Sm8pW51U,1867
|
7
|
-
livekit/plugins/speechmatics/version.py,sha256=O5JviWLJVBpC-DAakcGUwagiTFI5ZD7HTTTp2woaBmo,601
|
8
|
-
livekit_plugins_speechmatics-1.0.23.dist-info/METADATA,sha256=MUNHq8MNBRHJkQCo8yBl42rfQrMxl5oocVxszMNPq14,2051
|
9
|
-
livekit_plugins_speechmatics-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
-
livekit_plugins_speechmatics-1.0.23.dist-info/RECORD,,
|
{livekit_plugins_speechmatics-1.0.23.dist-info → livekit_plugins_speechmatics-1.1.0.dist-info}/WHEEL
RENAMED
File without changes
|