livekit-plugins-speechmatics 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,7 +30,7 @@ from livekit.agents import Plugin
30
30
 
31
31
 
32
32
  class SpeechmaticsPlugin(Plugin):
33
- def __init__(self):
33
+ def __init__(self) -> None:
34
34
  super().__init__(__name__, __version__, __package__)
35
35
 
36
36
 
@@ -69,6 +69,7 @@ class STT(stt.STT):
69
69
  operating_point="enhanced",
70
70
  enable_partials=True,
71
71
  max_delay=0.7,
72
+ speaker_diarization_config={"max_speakers": 2},
72
73
  )
73
74
  if not is_given(connection_settings):
74
75
  connection_settings = ConnectionSettings( # noqa: B008
@@ -105,13 +106,13 @@ class STT(stt.STT):
105
106
  language: NotGivenOr[str] = NOT_GIVEN,
106
107
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
107
108
  ) -> SpeechStream:
108
- config = dataclasses.replace(self._audio_settings)
109
+ transcription_config = dataclasses.replace(self._transcription_config)
109
110
  if is_given(language):
110
- config.language = language
111
+ transcription_config.language = language
111
112
  stream = SpeechStream(
112
113
  stt=self,
113
- transcription_config=self._transcription_config,
114
- audio_settings=config,
114
+ transcription_config=transcription_config,
115
+ audio_settings=self._audio_settings,
115
116
  connection_settings=self._connection_settings,
116
117
  conn_options=conn_options,
117
118
  http_session=self.session,
@@ -145,10 +146,10 @@ class SpeechStream(stt.SpeechStream):
145
146
  self._recognition_started = asyncio.Event()
146
147
  self._seq_no = 0
147
148
 
148
- async def _run(self):
149
+ async def _run(self) -> None:
149
150
  closing_ws = False
150
151
 
151
- async def send_task(ws: aiohttp.ClientWebSocketResponse):
152
+ async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
152
153
  nonlocal closing_ws
153
154
 
154
155
  start_recognition_msg = {
@@ -186,7 +187,7 @@ class SpeechStream(stt.SpeechStream):
186
187
  )
187
188
  )
188
189
 
189
- async def recv_task(ws: aiohttp.ClientWebSocketResponse):
190
+ async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
190
191
  nonlocal closing_ws
191
192
  while True:
192
193
  msg = await ws.receive()
@@ -221,9 +222,9 @@ class SpeechStream(stt.SpeechStream):
221
222
 
222
223
  try:
223
224
  done, _ = await asyncio.wait(
224
- [tasks_group, wait_reconnect_task],
225
+ (tasks_group, wait_reconnect_task),
225
226
  return_when=asyncio.FIRST_COMPLETED,
226
- ) # type: ignore
227
+ )
227
228
  for task in done:
228
229
  if task != wait_reconnect_task:
229
230
  task.result()
@@ -316,6 +317,7 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
316
317
  alt.get("confidence", 1.0),
317
318
  alt.get("language", "en"),
318
319
  )
320
+ speaker = alt.get("speaker")
319
321
 
320
322
  if not content:
321
323
  continue
@@ -326,8 +328,9 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
326
328
  elif speech_data and start_time == speech_data[-1].end_time:
327
329
  speech_data[-1].text += " " + content
328
330
  else:
329
- speech_data.append(
330
- stt.SpeechData(language, content, start_time, end_time, confidence)
331
- )
331
+ sd = stt.SpeechData(language, content, start_time, end_time, confidence)
332
+ if speaker is not None:
333
+ sd.speaker_id = speaker
334
+ speech_data.append(sd)
332
335
 
333
336
  return speech_data
@@ -49,6 +49,9 @@ class TranscriptionConfig:
49
49
  transcript_filtering_config: Optional[dict] = None
50
50
  """Removes disfluencies with the remove_disfluencies setting."""
51
51
 
52
+ speaker_diarization_config: Optional[dict] = None
53
+ """Options for speaker diarization such as ``max_speakers``."""
54
+
52
55
  def asdict(self) -> dict[Any, Any]:
53
56
  """Returns model as a dict while excluding None values recursively."""
54
57
  return asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None})
@@ -65,7 +68,7 @@ class AudioSettings:
65
68
  sample_rate: int = 16000
66
69
  """Sampling rate in hertz."""
67
70
 
68
- def asdict(self):
71
+ def asdict(self) -> dict[str, Any]:
69
72
  return {
70
73
  "type": "raw",
71
74
  "encoding": self.encoding,
@@ -16,7 +16,7 @@ async def get_access_token(api_key: str) -> str:
16
16
  if resp.status == 201:
17
17
  try:
18
18
  data = await resp.json()
19
- return data["key_value"]
19
+ return data["key_value"] # type: ignore
20
20
  except (ValueError, KeyError) as e:
21
21
  raise Exception(
22
22
  f"Failed to parse Speechmatics access token response: {e}"
@@ -29,12 +29,12 @@ async def get_access_token(api_key: str) -> str:
29
29
  )
30
30
 
31
31
 
32
- def get_sdk_version():
32
+ def get_sdk_version() -> str:
33
33
  version = importlib.metadata.version("livekit-plugins-speechmatics")
34
34
  return f"livekit-plugins-{version}"
35
35
 
36
36
 
37
- def sanitize_url(url, language):
37
+ def sanitize_url(url: str, language: str) -> str:
38
38
  from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
39
39
 
40
40
  parsed_url = urlparse(url)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.23"
15
+ __version__ = "1.1.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-speechmatics
3
- Version: 1.0.23
3
+ Version: 1.1.0
4
4
  Summary: Agent Framework plugin for Speechmatics
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
18
18
  Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Requires-Dist: livekit-agents>=1.0.23
21
+ Requires-Dist: livekit-agents>=1.1.0
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # Speechmatics STT plugin for LiveKit Agents
@@ -0,0 +1,10 @@
1
+ livekit/plugins/speechmatics/__init__.py,sha256=LpY2NZfiSfEeqmjB37anBHdPMbNkSiNsnJ9GZ6Jf6Ac,1236
2
+ livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
3
+ livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ livekit/plugins/speechmatics/stt.py,sha256=10SRGs9j_8m0Enh3uw7tKvszNNErK90h4bD0mEHTO58,12000
5
+ livekit/plugins/speechmatics/types.py,sha256=x9Jl5yABOU4hkN0adKEIhv0WLR32QWnVRo53qvyIPkY,4970
6
+ livekit/plugins/speechmatics/utils.py,sha256=qVg7MenwUvp_f2AngntQlmZ4xb9b8c8NB5ftqGk1Wr0,1907
7
+ livekit/plugins/speechmatics/version.py,sha256=lFVJxRoJYH4JZgg7iJQF0_dlrbfVbRYYWQzbHnv_p-w,600
8
+ livekit_plugins_speechmatics-1.1.0.dist-info/METADATA,sha256=JaJtVnuE9UB6eFJj2wtdbiz64Qrwz8Q2lZk1xsPisEg,2049
9
+ livekit_plugins_speechmatics-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ livekit_plugins_speechmatics-1.1.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- livekit/plugins/speechmatics/__init__.py,sha256=mHlhJcJJc0aIhQxHx6fUJC9epfGjw1E8b-FdR0eUzMQ,1228
2
- livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
3
- livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- livekit/plugins/speechmatics/stt.py,sha256=NrK3cljwbx5dEgslepG3ROszvok-hZadanNHRNo7oFk,11775
5
- livekit/plugins/speechmatics/types.py,sha256=LG9ZL_r9f0LMmKPaWqCNzOU6wQJmXp8w7S23xuQaHO8,4829
6
- livekit/plugins/speechmatics/utils.py,sha256=9pbI8yrB8e06WY4NOH_RaoTSmY6JJc-P12-Sm8pW51U,1867
7
- livekit/plugins/speechmatics/version.py,sha256=O5JviWLJVBpC-DAakcGUwagiTFI5ZD7HTTTp2woaBmo,601
8
- livekit_plugins_speechmatics-1.0.23.dist-info/METADATA,sha256=MUNHq8MNBRHJkQCo8yBl42rfQrMxl5oocVxszMNPq14,2051
9
- livekit_plugins_speechmatics-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- livekit_plugins_speechmatics-1.0.23.dist-info/RECORD,,