PyPI - livekit-plugins-speechmatics - Versions diffs - 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

livekit-plugins-speechmatics 1.0.23py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

livekit/plugins/speechmatics/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ from livekit.agents import Plugin
 class SpeechmaticsPlugin(Plugin):
-    def __init__(self):
+    def __init__(self) -> None:
         super().__init__(__name__, __version__, __package__)

livekit/plugins/speechmatics/stt.py CHANGED Viewed

@@ -69,6 +69,7 @@ class STT(stt.STT):
                 operating_point="enhanced",
                 enable_partials=True,
                 max_delay=0.7,
+                speaker_diarization_config={"max_speakers": 2},
             )
         if not is_given(connection_settings):
             connection_settings = ConnectionSettings(  # noqa: B008
@@ -105,13 +106,13 @@ class STT(stt.STT):
         language: NotGivenOr[str] = NOT_GIVEN,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
     ) -> SpeechStream:
-        config = dataclasses.replace(self._audio_settings)
+        transcription_config = dataclasses.replace(self._transcription_config)
         if is_given(language):
-            config.language = language
+            transcription_config.language = language
         stream = SpeechStream(
             stt=self,
-            transcription_config=self._transcription_config,
-            audio_settings=config,
+            transcription_config=transcription_config,
+            audio_settings=self._audio_settings,
             connection_settings=self._connection_settings,
             conn_options=conn_options,
             http_session=self.session,
@@ -145,10 +146,10 @@ class SpeechStream(stt.SpeechStream):
         self._recognition_started = asyncio.Event()
         self._seq_no = 0
-    async def _run(self):
+    async def _run(self) -> None:
         closing_ws = False
-        async def send_task(ws: aiohttp.ClientWebSocketResponse):
+        async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
             nonlocal closing_ws
             start_recognition_msg = {
@@ -186,7 +187,7 @@ class SpeechStream(stt.SpeechStream):
                 )
             )
-        async def recv_task(ws: aiohttp.ClientWebSocketResponse):
+        async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
             nonlocal closing_ws
             while True:
                 msg = await ws.receive()
@@ -221,9 +222,9 @@ class SpeechStream(stt.SpeechStream):
                 try:
                     done, _ = await asyncio.wait(
-                        [tasks_group, wait_reconnect_task],
+                        (tasks_group, wait_reconnect_task),
                         return_when=asyncio.FIRST_COMPLETED,
-                    )  # type: ignore
+                    )
                     for task in done:
                         if task != wait_reconnect_task:
                             task.result()
@@ -316,6 +317,7 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
                 alt.get("confidence", 1.0),
                 alt.get("language", "en"),
             )
+            speaker = alt.get("speaker")
             if not content:
                 continue
@@ -326,8 +328,9 @@ def live_transcription_to_speech_data(data: dict) -> list[stt.SpeechData]:
             elif speech_data and start_time == speech_data[-1].end_time:
                 speech_data[-1].text += " " + content
             else:
-                speech_data.append(
-                    stt.SpeechData(language, content, start_time, end_time, confidence)
-                )
+                sd = stt.SpeechData(language, content, start_time, end_time, confidence)
+                if speaker is not None:
+                    sd.speaker_id = speaker
+                speech_data.append(sd)
     return speech_data

livekit/plugins/speechmatics/types.py CHANGED Viewed

@@ -49,6 +49,9 @@ class TranscriptionConfig:
     transcript_filtering_config: Optional[dict] = None
     """Removes disfluencies with the remove_disfluencies setting."""
+    speaker_diarization_config: Optional[dict] = None
+    """Options for speaker diarization such as ``max_speakers``."""
     def asdict(self) -> dict[Any, Any]:
         """Returns model as a dict while excluding None values recursively."""
         return asdict(self, dict_factory=lambda x: {k: v for (k, v) in x if v is not None})
@@ -65,7 +68,7 @@ class AudioSettings:
     sample_rate: int = 16000
     """Sampling rate in hertz."""
-    def asdict(self):
+    def asdict(self) -> dict[str, Any]:
         return {
             "type": "raw",
             "encoding": self.encoding,

livekit/plugins/speechmatics/utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ async def get_access_token(api_key: str) -> str:
             if resp.status == 201:
                 try:
                     data = await resp.json()
-                    return data["key_value"]
+                    return data["key_value"]  # type: ignore
                 except (ValueError, KeyError) as e:
                     raise Exception(
                         f"Failed to parse Speechmatics access token response: {e}"
@@ -29,12 +29,12 @@ async def get_access_token(api_key: str) -> str:
                 )
-def get_sdk_version():
+def get_sdk_version() -> str:
     version = importlib.metadata.version("livekit-plugins-speechmatics")
     return f"livekit-plugins-{version}"
-def sanitize_url(url, language):
+def sanitize_url(url: str, language: str) -> str:
     from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
     parsed_url = urlparse(url)

livekit/plugins/speechmatics/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.23"
+__version__ = "1.1.0"

{livekit_plugins_speechmatics-1.0.23.dist-info → livekit_plugins_speechmatics-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-speechmatics
-Version: 1.0.23
+Version: 1.1.0
 Summary: Agent Framework plugin for Speechmatics
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Requires-Dist: livekit-agents>=1.0.23
+Requires-Dist: livekit-agents>=1.1.0
 Description-Content-Type: text/markdown
 # Speechmatics STT plugin for LiveKit Agents

livekit_plugins_speechmatics-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+livekit/plugins/speechmatics/__init__.py,sha256=LpY2NZfiSfEeqmjB37anBHdPMbNkSiNsnJ9GZ6Jf6Ac,1236
+livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
+livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/speechmatics/stt.py,sha256=10SRGs9j_8m0Enh3uw7tKvszNNErK90h4bD0mEHTO58,12000
+livekit/plugins/speechmatics/types.py,sha256=x9Jl5yABOU4hkN0adKEIhv0WLR32QWnVRo53qvyIPkY,4970
+livekit/plugins/speechmatics/utils.py,sha256=qVg7MenwUvp_f2AngntQlmZ4xb9b8c8NB5ftqGk1Wr0,1907
+livekit/plugins/speechmatics/version.py,sha256=lFVJxRoJYH4JZgg7iJQF0_dlrbfVbRYYWQzbHnv_p-w,600
+livekit_plugins_speechmatics-1.1.0.dist-info/METADATA,sha256=JaJtVnuE9UB6eFJj2wtdbiz64Qrwz8Q2lZk1xsPisEg,2049
+livekit_plugins_speechmatics-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_speechmatics-1.1.0.dist-info/RECORD,,

livekit_plugins_speechmatics-1.0.23.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-livekit/plugins/speechmatics/__init__.py,sha256=mHlhJcJJc0aIhQxHx6fUJC9epfGjw1E8b-FdR0eUzMQ,1228
-livekit/plugins/speechmatics/log.py,sha256=O1iyAF7cHUu_iMXh6l7KRwwWeDB5QyABI_qzAb0cs04,75
-livekit/plugins/speechmatics/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/speechmatics/stt.py,sha256=NrK3cljwbx5dEgslepG3ROszvok-hZadanNHRNo7oFk,11775
-livekit/plugins/speechmatics/types.py,sha256=LG9ZL_r9f0LMmKPaWqCNzOU6wQJmXp8w7S23xuQaHO8,4829
-livekit/plugins/speechmatics/utils.py,sha256=9pbI8yrB8e06WY4NOH_RaoTSmY6JJc-P12-Sm8pW51U,1867
-livekit/plugins/speechmatics/version.py,sha256=O5JviWLJVBpC-DAakcGUwagiTFI5ZD7HTTTp2woaBmo,601
-livekit_plugins_speechmatics-1.0.23.dist-info/METADATA,sha256=MUNHq8MNBRHJkQCo8yBl42rfQrMxl5oocVxszMNPq14,2051
-livekit_plugins_speechmatics-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_speechmatics-1.0.23.dist-info/RECORD,,

{livekit_plugins_speechmatics-1.0.23.dist-info → livekit_plugins_speechmatics-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-speechmatics 1.0.23__py3-none-any.whl → 1.1.0__py3-none-any.whl

livekit-plugins-speechmatics 1.0.23py3-none-any.whl → 1.1.0py3-none-any.whl