PyPI - livekit-plugins-google - Versions diffs - 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl - Mend

livekit-plugins-google 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

livekit/plugins/google/__init__.py CHANGED Viewed

@@ -29,3 +29,12 @@ class GooglePlugin(Plugin):
 Plugin.register_plugin(GooglePlugin())
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+__pdoc__ = {}
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False

livekit/plugins/google/stt.py CHANGED Viewed

@@ -20,8 +20,15 @@ from dataclasses import dataclass
 from typing import AsyncIterable, List, Union
 from livekit import agents, rtc
-from livekit.agents import stt, utils
+from livekit.agents import (
+    APIConnectionError,
+    APIStatusError,
+    APITimeoutError,
+    stt,
+    utils,
+)
+from google.api_core.exceptions import Aborted, DeadlineExceeded, GoogleAPICallError
 from google.auth import default as gauth_default
 from google.auth.exceptions import DefaultCredentialsError
 from google.cloud.speech_v2 import SpeechAsyncClient
@@ -43,6 +50,25 @@ class STTOptions:
     punctuate: bool
     spoken_punctuation: bool
     model: SpeechModels
+    keywords: List[tuple[str, float]] | None
+    def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
+        if self.keywords:
+            return cloud_speech.SpeechAdaptation(
+                phrase_sets=[
+                    cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
+                        inline_phrase_set=cloud_speech.PhraseSet(
+                            phrases=[
+                                cloud_speech.PhraseSet.Phrase(
+                                    value=keyword, boost=boost
+                                )
+                                for keyword, boost in self.keywords
+                            ]
+                        )
+                    )
+                ]
+            )
+        return None
 class STT(stt.STT):
@@ -57,6 +83,7 @@ class STT(stt.STT):
         model: SpeechModels = "long",
         credentials_info: dict | None = None,
         credentials_file: str | None = None,
+        keywords: List[tuple[str, float]] | None = None,
     ):
         """
         Create a new instance of Google STT.
@@ -93,6 +120,7 @@ class STT(stt.STT):
             punctuate=punctuate,
             spoken_punctuation=spoken_punctuation,
             model=model,
+            keywords=keywords,
         )
     def _ensure_client(self) -> SpeechAsyncClient:
@@ -141,7 +169,7 @@ class STT(stt.STT):
         return config
-    async def recognize(
+    async def _recognize_impl(
         self,
         buffer: utils.AudioBuffer,
         *,
@@ -156,6 +184,7 @@ class STT(stt.STT):
                 sample_rate_hertz=frame.sample_rate,
                 audio_channel_count=frame.num_channels,
             ),
+            adaptation=config.build_adaptation(),
             features=cloud_speech.RecognitionFeatures(
                 enable_automatic_punctuation=config.punctuate,
                 enable_spoken_punctuation=config.spoken_punctuation,
@@ -165,23 +194,39 @@ class STT(stt.STT):
             language_codes=config.languages,
         )
-        raw = await self._ensure_client().recognize(
-            cloud_speech.RecognizeRequest(
-                recognizer=self._recognizer, config=config, content=frame.data.tobytes()
+        try:
+            raw = await self._ensure_client().recognize(
+                cloud_speech.RecognizeRequest(
+                    recognizer=self._recognizer,
+                    config=config,
+                    content=frame.data.tobytes(),
+                )
             )
-        )
-        return _recognize_response_to_speech_event(raw)
+            return _recognize_response_to_speech_event(raw)
+        except DeadlineExceeded:
+            raise APITimeoutError()
+        except GoogleAPICallError as e:
+            raise APIStatusError(
+                e.message,
+                status_code=e.code or -1,
+                request_id=None,
+                body=None,
+            )
+        except Exception as e:
+            raise APIConnectionError() from e
     def stream(
         self, *, language: SpeechLanguages | str | None = None
     ) -> "SpeechStream":
         config = self._sanitize_options(language=language)
-        return SpeechStream(self._ensure_client(), self._recognizer, config)
+        return SpeechStream(self, self._ensure_client(), self._recognizer, config)
 class SpeechStream(stt.SpeechStream):
     def __init__(
         self,
+        stt: STT,
         client: SpeechAsyncClient,
         recognizer: str,
         config: STTOptions,
@@ -189,7 +234,7 @@ class SpeechStream(stt.SpeechStream):
         num_channels: int = 1,
         max_retry: int = 32,
     ) -> None:
-        super().__init__()
+        super().__init__(stt)
         self._client = client
         self._recognizer = recognizer
@@ -205,6 +250,7 @@ class SpeechStream(stt.SpeechStream):
                     sample_rate_hertz=self._sample_rate,
                     audio_channel_count=self._num_channels,
                 ),
+                adaptation=config.build_adaptation(),
                 language_codes=self._config.languages,
                 model=self._config.model,
                 features=cloud_speech.RecognitionFeatures(
@@ -257,6 +303,9 @@ class SpeechStream(stt.SpeechStream):
                 retry_count = 0  # connection successful, reset retry count
                 await self._run_stream(stream)
+            except Aborted:
+                logger.error("google stt connection aborted")
+                break
             except Exception as e:
                 if retry_count >= max_retry:
                     logger.error(

livekit/plugins/google/tts.py CHANGED Viewed

@@ -15,21 +15,22 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Union
 from livekit import rtc
-from livekit.agents import tts, utils
+from livekit.agents import (
+    APIConnectionError,
+    APIStatusError,
+    APITimeoutError,
+    tts,
+    utils,
+)
+from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
 from google.cloud import texttospeech
 from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
-from .log import logger
 from .models import AudioEncoding, Gender, SpeechLanguages
-LgType = Union[SpeechLanguages, str]
-GenderType = Union[Gender, str]
-AudioEncodingType = Union[AudioEncoding, str]
 @dataclass
 class _TTSOptions:
@@ -41,11 +42,13 @@ class TTS(tts.TTS):
     def __init__(
         self,
         *,
-        language: LgType = "en-US",
-        gender: GenderType = "neutral",
+        language: SpeechLanguages | str = "en-US",
+        gender: Gender | str = "neutral",
         voice_name: str = "",  # Not required
-        encoding: AudioEncodingType = "linear16",
+        encoding: AudioEncoding | str = "linear16",
         sample_rate: int = 24000,
+        pitch: int = 0,
+        effects_profile_id: str = "",
         speaking_rate: float = 1.0,
         credentials_info: dict | None = None,
         credentials_file: str | None = None,
@@ -56,6 +59,18 @@ class TTS(tts.TTS):
         Credentials must be provided, either by using the ``credentials_info`` dict, or reading
         from the file specified in ``credentials_file`` or the ``GOOGLE_APPLICATION_CREDENTIALS``
         environmental variable.
+        Args:
+            language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
+            gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
+            voice_name (str, optional): Specific voice name. Default is an empty string.
+            encoding (AudioEncoding | str, optional): Audio encoding format (e.g., "linear16"). Default is "linear16".
+            sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
+            pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
+            effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
+            speaking_rate (float, optional): Speed of speech. Default is 1.0.
+            credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
+            credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
         """
         super().__init__(
@@ -70,14 +85,10 @@ class TTS(tts.TTS):
         self._credentials_info = credentials_info
         self._credentials_file = credentials_file
-        ssml_gender = SsmlVoiceGender.NEUTRAL
-        if gender == "male":
-            ssml_gender = SsmlVoiceGender.MALE
-        elif gender == "female":
-            ssml_gender = SsmlVoiceGender.FEMALE
         voice = texttospeech.VoiceSelectionParams(
-            name=voice_name, language_code=language, ssml_gender=ssml_gender
+            name=voice_name,
+            language_code=language,
+            ssml_gender=_gender_from_str(gender),
         )
         if encoding == "linear16" or encoding == "wav":
@@ -92,10 +103,36 @@ class TTS(tts.TTS):
             audio_config=texttospeech.AudioConfig(
                 audio_encoding=_audio_encoding,
                 sample_rate_hertz=sample_rate,
+                pitch=pitch,
+                effects_profile_id=effects_profile_id,
                 speaking_rate=speaking_rate,
             ),
         )
+    def update_options(
+        self,
+        *,
+        language: SpeechLanguages | str = "en-US",
+        gender: Gender | str = "neutral",
+        voice_name: str = "",  # Not required
+        speaking_rate: float = 1.0,
+    ) -> None:
+        """
+        Update the TTS options.
+        Args:
+            language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
+            gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
+            voice_name (str, optional): Specific voice name. Default is an empty string.
+            speaking_rate (float, optional): Speed of speech. Default is 1.0.
+        """
+        self._opts.voice = texttospeech.VoiceSelectionParams(
+            name=voice_name,
+            language_code=language,
+            ssml_gender=_gender_from_str(gender),
+        )
+        self._opts.audio_config.speaking_rate = speaking_rate
     def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
         if not self._client:
             if self._credentials_info:
@@ -118,57 +155,79 @@ class TTS(tts.TTS):
         return self._client
     def synthesize(self, text: str) -> "ChunkedStream":
-        return ChunkedStream(text, self._opts, self._ensure_client())
+        return ChunkedStream(self, text, self._opts, self._ensure_client())
 class ChunkedStream(tts.ChunkedStream):
     def __init__(
-        self, text: str, opts: _TTSOptions, client: texttospeech.TextToSpeechAsyncClient
+        self,
+        tts: TTS,
+        text: str,
+        opts: _TTSOptions,
+        client: texttospeech.TextToSpeechAsyncClient,
     ) -> None:
-        super().__init__()
-        self._text, self._opts, self._client = text, opts, client
+        super().__init__(tts, text)
+        self._opts, self._client = opts, client
-    @utils.log_exceptions(logger=logger)
     async def _main_task(self) -> None:
         request_id = utils.shortuuid()
-        segment_id = utils.shortuuid()
-        response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
-            input=texttospeech.SynthesisInput(text=self._text),
-            voice=self._opts.voice,
-            audio_config=self._opts.audio_config,
-        )
-        data = response.audio_content
-        if self._opts.audio_config.audio_encoding == "mp3":
-            decoder = utils.codecs.Mp3StreamDecoder()
-            bstream = utils.audio.AudioByteStream(
-                sample_rate=self._opts.audio_config.sample_rate_hertz, num_channels=1
+        try:
+            response: SynthesizeSpeechResponse = await self._client.synthesize_speech(
+                input=texttospeech.SynthesisInput(text=self._input_text),
+                voice=self._opts.voice,
+                audio_config=self._opts.audio_config,
             )
-            for frame in decoder.decode_chunk(data):
-                for frame in bstream.write(frame.data):
-                    self._event_ch.send_nowait(
-                        tts.SynthesizedAudio(
-                            request_id=request_id, segment_id=segment_id, frame=frame
+            data = response.audio_content
+            if self._opts.audio_config.audio_encoding == "mp3":
+                decoder = utils.codecs.Mp3StreamDecoder()
+                bstream = utils.audio.AudioByteStream(
+                    sample_rate=self._opts.audio_config.sample_rate_hertz,
+                    num_channels=1,
+                )
+                for frame in decoder.decode_chunk(data):
+                    for frame in bstream.write(frame.data.tobytes()):
+                        self._event_ch.send_nowait(
+                            tts.SynthesizedAudio(request_id=request_id, frame=frame)
                         )
-                    )
-            for frame in bstream.flush():
+                for frame in bstream.flush():
+                    self._event_ch.send_nowait(
+                        tts.SynthesizedAudio(request_id=request_id, frame=frame)
+                    )
+            else:
+                data = data[44:]  # skip WAV header
                 self._event_ch.send_nowait(
                     tts.SynthesizedAudio(
-                        request_id=request_id, segment_id=segment_id, frame=frame
+                        request_id=request_id,
+                        frame=rtc.AudioFrame(
+                            data=data,
+                            sample_rate=self._opts.audio_config.sample_rate_hertz,
+                            num_channels=1,
+                            samples_per_channel=len(data) // 2,  # 16-bit
+                        ),
                     )
                 )
-        else:
-            data = data[44:]  # skip WAV header
-            self._event_ch.send_nowait(
-                tts.SynthesizedAudio(
-                    request_id=request_id,
-                    segment_id=segment_id,
-                    frame=rtc.AudioFrame(
-                        data=data,
-                        sample_rate=self._opts.audio_config.sample_rate_hertz,
-                        num_channels=1,
-                        samples_per_channel=len(data) // 2,  # 16-bit
-                    ),
-                )
+        except DeadlineExceeded:
+            raise APITimeoutError()
+        except GoogleAPICallError as e:
+            raise APIStatusError(
+                e.message,
+                status_code=e.code or -1,
+                request_id=None,
+                body=None,
             )
+        except Exception as e:
+            raise APIConnectionError() from e
+def _gender_from_str(gender: str) -> SsmlVoiceGender:
+    ssml_gender = SsmlVoiceGender.NEUTRAL
+    if gender == "male":
+        ssml_gender = SsmlVoiceGender.MALE
+    elif gender == "female":
+        ssml_gender = SsmlVoiceGender.FEMALE
+    return ssml_gender  # type: ignore

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.7.1"
+__version__ = "0.7.3"

{livekit_plugins_google-0.7.1.dist-info → livekit_plugins_google-0.7.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: livekit-plugins-google
-Version: 0.7.1
+Version: 0.7.3
 Summary: Agent Framework plugin for services from Google Cloud
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -22,7 +22,7 @@ Description-Content-Type: text/markdown
 Requires-Dist: google-auth <3,>=2
 Requires-Dist: google-cloud-speech <3,>=2
 Requires-Dist: google-cloud-texttospeech <3,>=2
-Requires-Dist: livekit-agents >=0.8.0.dev0
+Requires-Dist: livekit-agents >=0.11
 # LiveKit Plugins Google

livekit_plugins_google-0.7.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+livekit/plugins/google/__init__.py,sha256=rqV6C5mFNDFlrA2IcGJrsebr2VxQwMzoDUjY1JhMBZM,1117
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=WjeqYsunW8jY-WHlnNeks7gR-TiojMRR7LYdAVdCxqY,15268
+livekit/plugins/google/tts.py,sha256=hRN8ul1lDXU8LPVEfbTszgBiRYsifZXCPMwk-Pv2KeA,8793
+livekit/plugins/google/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
+livekit_plugins_google-0.7.3.dist-info/METADATA,sha256=8UvORpoVunOTq0xKxHEk8M3sexKFnBnu66DkEJCnrRY,1647
+livekit_plugins_google-0.7.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+livekit_plugins_google-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_google-0.7.3.dist-info/RECORD,,

{livekit_plugins_google-0.7.1.dist-info → livekit_plugins_google-0.7.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.5.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit_plugins_google-0.7.1.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-livekit/plugins/google/__init__.py,sha256=CYbSmm5fEw71F_r_4pEApGaWQ_r15Y3ZEocH88a4yc8,948
-livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
-livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
-livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=XXDOISg-8U1MzVu543xLEB3-mr_NFKJp9qo1-ya2-Hc,13569
-livekit/plugins/google/tts.py,sha256=T9AHsxofwo3XaMciJPWh9O7lTZqDVYdQQlnFPiGWVbQ,6170
-livekit/plugins/google/version.py,sha256=JOBYrlKcxbTTRXkUKH0921GsmV-i71_KHczg2cgQiLc,600
-livekit_plugins_google-0.7.1.dist-info/METADATA,sha256=MyDLqZp1DC52KWx_Re3Hj0kO75l-Dg9z9IfiihtH4KY,1653
-livekit_plugins_google-0.7.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-livekit_plugins_google-0.7.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_google-0.7.1.dist-info/RECORD,,

{livekit_plugins_google-0.7.1.dist-info → livekit_plugins_google-0.7.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-google 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

livekit-plugins-google 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl