PyPI - livekit-plugins-google - Versions diffs - 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl - Mend

livekit-plugins-google 1.0.21py3-none-any.whl → 1.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

livekit/plugins/google/tts.py CHANGED Viewed

@@ -14,6 +14,8 @@
 from __future__ import annotations
+import asyncio
+import weakref
 from dataclasses import dataclass
 from google.api_core.client_options import ClientOptions
@@ -25,6 +27,7 @@ from livekit.agents import (
     APIConnectOptions,
     APIStatusError,
     APITimeoutError,
+    tokenize,
     tts,
     utils,
 )
@@ -35,13 +38,21 @@ from livekit.agents.types import (
 )
 from livekit.agents.utils import is_given
+from .log import logger
 from .models import Gender, SpeechLanguages
+BUFFERED_WORDS_COUNT = 8
+NUM_CHANNELS = 1
+DEFAULT_VOICE_NAME = "en-US-Chirp3-HD-Charon"
+DEFAULT_LANGUAGE = "en-US"
+DEFAULT_GENDER = "neutral"
 @dataclass
 class _TTSOptions:
     voice: texttospeech.VoiceSelectionParams
     audio_config: texttospeech.AudioConfig
+    tokenizer: tokenize.SentenceTokenizer
 class TTS(tts.TTS):
@@ -59,6 +70,8 @@ class TTS(tts.TTS):
         audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
+        tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
+        use_streaming: NotGivenOr[bool] = NOT_GIVEN,
     ) -> None:
         """
         Create a new instance of Google TTS.
@@ -78,12 +91,14 @@ class TTS(tts.TTS):
             speaking_rate (float, optional): Speed of speech. Default is 1.0.
             credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
             credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
+            tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
+            use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
         """  # noqa: E501
+        if not is_given(use_streaming):
+            use_streaming = True
         super().__init__(
-            capabilities=tts.TTSCapabilities(
-                streaming=False,
-            ),
+            capabilities=tts.TTSCapabilities(streaming=use_streaming),
             sample_rate=sample_rate,
             num_channels=1,
         )
@@ -93,15 +108,17 @@ class TTS(tts.TTS):
         self._credentials_file = credentials_file
         self._location = location
-        lang = language if is_given(language) else "en-US"
-        ssml_gender = _gender_from_str("neutral" if not is_given(gender) else gender)
-        name = "" if not is_given(voice_name) else voice_name
+        lang = language if is_given(language) else DEFAULT_LANGUAGE
+        ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
+        name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
         voice_params = texttospeech.VoiceSelectionParams(
             name=name,
             language_code=lang,
             ssml_gender=ssml_gender,
         )
+        if not is_given(tokenizer):
+            tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=BUFFERED_WORDS_COUNT)
         self._opts = _TTSOptions(
             voice=voice_params,
@@ -112,7 +129,9 @@ class TTS(tts.TTS):
                 effects_profile_id=effects_profile_id,
                 speaking_rate=speaking_rate,
             ),
+            tokenizer=tokenizer,
         )
+        self._streams = weakref.WeakSet[SynthesizeStream]()
     def update_options(
         self,
@@ -168,6 +187,18 @@ class TTS(tts.TTS):
         assert self._client is not None
         return self._client
+    def stream(
+        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
+    ) -> SynthesizeStream:
+        stream = SynthesizeStream(
+            tts=self,
+            opts=self._opts,
+            client=self._ensure_client(),
+            conn_options=conn_options,
+        )
+        self._streams.add(stream)
+        return stream
     def synthesize(
         self,
         text: str,
@@ -182,6 +213,12 @@ class TTS(tts.TTS):
             client=self._ensure_client(),
         )
+    async def aclose(self) -> None:
+        for stream in list(self._streams):
+            await stream.aclose()
+        self._streams.clear()
+        await super().aclose()
 class ChunkedStream(tts.ChunkedStream):
     def __init__(
@@ -230,8 +267,105 @@ class ChunkedStream(tts.ChunkedStream):
             raise APITimeoutError() from None
         except GoogleAPICallError as e:
             raise APIStatusError(
-                e.message, status_code=e.code or -1, request_id=None, body=None
-            ) from None
+                f"{e.message} {e.details}", status_code=e.code or -1, request_id=None, body=None
+            ) from e
+        except Exception as e:
+            raise APIConnectionError() from e
+class SynthesizeStream(tts.SynthesizeStream):
+    def __init__(
+        self,
+        *,
+        tts: TTS,
+        opts: _TTSOptions,
+        client: texttospeech.TextToSpeechAsyncClient,
+        conn_options: APIConnectOptions,
+    ):
+        super().__init__(tts=tts, conn_options=conn_options)
+        self._opts, self._client = opts, client
+        self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
+    async def _run(self) -> None:
+        request_id = utils.shortuuid()
+        @utils.log_exceptions(logger=logger)
+        async def _tokenize_input():
+            input_stream = None
+            async for input in self._input_ch:
+                if isinstance(input, str):
+                    if input_stream is None:
+                        input_stream = self._opts.tokenizer.stream()
+                        self._segments_ch.send_nowait(input_stream)
+                    input_stream.push_text(input)
+                elif isinstance(input, self._FlushSentinel):
+                    if input_stream:
+                        input_stream.end_input()
+                    input_stream = None
+            self._segments_ch.close()
+        @utils.log_exceptions(logger=logger)
+        async def _run_segments():
+            async for input_stream in self._segments_ch:
+                await self._run_stream(input_stream, request_id)
+        tasks = [
+            asyncio.create_task(_tokenize_input()),
+            asyncio.create_task(_run_segments()),
+        ]
+        try:
+            await asyncio.gather(*tasks)
+        except Exception as e:
+            raise APIConnectionError() from e
+    async def _run_stream(self, input_stream, request_id):
+        streaming_config = texttospeech.StreamingSynthesizeConfig(
+            voice=self._opts.voice,
+            streaming_audio_config=texttospeech.StreamingAudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.PCM
+            ),
+        )
+        emitter = tts.SynthesizedAudioEmitter(event_ch=self._event_ch, request_id=request_id)
+        audio_bstream = utils.audio.AudioByteStream(
+            sample_rate=self._opts.audio_config.sample_rate_hertz,
+            num_channels=NUM_CHANNELS,
+        )
+        @utils.log_exceptions(logger=logger)
+        async def input_generator():
+            try:
+                yield texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
+                async for input in input_stream:
+                    self._mark_started()
+                    yield texttospeech.StreamingSynthesizeRequest(
+                        input=texttospeech.StreamingSynthesisInput(text=input.token)
+                    )
+            except Exception:
+                logger.exception("an error occurred while streaming input to google TTS")
+        try:
+            stream = await self._client.streaming_synthesize(
+                input_generator(),
+                timeout=self._conn_options.timeout,
+            )
+            async for resp in stream:
+                for frame in audio_bstream.write(resp.audio_content):
+                    emitter.push(frame)
+            for frame in audio_bstream.flush():
+                emitter.push(frame)
+            emitter.flush()
+        except DeadlineExceeded as e:
+            logger.debug(f"google tts deadline exceeded: {e}")
+            pass
+        except GoogleAPICallError as e:
+            raise APIStatusError(
+                f"{e.message} {e.details}",
+                status_code=e.code or -1,
+                request_id=request_id,
+                body=None,
+            ) from e
         except Exception as e:
             raise APIConnectionError() from e

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.21"
+__version__ = "1.0.22"

{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.0.21
+Version: 1.0.22
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.24
 Requires-Dist: google-genai>=1.14.0
-Requires-Dist: livekit-agents>=1.0.21
+Requires-Dist: livekit-agents>=1.0.22
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/RECORD RENAMED Viewed

@@ -4,13 +4,13 @@ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA
 livekit/plugins/google/models.py,sha256=maGlEM3hK4-5hMnH9UQMJewA7BZMrnStsFLBNoNVySg,1531
 livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
-livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
+livekit/plugins/google/tts.py,sha256=FfhNfGtW8drmYDDfLLZDjaIp2GvNiIdoovgtZq4t_l8,14211
 livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
-livekit/plugins/google/version.py,sha256=5lzQkS1jEPqreexacwMd18b2EOx7R5m8AQMKtQRBgC4,601
+livekit/plugins/google/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
 livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
 livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
 livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
 livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yYB5fKXl_aaMH_ZSpfUlfOTUg4eRqqRENLTZhZMfBMc,36253
-livekit_plugins_google-1.0.21.dist-info/METADATA,sha256=mQA8BfvWhAjp3V9GJA5OsZLzP_Q03UuDbRX2HbcEgtY,1908
-livekit_plugins_google-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.0.21.dist-info/RECORD,,
+livekit_plugins_google-1.0.22.dist-info/METADATA,sha256=S4bQZr4NhWrAI6vyJi299sh5lsD5eVMNfxvN9__xAMY,1908
+livekit_plugins_google-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.0.22.dist-info/RECORD,,

{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.22.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.0.21__py3-none-any.whl → 1.0.22__py3-none-any.whl

livekit-plugins-google 1.0.21py3-none-any.whl → 1.0.22py3-none-any.whl