PyPI - livekit-plugins-google - Versions diffs - 1.0.21__py3-none-any.whl → 1.0.23__py3-none-any.whl - Mend

livekit-plugins-google 1.0.21py3-none-any.whl → 1.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

livekit/plugins/google/beta/realtime/api_proto.py CHANGED Viewed

@@ -5,9 +5,15 @@ from typing import Literal, Union
 from google.genai import types
-LiveAPIModels = Literal["gemini-2.0-flash-exp", "gemini-2.0-flash-live-001"]
+LiveAPIModels = Literal[
+    "gemini-2.0-flash-exp",
+    # models supported on Gemini API
+    "gemini-2.0-flash-live-001",
+    "gemini-2.5-flash-preview-native-audio-dialog",
+    "gemini-2.5-flash-exp-native-audio-thinking-dialog",
+]
-Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
+Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede", "Leda", "Oru", "Zephyr"]
 ClientEvents = Union[

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -13,7 +13,6 @@ from google import genai
 from google.genai.live import AsyncSession
 from google.genai.types import (
     AudioTranscriptionConfig,
-    AutomaticActivityDetection,
     Blob,
     Content,
     FunctionDeclaration,
@@ -86,6 +85,9 @@ class _RealtimeOptions:
     input_audio_transcription: AudioTranscriptionConfig | None
     output_audio_transcription: AudioTranscriptionConfig | None
     image_encode_options: NotGivenOr[images.EncodeOptions]
+    enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN
+    proactivity: NotGivenOr[bool] = NOT_GIVEN
+    realtime_input_config: NotGivenOr[RealtimeInputConfig] = NOT_GIVEN
 @dataclass
@@ -131,6 +133,9 @@ class RealtimeModel(llm.RealtimeModel):
         input_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
         output_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
         image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN,
+        enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN,
+        proactivity: NotGivenOr[bool] = NOT_GIVEN,
+        realtime_input_config: NotGivenOr[RealtimeInputConfig] = NOT_GIVEN,
     ) -> None:
         """
         Initializes a RealtimeModel instance for interacting with Google's Realtime API.
@@ -161,6 +166,9 @@ class RealtimeModel(llm.RealtimeModel):
             input_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for input audio transcription. Defaults to None.)
             output_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
             image_encode_options (images.EncodeOptions, optional): The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.
+            enable_affective_dialog (bool, optional): Whether to enable affective dialog. Defaults to False.
+            proactivity (bool, optional): Whether to enable proactive audio. Defaults to False.
+            realtime_input_config (RealtimeInputConfig, optional): The configuration for realtime input. Defaults to None.
         Raises:
             ValueError: If the API key is required but not found.
@@ -232,6 +240,9 @@ class RealtimeModel(llm.RealtimeModel):
             output_audio_transcription=output_audio_transcription,
             language=language,
             image_encode_options=image_encode_options,
+            enable_affective_dialog=enable_affective_dialog,
+            proactivity=proactivity,
+            realtime_input_config=realtime_input_config,
         )
         self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -583,7 +594,7 @@ class RealtimeSession(llm.RealtimeSession):
     def _build_connect_config(self) -> LiveConnectConfig:
         temp = self._opts.temperature if is_given(self._opts.temperature) else None
-        return LiveConnectConfig(
+        conf = LiveConnectConfig(
             response_modalities=self._opts.response_modalities
             if is_given(self._opts.response_modalities)
             else [Modality.AUDIO],
@@ -615,11 +626,18 @@ class RealtimeSession(llm.RealtimeSession):
             input_audio_transcription=self._opts.input_audio_transcription,
             output_audio_transcription=self._opts.output_audio_transcription,
             session_resumption=SessionResumptionConfig(handle=self._session_resumption_handle),
-            realtime_input_config=RealtimeInputConfig(
-                automatic_activity_detection=AutomaticActivityDetection(),
-            ),
+            realtime_input_config=self._opts.realtime_input_config,
         )
+        if is_given(self._opts.proactivity):
+            conf.proactivity = {"proactive_audio": self._opts.proactivity}
+        if is_given(self._opts.enable_affective_dialog):
+            conf.enable_affective_dialog = self._opts.enable_affective_dialog
+        if is_given(self._opts.realtime_input_config):
+            conf.realtime_input_config = self._opts.realtime_input_config
+        return conf
     def _start_new_generation(self):
         if self._current_generation and not self._current_generation._done:
             logger.warning("starting new generation while another is active. Finalizing previous.")
@@ -789,6 +807,9 @@ class RealtimeSession(llm.RealtimeSession):
                 return token_details_map
             for token_detail in token_details:
+                if not token_detail.token_count:
+                    continue
                 if token_detail.modality == Modality.AUDIO:
                     token_details_map["audio_tokens"] += token_detail.token_count
                 elif token_detail.modality == Modality.TEXT:

livekit/plugins/google/llm.py CHANGED Viewed

@@ -304,11 +304,8 @@ class LLMStream(llm.LLMStream):
                     or not response.candidates[0].content
                     or not response.candidates[0].content.parts
                 ):
-                    raise APIStatusError(
-                        "No candidates in the response",
-                        retryable=True,
-                        request_id=request_id,
-                    )
+                    logger.warning(f"no candidates in the response: {response}")
+                    continue
                 if len(response.candidates) > 1:
                     logger.warning(

livekit/plugins/google/models.py CHANGED Viewed

@@ -97,6 +97,7 @@ Gender = Literal["male", "female", "neutral"]
 ChatModels = Literal[
     "gemini-2.5-pro-preview-05-06",
     "gemini-2.5-flash-preview-04-17",
+    "gemini-2.5-flash-preview-05-20",
     "gemini-2.0-flash-001",
     "gemini-2.0-flash-lite-preview-02-05",
     "gemini-2.0-pro-exp-02-05",

livekit/plugins/google/tts.py CHANGED Viewed

@@ -14,6 +14,8 @@
 from __future__ import annotations
+import asyncio
+import weakref
 from dataclasses import dataclass
 from google.api_core.client_options import ClientOptions
@@ -25,6 +27,7 @@ from livekit.agents import (
     APIConnectOptions,
     APIStatusError,
     APITimeoutError,
+    tokenize,
     tts,
     utils,
 )
@@ -35,13 +38,21 @@ from livekit.agents.types import (
 )
 from livekit.agents.utils import is_given
+from .log import logger
 from .models import Gender, SpeechLanguages
+BUFFERED_WORDS_COUNT = 8
+NUM_CHANNELS = 1
+DEFAULT_VOICE_NAME = "en-US-Chirp3-HD-Charon"
+DEFAULT_LANGUAGE = "en-US"
+DEFAULT_GENDER = "neutral"
 @dataclass
 class _TTSOptions:
     voice: texttospeech.VoiceSelectionParams
     audio_config: texttospeech.AudioConfig
+    tokenizer: tokenize.SentenceTokenizer
 class TTS(tts.TTS):
@@ -59,6 +70,8 @@ class TTS(tts.TTS):
         audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
+        tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
+        use_streaming: NotGivenOr[bool] = NOT_GIVEN,
     ) -> None:
         """
         Create a new instance of Google TTS.
@@ -78,12 +91,14 @@ class TTS(tts.TTS):
             speaking_rate (float, optional): Speed of speech. Default is 1.0.
             credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
             credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
+            tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
+            use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
         """  # noqa: E501
+        if not is_given(use_streaming):
+            use_streaming = True
         super().__init__(
-            capabilities=tts.TTSCapabilities(
-                streaming=False,
-            ),
+            capabilities=tts.TTSCapabilities(streaming=use_streaming),
             sample_rate=sample_rate,
             num_channels=1,
         )
@@ -93,15 +108,17 @@ class TTS(tts.TTS):
         self._credentials_file = credentials_file
         self._location = location
-        lang = language if is_given(language) else "en-US"
-        ssml_gender = _gender_from_str("neutral" if not is_given(gender) else gender)
-        name = "" if not is_given(voice_name) else voice_name
+        lang = language if is_given(language) else DEFAULT_LANGUAGE
+        ssml_gender = _gender_from_str(DEFAULT_GENDER if not is_given(gender) else gender)
+        name = DEFAULT_VOICE_NAME if not is_given(voice_name) else voice_name
         voice_params = texttospeech.VoiceSelectionParams(
             name=name,
             language_code=lang,
             ssml_gender=ssml_gender,
         )
+        if not is_given(tokenizer):
+            tokenizer = tokenize.basic.SentenceTokenizer(min_sentence_len=BUFFERED_WORDS_COUNT)
         self._opts = _TTSOptions(
             voice=voice_params,
@@ -112,7 +129,9 @@ class TTS(tts.TTS):
                 effects_profile_id=effects_profile_id,
                 speaking_rate=speaking_rate,
             ),
+            tokenizer=tokenizer,
         )
+        self._streams = weakref.WeakSet[SynthesizeStream]()
     def update_options(
         self,
@@ -168,6 +187,18 @@ class TTS(tts.TTS):
         assert self._client is not None
         return self._client
+    def stream(
+        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
+    ) -> SynthesizeStream:
+        stream = SynthesizeStream(
+            tts=self,
+            opts=self._opts,
+            client=self._ensure_client(),
+            conn_options=conn_options,
+        )
+        self._streams.add(stream)
+        return stream
     def synthesize(
         self,
         text: str,
@@ -182,6 +213,12 @@ class TTS(tts.TTS):
             client=self._ensure_client(),
         )
+    async def aclose(self) -> None:
+        for stream in list(self._streams):
+            await stream.aclose()
+        self._streams.clear()
+        await super().aclose()
 class ChunkedStream(tts.ChunkedStream):
     def __init__(
@@ -230,8 +267,105 @@ class ChunkedStream(tts.ChunkedStream):
             raise APITimeoutError() from None
         except GoogleAPICallError as e:
             raise APIStatusError(
-                e.message, status_code=e.code or -1, request_id=None, body=None
-            ) from None
+                f"{e.message} {e.details}", status_code=e.code or -1, request_id=None, body=None
+            ) from e
+        except Exception as e:
+            raise APIConnectionError() from e
+class SynthesizeStream(tts.SynthesizeStream):
+    def __init__(
+        self,
+        *,
+        tts: TTS,
+        opts: _TTSOptions,
+        client: texttospeech.TextToSpeechAsyncClient,
+        conn_options: APIConnectOptions,
+    ):
+        super().__init__(tts=tts, conn_options=conn_options)
+        self._opts, self._client = opts, client
+        self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
+    async def _run(self) -> None:
+        request_id = utils.shortuuid()
+        @utils.log_exceptions(logger=logger)
+        async def _tokenize_input():
+            input_stream = None
+            async for input in self._input_ch:
+                if isinstance(input, str):
+                    if input_stream is None:
+                        input_stream = self._opts.tokenizer.stream()
+                        self._segments_ch.send_nowait(input_stream)
+                    input_stream.push_text(input)
+                elif isinstance(input, self._FlushSentinel):
+                    if input_stream:
+                        input_stream.end_input()
+                    input_stream = None
+            self._segments_ch.close()
+        @utils.log_exceptions(logger=logger)
+        async def _run_segments():
+            async for input_stream in self._segments_ch:
+                await self._run_stream(input_stream, request_id)
+        tasks = [
+            asyncio.create_task(_tokenize_input()),
+            asyncio.create_task(_run_segments()),
+        ]
+        try:
+            await asyncio.gather(*tasks)
+        except Exception as e:
+            raise APIConnectionError() from e
+    async def _run_stream(self, input_stream, request_id):
+        streaming_config = texttospeech.StreamingSynthesizeConfig(
+            voice=self._opts.voice,
+            streaming_audio_config=texttospeech.StreamingAudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.PCM
+            ),
+        )
+        emitter = tts.SynthesizedAudioEmitter(event_ch=self._event_ch, request_id=request_id)
+        audio_bstream = utils.audio.AudioByteStream(
+            sample_rate=self._opts.audio_config.sample_rate_hertz,
+            num_channels=NUM_CHANNELS,
+        )
+        @utils.log_exceptions(logger=logger)
+        async def input_generator():
+            try:
+                yield texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
+                async for input in input_stream:
+                    self._mark_started()
+                    yield texttospeech.StreamingSynthesizeRequest(
+                        input=texttospeech.StreamingSynthesisInput(text=input.token)
+                    )
+            except Exception:
+                logger.exception("an error occurred while streaming input to google TTS")
+        try:
+            stream = await self._client.streaming_synthesize(
+                input_generator(),
+                timeout=self._conn_options.timeout,
+            )
+            async for resp in stream:
+                for frame in audio_bstream.write(resp.audio_content):
+                    emitter.push(frame)
+            for frame in audio_bstream.flush():
+                emitter.push(frame)
+            emitter.flush()
+        except DeadlineExceeded as e:
+            logger.debug(f"google tts deadline exceeded: {e}")
+            pass
+        except GoogleAPICallError as e:
+            raise APIStatusError(
+                f"{e.message} {e.details}",
+                status_code=e.code or -1,
+                request_id=request_id,
+                body=None,
+            ) from e
         except Exception as e:
             raise APIConnectionError() from e

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.21"
+__version__ = "1.0.23"

{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.0.21
+Version: 1.0.23
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -21,8 +21,8 @@ Requires-Python: >=3.9.0
 Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.24
-Requires-Dist: google-genai>=1.14.0
-Requires-Dist: livekit-agents>=1.0.21
+Requires-Dist: google-genai>=v1.16.1
+Requires-Dist: livekit-agents>=1.0.23
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

livekit_plugins_google-1.0.23.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+livekit/plugins/google/__init__.py,sha256=xain2qUzU-YWhYWsLBkW8Q-szV-htpnzHTqymMPo-j0,1364
+livekit/plugins/google/llm.py,sha256=E1T_7cugMVN13dyAbXHVS5sC1lxRPNUemwJdV29-CPk,16206
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=hOpfbN_qdQ1ZTpCN9m9dvG2eb6WgQ3KE3WRpIeeM_T0,1569
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
+livekit/plugins/google/tts.py,sha256=FfhNfGtW8drmYDDfLLZDjaIp2GvNiIdoovgtZq4t_l8,14211
+livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
+livekit/plugins/google/version.py,sha256=BRUqwxRBnPVqEcIODJdaZHGAanu4zkwM4NsAQjNtUEM,601
+livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
+livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
+livekit/plugins/google/beta/realtime/api_proto.py,sha256=NfE7xr2N3JOu7gVfWbAmDcEhs8vuZgMRu5vpScPJzsg,776
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=fgN2InMMCQL8JAHm-6J-SekzS5ymeH-hMRLzSW86Qkw,37477
+livekit_plugins_google-1.0.23.dist-info/METADATA,sha256=69J1PJEwdaM6jWeMUXpbaU8A0quqi3UjDb5884qG9mI,1909
+livekit_plugins_google-1.0.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.0.23.dist-info/RECORD,,

livekit_plugins_google-1.0.21.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-livekit/plugins/google/__init__.py,sha256=xain2qUzU-YWhYWsLBkW8Q-szV-htpnzHTqymMPo-j0,1364
-livekit/plugins/google/llm.py,sha256=Kr9qeBZ5Dd0WCCBR_-gM3WWsVRZPCSteK8NpBsg2C5Y,16304
-livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
-livekit/plugins/google/models.py,sha256=maGlEM3hK4-5hMnH9UQMJewA7BZMrnStsFLBNoNVySg,1531
-livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
-livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
-livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
-livekit/plugins/google/version.py,sha256=5lzQkS1jEPqreexacwMd18b2EOx7R5m8AQMKtQRBgC4,601
-livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
-livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
-livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yYB5fKXl_aaMH_ZSpfUlfOTUg4eRqqRENLTZhZMfBMc,36253
-livekit_plugins_google-1.0.21.dist-info/METADATA,sha256=mQA8BfvWhAjp3V9GJA5OsZLzP_Q03UuDbRX2HbcEgtY,1908
-livekit_plugins_google-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.0.21.dist-info/RECORD,,

{livekit_plugins_google-1.0.21.dist-info → livekit_plugins_google-1.0.23.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.0.21__py3-none-any.whl → 1.0.23__py3-none-any.whl

livekit-plugins-google 1.0.21py3-none-any.whl → 1.0.23py3-none-any.whl