PyPI - livekit-plugins-google - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

livekit-plugins-google 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -68,6 +68,7 @@ class _RealtimeOptions:
     output_audio_transcription: types.AudioTranscriptionConfig | None
     image_encode_options: NotGivenOr[images.EncodeOptions]
     conn_options: APIConnectOptions
+    http_options: NotGivenOr[types.HttpOptions]
     enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN
     proactivity: NotGivenOr[bool] = NOT_GIVEN
     realtime_input_config: NotGivenOr[types.RealtimeInputConfig] = NOT_GIVEN
@@ -136,6 +137,7 @@ class RealtimeModel(llm.RealtimeModel):
         context_window_compression: NotGivenOr[types.ContextWindowCompressionConfig] = NOT_GIVEN,
         api_version: NotGivenOr[str] = NOT_GIVEN,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
+        http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
         _gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
     ) -> None:
         """
@@ -259,6 +261,7 @@ class RealtimeModel(llm.RealtimeModel):
             api_version=api_version,
             gemini_tools=_gemini_tools,
             conn_options=conn_options,
+            http_options=http_options,
         )
         self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -319,7 +322,9 @@ class RealtimeSession(llm.RealtimeSession):
         if not api_version and (self._opts.enable_affective_dialog or self._opts.proactivity):
             api_version = "v1alpha"
-        http_options = types.HttpOptions(timeout=int(self._opts.conn_options.timeout * 1000))
+        http_options = self._opts.http_options or types.HttpOptions(
+            timeout=int(self._opts.conn_options.timeout * 1000)
+        )
         if api_version:
             http_options.api_version = api_version
@@ -902,6 +907,9 @@ class RealtimeSession(llm.RealtimeSession):
             )
         if not gen.text_ch.closed:
+            if self._opts.output_audio_transcription is None:
+                # close the text data of transcription synchronizer
+                gen.text_ch.send_nowait("")
             gen.text_ch.close()
         if not gen.audio_ch.closed:
             gen.audio_ch.close()

livekit/plugins/google/llm.py CHANGED Viewed

@@ -60,6 +60,7 @@ class _LLMOptions:
     frequency_penalty: NotGivenOr[float]
     thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
     gemini_tools: NotGivenOr[list[_LLMTool]]
+    http_options: NotGivenOr[types.HttpOptions]
 class LLM(llm.LLM):
@@ -80,6 +81,7 @@ class LLM(llm.LLM):
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
         thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
         gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
+        http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
     ) -> None:
         """
         Create a new instance of Google GenAI LLM.
@@ -106,6 +108,7 @@ class LLM(llm.LLM):
             tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
             thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
             gemini_tools (list[LLMTool], optional): The Gemini-specific tools to use for the session.
+            http_options (HttpOptions, optional): The HTTP options to use for the session.
         """  # noqa: E501
         super().__init__()
         gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
@@ -166,6 +169,7 @@ class LLM(llm.LLM):
             frequency_penalty=frequency_penalty,
             thinking_config=thinking_config,
             gemini_tools=gemini_tools,
+            http_options=http_options,
         )
         self._client = Client(
             api_key=gemini_api_key,
@@ -312,8 +316,9 @@ class LLMStream(llm.LLMStream):
                     if extra_data.system_messages
                     else None
                 ),
-                http_options=types.HttpOptions(
-                    timeout=int(self._conn_options.timeout * 1000),
+                http_options=(
+                    self._llm._opts.http_options
+                    or types.HttpOptions(timeout=int(self._conn_options.timeout * 1000))
                 ),
                 **self._extra_kwargs,
             )

livekit/plugins/google/stt.py CHANGED Viewed

@@ -381,11 +381,14 @@ class SpeechStream(stt.SpeechStream):
         self._reconnect_event.set()
     async def _run(self) -> None:
+        audio_pushed = False
         # google requires a async generator when calling streaming_recognize
         # this function basically convert the queue into a async generator
         async def input_generator(
             client: SpeechAsyncClient, should_stop: asyncio.Event
         ) -> AsyncGenerator[cloud_speech.StreamingRecognizeRequest, None]:
+            nonlocal audio_pushed
             try:
                 # first request should contain the config
                 yield cloud_speech.StreamingRecognizeRequest(
@@ -402,6 +405,8 @@ class SpeechStream(stt.SpeechStream):
                     if isinstance(frame, rtc.AudioFrame):
                         yield cloud_speech.StreamingRecognizeRequest(audio=frame.data.tobytes())
+                        if not audio_pushed:
+                            audio_pushed = True
             except Exception:
                 logger.exception("an error occurred while streaming input to google STT")
@@ -470,6 +475,7 @@ class SpeechStream(stt.SpeechStream):
                     has_started = False
         while True:
+            audio_pushed = False
             try:
                 async with self._pool.connection(timeout=self._conn_options.timeout) as client:
                     self._streaming_config = cloud_speech.StreamingRecognitionConfig(
@@ -514,13 +520,21 @@ class SpeechStream(stt.SpeechStream):
                             break
                         self._reconnect_event.clear()
                     finally:
-                        await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
                         should_stop.set()
+                        if not process_stream_task.done() and not wait_reconnect_task.done():
+                            # try to gracefully stop the process_stream_task
+                            try:
+                                await asyncio.wait_for(process_stream_task, timeout=1.0)
+                            except asyncio.TimeoutError:
+                                pass
+                        await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
             except DeadlineExceeded:
                 raise APITimeoutError() from None
             except GoogleAPICallError as e:
                 if e.code == 409:
-                    logger.debug("stream timed out, restarting.")
+                    if audio_pushed:
+                        logger.debug("stream timed out, restarting.")
                 else:
                     raise APIStatusError(
                         f"{e.message} {e.details}", status_code=e.code or -1

livekit/plugins/google/tts.py CHANGED Viewed

@@ -46,6 +46,8 @@ class _TTSOptions:
     effects_profile_id: str
     speaking_rate: float
     tokenizer: tokenize.SentenceTokenizer
+    volume_gain_db: float
+    enable_ssml: bool
 class TTS(tts.TTS):
@@ -59,12 +61,14 @@ class TTS(tts.TTS):
         pitch: int = 0,
         effects_profile_id: str = "",
         speaking_rate: float = 1.0,
+        volume_gain_db: float = 0.0,
         location: str = "global",
         audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.OGG_OPUS,  # type: ignore
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
         tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
         use_streaming: bool = True,
+        enable_ssml: bool = False,
     ) -> None:
         """
         Create a new instance of Google TTS.
@@ -82,10 +86,12 @@ class TTS(tts.TTS):
             pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
             effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
             speaking_rate (float, optional): Speed of speech. Default is 1.0.
+            volume_gain_db (float, optional): Volume gain in decibels. Default is 0.0. In the range [-96.0, 16.0]. Strongly recommended not to exceed +10 (dB).
             credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
             credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
             tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
             use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
+            enable_ssml (bool, optional): Whether to enable SSML support. Default is False.
         """  # noqa: E501
         super().__init__(
             capabilities=tts.TTSCapabilities(streaming=use_streaming),
@@ -93,6 +99,9 @@ class TTS(tts.TTS):
             num_channels=1,
         )
+        if enable_ssml and use_streaming:
+            raise ValueError("SSML support is not available for streaming synthesis")
         self._client: texttospeech.TextToSpeechAsyncClient | None = None
         self._credentials_info = credentials_info
         self._credentials_file = credentials_file
@@ -118,6 +127,8 @@ class TTS(tts.TTS):
             effects_profile_id=effects_profile_id,
             speaking_rate=speaking_rate,
             tokenizer=tokenizer,
+            volume_gain_db=volume_gain_db,
+            enable_ssml=enable_ssml,
         )
         self._streams = weakref.WeakSet[SynthesizeStream]()
@@ -128,6 +139,7 @@ class TTS(tts.TTS):
         gender: NotGivenOr[Gender | str] = NOT_GIVEN,
         voice_name: NotGivenOr[str] = NOT_GIVEN,
         speaking_rate: NotGivenOr[float] = NOT_GIVEN,
+        volume_gain_db: NotGivenOr[float] = NOT_GIVEN,
     ) -> None:
         """
         Update the TTS options.
@@ -137,6 +149,7 @@ class TTS(tts.TTS):
             gender (Gender | str, optional): Voice gender ("male", "female", "neutral").
             voice_name (str, optional): Specific voice name.
             speaking_rate (float, optional): Speed of speech.
+            volume_gain_db (float, optional): Volume gain in decibels.
         """
         params = {}
         if is_given(language):
@@ -151,6 +164,8 @@ class TTS(tts.TTS):
         if is_given(speaking_rate):
             self._opts.speaking_rate = speaking_rate
+        if is_given(volume_gain_db):
+            self._opts.volume_gain_db = volume_gain_db
     def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
         api_endpoint = "texttospeech.googleapis.com"
@@ -199,10 +214,21 @@ class ChunkedStream(tts.ChunkedStream):
         self._tts: TTS = tts
         self._opts = replace(tts._opts)
+    def _build_ssml(self) -> str:
+        ssml = "<speak>"
+        ssml += self._input_text
+        ssml += "</speak>"
+        return ssml
     async def _run(self, output_emitter: tts.AudioEmitter) -> None:
         try:
+            input = (
+                texttospeech.SynthesisInput(ssml=self._build_ssml())
+                if self._opts.enable_ssml
+                else texttospeech.SynthesisInput(text=self._input_text)
+            )
             response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
-                input=texttospeech.SynthesisInput(text=self._input_text),
+                input=input,
                 voice=self._opts.voice,
                 audio_config=texttospeech.AudioConfig(
                     audio_encoding=self._opts.encoding,
@@ -210,6 +236,7 @@ class ChunkedStream(tts.ChunkedStream):
                     pitch=self._opts.pitch,
                     effects_profile_id=self._opts.effects_profile_id,
                     speaking_rate=self._opts.speaking_rate,
+                    volume_gain_db=self._opts.volume_gain_db,
                 ),
                 timeout=self._conn_options.timeout,
             )
@@ -256,7 +283,9 @@ class SynthesizeStream(tts.SynthesizeStream):
         streaming_config = texttospeech.StreamingSynthesizeConfig(
             voice=self._opts.voice,
             streaming_audio_config=texttospeech.StreamingAudioConfig(
-                audio_encoding=encoding, sample_rate_hertz=self._opts.sample_rate
+                audio_encoding=encoding,
+                sample_rate_hertz=self._opts.sample_rate,
+                speaking_rate=self._opts.speaking_rate,
             ),
         )

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.1.0"
+__version__ = "1.1.2"

{livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.1.0
+Version: 1.1.2
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -20,9 +20,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
-Requires-Dist: google-cloud-texttospeech<3,>=2.24
-Requires-Dist: google-genai>=v1.16.1
-Requires-Dist: livekit-agents>=1.1.0
+Requires-Dist: google-cloud-texttospeech<3,>=2.27
+Requires-Dist: google-genai>=v1.21.1
+Requires-Dist: livekit-agents>=1.1.2
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
-livekit/plugins/google/llm.py,sha256=MIi-6kk8AZQxcf5y4zB3HwwEQHAJSCIdX79yf9QMAvI,17835
+livekit/plugins/google/llm.py,sha256=Feb2ixNN9YoDt3aPXkQNeVx2c-wkmrf-mv4r3vggY1s,18131
 livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
 livekit/plugins/google/models.py,sha256=hOpfbN_qdQ1ZTpCN9m9dvG2eb6WgQ3KE3WRpIeeM_T0,1569
 livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=SddM50w6g2rNkjaF5OtrPwEH-qqq36sa-v_6ogKoBYg,24077
+livekit/plugins/google/stt.py,sha256=ssDMH5U1vQOLA44XMlovYWIR4UqVtZSge3YFN-zZ7Iw,24696
 livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
-livekit/plugins/google/tts.py,sha256=PzDfEfvQfj-uSHYOUelFnwYK0Wu2-5Mp8PID0b4I5kc,14293
+livekit/plugins/google/tts.py,sha256=YTfce55MWNJyDH4k8U1O2giOcrtccTs8vrkiW9GuBR0,15541
 livekit/plugins/google/utils.py,sha256=-4z6wrjVaZPtFRowkpwaA2acBRfqtzTk4r2xrPDUdCk,8609
-livekit/plugins/google/version.py,sha256=7SjyflIFTjH0djSotKGIRoRykPCqMpVYetIlvHMFuh0,600
+livekit/plugins/google/version.py,sha256=gqaIRup9hxsq6YNsBlKPmS5PL-B8yqSRTd8wRfj8zoQ,600
 livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
 livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
 livekit/plugins/google/beta/realtime/api_proto.py,sha256=NfE7xr2N3JOu7gVfWbAmDcEhs8vuZgMRu5vpScPJzsg,776
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=Mt-f7mkwVd7Aq84HPh_AdIOaB4ye8d6TTllcEjKO5TY,45918
-livekit_plugins_google-1.1.0.dist-info/METADATA,sha256=HeQoxgYu0-hOIOawXsvtwHeESXj1U2Oo5GpwEUEx-W8,1907
-livekit_plugins_google-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.1.0.dist-info/RECORD,,
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=tlAsTFsumqOavC9JT2SuQi_3eGYygZ3bbS-nEM7ea8Q,46293
+livekit_plugins_google-1.1.2.dist-info/METADATA,sha256=cAk_E0o73mOJ1wFsuUFzmzW4vZ2B_lbM2O3aZeHoHq4,1907
+livekit_plugins_google-1.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.1.2.dist-info/RECORD,,

{livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

livekit-plugins-google 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl