PyPI - livekit-plugins-google - Versions diffs - 1.0.17__py3-none-any.whl → 1.0.19__py3-none-any.whl - Mend

livekit-plugins-google 1.0.17py3-none-any.whl → 1.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -9,6 +9,7 @@ from collections.abc import Iterator
 from dataclasses import dataclass
 from google import genai
+from google.genai.live import AsyncSession
 from google.genai.types import (
     AudioTranscriptionConfig,
     Blob,
@@ -17,6 +18,7 @@ from google.genai.types import (
     GenerationConfig,
     LiveClientContent,
     LiveClientRealtimeInput,
+    LiveClientToolResponse,
     LiveConnectConfig,
     LiveServerContent,
     LiveServerGoAway,
@@ -25,6 +27,7 @@ from google.genai.types import (
     Modality,
     Part,
     PrebuiltVoiceConfig,
+    SessionResumptionConfig,
     SpeechConfig,
     Tool,
     UsageMetadata,
@@ -62,6 +65,7 @@ class _RealtimeOptions:
     model: LiveAPIModels | str
     api_key: str | None
     voice: Voice | str
+    language: NotGivenOr[str]
     response_modalities: NotGivenOr[list[Modality]]
     vertexai: bool
     project: str | None
@@ -101,6 +105,7 @@ class RealtimeModel(llm.RealtimeModel):
         model: LiveAPIModels | str = "gemini-2.0-flash-live-001",
         api_key: NotGivenOr[str] = NOT_GIVEN,
         voice: Voice | str = "Puck",
+        language: NotGivenOr[str] = NOT_GIVEN,
         modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
         vertexai: bool = False,
         project: NotGivenOr[str] = NOT_GIVEN,
@@ -131,6 +136,7 @@ class RealtimeModel(llm.RealtimeModel):
             modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
             model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001".
             voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
+            language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
             temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
             vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False.
                 project (str, optional): The project id to use for the API. Defaults to None. (for vertexai)
@@ -195,6 +201,7 @@ class RealtimeModel(llm.RealtimeModel):
             instructions=instructions,
             input_audio_transcription=input_audio_transcription,
             output_audio_transcription=output_audio_transcription,
+            language=language,
         )
         self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -247,12 +254,14 @@ class RealtimeSession(llm.RealtimeSession):
         self._main_atask = asyncio.create_task(self._main_task(), name="gemini-realtime-session")
         self._current_generation: _ResponseGeneration | None = None
-        self._active_session: genai.LiveSession | None = None
+        self._active_session: AsyncSession | None = None
         # indicates if the underlying session should end
         self._session_should_close = asyncio.Event()
         self._response_created_futures: dict[str, asyncio.Future[llm.GenerationCreatedEvent]] = {}
         self._pending_generation_fut: asyncio.Future[llm.GenerationCreatedEvent] | None = None
+        self._session_resumption_handle: str | None = None
         self._update_lock = asyncio.Lock()
         self._session_lock = asyncio.Lock()
@@ -465,7 +474,7 @@ class RealtimeSession(llm.RealtimeSession):
             finally:
                 await self._close_active_session()
-    async def _send_task(self, session: genai.LiveSession):
+    async def _send_task(self, session: AsyncSession):
         try:
             async for msg in self._msg_ch:
                 async with self._session_lock:
@@ -473,11 +482,18 @@ class RealtimeSession(llm.RealtimeSession):
                         not self._active_session or self._active_session != session
                     ):
                         break
                 if isinstance(msg, LiveClientContent):
-                    await session.send(input=msg)
+                    await session.send_client_content(
+                        turns=msg.turns, turn_complete=msg.turn_complete
+                    )
+                elif isinstance(msg, LiveClientToolResponse):
+                    await session.send_tool_response(function_responses=msg.function_responses)
+                elif isinstance(msg, LiveClientRealtimeInput):
+                    for media_chunk in msg.media_chunks:
+                        await session.send_realtime_input(media=media_chunk)
                 else:
-                    await session.send(input=msg)
+                    logger.warning(f"Warning: Received unhandled message type: {type(msg)}")
         except Exception as e:
             if not self._session_should_close.is_set():
                 logger.error(f"error in send task: {e}", exc_info=e)
@@ -485,7 +501,7 @@ class RealtimeSession(llm.RealtimeSession):
         finally:
             logger.debug("send task finished.")
-    async def _recv_task(self, session: genai.LiveSession):
+    async def _recv_task(self, session: AsyncSession):
         try:
             while True:
                 async with self._session_lock:
@@ -501,6 +517,15 @@ class RealtimeSession(llm.RealtimeSession):
                     ):
                         self._start_new_generation()
+                    if response.session_resumption_update:
+                        if (
+                            response.session_resumption_update.resumable
+                            and response.session_resumption_update.new_handle
+                        ):
+                            self._session_resumption_handle = (
+                                response.session_resumption_update.new_handle
+                            )
                     if response.server_content:
                         self._handle_server_content(response.server_content)
                     if response.tool_call:
@@ -548,11 +573,13 @@ class RealtimeSession(llm.RealtimeSession):
             speech_config=SpeechConfig(
                 voice_config=VoiceConfig(
                     prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=self._opts.voice)
-                )
+                ),
+                language_code=self._opts.language if is_given(self._opts.language) else None,
             ),
             tools=[Tool(function_declarations=self._gemini_declarations)],
             input_audio_transcription=self._opts.input_audio_transcription,
             output_audio_transcription=self._opts.output_audio_transcription,
+            session_resumption=SessionResumptionConfig(handle=self._session_resumption_handle),
         )
     def _start_new_generation(self):

livekit/plugins/google/llm.py CHANGED Viewed

@@ -270,7 +270,7 @@ class LLMStream(llm.LLMStream):
         request_id = utils.shortuuid()
         try:
-            turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm))
+            turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm), generate=True)
             function_declarations = to_fnc_ctx(self._tools)
             if function_declarations:
                 self._extra_kwargs["tools"] = [

livekit/plugins/google/stt.py CHANGED Viewed

@@ -54,7 +54,7 @@ LanguageCode = Union[LgType, list[LgType]]
 _max_session_duration = 240
 # Google is very sensitive to background noise, so we'll ignore results with low confidence
-_min_confidence = 0.65
+_default_min_confidence = 0.65
 # This class is only be used internally to encapsulate the options
@@ -67,6 +67,7 @@ class STTOptions:
     spoken_punctuation: bool
     model: SpeechModels | str
     sample_rate: int
+    min_confidence_threshold: float
     keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN
     def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
@@ -98,6 +99,7 @@ class STT(stt.STT):
         model: SpeechModels | str = "latest_long",
         location: str = "global",
         sample_rate: int = 16000,
+        min_confidence_threshold: float = _default_min_confidence,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
         keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
@@ -118,6 +120,8 @@ class STT(stt.STT):
             model(SpeechModels): the model to use for recognition default: "latest_long"
             location(str): the location to use for recognition default: "global"
             sample_rate(int): the sample rate of the audio default: 16000
+            min_confidence_threshold(float): minimum confidence threshold for recognition
+            (default: 0.65)
             credentials_info(dict): the credentials info to use for recognition (default: None)
             credentials_file(str): the credentials file to use for recognition (default: None)
             keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
@@ -149,6 +153,7 @@ class STT(stt.STT):
             spoken_punctuation=spoken_punctuation,
             model=model,
             sample_rate=sample_rate,
+            min_confidence_threshold=min_confidence_threshold,
             keywords=keywords,
         )
         self._streams = weakref.WeakSet[SpeechStream]()
@@ -343,6 +348,7 @@ class SpeechStream(stt.SpeechStream):
         punctuate: NotGivenOr[bool] = NOT_GIVEN,
         spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
         model: NotGivenOr[SpeechModels] = NOT_GIVEN,
+        min_confidence_threshold: NotGivenOr[float] = NOT_GIVEN,
         keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
     ):
         if is_given(languages):
@@ -359,6 +365,8 @@ class SpeechStream(stt.SpeechStream):
             self._config.spoken_punctuation = spoken_punctuation
         if is_given(model):
             self._config.model = model
+        if is_given(min_confidence_threshold):
+            self._config.min_confidence_threshold = min_confidence_threshold
         if is_given(keywords):
             self._config.keywords = keywords
@@ -405,7 +413,10 @@ class SpeechStream(stt.SpeechStream):
                     == cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED  # noqa: E501
                 ):
                     result = resp.results[0]
-                    speech_data = _streaming_recognize_response_to_speech_data(resp)
+                    speech_data = _streaming_recognize_response_to_speech_data(
+                        resp,
+                        min_confidence_threshold=self._config.min_confidence_threshold,
+                    )
                     if speech_data is None:
                         continue
@@ -530,6 +541,8 @@ def _recognize_response_to_speech_event(
 def _streaming_recognize_response_to_speech_data(
     resp: cloud_speech.StreamingRecognizeResponse,
+    *,
+    min_confidence_threshold: float,
 ) -> stt.SpeechData | None:
     text = ""
     confidence = 0.0
@@ -542,7 +555,7 @@ def _streaming_recognize_response_to_speech_data(
     confidence /= len(resp.results)
     lg = resp.results[0].language_code
-    if confidence < _min_confidence:
+    if confidence < min_confidence_threshold:
         return None
     if text == "":
         return None

livekit/plugins/google/tts.py CHANGED Viewed

@@ -56,6 +56,7 @@ class TTS(tts.TTS):
         effects_profile_id: str = "",
         speaking_rate: float = 1.0,
         location: str = "global",
+        audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
@@ -105,7 +106,7 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             voice=voice_params,
             audio_config=texttospeech.AudioConfig(
-                audio_encoding=texttospeech.AudioEncoding.OGG_OPUS,
+                audio_encoding=audio_encoding,
                 sample_rate_hertz=sample_rate,
                 pitch=pitch,
                 effects_profile_id=effects_profile_id,
@@ -132,11 +133,11 @@ class TTS(tts.TTS):
         """  # noqa: E501
         params = {}
         if is_given(language):
-            params["language"] = language
+            params["language_code"] = str(language)
         if is_given(gender):
-            params["gender"] = gender
+            params["ssml_gender"] = _gender_from_str(str(gender))
         if is_given(voice_name):
-            params["voice_name"] = voice_name
+            params["name"] = voice_name
         if params:
             self._opts.voice = texttospeech.VoiceSelectionParams(**params)

livekit/plugins/google/utils.py CHANGED Viewed

@@ -39,7 +39,10 @@ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClient
 def to_chat_ctx(
-    chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
+    chat_ctx: llm.ChatContext,
+    cache_key: Any,
+    ignore_functions: bool = False,
+    generate: bool = False,
 ) -> tuple[list[types.Content], types.Content | None]:
     turns: list[types.Content] = []
     system_instruction: types.Content | None = None
@@ -99,10 +102,9 @@ def to_chat_ctx(
     if current_role is not None and parts:
         turns.append(types.Content(role=current_role, parts=parts))
-    # # Gemini requires the last message to end with user's turn before they can generate
-    # # currently not used because to_chat_ctx should not be used to force a new generation
-    # if current_role != "user":
-    #     turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
+    # Gemini requires the last message to end with user's turn before they can generate
+    if generate and current_role != "user":
+        turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
     return turns, system_instruction

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.17"
+__version__ = "1.0.19"

{livekit_plugins_google-1.0.17.dist-info → livekit_plugins_google-1.0.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.0.17
+Version: 1.0.19
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -21,8 +21,8 @@ Requires-Python: >=3.9.0
 Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2
-Requires-Dist: google-genai>=1.11.0
-Requires-Dist: livekit-agents>=1.0.17
+Requires-Dist: google-genai>=1.12.1
+Requires-Dist: livekit-agents>=1.0.19
 Description-Content-Type: text/markdown
 # LiveKit Plugins Google

livekit_plugins_google-1.0.19.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
+livekit/plugins/google/llm.py,sha256=NaaT4Zaw6o98VcUHNrQcZZRkD7DPREd76O8fG9IOpXQ,16190
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=MADnkh0YKWY4bLRgBwFv4emu4YFO-7EVnhxO--dPTlI,23082
+livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
+livekit/plugins/google/utils.py,sha256=sPZZg5VHf60kSILUIHGIZyN2CWYwnCGNYICn8Mhcv9g,9534
+livekit/plugins/google/version.py,sha256=UDC8ahmGgRkv-qMQUY3QibuuVevGMQ9Fd4yIhcQBZwA,601
+livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
+livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
+livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yk202S604Eogp_ssBX2BSbAXV67uUyQzVO-bzLnScrs,31423
+livekit_plugins_google-1.0.19.dist-info/METADATA,sha256=HuRBvpT9dX3Mz7YOVhZhgQLm3-qQa2vAf2SRDQ5u1vM,3492
+livekit_plugins_google-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.0.19.dist-info/RECORD,,

livekit_plugins_google-1.0.17.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
-livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
-livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
-livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
-livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
-livekit/plugins/google/tts.py,sha256=xhINokqY8UutXn85N-cbzq68eptbM6TTtIXmLktE_RM,9004
-livekit/plugins/google/utils.py,sha256=TjjTwMbdJdxr3bZjUXxs-J_fipTTM00goW2-d9KWX6w,9582
-livekit/plugins/google/version.py,sha256=GOfJB-DKZur-i3hrjFbzgpC2NHE96dnWhGLziW1e0_E,601
-livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
-livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
-livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=2_nPBvPttVudoQswhf19ieJ6wxvHquGJgALJ09afQms,29873
-livekit_plugins_google-1.0.17.dist-info/METADATA,sha256=cKeNSFwiM2A-MJeNA6zNeX7ioqbvkEZO3aFfR8Run2c,3492
-livekit_plugins_google-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.0.17.dist-info/RECORD,,

{livekit_plugins_google-1.0.17.dist-info → livekit_plugins_google-1.0.19.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.0.17__py3-none-any.whl → 1.0.19__py3-none-any.whl

livekit-plugins-google 1.0.17py3-none-any.whl → 1.0.19py3-none-any.whl