PyPI - livekit-plugins-google - Versions diffs - 1.0.18__py3-none-any.whl → 1.0.20__py3-none-any.whl - Mend

livekit-plugins-google 1.0.18py3-none-any.whl → 1.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

livekit/plugins/google/__init__.py +7 -0
livekit/plugins/google/beta/__init__.py +9 -0
livekit/plugins/google/beta/realtime/realtime_api.py +28 -8
livekit/plugins/google/llm.py +3 -3
livekit/plugins/google/stt.py +31 -6
livekit/plugins/google/tts.py +2 -1
livekit/plugins/google/utils.py +27 -12
livekit/plugins/google/version.py +1 -1
livekit_plugins_google-1.0.20.dist-info/METADATA +47 -0
livekit_plugins_google-1.0.20.dist-info/RECORD +16 -0
livekit_plugins_google-1.0.18.dist-info/METADATA +0 -99
livekit_plugins_google-1.0.18.dist-info/RECORD +0 -16
{livekit_plugins_google-1.0.18.dist-info → livekit_plugins_google-1.0.20.dist-info}/WHEEL +0 -0

livekit/plugins/google/__init__.py CHANGED Viewed

@@ -12,6 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Google AI plugin for LiveKit Agents
+Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
+See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
+"""
 from . import beta
 from .llm import LLM
 from .stt import STT, SpeechStream

livekit/plugins/google/beta/__init__.py CHANGED Viewed

@@ -1,3 +1,12 @@
 from . import realtime
 __all__ = ["realtime"]
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+__pdoc__ = {}
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False

livekit/plugins/google/beta/realtime/realtime_api.py CHANGED Viewed

@@ -18,6 +18,7 @@ from google.genai.types import (
     GenerationConfig,
     LiveClientContent,
     LiveClientRealtimeInput,
+    LiveClientToolResponse,
     LiveConnectConfig,
     LiveServerContent,
     LiveServerGoAway,
@@ -101,7 +102,7 @@ class RealtimeModel(llm.RealtimeModel):
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
-        model: LiveAPIModels | str = "gemini-2.0-flash-live-001",
+        model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN,
         api_key: NotGivenOr[str] = NOT_GIVEN,
         voice: Voice | str = "Puck",
         language: NotGivenOr[str] = NOT_GIVEN,
@@ -133,7 +134,7 @@ class RealtimeModel(llm.RealtimeModel):
             instructions (str, optional): Initial system instructions for the model. Defaults to "".
             api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
             modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
-            model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001".
+            model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
             voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
             language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
             temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
@@ -159,14 +160,24 @@ class RealtimeModel(llm.RealtimeModel):
             )
         )
+        if not is_given(model):
+            if vertexai:
+                model = "gemini-2.0-flash-exp"
+            else:
+                model = "gemini-2.0-flash-live-001"
         gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
         gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
-        gcp_location = location if is_given(location) else os.environ.get("GOOGLE_CLOUD_LOCATION")
+        gcp_location = (
+            location
+            if is_given(location)
+            else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1"
+        )
         if vertexai:
             if not gcp_project or not gcp_location:
                 raise ValueError(
-                    "Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables"  # noqa: E501
+                    "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable"  # noqa: E501
                 )
             gemini_api_key = None  # VertexAI does not require an API key
         else:
@@ -310,7 +321,9 @@ class RealtimeSession(llm.RealtimeSession):
         async with self._update_lock:
             self._chat_ctx = chat_ctx.copy()
             turns, _ = to_chat_ctx(self._chat_ctx, id(self), ignore_functions=True)
-            tool_results = get_tool_results_for_realtime(self._chat_ctx)
+            tool_results = get_tool_results_for_realtime(
+                self._chat_ctx, vertexai=self._opts.vertexai
+            )
             # TODO(dz): need to compute delta and then either append or recreate session
             if turns:
                 self._send_client_event(LiveClientContent(turns=turns, turn_complete=False))
@@ -481,11 +494,18 @@ class RealtimeSession(llm.RealtimeSession):
                         not self._active_session or self._active_session != session
                     ):
                         break
                 if isinstance(msg, LiveClientContent):
-                    await session.send(input=msg)
+                    await session.send_client_content(
+                        turns=msg.turns, turn_complete=msg.turn_complete
+                    )
+                elif isinstance(msg, LiveClientToolResponse):
+                    await session.send_tool_response(function_responses=msg.function_responses)
+                elif isinstance(msg, LiveClientRealtimeInput):
+                    for media_chunk in msg.media_chunks:
+                        await session.send_realtime_input(media=media_chunk)
                 else:
-                    await session.send(input=msg)
+                    logger.warning(f"Warning: Received unhandled message type: {type(msg)}")
         except Exception as e:
             if not self._session_should_close.is_set():
                 logger.error(f"error in send task: {e}", exc_info=e)

livekit/plugins/google/llm.py CHANGED Viewed

@@ -241,7 +241,7 @@ class LLM(llm.LLM):
             client=self._client,
             model=self._opts.model,
             chat_ctx=chat_ctx,
-            tools=tools,
+            tools=tools or [],
             conn_options=conn_options,
             extra_kwargs=extra,
         )
@@ -256,7 +256,7 @@ class LLMStream(llm.LLMStream):
         model: str | ChatModels,
         chat_ctx: llm.ChatContext,
         conn_options: APIConnectOptions,
-        tools: list[FunctionTool] | None,
+        tools: list[FunctionTool],
         extra_kwargs: dict[str, Any],
     ) -> None:
         super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
@@ -270,7 +270,7 @@ class LLMStream(llm.LLMStream):
         request_id = utils.shortuuid()
         try:
-            turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm))
+            turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm), generate=True)
             function_declarations = to_fnc_ctx(self._tools)
             if function_declarations:
                 self._extra_kwargs["tools"] = [

livekit/plugins/google/stt.py CHANGED Viewed

@@ -54,7 +54,7 @@ LanguageCode = Union[LgType, list[LgType]]
 _max_session_duration = 240
 # Google is very sensitive to background noise, so we'll ignore results with low confidence
-_min_confidence = 0.65
+_default_min_confidence = 0.65
 # This class is only be used internally to encapsulate the options
@@ -67,6 +67,7 @@ class STTOptions:
     spoken_punctuation: bool
     model: SpeechModels | str
     sample_rate: int
+    min_confidence_threshold: float
     keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN
     def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
@@ -98,9 +99,11 @@ class STT(stt.STT):
         model: SpeechModels | str = "latest_long",
         location: str = "global",
         sample_rate: int = 16000,
+        min_confidence_threshold: float = _default_min_confidence,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
         keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
+        use_streaming: NotGivenOr[bool] = NOT_GIVEN,
     ):
         """
         Create a new instance of Google STT.
@@ -118,11 +121,18 @@ class STT(stt.STT):
             model(SpeechModels): the model to use for recognition default: "latest_long"
             location(str): the location to use for recognition default: "global"
             sample_rate(int): the sample rate of the audio default: 16000
+            min_confidence_threshold(float): minimum confidence threshold for recognition
+            (default: 0.65)
             credentials_info(dict): the credentials info to use for recognition (default: None)
             credentials_file(str): the credentials file to use for recognition (default: None)
             keywords(List[tuple[str, float]]): list of keywords to recognize (default: None)
+            use_streaming(bool): whether to use streaming for recognition (default: True)
         """
-        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))
+        if not is_given(use_streaming):
+            use_streaming = True
+        super().__init__(
+            capabilities=stt.STTCapabilities(streaming=use_streaming, interim_results=True)
+        )
         self._location = location
         self._credentials_info = credentials_info
@@ -149,6 +159,7 @@ class STT(stt.STT):
             spoken_punctuation=spoken_punctuation,
             model=model,
             sample_rate=sample_rate,
+            min_confidence_threshold=min_confidence_threshold,
             keywords=keywords,
         )
         self._streams = weakref.WeakSet[SpeechStream]()
@@ -246,7 +257,7 @@ class STT(stt.STT):
         except DeadlineExceeded:
             raise APITimeoutError() from None
         except GoogleAPICallError as e:
-            raise APIStatusError(e.message, status_code=e.code or -1) from None
+            raise APIStatusError(f"{e.message} {e.details}", status_code=e.code or -1) from e
         except Exception as e:
             raise APIConnectionError() from e
@@ -343,6 +354,7 @@ class SpeechStream(stt.SpeechStream):
         punctuate: NotGivenOr[bool] = NOT_GIVEN,
         spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
         model: NotGivenOr[SpeechModels] = NOT_GIVEN,
+        min_confidence_threshold: NotGivenOr[float] = NOT_GIVEN,
         keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
     ):
         if is_given(languages):
@@ -359,6 +371,8 @@ class SpeechStream(stt.SpeechStream):
             self._config.spoken_punctuation = spoken_punctuation
         if is_given(model):
             self._config.model = model
+        if is_given(min_confidence_threshold):
+            self._config.min_confidence_threshold = min_confidence_threshold
         if is_given(keywords):
             self._config.keywords = keywords
@@ -405,7 +419,10 @@ class SpeechStream(stt.SpeechStream):
                     == cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED  # noqa: E501
                 ):
                     result = resp.results[0]
-                    speech_data = _streaming_recognize_response_to_speech_data(resp)
+                    speech_data = _streaming_recognize_response_to_speech_data(
+                        resp,
+                        min_confidence_threshold=self._config.min_confidence_threshold,
+                    )
                     if speech_data is None:
                         continue
@@ -461,6 +478,7 @@ class SpeechStream(stt.SpeechStream):
                             features=cloud_speech.RecognitionFeatures(
                                 enable_automatic_punctuation=self._config.punctuate,
                                 enable_word_time_offsets=True,
+                                enable_spoken_punctuation=self._config.spoken_punctuation,
                             ),
                         ),
                         streaming_features=cloud_speech.StreamingRecognitionFeatures(
@@ -494,7 +512,12 @@ class SpeechStream(stt.SpeechStream):
             except DeadlineExceeded:
                 raise APITimeoutError() from None
             except GoogleAPICallError as e:
-                raise APIStatusError(e.message, status_code=e.code or -1) from None
+                if e.code == 409:
+                    logger.debug("stream timed out, restarting.")
+                else:
+                    raise APIStatusError(
+                        f"{e.message} {e.details}", status_code=e.code or -1
+                    ) from e
             except Exception as e:
                 raise APIConnectionError() from e
@@ -530,6 +553,8 @@ def _recognize_response_to_speech_event(
 def _streaming_recognize_response_to_speech_data(
     resp: cloud_speech.StreamingRecognizeResponse,
+    *,
+    min_confidence_threshold: float,
 ) -> stt.SpeechData | None:
     text = ""
     confidence = 0.0
@@ -542,7 +567,7 @@ def _streaming_recognize_response_to_speech_data(
     confidence /= len(resp.results)
     lg = resp.results[0].language_code
-    if confidence < _min_confidence:
+    if confidence < min_confidence_threshold:
         return None
     if text == "":
         return None

livekit/plugins/google/tts.py CHANGED Viewed

@@ -56,6 +56,7 @@ class TTS(tts.TTS):
         effects_profile_id: str = "",
         speaking_rate: float = 1.0,
         location: str = "global",
+        audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.PCM,
         credentials_info: NotGivenOr[dict] = NOT_GIVEN,
         credentials_file: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
@@ -105,7 +106,7 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             voice=voice_params,
             audio_config=texttospeech.AudioConfig(
-                audio_encoding=texttospeech.AudioEncoding.PCM,
+                audio_encoding=audio_encoding,
                 sample_rate_hertz=sample_rate,
                 pitch=pitch,
                 effects_profile_id=effects_profile_id,

livekit/plugins/google/utils.py CHANGED Viewed

@@ -20,17 +20,21 @@ def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
     return [_build_gemini_fnc(fnc) for fnc in fncs]
-def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClientToolResponse | None:
+def get_tool_results_for_realtime(
+    chat_ctx: llm.ChatContext, *, vertexai: bool = False
+) -> types.LiveClientToolResponse | None:
     function_responses: list[types.FunctionResponse] = []
     for msg in chat_ctx.items:
         if msg.type == "function_call_output":
-            function_responses.append(
-                types.FunctionResponse(
-                    id=msg.call_id,
-                    name=msg.name,
-                    response={"output": msg.output},
-                )
+            res = types.FunctionResponse(
+                name=msg.name,
+                response={"output": msg.output},
             )
+            if not vertexai:
+                # vertexai does not support id in FunctionResponse
+                # see: https://github.com/googleapis/python-genai/blob/85e00bc/google/genai/_live_converters.py#L1435
+                res.id = msg.call_id
+            function_responses.append(res)
     return (
         types.LiveClientToolResponse(function_responses=function_responses)
         if function_responses
@@ -39,7 +43,10 @@ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClient
 def to_chat_ctx(
-    chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
+    chat_ctx: llm.ChatContext,
+    cache_key: Any,
+    ignore_functions: bool = False,
+    generate: bool = False,
 ) -> tuple[list[types.Content], types.Content | None]:
     turns: list[types.Content] = []
     system_instruction: types.Content | None = None
@@ -99,10 +106,9 @@ def to_chat_ctx(
     if current_role is not None and parts:
         turns.append(types.Content(role=current_role, parts=parts))
-    # # Gemini requires the last message to end with user's turn before they can generate
-    # # currently not used because to_chat_ctx should not be used to force a new generation
-    # if current_role != "user":
-    #     turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
+    # Gemini requires the last message to end with user's turn before they can generate
+    if generate and current_role != "user":
+        turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
     return turns, system_instruction
@@ -173,6 +179,15 @@ class _GeminiJsonSchema:
         schema.pop("title", None)
         schema.pop("default", None)
         schema.pop("additionalProperties", None)
+        schema.pop("$schema", None)
+        if (const := schema.pop("const", None)) is not None:
+            # Gemini doesn't support const, but it does support enum with a single value
+            schema["enum"] = [const]
+        schema.pop("discriminator", None)
+        schema.pop("examples", None)
         if ref := schema.pop("$ref", None):
             key = re.sub(r"^#/\$defs/", "", ref)
             if key in refs_stack:

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.0.18"
+__version__ = "1.0.20"

livekit_plugins_google-1.0.20.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,47 @@
+Metadata-Version: 2.4
+Name: livekit-plugins-google
+Version: 1.0.20
+Summary: Agent Framework plugin for services from Google Cloud
+Project-URL: Documentation, https://docs.livekit.io
+Project-URL: Website, https://livekit.io/
+Project-URL: Source, https://github.com/livekit/agents
+Author: LiveKit
+License-Expression: Apache-2.0
+Keywords: audio,livekit,realtime,video,webrtc
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9.0
+Requires-Dist: google-auth<3,>=2
+Requires-Dist: google-cloud-speech<3,>=2
+Requires-Dist: google-cloud-texttospeech<3,>=2
+Requires-Dist: google-genai>=1.12.1
+Requires-Dist: livekit-agents>=1.0.20
+Description-Content-Type: text/markdown
+# Google AI plugin for LiveKit Agents
+Support for Gemini, Gemini Live, Cloud Speech-to-Text, and Cloud Text-to-Speech.
+See [https://docs.livekit.io/agents/integrations/google/](https://docs.livekit.io/agents/integrations/google/) for more information.
+## Installation
+```bash
+pip install livekit-plugins-google
+```
+## Pre-requisites
+For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or via Application Default Credentials as specified in [How Application Default Credentials works](https://cloud.google.com/docs/authentication/application-default-credentials).
+To use the STT and TTS API, you'll need to enable the respective services for your Google Cloud project.
+- Cloud Speech-to-Text API
+- Cloud Text-to-Speech API

livekit_plugins_google-1.0.20.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+livekit/plugins/google/__init__.py,sha256=xain2qUzU-YWhYWsLBkW8Q-szV-htpnzHTqymMPo-j0,1364
+livekit/plugins/google/llm.py,sha256=m_lRoUw4RIO1d-LtNYugl99LUNcA1y4NQ17wX7Vv5j0,16189
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
+livekit/plugins/google/tts.py,sha256=29R0ieV5sRPBf5Yi0SPFQk7ZZMbELF30bIL9K_j_Wcg,9100
+livekit/plugins/google/utils.py,sha256=zPzmnR-Rs2I87mT_k5S-PVbbuJMH8S-Hp5QcM4wv8vA,10067
+livekit/plugins/google/version.py,sha256=t4KmPVTpEy1pOJ2GRCA-GNJfCQq_-zHNDBxGj4GKfVk,601
+livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
+livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
+livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=K_YD2CND3PMGV7c3gJY2UdReeLfsOPtIWDys5EU2T_A,31699
+livekit_plugins_google-1.0.20.dist-info/METADATA,sha256=govmSaj6few3t11vreVNKlH9Ki2YzbRGnN3b3il2f20,1905
+livekit_plugins_google-1.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.0.20.dist-info/RECORD,,

livekit_plugins_google-1.0.18.dist-info/METADATA DELETED Viewed

@@ -1,99 +0,0 @@
-Metadata-Version: 2.4
-Name: livekit-plugins-google
-Version: 1.0.18
-Summary: Agent Framework plugin for services from Google Cloud
-Project-URL: Documentation, https://docs.livekit.io
-Project-URL: Website, https://livekit.io/
-Project-URL: Source, https://github.com/livekit/agents
-Author: LiveKit
-License-Expression: Apache-2.0
-Keywords: audio,livekit,realtime,video,webrtc
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Topic :: Multimedia :: Sound/Audio
-Classifier: Topic :: Multimedia :: Video
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: >=3.9.0
-Requires-Dist: google-auth<3,>=2
-Requires-Dist: google-cloud-speech<3,>=2
-Requires-Dist: google-cloud-texttospeech<3,>=2
-Requires-Dist: google-genai>=1.12.1
-Requires-Dist: livekit-agents>=1.0.18
-Description-Content-Type: text/markdown
-# LiveKit Plugins Google
-Agent Framework plugin for services from Google Cloud. Currently supporting Google's [Speech-to-Text](https://cloud.google.com/speech-to-text) API.
-## Installation
-```bash
-pip install livekit-plugins-google
-```
-## Pre-requisites
-For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or via Application Default Credentials as specified in [How Application Default Credentials works](https://cloud.google.com/docs/authentication/application-default-credentials).
-To use the STT and TTS API, you'll need to enable the respective services for your Google Cloud project.
-- Cloud Speech-to-Text API
-- Cloud Text-to-Speech API
-## Gemini Multimodal Live
-Gemini Multimodal Live can be used with the `MultimodalAgent` class. See examples/multimodal_agent/gemini_agent.py for an example.
-### Live Video Input (experimental)
-You can push video frames to your Gemini Multimodal Live session alongside the audio automatically handled by the `MultimodalAgent`.  The basic approach is to subscribe to the video track, create a video stream, sample frames at a suitable frame rate, and push them into the RealtimeSession:
-```
-# Make sure you subscribe to audio and video tracks
-await ctx.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_ALL)
-# Create your RealtimeModel and store a reference
-model = google.beta.realtime.RealtimeModel(
-    # ...
-)
-# Create your MultimodalAgent as usual
-agent = MultimodalAgent(
-    model=model,
-    # ...
-)
-# Async method to process the video track and push frames to Gemini
-async def _process_video_track(self, track: Track):
-    video_stream = VideoStream(track)
-    last_frame_time = 0
-    async for event in video_stream:
-        current_time = asyncio.get_event_loop().time()
-        # Sample at 1 FPS
-        if current_time - last_frame_time < 1.0:
-            continue
-        last_frame_time = current_time
-        frame = event.frame
-        # Push the frame into the RealtimeSession
-        model.sessions[0].push_video(frame)
-    await video_stream.aclose()
-# Subscribe to new tracks and process them
-@ctx.room.on("track_subscribed")
-def _on_track_subscribed(track: Track, pub, participant):
-    if track.kind == TrackKind.KIND_VIDEO:
-        asyncio.create_task(self._process_video_track(track))
-```

livekit_plugins_google-1.0.18.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
-livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
-livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
-livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
-livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
-livekit/plugins/google/tts.py,sha256=fmQwW9a1kPsEsrTvIo8fqw479RxWEx0SIc3oTVaj41U,9031
-livekit/plugins/google/utils.py,sha256=TjjTwMbdJdxr3bZjUXxs-J_fipTTM00goW2-d9KWX6w,9582
-livekit/plugins/google/version.py,sha256=cnPu9FVKZV9tFmmz7lEvftrO3B_nWJVFghi3j6UcJLs,601
-livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
-livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
-livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
-livekit/plugins/google/beta/realtime/realtime_api.py,sha256=sXp2oHnTlHrAp5wFmcXj0bRtQKixBYedfbufcbjVHxk,30897
-livekit_plugins_google-1.0.18.dist-info/METADATA,sha256=Vqt0FoqibcKzX_jFXlyFkn-mT7iPC16JlH61VS0fbuw,3492
-livekit_plugins_google-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_google-1.0.18.dist-info/RECORD,,

{livekit_plugins_google-1.0.18.dist-info → livekit_plugins_google-1.0.20.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-google 1.0.18__py3-none-any.whl → 1.0.20__py3-none-any.whl

livekit-plugins-google 1.0.18py3-none-any.whl → 1.0.20py3-none-any.whl