PyPI - livekit-plugins-google - Versions diffs - 0.11.1__tar.gz → 1.0.0.dev5__tar.gz - Mend

livekit-plugins-google 0.11.1tar.gz → 1.0.0.dev5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

livekit_plugins_google-1.0.0.dev5/.gitignore ADDED Viewed

@@ -0,0 +1,168 @@
+**/.vscode
+**/.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+node_modules
+credentials.json
+pyrightconfig.json

{livekit_plugins_google-0.11.1 → livekit_plugins_google-1.0.0.dev5}/PKG-INFO RENAMED Viewed

@@ -1,39 +1,29 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 0.11.1
+Version: 1.0.0.dev5
 Summary: Agent Framework plugin for services from Google Cloud
-Home-page: https://github.com/livekit/agents
-License: Apache-2.0
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
 Project-URL: Source, https://github.com/livekit/agents
-Keywords: webrtc,realtime,audio,video,livekit
+Author: LiveKit
+License-Expression: Apache-2.0
+Keywords: audio,livekit,realtime,video,webrtc
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Topic :: Multimedia :: Sound/Audio
-Classifier: Topic :: Multimedia :: Video
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Description-Content-Type: text/markdown
 Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2
-Requires-Dist: google-genai==1.3.0
-Requires-Dist: livekit-agents<1.0.0,>=0.12.16
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: keywords
-Dynamic: license
-Dynamic: project-url
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+Requires-Dist: google-genai==1.5.0
+Requires-Dist: livekit-agents>=1.0.0.dev5
+Description-Content-Type: text/markdown
 # LiveKit Plugins Google

{livekit_plugins_google-0.11.1 → livekit_plugins_google-1.0.0.dev5}/livekit/plugins/google/beta/realtime/__init__.py RENAMED Viewed

@@ -1,8 +1,4 @@
-from .api_proto import (
-    ClientEvents,
-    LiveAPIModels,
-    Voice,
-)
+from .api_proto import ClientEvents, LiveAPIModels, Voice
 from .realtime_api import RealtimeModel
 __all__ = [

{livekit_plugins_google-0.11.1 → livekit_plugins_google-1.0.0.dev5}/livekit/plugins/google/beta/realtime/api_proto.py RENAMED Viewed

@@ -1,12 +1,13 @@
 from __future__ import annotations
-from typing import Literal, Sequence, Union
+from collections.abc import Sequence
+from typing import Literal, Union
 from google.genai import types
 from ..._utils import _build_gemini_ctx, _build_tools
-LiveAPIModels = Literal["gemini-2.0-flash-exp"]
+LiveAPIModels = Literal["gemini-2.0-flash-001",]
 Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]

{livekit_plugins_google-0.11.1 → livekit_plugins_google-1.0.0.dev5}/livekit/plugins/google/beta/realtime/realtime_api.py RENAMED Viewed

@@ -3,21 +3,17 @@ from __future__ import annotations
 import asyncio
 import json
 import os
+from collections.abc import AsyncIterable
 from dataclasses import dataclass
-from typing import AsyncIterable, Literal
-from livekit import rtc
-from livekit.agents import llm, utils
-from livekit.agents.llm.function_context import _create_ai_function_info
-from livekit.agents.utils import images
+from typing import Literal
 from google import genai
+from google.genai._api_client import HttpOptions
 from google.genai.types import (
     Blob,
     Content,
     FunctionResponse,
     GenerationConfig,
-    HttpOptions,
     LiveClientContent,
     LiveClientRealtimeInput,
     LiveClientToolResponse,
@@ -29,15 +25,13 @@ from google.genai.types import (
     Tool,
     VoiceConfig,
 )
+from livekit import rtc
+from livekit.agents import llm, utils
+from livekit.agents.llm.function_context import _create_ai_function_info
+from livekit.agents.utils import images
 from ...log import logger
-from .api_proto import (
-    ClientEvents,
-    LiveAPIModels,
-    Voice,
-    _build_gemini_ctx,
-    _build_tools,
-)
+from .api_proto import ClientEvents, LiveAPIModels, Voice, _build_gemini_ctx, _build_tools
 from .transcriber import ModelTranscriber, TranscriberSession, TranscriptionContent
 EventTypes = Literal[
@@ -83,7 +77,6 @@ class Capabilities:
 class ModelOptions:
     model: LiveAPIModels | str
     api_key: str | None
-    api_version: str
     voice: Voice | str
     response_modalities: list[Modality] | None
     vertexai: bool
@@ -108,9 +101,8 @@ class RealtimeModel:
         instructions: str | None = None,
         model: LiveAPIModels | str = "gemini-2.0-flash-exp",
         api_key: str | None = None,
-        api_version: str = "v1alpha",
         voice: Voice | str = "Puck",
-        modalities: list[Modality] = [Modality.AUDIO],
+        modalities: list[Modality] = None,
         enable_user_audio_transcription: bool = True,
         enable_agent_audio_transcription: bool = True,
         vertexai: bool = False,
@@ -138,7 +130,6 @@ class RealtimeModel:
         Args:
             instructions (str, optional): Initial system instructions for the model. Defaults to "".
             api_key (str or None, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
-            api_version (str, optional): The version of the API to use. Defaults to "v1alpha".
             modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
             model (str or None, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp".
             voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
@@ -158,6 +149,8 @@ class RealtimeModel:
         Raises:
             ValueError: If the API key is not provided and cannot be found in environment variables.
         """
+        if modalities is None:
+            modalities = ["AUDIO"]
         super().__init__()
         self._capabilities = Capabilities(
             supports_truncate=False,
@@ -183,14 +176,11 @@ class RealtimeModel:
                     "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
                 )
-        instructions_content = (
-            Content(parts=[Part(text=instructions)]) if instructions else None
-        )
+        instructions_content = Content(parts=[Part(text=instructions)]) if instructions else None
         self._rt_sessions: list[GeminiRealtimeSession] = []
         self._opts = ModelOptions(
             model=model,
-            api_version=api_version,
             api_key=self._api_key,
             voice=voice,
             enable_user_audio_transcription=enable_user_audio_transcription,
@@ -263,8 +253,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
         self._fnc_ctx = fnc_ctx
         self._fnc_tasks = utils.aio.TaskSet()
         self._is_interrupted = False
-        self._playout_complete = asyncio.Event()
-        self._playout_complete.set()
         tools = []
         if self._fnc_ctx is not None:
@@ -285,32 +273,24 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
             system_instruction=self._opts.instructions,
             speech_config=SpeechConfig(
                 voice_config=VoiceConfig(
-                    prebuilt_voice_config=PrebuiltVoiceConfig(
-                        voice_name=self._opts.voice
-                    )
+                    prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=self._opts.voice)
                 )
             ),
             tools=tools,
         )
         self._client = genai.Client(
-            http_options=HttpOptions(api_version=self._opts.api_version),
+            http_options=HttpOptions(api_version="v1alpha"),
             api_key=self._opts.api_key,
             vertexai=self._opts.vertexai,
             project=self._opts.project,
             location=self._opts.location,
         )
-        self._main_atask = asyncio.create_task(
-            self._main_task(), name="gemini-realtime-session"
-        )
+        self._main_atask = asyncio.create_task(self._main_task(), name="gemini-realtime-session")
         if self._opts.enable_user_audio_transcription:
-            self._transcriber = TranscriberSession(
-                client=self._client, model=self._opts.model
-            )
+            self._transcriber = TranscriberSession(client=self._client, model=self._opts.model)
             self._transcriber.on("input_speech_done", self._on_input_speech_done)
         if self._opts.enable_agent_audio_transcription:
-            self._agent_transcriber = ModelTranscriber(
-                client=self._client, model=self._opts.model
-            )
+            self._agent_transcriber = ModelTranscriber(client=self._client, model=self._opts.model)
             self._agent_transcriber.on("input_speech_done", self._on_agent_speech_done)
         # init dummy task
         self._init_sync_task = asyncio.create_task(asyncio.sleep(0))
@@ -324,10 +304,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
         self._send_ch.close()
         await self._main_atask
-    @property
-    def playout_complete(self) -> asyncio.Event | None:
-        return self._playout_complete
     @property
     def fnc_ctx(self) -> llm.FunctionContext | None:
         return self._fnc_ctx
@@ -345,9 +321,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
     DEFAULT_ENCODE_OPTIONS = images.EncodeOptions(
         format="JPEG",
         quality=75,
-        resize_options=images.ResizeOptions(
-            width=1024, height=1024, strategy="scale_aspect_fit"
-        ),
+        resize_options=images.ResizeOptions(width=1024, height=1024, strategy="scale_aspect_fit"),
     )
     def push_video(
@@ -397,9 +371,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
     def create_response(
         self,
-        on_duplicate: Literal[
-            "cancel_existing", "cancel_new", "keep_both"
-        ] = "keep_both",
+        on_duplicate: Literal["cancel_existing", "cancel_new", "keep_both"] = "keep_both",
     ) -> None:
         turns, _ = _build_gemini_ctx(self._chat_ctx, id(self))
         ctx = [self._opts.instructions] + turns if self._opts.instructions else turns
@@ -485,8 +457,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
                                         data=part.inline_data.data,
                                         sample_rate=24000,
                                         num_channels=1,
-                                        samples_per_channel=len(part.inline_data.data)
-                                        // 2,
+                                        samples_per_channel=len(part.inline_data.data) // 2,
                                     )
                                     if self._opts.enable_agent_audio_transcription:
                                         content.audio.append(frame)
@@ -529,12 +500,12 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
                         logger.warning(
                             "function call cancelled",
                             extra={
-                                "function_call_ids": response.tool_call_cancellation.ids,
+                                "function_call_ids": response.tool_call_cancellation.function_call_ids,
                             },
                         )
                         self.emit(
                             "function_calls_cancelled",
-                            response.tool_call_cancellation.ids,
+                            response.tool_call_cancellation.function_call_ids,
                         )
         async with self._client.aio.live.connect(

{livekit_plugins_google-0.11.1 → livekit_plugins_google-1.0.0.dev5}/livekit/plugins/google/beta/realtime/transcriber.py RENAMED Viewed

@@ -6,12 +6,12 @@ from dataclasses import dataclass
 from typing import Literal
 import websockets
-from livekit import rtc
-from livekit.agents import APIConnectionError, APIStatusError, utils
 from google import genai
 from google.genai import types
 from google.genai.errors import APIError, ClientError, ServerError
+from livekit import rtc
+from livekit.agents import APIConnectionError, APIStatusError, utils
 from ...log import logger
 from .api_proto import ClientEvents, LiveAPIModels
@@ -51,11 +51,9 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
         self._needed_sr = 16000
         self._closed = False
-        system_instructions = types.Content(
-            parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
-        )
+        system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
         self._config = types.LiveConnectConfig(
-            response_modalities=[types.Modality.TEXT],
+            response_modalities=["TEXT"],
             system_instruction=system_instructions,
             generation_config=types.GenerationConfig(temperature=0.0),
         )
@@ -81,17 +79,13 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
             for f in self._resampler.push(frame):
                 self._queue_msg(
                     types.LiveClientRealtimeInput(
-                        media_chunks=[
-                            types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
-                        ]
+                        media_chunks=[types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")]
                     )
                 )
         else:
             self._queue_msg(
                 types.LiveClientRealtimeInput(
-                    media_chunks=[
-                        types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
-                    ]
+                    media_chunks=[types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")]
                 )
             )
@@ -157,17 +151,11 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
                 logger.exception(f"Uncaught error in transcriber _recv_task: {e}")
                 self._closed = True
-        async with self._client.aio.live.connect(
-            model=self._model, config=self._config
-        ) as session:
+        async with self._client.aio.live.connect(model=self._model, config=self._config) as session:
             self._session = session
             tasks = [
-                asyncio.create_task(
-                    _send_task(), name="gemini-realtime-transcriber-send"
-                ),
-                asyncio.create_task(
-                    _recv_task(), name="gemini-realtime-transcriber-recv"
-                ),
+                asyncio.create_task(_send_task(), name="gemini-realtime-transcriber-send"),
+                asyncio.create_task(_recv_task(), name="gemini-realtime-transcriber-recv"),
             ]
             try:
@@ -187,9 +175,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
         self._client = client
         self._model = model
         self._needed_sr = 16000
-        self._system_instructions = types.Content(
-            parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
-        )
+        self._system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
         self._config = types.GenerateContentConfig(
             temperature=0.0,
             system_instruction=self._system_instructions,
@@ -198,9 +184,7 @@ class ModelTranscriber(utils.EventEmitter[EventTypes]):
         self._resampler: rtc.AudioResampler | None = None
         self._buffer: rtc.AudioFrame | None = None
         self._audio_ch = utils.aio.Chan[rtc.AudioFrame]()
-        self._main_atask = asyncio.create_task(
-            self._main_task(), name="gemini-model-transcriber"
-        )
+        self._main_atask = asyncio.create_task(self._main_task(), name="gemini-model-transcriber")
     async def aclose(self) -> None:
         if self._audio_ch.closed:

livekit-plugins-google 0.11.1__tar.gz → 1.0.0.dev5__tar.gz

livekit-plugins-google 0.11.1tar.gz → 1.0.0.dev5tar.gz