PyPI - livekit-plugins-google - Versions diffs - 1.2.13__tar.gz → 1.2.15__tar.gz - Mend

livekit-plugins-google 1.2.13tar.gz → 1.2.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-google might be problematic. Click here for more details.

Files changed (19) hide show

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/.gitignore RENAMED Viewed

@@ -172,4 +172,8 @@ pyrightconfig.json
 docs/
 # Database files
-*.db
+*.db
+# Examples for development
+examples/dev/*

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 1.2.13
+Version: 1.2.15
 Summary: Agent Framework plugin for services from Google Cloud
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2.27
 Requires-Dist: google-genai>=v1.23.0
-Requires-Dist: livekit-agents>=1.2.13
+Requires-Dist: livekit-agents>=1.2.15
 Description-Content-Type: text/markdown
 # Google AI plugin for LiveKit Agents

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/__init__.py RENAMED Viewed

@@ -19,14 +19,14 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
 See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
 """
-from . import beta
+from . import beta, realtime
 from .llm import LLM
 from .stt import STT, SpeechStream
 from .tools import _LLMTool
 from .tts import TTS
 from .version import __version__
-__all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
+__all__ = ["STT", "TTS", "realtime", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
 from livekit.agents import Plugin
 from .log import logger

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/beta/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
-from . import realtime
+from .. import realtime
 from .gemini_tts import TTS as GeminiTTS
-__all__ = ["realtime", "GeminiTTS"]
+__all__ = ["GeminiTTS", "realtime"]
 # Cleanup docs of unexported modules
 _module = dir()

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/models.py RENAMED Viewed

@@ -10,6 +10,7 @@ SpeechModels = Literal[
     "medical_conversation",
     "chirp",
     "chirp_2",
+    "chirp_3",
     "latest_long",
     "latest_short",
 ]

{livekit_plugins_google-1.2.13/livekit/plugins/google/beta → livekit_plugins_google-1.2.15/livekit/plugins/google}/realtime/api_proto.py RENAMED Viewed

@@ -10,11 +10,43 @@ LiveAPIModels = Literal[
     # models supported on Gemini API
     "gemini-2.0-flash-live-001",
     "gemini-live-2.5-flash-preview",
+    "gemini-2.5-flash-native-audio-preview-09-2025",
     "gemini-2.5-flash-preview-native-audio-dialog",
     "gemini-2.5-flash-exp-native-audio-thinking-dialog",
 ]
-Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede", "Leda", "Orus", "Zephyr"]
+Voice = Literal[
+    "Achernar",
+    "Achird",
+    "Algenib",
+    "Algieba",
+    "Alnilam",
+    "Aoede",
+    "Autonoe",
+    "Callirrhoe",
+    "Charon",
+    "Despina",
+    "Enceladus",
+    "Erinome",
+    "Fenrir",
+    "Gacrux",
+    "Iapetus",
+    "Kore",
+    "Laomedeia",
+    "Leda",
+    "Orus",
+    "Pulcherrima",
+    "Puck",
+    "Rasalgethi",
+    "Sadachbia",
+    "Sadaltager",
+    "Schedar",
+    "Sulafat",
+    "Umbriel",
+    "Vindemiatrix",
+    "Zephyr",
+    "Zubenelgenubi",
+]
 ClientEvents = Union[

{livekit_plugins_google-1.2.13/livekit/plugins/google/beta → livekit_plugins_google-1.2.15/livekit/plugins/google}/realtime/realtime_api.py RENAMED Viewed

@@ -23,11 +23,11 @@ from livekit.agents.types import (
     NotGivenOr,
 )
 from livekit.agents.utils import audio as audio_utils, images, is_given
-from livekit.plugins.google.beta.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
+from livekit.plugins.google.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
-from ...log import logger
-from ...tools import _LLMTool
-from ...utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
+from ..log import logger
+from ..tools import _LLMTool
+from ..utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
 INPUT_AUDIO_SAMPLE_RATE = 16000
 INPUT_AUDIO_CHANNELS = 1
@@ -78,6 +78,7 @@ class _RealtimeOptions:
     gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN
     tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN
     tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN
+    thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN
 @dataclass
@@ -144,6 +145,7 @@ class RealtimeModel(llm.RealtimeModel):
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
         http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
         _gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
+        thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN,
     ) -> None:
         """
         Initializes a RealtimeModel instance for interacting with Google's Realtime API.
@@ -180,6 +182,7 @@ class RealtimeModel(llm.RealtimeModel):
             context_window_compression (ContextWindowCompressionConfig, optional): The configuration for context window compression. Defaults to None.
             tool_behavior (Behavior, optional): The behavior for tool call. Default behavior is BLOCK in Gemini Realtime API.
             tool_response_scheduling (FunctionResponseScheduling, optional): The scheduling for tool response. Default scheduling is WHEN_IDLE.
+            thinking_config (ThinkingConfig, optional): Native audio thinking configuration.
             conn_options (APIConnectOptions, optional): The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
             _gemini_tools (list[LLMTool], optional): Gemini-specific tools to use for the session. This parameter is experimental and may change.
@@ -274,6 +277,7 @@ class RealtimeModel(llm.RealtimeModel):
             tool_behavior=tool_behavior,
             conn_options=conn_options,
             http_options=http_options,
+            thinking_config=thinking_config,
         )
         self._sessions = weakref.WeakSet[RealtimeSession]()
@@ -510,7 +514,12 @@ class RealtimeSession(llm.RealtimeSession):
         for f in self._resample_audio(frame):
             for nf in self._bstream.write(f.data.tobytes()):
                 realtime_input = types.LiveClientRealtimeInput(
-                    media_chunks=[types.Blob(data=nf.data.tobytes(), mime_type="audio/pcm")]
+                    media_chunks=[
+                        types.Blob(
+                            data=nf.data.tobytes(),
+                            mime_type=f"audio/pcm;rate={INPUT_AUDIO_SAMPLE_RATE}",
+                        )
+                    ]
                 )
                 self._send_client_event(realtime_input)
@@ -814,6 +823,9 @@ class RealtimeSession(llm.RealtimeSession):
                 frequency_penalty=self._opts.frequency_penalty
                 if is_given(self._opts.frequency_penalty)
                 else None,
+                thinking_config=self._opts.thinking_config
+                if is_given(self._opts.thinking_config)
+                else None,
             ),
             system_instruction=types.Content(parts=[types.Part(text=self._opts.instructions)])
             if is_given(self._opts.instructions)

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/stt.py RENAMED Viewed

@@ -618,17 +618,28 @@ def _streaming_recognize_response_to_speech_data(
 ) -> stt.SpeechData | None:
     text = ""
     confidence = 0.0
+    final_result = None
     for result in resp.results:
         if len(result.alternatives) == 0:
             continue
-        text += result.alternatives[0].transcript
-        confidence += result.alternatives[0].confidence
-    confidence /= len(resp.results)
-    lg = resp.results[0].language_code
+        else:
+            if result.is_final:
+                final_result = result
+                break
+            else:
+                text += result.alternatives[0].transcript
+                confidence += result.alternatives[0].confidence
+    if final_result is not None:
+        text = final_result.alternatives[0].transcript
+        confidence = final_result.alternatives[0].confidence
+        lg = final_result.language_code
+    else:
+        confidence /= len(resp.results)
+        if confidence < min_confidence_threshold:
+            return None
+        lg = resp.results[0].language_code
-    if confidence < min_confidence_threshold:
-        return None
     if text == "":
         return None

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/livekit/plugins/google/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.13"
+__version__ = "1.2.15"

{livekit_plugins_google-1.2.13 → livekit_plugins_google-1.2.15}/pyproject.toml RENAMED Viewed

@@ -27,7 +27,7 @@ dependencies = [
     "google-cloud-speech >= 2, < 3",
     "google-cloud-texttospeech >= 2.27, < 3",
     "google-genai >= v1.23.0",
-    "livekit-agents>=1.2.13",
+    "livekit-agents>=1.2.15",
 ]
 [project.urls]