PyPI - livekit-plugins-google - Versions diffs - 1.3.8__py3-none-any.whl → 1.3.11__py3-none-any.whl - Mend

livekit-plugins-google 1.3.8py3-none-any.whl → 1.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

livekit/plugins/google/__init__.py +11 -3
livekit/plugins/google/llm.py +142 -81
livekit/plugins/google/models.py +15 -1
livekit/plugins/google/realtime/api_proto.py +12 -10
livekit/plugins/google/realtime/realtime_api.py +25 -28
livekit/plugins/google/stt.py +281 -93
livekit/plugins/google/tools.py +69 -9
livekit/plugins/google/tts.py +17 -9
livekit/plugins/google/utils.py +21 -87
livekit/plugins/google/version.py +1 -1
{livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/METADATA +1 -1
livekit_plugins_google-1.3.11.dist-info/RECORD +18 -0
livekit_plugins_google-1.3.8.dist-info/RECORD +0 -18
{livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/WHEEL +0 -0

livekit/plugins/google/__init__.py CHANGED Viewed

@@ -19,14 +19,22 @@ Supports Gemini, Cloud Speech-to-Text, and Cloud Text-to-Speech.
 See https://docs.livekit.io/agents/integrations/stt/google/ for more information.
 """
-from . import beta, realtime
+from . import beta, realtime, tools
 from .llm import LLM
 from .stt import STT, SpeechStream
-from .tools import _LLMTool
 from .tts import TTS
 from .version import __version__
-__all__ = ["STT", "TTS", "realtime", "SpeechStream", "__version__", "beta", "LLM", "_LLMTool"]
+__all__ = [
+    "STT",
+    "TTS",
+    "realtime",
+    "SpeechStream",
+    "__version__",
+    "beta",
+    "LLM",
+    "tools",
+]
 from livekit.agents import Plugin
 from .log import logger

livekit/plugins/google/llm.py CHANGED Viewed

@@ -24,13 +24,7 @@ from google.auth._default_async import default_async
 from google.genai import Client, types
 from google.genai.errors import APIError, ClientError, ServerError
 from livekit.agents import APIConnectionError, APIStatusError, llm, utils
-from livekit.agents.llm import FunctionTool, RawFunctionTool, ToolChoice, utils as llm_utils
-from livekit.agents.llm.tool_context import (
-    get_function_info,
-    get_raw_function_info,
-    is_function_tool,
-    is_raw_function_tool,
-)
+from livekit.agents.llm import ToolChoice, utils as llm_utils
 from livekit.agents.types import (
     DEFAULT_API_CONNECT_OPTIONS,
     NOT_GIVEN,
@@ -41,11 +35,20 @@ from livekit.agents.utils import is_given
 from .log import logger
 from .models import ChatModels
-from .tools import _LLMTool
-from .utils import create_tools_config, to_fnc_ctx, to_response_format
+from .utils import create_tools_config, to_response_format
 from .version import __version__
+def _is_gemini_3_model(model: str) -> bool:
+    """Check if model is Gemini 3 series"""
+    return "gemini-3" in model.lower() or model.lower().startswith("gemini-3")
+def _is_gemini_3_flash_model(model: str) -> bool:
+    """Check if model is Gemini 3 Flash"""
+    return "gemini-3-flash" in model.lower() or model.lower().startswith("gemini-3-flash")
 @dataclass
 class _LLMOptions:
     model: ChatModels | str
@@ -60,8 +63,8 @@ class _LLMOptions:
     presence_penalty: NotGivenOr[float]
     frequency_penalty: NotGivenOr[float]
     thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
+    retrieval_config: NotGivenOr[types.RetrievalConfigOrDict]
     automatic_function_calling_config: NotGivenOr[types.AutomaticFunctionCallingConfigOrDict]
-    gemini_tools: NotGivenOr[list[_LLMTool]]
     http_options: NotGivenOr[types.HttpOptions]
     seed: NotGivenOr[int]
     safety_settings: NotGivenOr[list[types.SafetySettingOrDict]]
@@ -81,7 +84,7 @@ class LLM(llm.LLM):
     def __init__(
         self,
         *,
-        model: ChatModels | str = "gemini-2.0-flash-001",
+        model: ChatModels | str = "gemini-2.5-flash",
         api_key: NotGivenOr[str] = NOT_GIVEN,
         vertexai: NotGivenOr[bool] = NOT_GIVEN,
         project: NotGivenOr[str] = NOT_GIVEN,
@@ -94,10 +97,10 @@ class LLM(llm.LLM):
         frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
         thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
+        retrieval_config: NotGivenOr[types.RetrievalConfigOrDict] = NOT_GIVEN,
         automatic_function_calling_config: NotGivenOr[
             types.AutomaticFunctionCallingConfigOrDict
         ] = NOT_GIVEN,
-        gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
         http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
         seed: NotGivenOr[int] = NOT_GIVEN,
         safety_settings: NotGivenOr[list[types.SafetySettingOrDict]] = NOT_GIVEN,
@@ -126,8 +129,8 @@ class LLM(llm.LLM):
             frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
             tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
             thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
+            retrieval_config (RetrievalConfigOrDict, optional): The retrieval configuration for response generation. Defaults to None.
             automatic_function_calling_config (AutomaticFunctionCallingConfigOrDict, optional): The automatic function calling configuration for response generation. Defaults to None.
-            gemini_tools (list[LLMTool], optional): The Gemini-specific tools to use for the session.
             http_options (HttpOptions, optional): The HTTP options to use for the session.
             seed (int, optional): Random seed for reproducible generation. Defaults to None.
             safety_settings (list[SafetySettingOrDict], optional): Safety settings for content filtering. Defaults to None.
@@ -168,10 +171,13 @@ class LLM(llm.LLM):
         # Validate thinking_config
         if is_given(thinking_config):
             _thinking_budget = None
+            _thinking_level = None
             if isinstance(thinking_config, dict):
                 _thinking_budget = thinking_config.get("thinking_budget")
+                _thinking_level = thinking_config.get("thinking_level")
             elif isinstance(thinking_config, types.ThinkingConfig):
                 _thinking_budget = thinking_config.thinking_budget
+                _thinking_level = getattr(thinking_config, "thinking_level", None)
             if _thinking_budget is not None:
                 if not isinstance(_thinking_budget, int):
@@ -190,8 +196,8 @@ class LLM(llm.LLM):
             presence_penalty=presence_penalty,
             frequency_penalty=frequency_penalty,
             thinking_config=thinking_config,
+            retrieval_config=retrieval_config,
             automatic_function_calling_config=automatic_function_calling_config,
-            gemini_tools=gemini_tools,
             http_options=http_options,
             seed=seed,
             safety_settings=safety_settings,
@@ -202,6 +208,8 @@ class LLM(llm.LLM):
             project=gcp_project,
             location=gcp_location,
         )
+        # Store thought_signatures for Gemini 3 multi-turn function calling
+        self._thought_signatures: dict[str, bytes] = {}
     @property
     def model(self) -> str:
@@ -218,7 +226,7 @@ class LLM(llm.LLM):
         self,
         *,
         chat_ctx: llm.ChatContext,
-        tools: list[FunctionTool | RawFunctionTool] | None = None,
+        tools: list[llm.Tool] | None = None,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
         parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
@@ -226,7 +234,6 @@ class LLM(llm.LLM):
             types.SchemaUnion | type[llm_utils.ResponseFormatT]
         ] = NOT_GIVEN,
         extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
-        gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
     ) -> LLMStream:
         extra = {}
@@ -236,6 +243,12 @@ class LLM(llm.LLM):
         tool_choice = (
             cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
         )
+        retrieval_config = (
+            self._opts.retrieval_config if is_given(self._opts.retrieval_config) else None
+        )
+        if isinstance(retrieval_config, dict):
+            retrieval_config = types.RetrievalConfig.model_validate(retrieval_config)
         if is_given(tool_choice):
             gemini_tool_choice: types.ToolConfig
             if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
@@ -243,38 +256,44 @@ class LLM(llm.LLM):
                     function_calling_config=types.FunctionCallingConfig(
                         mode=types.FunctionCallingConfigMode.ANY,
                         allowed_function_names=[tool_choice["function"]["name"]],
-                    )
+                    ),
+                    retrieval_config=retrieval_config,
                 )
                 extra["tool_config"] = gemini_tool_choice
             elif tool_choice == "required":
                 tool_names = []
                 for tool in tools or []:
-                    if is_function_tool(tool):
-                        tool_names.append(get_function_info(tool).name)
-                    elif is_raw_function_tool(tool):
-                        tool_names.append(get_raw_function_info(tool).name)
+                    if isinstance(tool, (llm.FunctionTool, llm.RawFunctionTool)):
+                        tool_names.append(tool.info.name)
                 gemini_tool_choice = types.ToolConfig(
                     function_calling_config=types.FunctionCallingConfig(
                         mode=types.FunctionCallingConfigMode.ANY,
                         allowed_function_names=tool_names or None,
-                    )
+                    ),
+                    retrieval_config=retrieval_config,
                 )
                 extra["tool_config"] = gemini_tool_choice
             elif tool_choice == "auto":
                 gemini_tool_choice = types.ToolConfig(
                     function_calling_config=types.FunctionCallingConfig(
                         mode=types.FunctionCallingConfigMode.AUTO,
-                    )
+                    ),
+                    retrieval_config=retrieval_config,
                 )
                 extra["tool_config"] = gemini_tool_choice
             elif tool_choice == "none":
                 gemini_tool_choice = types.ToolConfig(
                     function_calling_config=types.FunctionCallingConfig(
                         mode=types.FunctionCallingConfigMode.NONE,
-                    )
+                    ),
+                    retrieval_config=retrieval_config,
                 )
                 extra["tool_config"] = gemini_tool_choice
+        elif retrieval_config:
+            extra["tool_config"] = types.ToolConfig(
+                retrieval_config=retrieval_config,
+            )
         if is_given(response_format):
             extra["response_schema"] = to_response_format(response_format)  # type: ignore
@@ -295,9 +314,51 @@ class LLM(llm.LLM):
         if is_given(self._opts.seed):
             extra["seed"] = self._opts.seed
-        # Add thinking config if thinking_budget is provided
+        # Handle thinking_config based on model version
         if is_given(self._opts.thinking_config):
-            extra["thinking_config"] = self._opts.thinking_config
+            is_gemini_3 = _is_gemini_3_model(self._opts.model)
+            is_gemini_3_flash = _is_gemini_3_flash_model(self._opts.model)
+            thinking_cfg = self._opts.thinking_config
+            # Extract both parameters
+            _budget = None
+            _level = None
+            if isinstance(thinking_cfg, dict):
+                _budget = thinking_cfg.get("thinking_budget")
+                _level = thinking_cfg.get("thinking_level")
+            elif isinstance(thinking_cfg, types.ThinkingConfig):
+                _budget = thinking_cfg.thinking_budget
+                _level = getattr(thinking_cfg, "thinking_level", None)
+            if is_gemini_3:
+                # Gemini 3: only support thinking_level
+                if _budget is not None and _level is None:
+                    logger.warning(
+                        f"Model {self._opts.model} is Gemini 3 which does not support thinking_budget. "
+                        "Please use thinking_level ('low' or 'high') instead. Ignoring thinking_budget."
+                    )
+                if _level is None:
+                    # If no thinking_level is provided, use the fastest thinking level
+                    if is_gemini_3_flash:
+                        _level = "minimal"
+                    else:
+                        _level = "low"
+                # Use thinking_level only (pass as dict since SDK may not have this field yet)
+                extra["thinking_config"] = {"thinking_level": _level}
+            else:
+                # Gemini 2.5 and earlier: only support thinking_budget
+                if _level is not None and _budget is None:
+                    raise ValueError(
+                        f"Model {self._opts.model} does not support thinking_level. "
+                        "Please use thinking_budget (int) instead for Gemini 2.5 and earlier models."
+                    )
+                if _budget is not None:
+                    # Use thinking_budget only
+                    extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget)
+                else:
+                    # Pass through original config if no specific handling needed
+                    extra["thinking_config"] = self._opts.thinking_config
         if is_given(self._opts.automatic_function_calling_config):
             extra["automatic_function_calling"] = self._opts.automatic_function_calling_config
@@ -305,8 +366,6 @@ class LLM(llm.LLM):
         if is_given(self._opts.safety_settings):
             extra["safety_settings"] = self._opts.safety_settings
-        gemini_tools = gemini_tools if is_given(gemini_tools) else self._opts.gemini_tools
         return LLMStream(
             self,
             client=self._client,
@@ -314,7 +373,6 @@ class LLM(llm.LLM):
             chat_ctx=chat_ctx,
             tools=tools or [],
             conn_options=conn_options,
-            gemini_tools=gemini_tools,
             extra_kwargs=extra,
         )
@@ -322,35 +380,38 @@ class LLM(llm.LLM):
 class LLMStream(llm.LLMStream):
     def __init__(
         self,
-        llm: LLM,
+        llm_v: LLM,
         *,
         client: Client,
         model: str | ChatModels,
         chat_ctx: llm.ChatContext,
         conn_options: APIConnectOptions,
-        tools: list[FunctionTool | RawFunctionTool],
+        tools: list[llm.Tool],
         extra_kwargs: dict[str, Any],
-        gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
     ) -> None:
-        super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
+        super().__init__(llm_v, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
         self._client = client
         self._model = model
-        self._llm: LLM = llm
+        self._llm: LLM = llm_v
         self._extra_kwargs = extra_kwargs
-        self._gemini_tools = gemini_tools
+        self._tool_ctx = llm.ToolContext(tools)
     async def _run(self) -> None:
         retryable = True
         request_id = utils.shortuuid()
         try:
-            turns_dict, extra_data = self._chat_ctx.to_provider_format(format="google")
-            turns = [types.Content.model_validate(turn) for turn in turns_dict]
-            function_declarations = to_fnc_ctx(self._tools)
-            tools_config = create_tools_config(
-                function_tools=function_declarations,
-                gemini_tools=self._gemini_tools if is_given(self._gemini_tools) else None,
+            # Pass thought_signatures for Gemini 3 multi-turn function calling
+            thought_sigs = (
+                self._llm._thought_signatures if _is_gemini_3_model(self._model) else None
             )
+            turns_dict, extra_data = self._chat_ctx.to_provider_format(
+                format="google", thought_signatures=thought_sigs
+            )
+            turns = [types.Content.model_validate(turn) for turn in turns_dict]
+            tool_context = llm.ToolContext(self._tools)
+            tools_config = create_tools_config(tool_context, _only_single_type=True)
             if tools_config:
                 self._extra_kwargs["tools"] = tools_config
             http_options = self._llm._opts.http_options or types.HttpOptions(
@@ -368,31 +429,25 @@ class LLMStream(llm.LLMStream):
                 http_options=http_options,
                 **self._extra_kwargs,
             )
             stream = await self._client.aio.models.generate_content_stream(
                 model=self._model,
                 contents=cast(types.ContentListUnion, turns),
                 config=config,
             )
+            response_generated = False
+            finish_reason: types.FinishReason | None = None
             async for response in stream:
                 if response.prompt_feedback:
                     raise APIStatusError(
-                        response.prompt_feedback.json(),
+                        response.prompt_feedback.model_dump_json(),
                         retryable=False,
                         request_id=request_id,
                     )
-                if (
-                    not response.candidates
-                    or not response.candidates[0].content
-                    or not response.candidates[0].content.parts
-                ):
-                    logger.warning(f"no content in the response: {response}")
-                    raise APIStatusError(
-                        "no content in the response",
-                        retryable=True,
-                        request_id=request_id,
-                    )
+                if not response.candidates:
+                    continue
                 if len(response.candidates) > 1:
                     logger.warning(
@@ -401,35 +456,25 @@ class LLMStream(llm.LLMStream):
                 candidate = response.candidates[0]
-                if candidate.finish_reason in BLOCKED_REASONS:
-                    raise APIStatusError(
-                        f"generation blocked by gemini: {candidate.finish_reason}",
-                        retryable=False,
-                        request_id=request_id,
-                    )
                 if not candidate.content or not candidate.content.parts:
-                    raise APIStatusError(
-                        "no content in the response",
-                        retryable=retryable,
-                        request_id=request_id,
-                    )
+                    continue
+                if candidate.finish_reason is not None:
+                    finish_reason = candidate.finish_reason
+                    if candidate.finish_reason in BLOCKED_REASONS:
+                        raise APIStatusError(
+                            f"generation blocked by gemini: {candidate.finish_reason}",
+                            retryable=False,
+                            request_id=request_id,
+                        )
-                chunks_yielded = False
                 for part in candidate.content.parts:
                     chat_chunk = self._parse_part(request_id, part)
+                    response_generated = True
                     if chat_chunk is not None:
-                        chunks_yielded = True
                         retryable = False
                         self._event_ch.send_nowait(chat_chunk)
-                if candidate.finish_reason == types.FinishReason.STOP and not chunks_yielded:
-                    raise APIStatusError(
-                        "no response generated",
-                        retryable=retryable,
-                        request_id=request_id,
-                    )
                 if response.usage_metadata is not None:
                     usage = response.usage_metadata
                     self._event_ch.send_nowait(
@@ -444,6 +489,14 @@ class LLMStream(llm.LLMStream):
                         )
                     )
+            if not response_generated:
+                raise APIStatusError(
+                    "no response generated",
+                    retryable=retryable,
+                    request_id=request_id,
+                    body=f"finish reason: {finish_reason}",
+                )
         except ClientError as e:
             raise APIStatusError(
                 "gemini llm: client error",
@@ -476,17 +529,25 @@ class LLMStream(llm.LLMStream):
     def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
         if part.function_call:
+            tool_call = llm.FunctionToolCall(
+                arguments=json.dumps(part.function_call.args),
+                name=part.function_call.name,
+                call_id=part.function_call.id or utils.shortuuid("function_call_"),
+            )
+            # Store thought_signature for Gemini 3 multi-turn function calling
+            if (
+                _is_gemini_3_model(self._model)
+                and hasattr(part, "thought_signature")
+                and part.thought_signature
+            ):
+                self._llm._thought_signatures[tool_call.call_id] = part.thought_signature
             chat_chunk = llm.ChatChunk(
                 id=id,
                 delta=llm.ChoiceDelta(
                     role="assistant",
-                    tool_calls=[
-                        llm.FunctionToolCall(
-                            arguments=json.dumps(part.function_call.args),
-                            name=part.function_call.name,
-                            call_id=part.function_call.id or utils.shortuuid("function_call_"),
-                        )
-                    ],
+                    tool_calls=[tool_call],
                     content=part.text,
                 ),
             )

livekit/plugins/google/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Literal
-# Speech to Text v2
+# Speech to Text (v1 and v2)
 SpeechModels = Literal[
     "long",
@@ -14,6 +14,13 @@ SpeechModels = Literal[
     "latest_long",
     "latest_short",
 ]
+# https://docs.cloud.google.com/speech-to-text/docs/transcription-model
+SpeechModelsV2 = Literal[
+    "telephony",
+    "chirp_2",
+    "chirp_3",
+]
 SpeechLanguages = Literal[
     "af-ZA",
@@ -189,6 +196,9 @@ SpeechLanguages = Literal[
 Gender = Literal["male", "female", "neutral"]
 ChatModels = Literal[
+    "gemini-3-pro-preview",
+    "gemini-3-flash-preview",
+    "gemini-2.5-flash",
     "gemini-2.5-pro-preview-05-06",
     "gemini-2.5-flash-preview-04-17",
     "gemini-2.5-flash-preview-05-20",
@@ -197,3 +207,7 @@ ChatModels = Literal[
     "gemini-2.0-pro-exp-02-05",
     "gemini-1.5-pro",
 ]
+GeminiTTSModels = Literal[
+    "gemini-2.5-flash-tts", "gemini-2.5-flash-lite-preview-tts", "gemini-2.5-pro-tts"
+]

livekit/plugins/google/realtime/api_proto.py CHANGED Viewed

@@ -5,19 +5,21 @@ from typing import Literal, Union
 from google.genai import types
+# Gemini API deprecations: https://ai.google.dev/gemini-api/docs/deprecations
+# Gemini API release notes with preview deprecations: https://ai.google.dev/gemini-api/docs/changelog
+# live models: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api
+# VertexAI retirement: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versions#retired-models
+# Additional references:
+# 1. https://github.com/kazunori279/adk-streaming-test/blob/main/test_report.md
 LiveAPIModels = Literal[
     # VertexAI models
-    "gemini-live-2.5-flash-native-audio",
-    "gemini-live-2.5-flash-preview-native-audio",
-    # deprecated vertexai models
-    "gemini-2.0-flash-exp",
-    "gemini-live-2.5-flash-preview-native-audio-09-2025",
+    "gemini-live-2.5-flash-native-audio",  # GA https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash
+    "gemini-live-2.5-flash-preview-native-audio-09-2025",  # Public preview https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-live-api#live-2.5-flash-preview
+    "gemini-live-2.5-flash-preview-native-audio",  # still works, possibly an alias, but not mentioned in any docs or changelog
     # Gemini API models
-    "gemini-2.5-flash-native-audio-preview-12-2025",
-    "gemini-live-2.5-flash-preview",
-    # deprecated Gemini API models
-    "gemini-2.0-flash-live-001",
-    "gemini-2.5-flash-native-audio-preview-09-2025",
+    "gemini-2.5-flash-native-audio-preview-12-2025",  # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
+    "gemini-2.5-flash-native-audio-preview-09-2025",  # https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-live
+    "gemini-2.0-flash-exp",  # still works in Gemini API but not VertexAI
 ]
 Voice = Literal[

livekit-plugins-google 1.3.8__py3-none-any.whl → 1.3.11__py3-none-any.whl

livekit-plugins-google 1.3.8py3-none-any.whl → 1.3.11py3-none-any.whl