PyPI - livekit-plugins-google - Versions diffs - 0.11.3__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

livekit-plugins-google 0.11.3py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

livekit/plugins/google/beta/realtime/__init__.py +1 -5
livekit/plugins/google/beta/realtime/api_proto.py +2 -4
livekit/plugins/google/beta/realtime/realtime_api.py +407 -449
livekit/plugins/google/llm.py +158 -220
livekit/plugins/google/stt.py +80 -115
livekit/plugins/google/tts.py +40 -56
livekit/plugins/google/utils.py +251 -0
livekit/plugins/google/version.py +1 -1
{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/METADATA +11 -21
livekit_plugins_google-1.0.0.dist-info/RECORD +16 -0
{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/WHEEL +1 -2
livekit/plugins/google/_utils.py +0 -199
livekit/plugins/google/beta/realtime/transcriber.py +0 -270
livekit_plugins_google-0.11.3.dist-info/RECORD +0 -18
livekit_plugins_google-0.11.3.dist-info/top_level.txt +0 -1

livekit/plugins/google/llm.py CHANGED Viewed

@@ -15,45 +15,43 @@
 from __future__ import annotations
-import asyncio
 import json
 import os
 from dataclasses import dataclass
-from typing import Any, Literal, MutableSet, Union, cast
-from livekit.agents import (
-    APIConnectionError,
-    APIStatusError,
-    llm,
-    utils,
-)
-from livekit.agents.llm import LLMCapabilities, ToolChoice, _create_ai_function_info
-from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
+from typing import Any, cast
 from google import genai
 from google.auth._default_async import default_async
 from google.genai import types
 from google.genai.errors import APIError, ClientError, ServerError
+from livekit.agents import APIConnectionError, APIStatusError, llm, utils
+from livekit.agents.llm import FunctionTool, ToolChoice, utils as llm_utils
+from livekit.agents.types import (
+    DEFAULT_API_CONNECT_OPTIONS,
+    NOT_GIVEN,
+    APIConnectOptions,
+    NotGivenOr,
+)
+from livekit.agents.utils import is_given
-from ._utils import _build_gemini_ctx, _build_tools
 from .log import logger
 from .models import ChatModels
+from .utils import to_chat_ctx, to_fnc_ctx, to_response_format
 @dataclass
-class LLMOptions:
+class _LLMOptions:
     model: ChatModels | str
-    temperature: float | None
-    tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto"
-    vertexai: bool = False
-    project: str | None = None
-    location: str | None = None
-    candidate_count: int = 1
-    max_output_tokens: int | None = None
-    top_p: float | None = None
-    top_k: float | None = None
-    presence_penalty: float | None = None
-    frequency_penalty: float | None = None
+    temperature: NotGivenOr[float]
+    tool_choice: NotGivenOr[ToolChoice]
+    vertexai: NotGivenOr[bool]
+    project: NotGivenOr[str]
+    location: NotGivenOr[str]
+    max_output_tokens: NotGivenOr[int]
+    top_p: NotGivenOr[float]
+    top_k: NotGivenOr[float]
+    presence_penalty: NotGivenOr[float]
+    frequency_penalty: NotGivenOr[float]
 class LLM(llm.LLM):
@@ -61,18 +59,17 @@ class LLM(llm.LLM):
         self,
         *,
         model: ChatModels | str = "gemini-2.0-flash-001",
-        api_key: str | None = None,
-        vertexai: bool = False,
-        project: str | None = None,
-        location: str | None = None,
-        candidate_count: int = 1,
-        temperature: float = 0.8,
-        max_output_tokens: int | None = None,
-        top_p: float | None = None,
-        top_k: float | None = None,
-        presence_penalty: float | None = None,
-        frequency_penalty: float | None = None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
+        api_key: NotGivenOr[str] = NOT_GIVEN,
+        vertexai: NotGivenOr[bool] = False,
+        project: NotGivenOr[str] = NOT_GIVEN,
+        location: NotGivenOr[str] = NOT_GIVEN,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+        max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
+        top_p: NotGivenOr[float] = NOT_GIVEN,
+        top_k: NotGivenOr[float] = NOT_GIVEN,
+        presence_penalty: NotGivenOr[float] = NOT_GIVEN,
+        frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
     ) -> None:
         """
         Create a new instance of Google GenAI LLM.
@@ -90,55 +87,46 @@ class LLM(llm.LLM):
             vertexai (bool, optional): Whether to use VertexAI. Defaults to False.
             project (str, optional): The Google Cloud project to use (only for VertexAI). Defaults to None.
             location (str, optional): The location to use for VertexAI API requests. Defaults value is "us-central1".
-            candidate_count (int, optional): Number of candidate responses to generate. Defaults to 1.
             temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
             max_output_tokens (int, optional): Maximum number of tokens to generate in the output. Defaults to None.
             top_p (float, optional): The nucleus sampling probability for response generation. Defaults to None.
             top_k (int, optional): The top-k sampling value for response generation. Defaults to None.
             presence_penalty (float, optional): Penalizes the model for generating previously mentioned concepts. Defaults to None.
             frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
-            tool_choice (ToolChoice or Literal["auto", "required", "none"], optional): Specifies whether to use tools during response generation. Defaults to "auto".
-        """
-        super().__init__(
-            capabilities=LLMCapabilities(
-                supports_choices_on_int=False,
-                requires_persistent_functions=False,
-            )
-        )
-        self._project_id = project or os.environ.get("GOOGLE_CLOUD_PROJECT", None)
-        self._location = location or os.environ.get(
-            "GOOGLE_CLOUD_LOCATION", "us-central1"
-        )
-        self._api_key = api_key or os.environ.get("GOOGLE_API_KEY", None)
+            tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
+        """  # noqa: E501
+        super().__init__()
+        gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
+        gcp_location = location if is_given(location) else os.environ.get("GOOGLE_CLOUD_LOCATION")
+        gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
         _gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
         if _gac is None:
             logger.warning(
-                "`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods."
+                "`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods."  # noqa: E501
             )
-        if vertexai:
-            if not self._project_id:
-                _, self._project_id = default_async(
+        if is_given(vertexai) and vertexai:
+            if not gcp_project:
+                _, gcp_project = default_async(
                     scopes=["https://www.googleapis.com/auth/cloud-platform"]
                 )
-            self._api_key = None  # VertexAI does not require an API key
+            gemini_api_key = None  # VertexAI does not require an API key
         else:
-            self._project_id = None
-            self._location = None
-            if not self._api_key:
+            gcp_project = None
+            gcp_location = None
+            if not gemini_api_key:
                 raise ValueError(
-                    "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
+                    "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"  # noqa: E501
                 )
-        self._opts = LLMOptions(
+        self._opts = _LLMOptions(
             model=model,
             temperature=temperature,
             tool_choice=tool_choice,
             vertexai=vertexai,
             project=project,
             location=location,
-            candidate_count=candidate_count,
             max_output_tokens=max_output_tokens,
             top_p=top_p,
             top_k=top_k,
@@ -146,46 +134,89 @@ class LLM(llm.LLM):
             frequency_penalty=frequency_penalty,
         )
         self._client = genai.Client(
-            api_key=self._api_key,
-            vertexai=vertexai,
-            project=self._project_id,
-            location=self._location,
+            api_key=gemini_api_key,
+            vertexai=is_given(vertexai) and vertexai,
+            project=gcp_project,
+            location=gcp_location,
         )
-        self._running_fncs: MutableSet[asyncio.Task[Any]] = set()
     def chat(
         self,
         *,
         chat_ctx: llm.ChatContext,
+        tools: list[FunctionTool] | None = None,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
-        fnc_ctx: llm.FunctionContext | None = None,
-        temperature: float | None = None,
-        n: int | None = 1,
-        parallel_tool_calls: bool | None = None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]]
-        | None = None,
-    ) -> "LLMStream":
-        if tool_choice is None:
-            tool_choice = self._opts.tool_choice
-        if temperature is None:
-            temperature = self._opts.temperature
+        parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
+        response_format: NotGivenOr[
+            types.SchemaUnion | type[llm_utils.ResponseFormatT]
+        ] = NOT_GIVEN,
+        extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
+    ) -> LLMStream:
+        extra = {}
+        if is_given(extra_kwargs):
+            extra.update(extra_kwargs)
+        tool_choice = tool_choice if is_given(tool_choice) else self._opts.tool_choice
+        if is_given(tool_choice):
+            gemini_tool_choice: types.ToolConfig
+            if isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
+                gemini_tool_choice = types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode="ANY",
+                        allowed_function_names=[tool_choice["function"]["name"]],
+                    )
+                )
+                extra["tool_config"] = gemini_tool_choice
+            elif tool_choice == "required":
+                gemini_tool_choice = types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode="ANY",
+                        allowed_function_names=[fnc.name for fnc in tools],
+                    )
+                )
+                extra["tool_config"] = gemini_tool_choice
+            elif tool_choice == "auto":
+                gemini_tool_choice = types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode="AUTO",
+                    )
+                )
+                extra["tool_config"] = gemini_tool_choice
+            elif tool_choice == "none":
+                gemini_tool_choice = types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(
+                        mode="NONE",
+                    )
+                )
+                extra["tool_config"] = gemini_tool_choice
+        if is_given(response_format):
+            extra["response_schema"] = to_response_format(response_format)
+            extra["response_mime_type"] = "application/json"
+        if is_given(self._opts.temperature):
+            extra["temperature"] = self._opts.temperature
+        if is_given(self._opts.max_output_tokens):
+            extra["max_output_tokens"] = self._opts.max_output_tokens
+        if is_given(self._opts.top_p):
+            extra["top_p"] = self._opts.top_p
+        if is_given(self._opts.top_k):
+            extra["top_k"] = self._opts.top_k
+        if is_given(self._opts.presence_penalty):
+            extra["presence_penalty"] = self._opts.presence_penalty
+        if is_given(self._opts.frequency_penalty):
+            extra["frequency_penalty"] = self._opts.frequency_penalty
         return LLMStream(
             self,
             client=self._client,
             model=self._opts.model,
-            max_output_tokens=self._opts.max_output_tokens,
-            top_p=self._opts.top_p,
-            top_k=self._opts.top_k,
-            presence_penalty=self._opts.presence_penalty,
-            frequency_penalty=self._opts.frequency_penalty,
             chat_ctx=chat_ctx,
-            fnc_ctx=fnc_ctx,
+            tools=tools,
             conn_options=conn_options,
-            n=n,
-            temperature=temperature,
-            tool_choice=tool_choice,
+            extra_kwargs=extra,
         )
@@ -198,96 +229,38 @@ class LLMStream(llm.LLMStream):
         model: str | ChatModels,
         chat_ctx: llm.ChatContext,
         conn_options: APIConnectOptions,
-        fnc_ctx: llm.FunctionContext | None,
-        temperature: float | None,
-        n: int | None,
-        max_output_tokens: int | None,
-        top_p: float | None,
-        top_k: float | None,
-        presence_penalty: float | None,
-        frequency_penalty: float | None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]],
+        tools: list[FunctionTool] | None,
+        extra_kwargs: dict[str, Any],
     ) -> None:
-        super().__init__(
-            llm, chat_ctx=chat_ctx, fnc_ctx=fnc_ctx, conn_options=conn_options
-        )
+        super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
         self._client = client
         self._model = model
         self._llm: LLM = llm
-        self._max_output_tokens = max_output_tokens
-        self._top_p = top_p
-        self._top_k = top_k
-        self._presence_penalty = presence_penalty
-        self._frequency_penalty = frequency_penalty
-        self._temperature = temperature
-        self._n = n
-        self._tool_choice = tool_choice
+        self._extra_kwargs = extra_kwargs
     async def _run(self) -> None:
         retryable = True
         request_id = utils.shortuuid()
         try:
-            opts: dict[str, Any] = dict()
-            turns, system_instruction = _build_gemini_ctx(self._chat_ctx, id(self))
-            if self._fnc_ctx and len(self._fnc_ctx.ai_functions) > 0:
-                functions = _build_tools(self._fnc_ctx)
-                opts["tools"] = [types.Tool(function_declarations=functions)]
-                if self._tool_choice is not None:
-                    if isinstance(self._tool_choice, ToolChoice):
-                        # specific function
-                        tool_config = types.ToolConfig(
-                            function_calling_config=types.FunctionCallingConfig(
-                                mode=types.FunctionCallingConfigMode.ANY,
-                                allowed_function_names=[self._tool_choice.name],
-                            )
-                        )
-                    elif self._tool_choice == "required":
-                        # model must call any function
-                        tool_config = types.ToolConfig(
-                            function_calling_config=types.FunctionCallingConfig(
-                                mode=types.FunctionCallingConfigMode.ANY,
-                                allowed_function_names=[
-                                    fnc.name
-                                    for fnc in self._fnc_ctx.ai_functions.values()
-                                ],
-                            )
-                        )
-                    elif self._tool_choice == "auto":
-                        # model can call any function
-                        tool_config = types.ToolConfig(
-                            function_calling_config=types.FunctionCallingConfig(
-                                mode=types.FunctionCallingConfigMode.AUTO
-                            )
-                        )
-                    elif self._tool_choice == "none":
-                        # model cannot call any function
-                        tool_config = types.ToolConfig(
-                            function_calling_config=types.FunctionCallingConfig(
-                                mode=types.FunctionCallingConfigMode.NONE,
-                            )
-                        )
-                    opts["tool_config"] = tool_config
+            turns, system_instruction = to_chat_ctx(self._chat_ctx, id(self._llm))
+            function_declarations = to_fnc_ctx(self._tools)
+            if function_declarations:
+                self._extra_kwargs["tools"] = [
+                    types.Tool(function_declarations=function_declarations)
+                ]
             config = types.GenerateContentConfig(
-                candidate_count=self._n,
-                temperature=self._temperature,
-                max_output_tokens=self._max_output_tokens,
-                top_p=self._top_p,
-                top_k=self._top_k,
-                presence_penalty=self._presence_penalty,
-                frequency_penalty=self._frequency_penalty,
                 system_instruction=system_instruction,
-                **opts,
+                **self._extra_kwargs,
             )
             stream = await self._client.aio.models.generate_content_stream(
                 model=self._model,
                 contents=cast(types.ContentListUnion, turns),
                 config=config,
             )
-            async for response in stream:  # type: ignore
+            async for response in stream:
                 if response.prompt_feedback:
                     raise APIStatusError(
                         response.prompt_feedback.json(),
@@ -308,11 +281,11 @@ class LLMStream(llm.LLMStream):
                 if len(response.candidates) > 1:
                     logger.warning(
-                        "gemini llm: there are multiple candidates in the response, returning response from the first one."
+                        "gemini llm: there are multiple candidates in the response, returning response from the first one."  # noqa: E501
                     )
-                for index, part in enumerate(response.candidates[0].content.parts):
-                    chat_chunk = self._parse_part(request_id, index, part)
+                for part in response.candidates[0].content.parts:
+                    chat_chunk = self._parse_part(request_id, part)
                     if chat_chunk is not None:
                         retryable = False
                         self._event_ch.send_nowait(chat_chunk)
@@ -321,7 +294,7 @@ class LLMStream(llm.LLMStream):
                     usage = response.usage_metadata
                     self._event_ch.send_nowait(
                         llm.ChatChunk(
-                            request_id=request_id,
+                            id=request_id,
                             usage=llm.CompletionUsage(
                                 completion_tokens=usage.candidates_token_count or 0,
                                 prompt_tokens=usage.prompt_token_count or 0,
@@ -329,11 +302,12 @@ class LLMStream(llm.LLMStream):
                             ),
                         )
                     )
         except ClientError as e:
             raise APIStatusError(
                 "gemini llm: client error",
                 status_code=e.code,
-                body=e.message,
+                body=e.message + e.status,
                 request_id=request_id,
                 retryable=False if e.code != 429 else True,
             ) from e
@@ -341,7 +315,7 @@ class LLMStream(llm.LLMStream):
             raise APIStatusError(
                 "gemini llm: server error",
                 status_code=e.code,
-                body=e.message,
+                body=e.message + e.status,
                 request_id=request_id,
                 retryable=retryable,
             ) from e
@@ -349,71 +323,35 @@ class LLMStream(llm.LLMStream):
             raise APIStatusError(
                 "gemini llm: api error",
                 status_code=e.code,
-                body=e.message,
+                body=e.message + e.status,
                 request_id=request_id,
                 retryable=retryable,
             ) from e
         except Exception as e:
             raise APIConnectionError(
-                "gemini llm: error generating content",
+                f"gemini llm: error generating content {str(e)}",
                 retryable=retryable,
             ) from e
-    def _parse_part(
-        self, id: str, index: int, part: types.Part
-    ) -> llm.ChatChunk | None:
+    def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
         if part.function_call:
-            return self._try_build_function(id, index, part)
-        return llm.ChatChunk(
-            request_id=id,
-            choices=[
-                llm.Choice(
-                    delta=llm.ChoiceDelta(content=part.text, role="assistant"),
-                    index=index,
-                )
-            ],
-        )
-    def _try_build_function(
-        self, id: str, index: int, part: types.Part
-    ) -> llm.ChatChunk | None:
-        if part.function_call is None:
-            logger.warning("gemini llm: no function call in the response")
-            return None
-        if part.function_call.name is None:
-            logger.warning("gemini llm: no function name in the response")
-            return None
-        if part.function_call.id is None:
-            part.function_call.id = utils.shortuuid()
-        if self._fnc_ctx is None:
-            logger.warning(
-                "google stream tried to run function without function context"
+            chat_chunk = llm.ChatChunk(
+                id=id,
+                delta=llm.ChoiceDelta(
+                    role="assistant",
+                    tool_calls=[
+                        llm.FunctionToolCall(
+                            arguments=json.dumps(part.function_call.args),
+                            name=part.function_call.name,
+                            call_id=part.function_call.id or utils.shortuuid("function_call_"),
+                        )
+                    ],
+                    content=part.text,
+                ),
             )
-            return None
-        fnc_info = _create_ai_function_info(
-            self._fnc_ctx,
-            part.function_call.id,
-            part.function_call.name,
-            json.dumps(part.function_call.args),
-        )
-        self._function_calls_info.append(fnc_info)
+            return chat_chunk
         return llm.ChatChunk(
-            request_id=id,
-            choices=[
-                llm.Choice(
-                    delta=llm.ChoiceDelta(
-                        role="assistant",
-                        tool_calls=[fnc_info],
-                        content=part.text,
-                    ),
-                    index=index,
-                )
-            ],
+            id=id,
+            delta=llm.ChoiceDelta(content=part.text, role="assistant"),
         )

livekit-plugins-google 0.11.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

livekit-plugins-google 0.11.3py3-none-any.whl → 1.0.0py3-none-any.whl