PyPI - livekit-plugins-google - Versions diffs - 0.11.3__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

livekit-plugins-google 0.11.3py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

livekit/plugins/google/beta/realtime/__init__.py +1 -5
livekit/plugins/google/beta/realtime/api_proto.py +2 -4
livekit/plugins/google/beta/realtime/realtime_api.py +407 -449
livekit/plugins/google/llm.py +158 -220
livekit/plugins/google/stt.py +80 -115
livekit/plugins/google/tts.py +40 -56
livekit/plugins/google/utils.py +251 -0
livekit/plugins/google/version.py +1 -1
{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/METADATA +11 -21
livekit_plugins_google-1.0.0.dist-info/RECORD +16 -0
{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/WHEEL +1 -2
livekit/plugins/google/_utils.py +0 -199
livekit/plugins/google/beta/realtime/transcriber.py +0 -270
livekit_plugins_google-0.11.3.dist-info/RECORD +0 -18
livekit_plugins_google-0.11.3.dist-info/top_level.txt +0 -1

livekit/plugins/google/utils.py ADDED Viewed

@@ -0,0 +1,251 @@
+from __future__ import annotations
+import json
+import re
+from copy import deepcopy
+from typing import Any
+from pydantic import TypeAdapter
+from google.genai import types
+from livekit.agents import llm
+from livekit.agents.llm import FunctionTool, utils as llm_utils
+from .log import logger
+__all__ = ["to_chat_ctx", "to_fnc_ctx"]
+def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
+    return [_build_gemini_fnc(fnc) for fnc in fncs]
+def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClientToolResponse | None:
+    function_responses: list[types.FunctionResponse] = []
+    for msg in chat_ctx.items:
+        if msg.type == "function_call_output":
+            function_responses.append(
+                types.FunctionResponse(
+                    id=msg.call_id,
+                    name=msg.name,
+                    response={"text": msg.output},
+                )
+            )
+    return (
+        types.LiveClientToolResponse(function_responses=function_responses)
+        if function_responses
+        else None
+    )
+def to_chat_ctx(
+    chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
+) -> tuple[list[types.Content], types.Content | None]:
+    turns: list[types.Content] = []
+    system_instruction: types.Content | None = None
+    current_role: str | None = None
+    parts: list[types.Part] = []
+    for msg in chat_ctx.items:
+        if msg.type == "message" and msg.role == "system":
+            sys_parts = []
+            for content in msg.content:
+                if content and isinstance(content, str):
+                    sys_parts.append(types.Part(text=content))
+            system_instruction = types.Content(parts=sys_parts)
+            continue
+        if msg.type == "message":
+            role = "model" if msg.role == "assistant" else "user"
+        elif msg.type == "function_call":
+            role = "model"
+        elif msg.type == "function_call_output":
+            role = "user"
+        # if the effective role changed, finalize the previous turn.
+        if role != current_role:
+            if current_role is not None and parts:
+                turns.append(types.Content(role=current_role, parts=parts))
+            parts = []
+            current_role = role
+        if msg.type == "message":
+            for content in msg.content:
+                if content and isinstance(content, str):
+                    parts.append(types.Part(text=content))
+                elif content and isinstance(content, dict):
+                    parts.append(types.Part(text=json.dumps(content)))
+                elif isinstance(content, llm.ImageContent):
+                    parts.append(_to_image_part(content, cache_key))
+        elif msg.type == "function_call" and not ignore_functions:
+            parts.append(
+                types.Part(
+                    function_call=types.FunctionCall(
+                        name=msg.name,
+                        args=json.loads(msg.arguments),
+                    )
+                )
+            )
+        elif msg.type == "function_call_output" and not ignore_functions:
+            parts.append(
+                types.Part(
+                    function_response=types.FunctionResponse(
+                        name=msg.name,
+                        response={"text": msg.output},
+                    )
+                )
+            )
+    if current_role is not None and parts:
+        turns.append(types.Content(role=current_role, parts=parts))
+    if not turns:
+        # if no turns, add a user message with a placeholder
+        turns = [types.Content(role="user", parts=[types.Part(text=".")])]
+    return turns, system_instruction
+def _to_image_part(image: llm.ImageContent, cache_key: Any) -> types.Part:
+    img = llm.utils.serialize_image(image)
+    if img.external_url:
+        if img.mime_type:
+            mime_type = img.mime_type
+        else:
+            logger.debug("No media type provided for image, defaulting to image/jpeg.")
+            mime_type = "image/jpeg"
+        return types.Part.from_uri(file_uri=img.external_url, mime_type=mime_type)
+    if cache_key not in image._cache:
+        image._cache[cache_key] = img.data_bytes
+    return types.Part.from_bytes(data=image._cache[cache_key], mime_type=img.mime_type)
+def _build_gemini_fnc(function_tool: FunctionTool) -> types.FunctionDeclaration:
+    fnc = llm.utils.build_legacy_openai_schema(function_tool, internally_tagged=True)
+    json_schema = _GeminiJsonSchema(fnc["parameters"]).simplify()
+    return types.FunctionDeclaration(
+        name=fnc["name"],
+        description=fnc["description"],
+        parameters=json_schema,
+    )
+def to_response_format(response_format: type | dict) -> types.SchemaUnion:
+    _, json_schema_type = llm_utils.to_response_format_param(response_format)
+    if isinstance(json_schema_type, TypeAdapter):
+        schema = json_schema_type.json_schema()
+    else:
+        schema = json_schema_type.model_json_schema()
+    return _GeminiJsonSchema(schema).simplify()
+class _GeminiJsonSchema:
+    """
+    Transforms the JSON Schema from Pydantic to be suitable for Gemini.
+    based on pydantic-ai implementation
+    https://github.com/pydantic/pydantic-ai/blob/085a9542a7360b7e388ce575323ce189b397d7ad/pydantic_ai_slim/pydantic_ai/models/gemini.py#L809
+    """
+    # Type mapping from JSON Schema to Gemini Schema
+    TYPE_MAPPING: dict[str, types.Type] = {
+        "string": types.Type.STRING,
+        "number": types.Type.NUMBER,
+        "integer": types.Type.INTEGER,
+        "boolean": types.Type.BOOLEAN,
+        "array": types.Type.ARRAY,
+        "object": types.Type.OBJECT,
+    }
+    def __init__(self, schema: dict[str, Any]):
+        self.schema = deepcopy(schema)
+        self.defs = self.schema.pop("$defs", {})
+    def simplify(self) -> dict[str, Any] | None:
+        self._simplify(self.schema, refs_stack=())
+        # If the schema is an OBJECT with no properties, return None.
+        if self.schema.get("type") == types.Type.OBJECT and not self.schema.get("properties"):
+            return None
+        return self.schema
+    def _simplify(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
+        schema.pop("title", None)
+        schema.pop("default", None)
+        schema.pop("additionalProperties", None)
+        if ref := schema.pop("$ref", None):
+            key = re.sub(r"^#/\$defs/", "", ref)
+            if key in refs_stack:
+                raise ValueError("Recursive `$ref`s in JSON Schema are not supported by Gemini")
+            refs_stack += (key,)
+            schema_def = self.defs[key]
+            self._simplify(schema_def, refs_stack)
+            schema.update(schema_def)
+            return
+        # Convert type value to Gemini format
+        if "type" in schema and schema["type"] != "null":
+            json_type = schema["type"]
+            if json_type in self.TYPE_MAPPING:
+                schema["type"] = self.TYPE_MAPPING[json_type]
+            elif isinstance(json_type, types.Type):
+                schema["type"] = json_type
+            else:
+                raise ValueError(f"Unsupported type in JSON Schema: {json_type}")
+        # Map field names that differ between JSON Schema and Gemini
+        self._map_field_names(schema)
+        # Handle anyOf - map to any_of
+        if any_of := schema.pop("anyOf", None):
+            if any_of:
+                mapped_any_of = []
+                has_null = False
+                non_null_schema = None
+                for item_schema in any_of:
+                    self._simplify(item_schema, refs_stack)
+                    if item_schema == {"type": "null"}:
+                        has_null = True
+                    else:
+                        non_null_schema = item_schema
+                        mapped_any_of.append(item_schema)
+                if has_null and len(any_of) == 2 and non_null_schema:
+                    schema.update(non_null_schema)
+                    schema["nullable"] = True
+                else:
+                    schema["any_of"] = mapped_any_of
+        type_ = schema.get("type")
+        if type_ == types.Type.OBJECT:
+            self._object(schema, refs_stack)
+        elif type_ == types.Type.ARRAY:
+            self._array(schema, refs_stack)
+    def _map_field_names(self, schema: dict[str, Any]) -> None:
+        """Map JSON Schema field names to Gemini Schema field names."""
+        mappings = {
+            "minLength": "min_length",
+            "maxLength": "max_length",
+            "minItems": "min_items",
+            "maxItems": "max_items",
+            "minProperties": "min_properties",
+            "maxProperties": "max_properties",
+        }
+        for json_name, gemini_name in mappings.items():
+            if json_name in schema:
+                schema[gemini_name] = schema.pop(json_name)
+    def _object(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
+        if properties := schema.get("properties"):
+            for value in properties.values():
+                self._simplify(value, refs_stack)
+    def _array(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
+        if prefix_items := schema.get("prefixItems"):
+            for prefix_item in prefix_items:
+                self._simplify(prefix_item, refs_stack)
+        if items_schema := schema.get("items"):
+            self._simplify(items_schema, refs_stack)

livekit/plugins/google/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.11.3"
+__version__ = "1.0.0"

{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,39 +1,29 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-google
-Version: 0.11.3
+Version: 1.0.0
 Summary: Agent Framework plugin for services from Google Cloud
-Home-page: https://github.com/livekit/agents
-License: Apache-2.0
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
 Project-URL: Source, https://github.com/livekit/agents
-Keywords: webrtc,realtime,audio,video,livekit
+Author: LiveKit
+License-Expression: Apache-2.0
+Keywords: audio,livekit,realtime,video,webrtc
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Topic :: Multimedia :: Sound/Audio
-Classifier: Topic :: Multimedia :: Video
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Description-Content-Type: text/markdown
 Requires-Dist: google-auth<3,>=2
 Requires-Dist: google-cloud-speech<3,>=2
 Requires-Dist: google-cloud-texttospeech<3,>=2
-Requires-Dist: google-genai==1.3.0
-Requires-Dist: livekit-agents<1.0.0,>=0.12.16
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: keywords
-Dynamic: license
-Dynamic: project-url
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+Requires-Dist: google-genai==1.5.0
+Requires-Dist: livekit-agents>=1.0.0
+Description-Content-Type: text/markdown
 # LiveKit Plugins Google

livekit_plugins_google-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
+livekit/plugins/google/llm.py,sha256=81LCCJPmpMOkApX0S0a-zu5xIvcm2Pk8lTTz-PoK5m0,14740
+livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
+livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
+livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
+livekit/plugins/google/tts.py,sha256=P8Zu2s0TfmyzlrNxzDIqyn3sGiNSW0n3nB_JlO_ojiM,7985
+livekit/plugins/google/utils.py,sha256=pbLSOAdQxInWhgI2Yhsrr9KvgvpFXYDdU2yx2p03pFg,9437
+livekit/plugins/google/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
+livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
+livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
+livekit/plugins/google/beta/realtime/api_proto.py,sha256=cwpFOYjN_3v5PMY0TnzoHhJoASfZ7Qt9IO281ZhJ7Ww,565
+livekit/plugins/google/beta/realtime/realtime_api.py,sha256=ubF2Ha9zCD28gQrrjTcX3MWgMBs7bC3rI0DUdaHAa_Q,22021
+livekit_plugins_google-1.0.0.dist-info/METADATA,sha256=UfiGiFsqqXKhw8HcQKW6WhltzxgeNz5qr7Vo6PoxU58,3489
+livekit_plugins_google-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_google-1.0.0.dist-info/RECORD,,

{livekit_plugins_google-0.11.3.dist-info → livekit_plugins_google-1.0.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit/plugins/google/_utils.py DELETED Viewed

@@ -1,199 +0,0 @@
-from __future__ import annotations
-import base64
-import inspect
-import json
-from typing import Any, Dict, List, Optional, get_args, get_origin
-from livekit import rtc
-from livekit.agents import llm, utils
-from livekit.agents.llm.function_context import _is_optional_type
-from google.genai import types
-from google.genai.types import Type as GenaiType
-JSON_SCHEMA_TYPE_MAP: dict[type, GenaiType] = {
-    str: GenaiType.STRING,
-    int: GenaiType.INTEGER,
-    float: GenaiType.NUMBER,
-    bool: GenaiType.BOOLEAN,
-    dict: GenaiType.OBJECT,
-    list: GenaiType.ARRAY,
-}
-__all__ = ["_build_gemini_ctx", "_build_tools"]
-def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
-    properties: Dict[str, types.Schema] = {}
-    required: List[str] = []
-    for arg_name, arg_info in arguments.items():
-        prop = types.Schema()
-        if arg_info.description:
-            prop.description = arg_info.description
-        _, py_type = _is_optional_type(arg_info.type)
-        origin = get_origin(py_type)
-        if origin is list:
-            item_type = get_args(py_type)[0]
-            if item_type not in JSON_SCHEMA_TYPE_MAP:
-                raise ValueError(f"Unsupported type: {item_type}")
-            prop.type = GenaiType.ARRAY
-            prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
-            if arg_info.choices:
-                prop.items.enum = arg_info.choices
-        else:
-            if py_type not in JSON_SCHEMA_TYPE_MAP:
-                raise ValueError(f"Unsupported type: {py_type}")
-            prop.type = JSON_SCHEMA_TYPE_MAP[py_type]
-            if arg_info.choices:
-                prop.enum = arg_info.choices
-                if py_type is int:
-                    raise ValueError(
-                        f"Parameter '{arg_info.name}' uses integer choices, not supported by this model."
-                    )
-        properties[arg_name] = prop
-        if arg_info.default is inspect.Parameter.empty:
-            required.append(arg_name)
-    if properties:
-        parameters = types.Schema(type=GenaiType.OBJECT, properties=properties)
-        if required:
-            parameters.required = required
-        return parameters
-    return None
-def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclaration]:
-    function_declarations: List[types.FunctionDeclaration] = []
-    for fnc_info in fnc_ctx.ai_functions.values():
-        parameters = _build_parameters(fnc_info.arguments)
-        func_decl = types.FunctionDeclaration(
-            name=fnc_info.name,
-            description=fnc_info.description,
-            parameters=parameters,
-        )
-        function_declarations.append(func_decl)
-    return function_declarations
-def _build_gemini_ctx(
-    chat_ctx: llm.ChatContext, cache_key: Any
-) -> tuple[list[types.Content], Optional[types.Content]]:
-    turns: list[types.Content] = []
-    system_instruction: Optional[types.Content] = None
-    current_role: Optional[str] = None
-    parts: list[types.Part] = []
-    for msg in chat_ctx.messages:
-        if msg.role == "system":
-            if isinstance(msg.content, str):
-                system_instruction = types.Content(parts=[types.Part(text=msg.content)])
-            continue
-        if msg.role == "assistant":
-            role = "model"
-        elif msg.role == "tool":
-            role = "user"
-        else:
-            role = "user"
-        # If role changed, finalize previous parts into a turn
-        if role != current_role:
-            if current_role is not None and parts:
-                turns.append(types.Content(role=current_role, parts=parts))
-            current_role = role
-            parts = []
-        if msg.tool_calls:
-            for fnc in msg.tool_calls:
-                parts.append(
-                    types.Part(
-                        function_call=types.FunctionCall(
-                            name=fnc.function_info.name,
-                            args=fnc.arguments,
-                        )
-                    )
-                )
-        if msg.role == "tool":
-            if msg.content:
-                if isinstance(msg.content, dict):
-                    parts.append(
-                        types.Part(
-                            function_response=types.FunctionResponse(
-                                name=msg.name,
-                                response=msg.content,
-                            )
-                        )
-                    )
-                elif isinstance(msg.content, str):
-                    parts.append(
-                        types.Part(
-                            function_response=types.FunctionResponse(
-                                name=msg.name,
-                                response={"result": msg.content},
-                            )
-                        )
-                    )
-        else:
-            if msg.content:
-                if isinstance(msg.content, str):
-                    parts.append(types.Part(text=msg.content))
-                elif isinstance(msg.content, dict):
-                    parts.append(types.Part(text=json.dumps(msg.content)))
-                elif isinstance(msg.content, list):
-                    for item in msg.content:
-                        if isinstance(item, str):
-                            parts.append(types.Part(text=item))
-                        elif isinstance(item, llm.ChatImage):
-                            parts.append(_build_gemini_image_part(item, cache_key))
-    # Finalize last role's parts if any remain
-    if current_role is not None and parts:
-        turns.append(types.Content(role=current_role, parts=parts))
-    return turns, system_instruction
-def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part:
-    if isinstance(image.image, str):
-        # Check if the string is a Data URL
-        if image.image.startswith("data:image/jpeg;base64,"):
-            # Extract the base64 part after the comma
-            base64_data = image.image.split(",", 1)[1]
-            try:
-                image_bytes = base64.b64decode(base64_data)
-            except Exception as e:
-                raise ValueError("Invalid base64 data in image URL") from e
-            return types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
-        else:
-            # Assume it's a regular URL
-            return types.Part.from_uri(file_uri=image.image, mime_type="image/jpeg")
-    elif isinstance(image.image, rtc.VideoFrame):
-        if cache_key not in image._cache:
-            opts = utils.images.EncodeOptions()
-            if image.inference_width and image.inference_height:
-                opts.resize_options = utils.images.ResizeOptions(
-                    width=image.inference_width,
-                    height=image.inference_height,
-                    strategy="scale_aspect_fit",
-                )
-            image._cache[cache_key] = utils.images.encode(image.image, opts)
-        return types.Part.from_bytes(
-            data=image._cache[cache_key], mime_type="image/jpeg"
-        )
-    raise ValueError(f"Unsupported image type: {type(image.image)}")

livekit-plugins-google 0.11.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

livekit-plugins-google 0.11.3py3-none-any.whl → 1.0.0py3-none-any.whl