livekit-plugins-google 0.3.0__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from copy import deepcopy
5
+ from typing import Any
6
+
7
+ from pydantic import TypeAdapter
8
+
9
+ from google.genai import types
10
+ from livekit.agents import llm
11
+ from livekit.agents.llm import utils as llm_utils
12
+ from livekit.agents.types import NOT_GIVEN, NotGivenOr
13
+ from livekit.agents.utils import is_given
14
+
15
+ from .tools import GeminiTool
16
+
17
+ __all__ = ["create_tools_config"]
18
+
19
+
20
+ def create_tools_config(
21
+ tool_ctx: llm.ToolContext,
22
+ *,
23
+ tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN,
24
+ _only_single_type: bool = False,
25
+ ) -> list[types.Tool]:
26
+ gemini_tools: list[types.Tool] = []
27
+
28
+ function_tools = [
29
+ types.FunctionDeclaration.model_validate(schema)
30
+ for schema in tool_ctx.parse_function_tools(
31
+ "google", tool_behavior=tool_behavior.value if tool_behavior else None
32
+ )
33
+ ]
34
+ if function_tools:
35
+ gemini_tools.append(types.Tool(function_declarations=function_tools))
36
+
37
+ # Some Google LLMs do not support multiple tool types (either function tools or builtin tools).
38
+ if _only_single_type and gemini_tools:
39
+ return gemini_tools
40
+
41
+ for tool in tool_ctx.provider_tools:
42
+ if isinstance(tool, GeminiTool):
43
+ gemini_tools.append(tool.to_tool_config())
44
+
45
+ return gemini_tools
46
+
47
+
48
+ def get_tool_results_for_realtime(
49
+ chat_ctx: llm.ChatContext,
50
+ *,
51
+ vertexai: bool = False,
52
+ tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
53
+ ) -> types.LiveClientToolResponse | None:
54
+ function_responses: list[types.FunctionResponse] = []
55
+ for msg in chat_ctx.items:
56
+ if msg.type == "function_call_output":
57
+ res = types.FunctionResponse(
58
+ name=msg.name,
59
+ response={"output": msg.output},
60
+ )
61
+ if is_given(tool_response_scheduling):
62
+ # vertexai currently doesn't support the scheduling parameter, gemini api defaults to idle
63
+ # it's the user's responsibility to avoid this parameter when using vertexai
64
+ res.scheduling = tool_response_scheduling
65
+ if not vertexai:
66
+ # vertexai does not support id in FunctionResponse
67
+ # see: https://github.com/googleapis/python-genai/blob/85e00bc/google/genai/_live_converters.py#L1435
68
+ res.id = msg.call_id
69
+ function_responses.append(res)
70
+ return (
71
+ types.LiveClientToolResponse(function_responses=function_responses)
72
+ if function_responses
73
+ else None
74
+ )
75
+
76
+
77
+ def to_response_format(response_format: type | dict) -> types.SchemaUnion:
78
+ _, json_schema_type = llm_utils.to_response_format_param(response_format)
79
+ if isinstance(json_schema_type, TypeAdapter):
80
+ schema = json_schema_type.json_schema()
81
+ else:
82
+ schema = json_schema_type.model_json_schema()
83
+
84
+ return _GeminiJsonSchema(schema).simplify()
85
+
86
+
87
+ class _GeminiJsonSchema:
88
+ """
89
+ Transforms the JSON Schema from Pydantic to be suitable for Gemini.
90
+ based on pydantic-ai implementation
91
+ https://github.com/pydantic/pydantic-ai/blob/085a9542a7360b7e388ce575323ce189b397d7ad/pydantic_ai_slim/pydantic_ai/models/gemini.py#L809
92
+ """
93
+
94
+ # Type mapping from JSON Schema to Gemini Schema
95
+ TYPE_MAPPING: dict[str, types.Type] = {
96
+ "string": types.Type.STRING,
97
+ "number": types.Type.NUMBER,
98
+ "integer": types.Type.INTEGER,
99
+ "boolean": types.Type.BOOLEAN,
100
+ "array": types.Type.ARRAY,
101
+ "object": types.Type.OBJECT,
102
+ }
103
+
104
+ def __init__(self, schema: dict[str, Any]):
105
+ self.schema = deepcopy(schema)
106
+ self.defs = self.schema.pop("$defs", {})
107
+
108
+ def simplify(self) -> dict[str, Any] | None:
109
+ self._simplify(self.schema, refs_stack=())
110
+ # If the schema is an OBJECT with no properties, return None.
111
+ if self.schema.get("type") == types.Type.OBJECT and not self.schema.get("properties"):
112
+ return None
113
+ return self.schema
114
+
115
+ def _simplify(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
116
+ schema.pop("title", None)
117
+ schema.pop("default", None)
118
+ schema.pop("additionalProperties", None)
119
+ schema.pop("$schema", None)
120
+
121
+ if (const := schema.pop("const", None)) is not None:
122
+ # Gemini doesn't support const, but it does support enum with a single value
123
+ schema["enum"] = [const]
124
+
125
+ schema.pop("discriminator", None)
126
+ schema.pop("examples", None)
127
+
128
+ if ref := schema.pop("$ref", None):
129
+ key = re.sub(r"^#/\$defs/", "", ref)
130
+ if key in refs_stack:
131
+ raise ValueError("Recursive `$ref`s in JSON Schema are not supported by Gemini")
132
+ refs_stack += (key,)
133
+ schema_def = self.defs[key]
134
+ self._simplify(schema_def, refs_stack)
135
+ schema.update(schema_def)
136
+ return
137
+
138
+ if "enum" in schema and "type" not in schema:
139
+ schema["type"] = self._infer_type(schema["enum"][0])
140
+
141
+ # Convert type value to Gemini format
142
+ if "type" in schema and schema["type"] != "null":
143
+ json_type = schema["type"]
144
+ if json_type in self.TYPE_MAPPING:
145
+ schema["type"] = self.TYPE_MAPPING[json_type]
146
+ elif isinstance(json_type, types.Type):
147
+ schema["type"] = json_type
148
+ else:
149
+ raise ValueError(f"Unsupported type in JSON Schema: {json_type}")
150
+
151
+ # Map field names that differ between JSON Schema and Gemini
152
+ self._map_field_names(schema)
153
+
154
+ # Handle anyOf - map to any_of
155
+ if any_of := schema.pop("anyOf", None):
156
+ if any_of:
157
+ mapped_any_of = []
158
+ has_null = False
159
+ non_null_schema = None
160
+
161
+ for item_schema in any_of:
162
+ self._simplify(item_schema, refs_stack)
163
+ if item_schema == {"type": "null"}:
164
+ has_null = True
165
+ else:
166
+ non_null_schema = item_schema
167
+ mapped_any_of.append(item_schema)
168
+
169
+ if has_null and len(any_of) == 2 and non_null_schema:
170
+ schema.update(non_null_schema)
171
+ schema["nullable"] = True
172
+ else:
173
+ schema["any_of"] = mapped_any_of
174
+
175
+ type_ = schema.get("type")
176
+
177
+ if type_ == types.Type.OBJECT:
178
+ self._object(schema, refs_stack)
179
+ elif type_ == types.Type.ARRAY:
180
+ self._array(schema, refs_stack)
181
+
182
+ def _infer_type(self, value: Any) -> str:
183
+ if isinstance(value, int):
184
+ return "integer"
185
+ elif isinstance(value, float):
186
+ return "number"
187
+ elif isinstance(value, str):
188
+ return "string"
189
+ elif isinstance(value, bool):
190
+ return "boolean"
191
+ else:
192
+ raise ValueError(f"Unsupported type in Schema: {type(value)}")
193
+
194
+ def _map_field_names(self, schema: dict[str, Any]) -> None:
195
+ """Map JSON Schema field names to Gemini Schema field names."""
196
+ mappings = {
197
+ "minLength": "min_length",
198
+ "maxLength": "max_length",
199
+ "minItems": "min_items",
200
+ "maxItems": "max_items",
201
+ "minProperties": "min_properties",
202
+ "maxProperties": "max_properties",
203
+ }
204
+
205
+ for json_name, gemini_name in mappings.items():
206
+ if json_name in schema:
207
+ schema[gemini_name] = schema.pop(json_name)
208
+
209
+ def _object(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
210
+ if properties := schema.get("properties"):
211
+ for value in properties.values():
212
+ self._simplify(value, refs_stack)
213
+
214
+ def _array(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
215
+ if prefix_items := schema.get("prefixItems"):
216
+ for prefix_item in prefix_items:
217
+ self._simplify(prefix_item, refs_stack)
218
+
219
+ if items_schema := schema.get("items"):
220
+ self._simplify(items_schema, refs_stack)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.3.0"
15
+ __version__ = "1.3.11"
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-google
3
+ Version: 1.3.11
4
+ Summary: Agent Framework plugin for services from Google Cloud
5
+ Project-URL: Documentation, https://docs.livekit.io
6
+ Project-URL: Website, https://livekit.io/
7
+ Project-URL: Source, https://github.com/livekit/agents
8
+ Author: LiveKit
9
+ License-Expression: Apache-2.0
10
+ Keywords: ai,audio,gemini,google,livekit,realtime,video,voice
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: google-auth<3,>=2
22
+ Requires-Dist: google-cloud-speech<3,>=2
23
+ Requires-Dist: google-cloud-texttospeech<3,>=2.32
24
+ Requires-Dist: google-genai>=1.55; python_version >= '3.10'
25
+ Requires-Dist: livekit-agents>=1.3.6
26
+ Description-Content-Type: text/markdown
27
+
28
+ # Google AI plugin for LiveKit Agents
29
+
30
+ Support for Gemini, Gemini Live, Cloud Speech-to-Text, and Cloud Text-to-Speech.
31
+
32
+ See [https://docs.livekit.io/agents/integrations/google/](https://docs.livekit.io/agents/integrations/google/) for more information.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install livekit-plugins-google
38
+ ```
39
+
40
+ ## Pre-requisites
41
+
42
+ For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or via Application Default Credentials as specified in [How Application Default Credentials works](https://cloud.google.com/docs/authentication/application-default-credentials).
43
+
44
+ To use the STT and TTS API, you'll need to enable the respective services for your Google Cloud project.
45
+
46
+ - Cloud Speech-to-Text API
47
+ - Cloud Text-to-Speech API
48
+
49
+ ## Live API model support
50
+
51
+ LiveKit supports both Gemini Live API on both Gemini Developer API as well as Vertex AI. However, be aware they have slightly different behavior and use different model names.
52
+
53
+ The following models are supported by Gemini Developer API:
54
+
55
+ - gemini-2.0-flash-live-001
56
+ - gemini-live-2.5-flash-preview
57
+ - gemini-2.5-flash-native-audio-preview-09-2025
58
+
59
+ And these on Vertex AI:
60
+
61
+ - gemini-2.0-flash-exp
62
+ - gemini-live-2.5-flash-preview-native-audio
63
+ - gemini-live-2.5-flash-preview-native-audio-09-2025
@@ -0,0 +1,18 @@
1
+ livekit/plugins/google/__init__.py,sha256=21ZYfsz4d4a5tP_po9WPtIv552gYh6thg7mcnkYA9vc,1445
2
+ livekit/plugins/google/llm.py,sha256=eLLuXQZ0GMrl8blfb2staHr2ClAsYroJRydt0arT1Uk,24386
3
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
+ livekit/plugins/google/models.py,sha256=NOkEJVTmvZ7A6TSVCACoaST-qM84YSPwo_HkM5ct1mY,3171
5
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/google/stt.py,sha256=Q3-gEAV5xykTLfmkj8bPq6mHdx4cn6W7_-fQrI7n85o,35246
7
+ livekit/plugins/google/tools.py,sha256=cH8qXkQj6zQ_cWUV-4apjSeQaMhfK6907XKPAs9PVNo,1956
8
+ livekit/plugins/google/tts.py,sha256=dVhGDN0Q-b_tGKdridkb5grl8YhBMcxTc_kIdcg4gQ8,19251
9
+ livekit/plugins/google/utils.py,sha256=MSMsmPBsmxL-rSi5mc8d7ViI5bHgNEOO4Em0yHLhNyQ,8128
10
+ livekit/plugins/google/version.py,sha256=Db2pVVYNC02fj0G6tMcFzTjBbAxJWVcZ5l1gTIq9VU4,601
11
+ livekit/plugins/google/beta/__init__.py,sha256=4q5dx-Y6o9peCDziB03Skf5ngH4PTBsZC86ZawWrgnk,271
12
+ livekit/plugins/google/beta/gemini_tts.py,sha256=SpKorOteQ7GYoGWsxV5YPuGeMexoosmtDXQVz_1ZeLA,8743
13
+ livekit/plugins/google/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
14
+ livekit/plugins/google/realtime/api_proto.py,sha256=n6Rb-3qrZyByp8MSkBHA3TIW2E0IGKH1Xj7hKJM029M,2290
15
+ livekit/plugins/google/realtime/realtime_api.py,sha256=_eCRUK2Bi4ypBEPcCzK9ym_isO8BAvmlXkgW1cO4P8w,54723
16
+ livekit_plugins_google-1.3.11.dist-info/METADATA,sha256=sQqXHcC_xM0n6_MNZYlJJq61WRG4y422eYFDy0YeXD4,2467
17
+ livekit_plugins_google-1.3.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
18
+ livekit_plugins_google-1.3.11.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,47 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: livekit-plugins-google
3
- Version: 0.3.0
4
- Summary: Agent Framework plugin for services from Google Cloud
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
7
- Project-URL: Documentation, https://docs.livekit.io
8
- Project-URL: Website, https://livekit.io/
9
- Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit
11
- Classifier: Intended Audience :: Developers
12
- Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.7
18
- Classifier: Programming Language :: Python :: 3.8
19
- Classifier: Programming Language :: Python :: 3.9
20
- Classifier: Programming Language :: Python :: 3.10
21
- Classifier: Programming Language :: Python :: 3 :: Only
22
- Requires-Python: >=3.7.0
23
- Description-Content-Type: text/markdown
24
- Requires-Dist: numpy <2,>=1
25
- Requires-Dist: google-api-core <3,>=2
26
- Requires-Dist: google-auth <3,>=2
27
- Requires-Dist: google-cloud-core <3,>=2
28
- Requires-Dist: google-cloud-speech <3,>=2
29
- Requires-Dist: google-cloud-texttospeech <3,>=2
30
- Requires-Dist: google-cloud-translate <4,>=3
31
- Requires-Dist: googleapis-common-protos <2,>=1
32
- Requires-Dist: livekit >=0.9.2
33
- Requires-Dist: livekit-agents ~=0.5.dev0
34
-
35
- # LiveKit Plugins Google
36
-
37
- Agent Framework plugin for services from Google Cloud. Currently supporting Google's [Speech-to-Text](https://cloud.google.com/speech-to-text) API.
38
-
39
- ## Installation
40
-
41
- ```bash
42
- pip install livekit-plugins-google
43
- ```
44
-
45
- ## Pre-requisites
46
-
47
- For credentials, you'll need a Google Cloud account and obtain the correct credentials. Credentials can be passed directly or set as [GOOGLE_APPLICATION_CREDENTIALS](https://cloud.google.com/docs/authentication/application-default-credentials) environment variable.
@@ -1,9 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=snPMHNLrurYbLWQOkV_o6qG1CEWsOCZ8ZfPMvmh5ejY,931
2
- livekit/plugins/google/models.py,sha256=DgiXOvGDO8D9rfCKHJL28lbyQR8mXXB2kpku-szXLRs,1185
3
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- livekit/plugins/google/stt.py,sha256=lYA8hlkxG3YSw1Q34j8hgs4us5Ij-TLBQTRwtGPN9MY,15025
5
- livekit/plugins/google/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
6
- livekit_plugins_google-0.3.0.dist-info/METADATA,sha256=sPd3OZxViD0Aq1uF1qJpbsYeqLAlq8tB720JXk-_RKw,1945
7
- livekit_plugins_google-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
8
- livekit_plugins_google-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
9
- livekit_plugins_google-0.3.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- livekit