livekit-plugins-google 0.11.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from copy import deepcopy
6
+ from typing import Any
7
+
8
+ from pydantic import TypeAdapter
9
+
10
+ from google.genai import types
11
+ from livekit.agents import llm
12
+ from livekit.agents.llm import FunctionTool, utils as llm_utils
13
+
14
+ from .log import logger
15
+
16
+ __all__ = ["to_chat_ctx", "to_fnc_ctx"]
17
+
18
+
19
+ def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
20
+ return [_build_gemini_fnc(fnc) for fnc in fncs]
21
+
22
+
23
+ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClientToolResponse | None:
24
+ function_responses: list[types.FunctionResponse] = []
25
+ for msg in chat_ctx.items:
26
+ if msg.type == "function_call_output":
27
+ function_responses.append(
28
+ types.FunctionResponse(
29
+ id=msg.call_id,
30
+ name=msg.name,
31
+ response={"text": msg.output},
32
+ )
33
+ )
34
+ return (
35
+ types.LiveClientToolResponse(function_responses=function_responses)
36
+ if function_responses
37
+ else None
38
+ )
39
+
40
+
41
+ def to_chat_ctx(
42
+ chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
43
+ ) -> tuple[list[types.Content], types.Content | None]:
44
+ turns: list[types.Content] = []
45
+ system_instruction: types.Content | None = None
46
+ current_role: str | None = None
47
+ parts: list[types.Part] = []
48
+
49
+ for msg in chat_ctx.items:
50
+ if msg.type == "message" and msg.role == "system":
51
+ sys_parts = []
52
+ for content in msg.content:
53
+ if content and isinstance(content, str):
54
+ sys_parts.append(types.Part(text=content))
55
+ system_instruction = types.Content(parts=sys_parts)
56
+ continue
57
+
58
+ if msg.type == "message":
59
+ role = "model" if msg.role == "assistant" else "user"
60
+ elif msg.type == "function_call":
61
+ role = "model"
62
+ elif msg.type == "function_call_output":
63
+ role = "user"
64
+
65
+ # if the effective role changed, finalize the previous turn.
66
+ if role != current_role:
67
+ if current_role is not None and parts:
68
+ turns.append(types.Content(role=current_role, parts=parts))
69
+ parts = []
70
+ current_role = role
71
+
72
+ if msg.type == "message":
73
+ for content in msg.content:
74
+ if content and isinstance(content, str):
75
+ parts.append(types.Part(text=content))
76
+ elif content and isinstance(content, dict):
77
+ parts.append(types.Part(text=json.dumps(content)))
78
+ elif isinstance(content, llm.ImageContent):
79
+ parts.append(_to_image_part(content, cache_key))
80
+ elif msg.type == "function_call" and not ignore_functions:
81
+ parts.append(
82
+ types.Part(
83
+ function_call=types.FunctionCall(
84
+ name=msg.name,
85
+ args=json.loads(msg.arguments),
86
+ )
87
+ )
88
+ )
89
+ elif msg.type == "function_call_output" and not ignore_functions:
90
+ parts.append(
91
+ types.Part(
92
+ function_response=types.FunctionResponse(
93
+ name=msg.name,
94
+ response={"text": msg.output},
95
+ )
96
+ )
97
+ )
98
+
99
+ if current_role is not None and parts:
100
+ turns.append(types.Content(role=current_role, parts=parts))
101
+
102
+ if not turns:
103
+ # if no turns, add a user message with a placeholder
104
+ turns = [types.Content(role="user", parts=[types.Part(text=".")])]
105
+ return turns, system_instruction
106
+
107
+
108
+ def _to_image_part(image: llm.ImageContent, cache_key: Any) -> types.Part:
109
+ img = llm.utils.serialize_image(image)
110
+ if img.external_url:
111
+ if img.mime_type:
112
+ mime_type = img.mime_type
113
+ else:
114
+ logger.debug("No media type provided for image, defaulting to image/jpeg.")
115
+ mime_type = "image/jpeg"
116
+ return types.Part.from_uri(file_uri=img.external_url, mime_type=mime_type)
117
+ if cache_key not in image._cache:
118
+ image._cache[cache_key] = img.data_bytes
119
+ return types.Part.from_bytes(data=image._cache[cache_key], mime_type=img.mime_type)
120
+
121
+
122
+ def _build_gemini_fnc(function_tool: FunctionTool) -> types.FunctionDeclaration:
123
+ fnc = llm.utils.build_legacy_openai_schema(function_tool, internally_tagged=True)
124
+ json_schema = _GeminiJsonSchema(fnc["parameters"]).simplify()
125
+ return types.FunctionDeclaration(
126
+ name=fnc["name"],
127
+ description=fnc["description"],
128
+ parameters=json_schema,
129
+ )
130
+
131
+
132
+ def to_response_format(response_format: type | dict) -> types.SchemaUnion:
133
+ _, json_schema_type = llm_utils.to_response_format_param(response_format)
134
+ if isinstance(json_schema_type, TypeAdapter):
135
+ schema = json_schema_type.json_schema()
136
+ else:
137
+ schema = json_schema_type.model_json_schema()
138
+
139
+ return _GeminiJsonSchema(schema).simplify()
140
+
141
+
142
+ class _GeminiJsonSchema:
143
+ """
144
+ Transforms the JSON Schema from Pydantic to be suitable for Gemini.
145
+ based on pydantic-ai implementation
146
+ https://github.com/pydantic/pydantic-ai/blob/085a9542a7360b7e388ce575323ce189b397d7ad/pydantic_ai_slim/pydantic_ai/models/gemini.py#L809
147
+ """
148
+
149
+ # Type mapping from JSON Schema to Gemini Schema
150
+ TYPE_MAPPING: dict[str, types.Type] = {
151
+ "string": types.Type.STRING,
152
+ "number": types.Type.NUMBER,
153
+ "integer": types.Type.INTEGER,
154
+ "boolean": types.Type.BOOLEAN,
155
+ "array": types.Type.ARRAY,
156
+ "object": types.Type.OBJECT,
157
+ }
158
+
159
+ def __init__(self, schema: dict[str, Any]):
160
+ self.schema = deepcopy(schema)
161
+ self.defs = self.schema.pop("$defs", {})
162
+
163
+ def simplify(self) -> dict[str, Any] | None:
164
+ self._simplify(self.schema, refs_stack=())
165
+ # If the schema is an OBJECT with no properties, return None.
166
+ if self.schema.get("type") == types.Type.OBJECT and not self.schema.get("properties"):
167
+ return None
168
+ return self.schema
169
+
170
+ def _simplify(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
171
+ schema.pop("title", None)
172
+ schema.pop("default", None)
173
+ schema.pop("additionalProperties", None)
174
+ if ref := schema.pop("$ref", None):
175
+ key = re.sub(r"^#/\$defs/", "", ref)
176
+ if key in refs_stack:
177
+ raise ValueError("Recursive `$ref`s in JSON Schema are not supported by Gemini")
178
+ refs_stack += (key,)
179
+ schema_def = self.defs[key]
180
+ self._simplify(schema_def, refs_stack)
181
+ schema.update(schema_def)
182
+ return
183
+
184
+ # Convert type value to Gemini format
185
+ if "type" in schema and schema["type"] != "null":
186
+ json_type = schema["type"]
187
+ if json_type in self.TYPE_MAPPING:
188
+ schema["type"] = self.TYPE_MAPPING[json_type]
189
+ elif isinstance(json_type, types.Type):
190
+ schema["type"] = json_type
191
+ else:
192
+ raise ValueError(f"Unsupported type in JSON Schema: {json_type}")
193
+
194
+ # Map field names that differ between JSON Schema and Gemini
195
+ self._map_field_names(schema)
196
+
197
+ # Handle anyOf - map to any_of
198
+ if any_of := schema.pop("anyOf", None):
199
+ if any_of:
200
+ mapped_any_of = []
201
+ has_null = False
202
+ non_null_schema = None
203
+
204
+ for item_schema in any_of:
205
+ self._simplify(item_schema, refs_stack)
206
+ if item_schema == {"type": "null"}:
207
+ has_null = True
208
+ else:
209
+ non_null_schema = item_schema
210
+ mapped_any_of.append(item_schema)
211
+
212
+ if has_null and len(any_of) == 2 and non_null_schema:
213
+ schema.update(non_null_schema)
214
+ schema["nullable"] = True
215
+ else:
216
+ schema["any_of"] = mapped_any_of
217
+
218
+ type_ = schema.get("type")
219
+
220
+ if type_ == types.Type.OBJECT:
221
+ self._object(schema, refs_stack)
222
+ elif type_ == types.Type.ARRAY:
223
+ self._array(schema, refs_stack)
224
+
225
+ def _map_field_names(self, schema: dict[str, Any]) -> None:
226
+ """Map JSON Schema field names to Gemini Schema field names."""
227
+ mappings = {
228
+ "minLength": "min_length",
229
+ "maxLength": "max_length",
230
+ "minItems": "min_items",
231
+ "maxItems": "max_items",
232
+ "minProperties": "min_properties",
233
+ "maxProperties": "max_properties",
234
+ }
235
+
236
+ for json_name, gemini_name in mappings.items():
237
+ if json_name in schema:
238
+ schema[gemini_name] = schema.pop(json_name)
239
+
240
+ def _object(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
241
+ if properties := schema.get("properties"):
242
+ for value in properties.values():
243
+ self._simplify(value, refs_stack)
244
+
245
+ def _array(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
246
+ if prefix_items := schema.get("prefixItems"):
247
+ for prefix_item in prefix_items:
248
+ self._simplify(prefix_item, refs_stack)
249
+
250
+ if items_schema := schema.get("items"):
251
+ self._simplify(items_schema, refs_stack)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.11.3"
15
+ __version__ = "1.0.0"
@@ -1,39 +1,29 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 0.11.3
3
+ Version: 1.0.0
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
- Home-page: https://github.com/livekit/agents
6
- License: Apache-2.0
7
5
  Project-URL: Documentation, https://docs.livekit.io
8
6
  Project-URL: Website, https://livekit.io/
9
7
  Project-URL: Source, https://github.com/livekit/agents
10
- Keywords: webrtc,realtime,audio,video,livekit
8
+ Author: LiveKit
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,realtime,video,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
- Classifier: Topic :: Multimedia :: Sound/Audio
14
- Classifier: Topic :: Multimedia :: Video
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
13
  Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
17
15
  Classifier: Programming Language :: Python :: 3.9
18
16
  Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
- Description-Content-Type: text/markdown
22
21
  Requires-Dist: google-auth<3,>=2
23
22
  Requires-Dist: google-cloud-speech<3,>=2
24
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
- Requires-Dist: google-genai==1.3.0
26
- Requires-Dist: livekit-agents<1.0.0,>=0.12.16
27
- Dynamic: classifier
28
- Dynamic: description
29
- Dynamic: description-content-type
30
- Dynamic: home-page
31
- Dynamic: keywords
32
- Dynamic: license
33
- Dynamic: project-url
34
- Dynamic: requires-dist
35
- Dynamic: requires-python
36
- Dynamic: summary
24
+ Requires-Dist: google-genai==1.5.0
25
+ Requires-Dist: livekit-agents>=1.0.0
26
+ Description-Content-Type: text/markdown
37
27
 
38
28
  # LiveKit Plugins Google
39
29
 
@@ -0,0 +1,16 @@
1
+ livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
+ livekit/plugins/google/llm.py,sha256=81LCCJPmpMOkApX0S0a-zu5xIvcm2Pk8lTTz-PoK5m0,14740
3
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
+ livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
7
+ livekit/plugins/google/tts.py,sha256=P8Zu2s0TfmyzlrNxzDIqyn3sGiNSW0n3nB_JlO_ojiM,7985
8
+ livekit/plugins/google/utils.py,sha256=pbLSOAdQxInWhgI2Yhsrr9KvgvpFXYDdU2yx2p03pFg,9437
9
+ livekit/plugins/google/version.py,sha256=nW89L_U9N4ukT3wAO3BeTqOaa87zLUOsEFz8TkiKIP8,600
10
+ livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
+ livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
+ livekit/plugins/google/beta/realtime/api_proto.py,sha256=cwpFOYjN_3v5PMY0TnzoHhJoASfZ7Qt9IO281ZhJ7Ww,565
13
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=ubF2Ha9zCD28gQrrjTcX3MWgMBs7bC3rI0DUdaHAa_Q,22021
14
+ livekit_plugins_google-1.0.0.dist-info/METADATA,sha256=UfiGiFsqqXKhw8HcQKW6WhltzxgeNz5qr7Vo6PoxU58,3489
15
+ livekit_plugins_google-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ livekit_plugins_google-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,199 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import base64
4
- import inspect
5
- import json
6
- from typing import Any, Dict, List, Optional, get_args, get_origin
7
-
8
- from livekit import rtc
9
- from livekit.agents import llm, utils
10
- from livekit.agents.llm.function_context import _is_optional_type
11
-
12
- from google.genai import types
13
- from google.genai.types import Type as GenaiType
14
-
15
- JSON_SCHEMA_TYPE_MAP: dict[type, GenaiType] = {
16
- str: GenaiType.STRING,
17
- int: GenaiType.INTEGER,
18
- float: GenaiType.NUMBER,
19
- bool: GenaiType.BOOLEAN,
20
- dict: GenaiType.OBJECT,
21
- list: GenaiType.ARRAY,
22
- }
23
-
24
- __all__ = ["_build_gemini_ctx", "_build_tools"]
25
-
26
-
27
- def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
28
- properties: Dict[str, types.Schema] = {}
29
- required: List[str] = []
30
-
31
- for arg_name, arg_info in arguments.items():
32
- prop = types.Schema()
33
- if arg_info.description:
34
- prop.description = arg_info.description
35
-
36
- _, py_type = _is_optional_type(arg_info.type)
37
- origin = get_origin(py_type)
38
- if origin is list:
39
- item_type = get_args(py_type)[0]
40
- if item_type not in JSON_SCHEMA_TYPE_MAP:
41
- raise ValueError(f"Unsupported type: {item_type}")
42
- prop.type = GenaiType.ARRAY
43
- prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
44
-
45
- if arg_info.choices:
46
- prop.items.enum = arg_info.choices
47
- else:
48
- if py_type not in JSON_SCHEMA_TYPE_MAP:
49
- raise ValueError(f"Unsupported type: {py_type}")
50
-
51
- prop.type = JSON_SCHEMA_TYPE_MAP[py_type]
52
-
53
- if arg_info.choices:
54
- prop.enum = arg_info.choices
55
- if py_type is int:
56
- raise ValueError(
57
- f"Parameter '{arg_info.name}' uses integer choices, not supported by this model."
58
- )
59
-
60
- properties[arg_name] = prop
61
-
62
- if arg_info.default is inspect.Parameter.empty:
63
- required.append(arg_name)
64
-
65
- if properties:
66
- parameters = types.Schema(type=GenaiType.OBJECT, properties=properties)
67
- if required:
68
- parameters.required = required
69
-
70
- return parameters
71
-
72
- return None
73
-
74
-
75
- def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclaration]:
76
- function_declarations: List[types.FunctionDeclaration] = []
77
- for fnc_info in fnc_ctx.ai_functions.values():
78
- parameters = _build_parameters(fnc_info.arguments)
79
-
80
- func_decl = types.FunctionDeclaration(
81
- name=fnc_info.name,
82
- description=fnc_info.description,
83
- parameters=parameters,
84
- )
85
-
86
- function_declarations.append(func_decl)
87
- return function_declarations
88
-
89
-
90
- def _build_gemini_ctx(
91
- chat_ctx: llm.ChatContext, cache_key: Any
92
- ) -> tuple[list[types.Content], Optional[types.Content]]:
93
- turns: list[types.Content] = []
94
- system_instruction: Optional[types.Content] = None
95
- current_role: Optional[str] = None
96
- parts: list[types.Part] = []
97
-
98
- for msg in chat_ctx.messages:
99
- if msg.role == "system":
100
- if isinstance(msg.content, str):
101
- system_instruction = types.Content(parts=[types.Part(text=msg.content)])
102
- continue
103
-
104
- if msg.role == "assistant":
105
- role = "model"
106
- elif msg.role == "tool":
107
- role = "user"
108
- else:
109
- role = "user"
110
-
111
- # If role changed, finalize previous parts into a turn
112
- if role != current_role:
113
- if current_role is not None and parts:
114
- turns.append(types.Content(role=current_role, parts=parts))
115
- current_role = role
116
- parts = []
117
-
118
- if msg.tool_calls:
119
- for fnc in msg.tool_calls:
120
- parts.append(
121
- types.Part(
122
- function_call=types.FunctionCall(
123
- name=fnc.function_info.name,
124
- args=fnc.arguments,
125
- )
126
- )
127
- )
128
-
129
- if msg.role == "tool":
130
- if msg.content:
131
- if isinstance(msg.content, dict):
132
- parts.append(
133
- types.Part(
134
- function_response=types.FunctionResponse(
135
- name=msg.name,
136
- response=msg.content,
137
- )
138
- )
139
- )
140
- elif isinstance(msg.content, str):
141
- parts.append(
142
- types.Part(
143
- function_response=types.FunctionResponse(
144
- name=msg.name,
145
- response={"result": msg.content},
146
- )
147
- )
148
- )
149
- else:
150
- if msg.content:
151
- if isinstance(msg.content, str):
152
- parts.append(types.Part(text=msg.content))
153
- elif isinstance(msg.content, dict):
154
- parts.append(types.Part(text=json.dumps(msg.content)))
155
- elif isinstance(msg.content, list):
156
- for item in msg.content:
157
- if isinstance(item, str):
158
- parts.append(types.Part(text=item))
159
- elif isinstance(item, llm.ChatImage):
160
- parts.append(_build_gemini_image_part(item, cache_key))
161
-
162
- # Finalize last role's parts if any remain
163
- if current_role is not None and parts:
164
- turns.append(types.Content(role=current_role, parts=parts))
165
-
166
- return turns, system_instruction
167
-
168
-
169
- def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part:
170
- if isinstance(image.image, str):
171
- # Check if the string is a Data URL
172
- if image.image.startswith("data:image/jpeg;base64,"):
173
- # Extract the base64 part after the comma
174
- base64_data = image.image.split(",", 1)[1]
175
- try:
176
- image_bytes = base64.b64decode(base64_data)
177
- except Exception as e:
178
- raise ValueError("Invalid base64 data in image URL") from e
179
-
180
- return types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
181
- else:
182
- # Assume it's a regular URL
183
- return types.Part.from_uri(file_uri=image.image, mime_type="image/jpeg")
184
-
185
- elif isinstance(image.image, rtc.VideoFrame):
186
- if cache_key not in image._cache:
187
- opts = utils.images.EncodeOptions()
188
- if image.inference_width and image.inference_height:
189
- opts.resize_options = utils.images.ResizeOptions(
190
- width=image.inference_width,
191
- height=image.inference_height,
192
- strategy="scale_aspect_fit",
193
- )
194
- image._cache[cache_key] = utils.images.encode(image.image, opts)
195
-
196
- return types.Part.from_bytes(
197
- data=image._cache[cache_key], mime_type="image/jpeg"
198
- )
199
- raise ValueError(f"Unsupported image type: {type(image.image)}")