livekit-plugins-google 0.11.1__py3-none-any.whl → 1.0.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/__init__.py +1 -5
- livekit/plugins/google/beta/realtime/api_proto.py +3 -2
- livekit/plugins/google/beta/realtime/realtime_api.py +22 -51
- livekit/plugins/google/beta/realtime/transcriber.py +11 -27
- livekit/plugins/google/llm.py +127 -197
- livekit/plugins/google/stt.py +28 -58
- livekit/plugins/google/tts.py +10 -16
- livekit/plugins/google/utils.py +213 -0
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.11.1.dist-info → livekit_plugins_google-1.0.0.dev4.dist-info}/METADATA +12 -22
- livekit_plugins_google-1.0.0.dev4.dist-info/RECORD +17 -0
- {livekit_plugins_google-0.11.1.dist-info → livekit_plugins_google-1.0.0.dev4.dist-info}/WHEEL +1 -2
- livekit/plugins/google/_utils.py +0 -199
- livekit_plugins_google-0.11.1.dist-info/RECORD +0 -18
- livekit_plugins_google-0.11.1.dist-info/top_level.txt +0 -1
livekit/plugins/google/tts.py
CHANGED
@@ -15,8 +15,10 @@
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
17
|
from dataclasses import dataclass
|
18
|
-
from typing import Optional
|
19
18
|
|
19
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
20
|
+
from google.cloud import texttospeech
|
21
|
+
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
20
22
|
from livekit.agents import (
|
21
23
|
APIConnectionError,
|
22
24
|
APIConnectOptions,
|
@@ -26,10 +28,6 @@ from livekit.agents import (
|
|
26
28
|
utils,
|
27
29
|
)
|
28
30
|
|
29
|
-
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
30
|
-
from google.cloud import texttospeech
|
31
|
-
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
32
|
-
|
33
31
|
from .models import Gender, SpeechLanguages
|
34
32
|
|
35
33
|
|
@@ -128,17 +126,13 @@ class TTS(tts.TTS):
|
|
128
126
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
129
127
|
if self._client is None:
|
130
128
|
if self._credentials_info:
|
131
|
-
self._client = (
|
132
|
-
|
133
|
-
self._credentials_info
|
134
|
-
)
|
129
|
+
self._client = texttospeech.TextToSpeechAsyncClient.from_service_account_info(
|
130
|
+
self._credentials_info
|
135
131
|
)
|
136
132
|
|
137
133
|
elif self._credentials_file:
|
138
|
-
self._client = (
|
139
|
-
|
140
|
-
self._credentials_file
|
141
|
-
)
|
134
|
+
self._client = texttospeech.TextToSpeechAsyncClient.from_service_account_file(
|
135
|
+
self._credentials_file
|
142
136
|
)
|
143
137
|
else:
|
144
138
|
self._client = texttospeech.TextToSpeechAsyncClient()
|
@@ -150,8 +144,8 @@ class TTS(tts.TTS):
|
|
150
144
|
self,
|
151
145
|
text: str,
|
152
146
|
*,
|
153
|
-
conn_options:
|
154
|
-
) ->
|
147
|
+
conn_options: APIConnectOptions | None = None,
|
148
|
+
) -> ChunkedStream:
|
155
149
|
return ChunkedStream(
|
156
150
|
tts=self,
|
157
151
|
input_text=text,
|
@@ -169,7 +163,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
169
163
|
input_text: str,
|
170
164
|
opts: _TTSOptions,
|
171
165
|
client: texttospeech.TextToSpeechAsyncClient,
|
172
|
-
conn_options:
|
166
|
+
conn_options: APIConnectOptions | None = None,
|
173
167
|
) -> None:
|
174
168
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
175
169
|
self._opts, self._client = opts, client
|
@@ -0,0 +1,213 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import json
|
4
|
+
import re
|
5
|
+
from copy import deepcopy
|
6
|
+
from typing import Any
|
7
|
+
|
8
|
+
from google.genai import types
|
9
|
+
from livekit.agents import llm
|
10
|
+
from livekit.agents.llm import FunctionTool
|
11
|
+
|
12
|
+
__all__ = ["to_chat_ctx", "to_fnc_ctx"]
|
13
|
+
|
14
|
+
|
15
|
+
def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
|
16
|
+
return [_build_gemini_fnc(fnc) for fnc in fncs]
|
17
|
+
|
18
|
+
|
19
|
+
def to_chat_ctx(
|
20
|
+
chat_ctx: llm.ChatContext, cache_key: Any
|
21
|
+
) -> tuple[list[types.Content], types.Content | None]:
|
22
|
+
turns: list[types.Content] = []
|
23
|
+
system_instruction: types.Content | None = None
|
24
|
+
current_role: str | None = None
|
25
|
+
parts: list[types.Part] = []
|
26
|
+
|
27
|
+
for msg in chat_ctx.items:
|
28
|
+
if msg.type == "message" and msg.role == "system":
|
29
|
+
sys_parts = []
|
30
|
+
for content in msg.content:
|
31
|
+
if isinstance(content, str):
|
32
|
+
sys_parts.append(types.Part(text=content))
|
33
|
+
system_instruction = types.Content(parts=sys_parts)
|
34
|
+
continue
|
35
|
+
|
36
|
+
if msg.type == "message":
|
37
|
+
role = "model" if msg.role == "assistant" else "user"
|
38
|
+
elif msg.type == "function_call":
|
39
|
+
role = "model"
|
40
|
+
elif msg.type == "function_call_output":
|
41
|
+
role = "user"
|
42
|
+
|
43
|
+
# if the effective role changed, finalize the previous turn.
|
44
|
+
if role != current_role:
|
45
|
+
if current_role is not None and parts:
|
46
|
+
turns.append(types.Content(role=current_role, parts=parts))
|
47
|
+
parts = []
|
48
|
+
current_role = role
|
49
|
+
|
50
|
+
if msg.type == "message":
|
51
|
+
for content in msg.content:
|
52
|
+
if isinstance(content, str):
|
53
|
+
parts.append(types.Part(text=content))
|
54
|
+
elif isinstance(content, dict):
|
55
|
+
parts.append(types.Part(text=json.dumps(content)))
|
56
|
+
elif isinstance(content, llm.ImageContent):
|
57
|
+
parts.append(_to_image_part(content, cache_key))
|
58
|
+
elif msg.type == "function_call":
|
59
|
+
parts.append(
|
60
|
+
types.Part(
|
61
|
+
function_call=types.FunctionCall(
|
62
|
+
name=msg.name,
|
63
|
+
args=json.loads(msg.arguments),
|
64
|
+
)
|
65
|
+
)
|
66
|
+
)
|
67
|
+
elif msg.type == "function_call_output":
|
68
|
+
parts.append(
|
69
|
+
types.Part(
|
70
|
+
function_response=types.FunctionResponse(
|
71
|
+
name=msg.name,
|
72
|
+
response={"text": msg.output},
|
73
|
+
)
|
74
|
+
)
|
75
|
+
)
|
76
|
+
|
77
|
+
if current_role is not None and parts:
|
78
|
+
turns.append(types.Content(role=current_role, parts=parts))
|
79
|
+
return turns, system_instruction
|
80
|
+
|
81
|
+
|
82
|
+
def _to_image_part(image: llm.ImageContent, cache_key: Any) -> types.Part:
|
83
|
+
img = llm.utils.serialize_image(image)
|
84
|
+
if cache_key not in image._cache:
|
85
|
+
image._cache[cache_key] = img.data_bytes
|
86
|
+
return types.Part.from_bytes(data=image._cache[cache_key], mime_type=img.media_type)
|
87
|
+
|
88
|
+
|
89
|
+
def _build_gemini_fnc(function_tool: FunctionTool) -> types.FunctionDeclaration:
|
90
|
+
fnc = llm.utils.build_legacy_openai_schema(function_tool, internally_tagged=True)
|
91
|
+
json_schema = _GeminiJsonSchema(fnc["parameters"]).simplify()
|
92
|
+
return types.FunctionDeclaration(
|
93
|
+
name=fnc["name"],
|
94
|
+
description=fnc["description"],
|
95
|
+
parameters=json_schema,
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
class _GeminiJsonSchema:
|
100
|
+
"""
|
101
|
+
Transforms the JSON Schema from Pydantic to be suitable for Gemini.
|
102
|
+
based on pydantic-ai implementation
|
103
|
+
https://github.com/pydantic/pydantic-ai/blob/085a9542a7360b7e388ce575323ce189b397d7ad/pydantic_ai_slim/pydantic_ai/models/gemini.py#L809
|
104
|
+
"""
|
105
|
+
|
106
|
+
# Type mapping from JSON Schema to Gemini Schema
|
107
|
+
TYPE_MAPPING: dict[str, types.Type] = {
|
108
|
+
"string": types.Type.STRING,
|
109
|
+
"number": types.Type.NUMBER,
|
110
|
+
"integer": types.Type.INTEGER,
|
111
|
+
"boolean": types.Type.BOOLEAN,
|
112
|
+
"array": types.Type.ARRAY,
|
113
|
+
"object": types.Type.OBJECT,
|
114
|
+
}
|
115
|
+
|
116
|
+
def __init__(self, schema: dict[str, Any]):
|
117
|
+
self.schema = deepcopy(schema)
|
118
|
+
self.defs = self.schema.pop("$defs", {})
|
119
|
+
|
120
|
+
def simplify(self) -> dict[str, Any] | None:
|
121
|
+
self._simplify(self.schema, refs_stack=())
|
122
|
+
# If the schema is an OBJECT with no properties, return None.
|
123
|
+
if self.schema.get("type") == types.Type.OBJECT and not self.schema.get("properties"):
|
124
|
+
return None
|
125
|
+
return self.schema
|
126
|
+
|
127
|
+
def _simplify(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
|
128
|
+
schema.pop("title", None)
|
129
|
+
schema.pop("default", None)
|
130
|
+
if ref := schema.pop("$ref", None):
|
131
|
+
key = re.sub(r"^#/\$defs/", "", ref)
|
132
|
+
if key in refs_stack:
|
133
|
+
raise ValueError("Recursive `$ref`s in JSON Schema are not supported by Gemini")
|
134
|
+
refs_stack += (key,)
|
135
|
+
schema_def = self.defs[key]
|
136
|
+
self._simplify(schema_def, refs_stack)
|
137
|
+
schema.update(schema_def)
|
138
|
+
return
|
139
|
+
|
140
|
+
# Convert type value to Gemini format
|
141
|
+
if "type" in schema and schema["type"] != "null":
|
142
|
+
json_type = schema["type"]
|
143
|
+
if json_type in self.TYPE_MAPPING:
|
144
|
+
schema["type"] = self.TYPE_MAPPING[json_type]
|
145
|
+
elif isinstance(json_type, types.Type):
|
146
|
+
schema["type"] = json_type
|
147
|
+
else:
|
148
|
+
raise ValueError(f"Unsupported type in JSON Schema: {json_type}")
|
149
|
+
|
150
|
+
# Map field names that differ between JSON Schema and Gemini
|
151
|
+
self._map_field_names(schema)
|
152
|
+
|
153
|
+
# Handle anyOf - map to any_of
|
154
|
+
if any_of := schema.pop("anyOf", None):
|
155
|
+
if any_of:
|
156
|
+
mapped_any_of = []
|
157
|
+
has_null = False
|
158
|
+
non_null_schema = None
|
159
|
+
|
160
|
+
for item_schema in any_of:
|
161
|
+
self._simplify(item_schema, refs_stack)
|
162
|
+
if item_schema == {"type": "null"}:
|
163
|
+
has_null = True
|
164
|
+
else:
|
165
|
+
non_null_schema = item_schema
|
166
|
+
mapped_any_of.append(item_schema)
|
167
|
+
|
168
|
+
if has_null and len(any_of) == 2 and non_null_schema:
|
169
|
+
schema.update(non_null_schema)
|
170
|
+
schema["nullable"] = True
|
171
|
+
else:
|
172
|
+
schema["any_of"] = mapped_any_of
|
173
|
+
|
174
|
+
type_ = schema.get("type")
|
175
|
+
|
176
|
+
if type_ == types.Type.OBJECT:
|
177
|
+
self._object(schema, refs_stack)
|
178
|
+
elif type_ == types.Type.ARRAY:
|
179
|
+
self._array(schema, refs_stack)
|
180
|
+
|
181
|
+
def _map_field_names(self, schema: dict[str, Any]) -> None:
|
182
|
+
"""Map JSON Schema field names to Gemini Schema field names."""
|
183
|
+
mappings = {
|
184
|
+
"minLength": "min_length",
|
185
|
+
"maxLength": "max_length",
|
186
|
+
"minItems": "min_items",
|
187
|
+
"maxItems": "max_items",
|
188
|
+
"minProperties": "min_properties",
|
189
|
+
"maxProperties": "max_properties",
|
190
|
+
"additionalProperties": "additional_properties",
|
191
|
+
}
|
192
|
+
|
193
|
+
for json_name, gemini_name in mappings.items():
|
194
|
+
if json_name in schema:
|
195
|
+
schema[gemini_name] = schema.pop(json_name)
|
196
|
+
|
197
|
+
def _object(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
|
198
|
+
# Gemini doesn't support additionalProperties
|
199
|
+
ad_props = schema.pop("additional_properties", None)
|
200
|
+
if ad_props:
|
201
|
+
raise ValueError("Additional properties in JSON Schema are not supported by Gemini")
|
202
|
+
|
203
|
+
if properties := schema.get("properties"):
|
204
|
+
for value in properties.values():
|
205
|
+
self._simplify(value, refs_stack)
|
206
|
+
|
207
|
+
def _array(self, schema: dict[str, Any], refs_stack: tuple[str, ...]) -> None:
|
208
|
+
if prefix_items := schema.get("prefixItems"):
|
209
|
+
for prefix_item in prefix_items:
|
210
|
+
self._simplify(prefix_item, refs_stack)
|
211
|
+
|
212
|
+
if items_schema := schema.get("items"):
|
213
|
+
self._simplify(items_schema, refs_stack)
|
{livekit_plugins_google-0.11.1.dist-info → livekit_plugins_google-1.0.0.dev4.dist-info}/METADATA
RENAMED
@@ -1,39 +1,29 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0.dev4
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
|
-
Home-page: https://github.com/livekit/agents
|
6
|
-
License: Apache-2.0
|
7
5
|
Project-URL: Documentation, https://docs.livekit.io
|
8
6
|
Project-URL: Website, https://livekit.io/
|
9
7
|
Project-URL: Source, https://github.com/livekit/agents
|
10
|
-
|
8
|
+
Author: LiveKit
|
9
|
+
License-Expression: Apache-2.0
|
10
|
+
Keywords: audio,livekit,realtime,video,webrtc
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
14
|
-
Classifier: Topic :: Multimedia :: Video
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
16
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier:
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
18
|
+
Classifier: Topic :: Multimedia :: Video
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
20
|
Requires-Python: >=3.9.0
|
21
|
-
Description-Content-Type: text/markdown
|
22
21
|
Requires-Dist: google-auth<3,>=2
|
23
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
-
Requires-Dist: google-genai==1.
|
26
|
-
Requires-Dist: livekit-agents
|
27
|
-
|
28
|
-
Dynamic: description
|
29
|
-
Dynamic: description-content-type
|
30
|
-
Dynamic: home-page
|
31
|
-
Dynamic: keywords
|
32
|
-
Dynamic: license
|
33
|
-
Dynamic: project-url
|
34
|
-
Dynamic: requires-dist
|
35
|
-
Dynamic: requires-python
|
36
|
-
Dynamic: summary
|
24
|
+
Requires-Dist: google-genai==1.5.0
|
25
|
+
Requires-Dist: livekit-agents>=1.0.0.dev4
|
26
|
+
Description-Content-Type: text/markdown
|
37
27
|
|
38
28
|
# LiveKit Plugins Google
|
39
29
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
+
livekit/plugins/google/llm.py,sha256=-GksCFbQB5r3Cg2Zousf0etTrj1N-FRwIoJolepTupQ,14259
|
3
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
+
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
livekit/plugins/google/stt.py,sha256=c3YRAiFYpLuw1Prb6LnQNCuHyHf8TWbqBbiBWBKNDCI,22227
|
7
|
+
livekit/plugins/google/tts.py,sha256=AOv3kbvqezUbZguK55fAKpaLhUd1FMv61NMUjwbixAA,7875
|
8
|
+
livekit/plugins/google/utils.py,sha256=SfuQLJSXSV708VMn5_TVB93ginUCX2izqT9r2seraSQ,8040
|
9
|
+
livekit/plugins/google/version.py,sha256=koM_bT4QbztrKQ60Gjg7V4oe99CuxgGcpuUtWMOEKqU,605
|
10
|
+
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=f7YllxvIp-xefZQ-XrMjcu8SVbISUQYWlgs_1w-P9IM,685
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=em3aPaxCtZCQ1zgKc8xfFfvqGtgYxjOC0zwpvkTvv60,22479
|
14
|
+
livekit/plugins/google/beta/realtime/transcriber.py,sha256=AHKIbJdX2SIj_s2QQwo9aE6xbQjSZ9YGp8Y204EuOq0,9532
|
15
|
+
livekit_plugins_google-1.0.0.dev4.dist-info/METADATA,sha256=FudaI58iWf7o0d5HE6qYiNj9GyoBzZqBHfGSBA1Y9Hk,3499
|
16
|
+
livekit_plugins_google-1.0.0.dev4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
livekit_plugins_google-1.0.0.dev4.dist-info/RECORD,,
|
livekit/plugins/google/_utils.py
DELETED
@@ -1,199 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import base64
|
4
|
-
import inspect
|
5
|
-
import json
|
6
|
-
from typing import Any, Dict, List, Optional, get_args, get_origin
|
7
|
-
|
8
|
-
from livekit import rtc
|
9
|
-
from livekit.agents import llm, utils
|
10
|
-
from livekit.agents.llm.function_context import _is_optional_type
|
11
|
-
|
12
|
-
from google.genai import types
|
13
|
-
from google.genai.types import Type as GenaiType
|
14
|
-
|
15
|
-
JSON_SCHEMA_TYPE_MAP: dict[type, GenaiType] = {
|
16
|
-
str: GenaiType.STRING,
|
17
|
-
int: GenaiType.INTEGER,
|
18
|
-
float: GenaiType.NUMBER,
|
19
|
-
bool: GenaiType.BOOLEAN,
|
20
|
-
dict: GenaiType.OBJECT,
|
21
|
-
list: GenaiType.ARRAY,
|
22
|
-
}
|
23
|
-
|
24
|
-
__all__ = ["_build_gemini_ctx", "_build_tools"]
|
25
|
-
|
26
|
-
|
27
|
-
def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
28
|
-
properties: Dict[str, types.Schema] = {}
|
29
|
-
required: List[str] = []
|
30
|
-
|
31
|
-
for arg_name, arg_info in arguments.items():
|
32
|
-
prop = types.Schema()
|
33
|
-
if arg_info.description:
|
34
|
-
prop.description = arg_info.description
|
35
|
-
|
36
|
-
_, py_type = _is_optional_type(arg_info.type)
|
37
|
-
origin = get_origin(py_type)
|
38
|
-
if origin is list:
|
39
|
-
item_type = get_args(py_type)[0]
|
40
|
-
if item_type not in JSON_SCHEMA_TYPE_MAP:
|
41
|
-
raise ValueError(f"Unsupported type: {item_type}")
|
42
|
-
prop.type = GenaiType.ARRAY
|
43
|
-
prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
|
44
|
-
|
45
|
-
if arg_info.choices:
|
46
|
-
prop.items.enum = arg_info.choices
|
47
|
-
else:
|
48
|
-
if py_type not in JSON_SCHEMA_TYPE_MAP:
|
49
|
-
raise ValueError(f"Unsupported type: {py_type}")
|
50
|
-
|
51
|
-
prop.type = JSON_SCHEMA_TYPE_MAP[py_type]
|
52
|
-
|
53
|
-
if arg_info.choices:
|
54
|
-
prop.enum = arg_info.choices
|
55
|
-
if py_type is int:
|
56
|
-
raise ValueError(
|
57
|
-
f"Parameter '{arg_info.name}' uses integer choices, not supported by this model."
|
58
|
-
)
|
59
|
-
|
60
|
-
properties[arg_name] = prop
|
61
|
-
|
62
|
-
if arg_info.default is inspect.Parameter.empty:
|
63
|
-
required.append(arg_name)
|
64
|
-
|
65
|
-
if properties:
|
66
|
-
parameters = types.Schema(type=GenaiType.OBJECT, properties=properties)
|
67
|
-
if required:
|
68
|
-
parameters.required = required
|
69
|
-
|
70
|
-
return parameters
|
71
|
-
|
72
|
-
return None
|
73
|
-
|
74
|
-
|
75
|
-
def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclaration]:
|
76
|
-
function_declarations: List[types.FunctionDeclaration] = []
|
77
|
-
for fnc_info in fnc_ctx.ai_functions.values():
|
78
|
-
parameters = _build_parameters(fnc_info.arguments)
|
79
|
-
|
80
|
-
func_decl = types.FunctionDeclaration(
|
81
|
-
name=fnc_info.name,
|
82
|
-
description=fnc_info.description,
|
83
|
-
parameters=parameters,
|
84
|
-
)
|
85
|
-
|
86
|
-
function_declarations.append(func_decl)
|
87
|
-
return function_declarations
|
88
|
-
|
89
|
-
|
90
|
-
def _build_gemini_ctx(
|
91
|
-
chat_ctx: llm.ChatContext, cache_key: Any
|
92
|
-
) -> tuple[list[types.Content], Optional[types.Content]]:
|
93
|
-
turns: list[types.Content] = []
|
94
|
-
system_instruction: Optional[types.Content] = None
|
95
|
-
current_role: Optional[str] = None
|
96
|
-
parts: list[types.Part] = []
|
97
|
-
|
98
|
-
for msg in chat_ctx.messages:
|
99
|
-
if msg.role == "system":
|
100
|
-
if isinstance(msg.content, str):
|
101
|
-
system_instruction = types.Content(parts=[types.Part(text=msg.content)])
|
102
|
-
continue
|
103
|
-
|
104
|
-
if msg.role == "assistant":
|
105
|
-
role = "model"
|
106
|
-
elif msg.role == "tool":
|
107
|
-
role = "user"
|
108
|
-
else:
|
109
|
-
role = "user"
|
110
|
-
|
111
|
-
# If role changed, finalize previous parts into a turn
|
112
|
-
if role != current_role:
|
113
|
-
if current_role is not None and parts:
|
114
|
-
turns.append(types.Content(role=current_role, parts=parts))
|
115
|
-
current_role = role
|
116
|
-
parts = []
|
117
|
-
|
118
|
-
if msg.tool_calls:
|
119
|
-
for fnc in msg.tool_calls:
|
120
|
-
parts.append(
|
121
|
-
types.Part(
|
122
|
-
function_call=types.FunctionCall(
|
123
|
-
name=fnc.function_info.name,
|
124
|
-
args=fnc.arguments,
|
125
|
-
)
|
126
|
-
)
|
127
|
-
)
|
128
|
-
|
129
|
-
if msg.role == "tool":
|
130
|
-
if msg.content:
|
131
|
-
if isinstance(msg.content, dict):
|
132
|
-
parts.append(
|
133
|
-
types.Part(
|
134
|
-
function_response=types.FunctionResponse(
|
135
|
-
name=msg.name,
|
136
|
-
response=msg.content,
|
137
|
-
)
|
138
|
-
)
|
139
|
-
)
|
140
|
-
elif isinstance(msg.content, str):
|
141
|
-
parts.append(
|
142
|
-
types.Part(
|
143
|
-
function_response=types.FunctionResponse(
|
144
|
-
name=msg.name,
|
145
|
-
response={"result": msg.content},
|
146
|
-
)
|
147
|
-
)
|
148
|
-
)
|
149
|
-
else:
|
150
|
-
if msg.content:
|
151
|
-
if isinstance(msg.content, str):
|
152
|
-
parts.append(types.Part(text=msg.content))
|
153
|
-
elif isinstance(msg.content, dict):
|
154
|
-
parts.append(types.Part(text=json.dumps(msg.content)))
|
155
|
-
elif isinstance(msg.content, list):
|
156
|
-
for item in msg.content:
|
157
|
-
if isinstance(item, str):
|
158
|
-
parts.append(types.Part(text=item))
|
159
|
-
elif isinstance(item, llm.ChatImage):
|
160
|
-
parts.append(_build_gemini_image_part(item, cache_key))
|
161
|
-
|
162
|
-
# Finalize last role's parts if any remain
|
163
|
-
if current_role is not None and parts:
|
164
|
-
turns.append(types.Content(role=current_role, parts=parts))
|
165
|
-
|
166
|
-
return turns, system_instruction
|
167
|
-
|
168
|
-
|
169
|
-
def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part:
|
170
|
-
if isinstance(image.image, str):
|
171
|
-
# Check if the string is a Data URL
|
172
|
-
if image.image.startswith("data:image/jpeg;base64,"):
|
173
|
-
# Extract the base64 part after the comma
|
174
|
-
base64_data = image.image.split(",", 1)[1]
|
175
|
-
try:
|
176
|
-
image_bytes = base64.b64decode(base64_data)
|
177
|
-
except Exception as e:
|
178
|
-
raise ValueError("Invalid base64 data in image URL") from e
|
179
|
-
|
180
|
-
return types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
|
181
|
-
else:
|
182
|
-
# Assume it's a regular URL
|
183
|
-
return types.Part.from_uri(file_uri=image.image, mime_type="image/jpeg")
|
184
|
-
|
185
|
-
elif isinstance(image.image, rtc.VideoFrame):
|
186
|
-
if cache_key not in image._cache:
|
187
|
-
opts = utils.images.EncodeOptions()
|
188
|
-
if image.inference_width and image.inference_height:
|
189
|
-
opts.resize_options = utils.images.ResizeOptions(
|
190
|
-
width=image.inference_width,
|
191
|
-
height=image.inference_height,
|
192
|
-
strategy="scale_aspect_fit",
|
193
|
-
)
|
194
|
-
image._cache[cache_key] = utils.images.encode(image.image, opts)
|
195
|
-
|
196
|
-
return types.Part.from_bytes(
|
197
|
-
data=image._cache[cache_key], mime_type="image/jpeg"
|
198
|
-
)
|
199
|
-
raise ValueError(f"Unsupported image type: {type(image.image)}")
|
@@ -1,18 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/_utils.py,sha256=FG1_26nlWGcI6onPleQQcmGBMfb4QNYgis1B5BMJxWA,7131
|
3
|
-
livekit/plugins/google/llm.py,sha256=LZaHsrkjfboRZLWm7L2G0mw62q2sXBNj4YeeV2Sk2uU,16717
|
4
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
|
-
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
6
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
livekit/plugins/google/stt.py,sha256=96GJmGDAIBdCpDECArwIXpj2s1xlcA_zuvTnwsvq4xA,22854
|
8
|
-
livekit/plugins/google/tts.py,sha256=pG9_pibO3NDGEMa4huU5S9lbeyI3daQyrS17SuTKfZI,8008
|
9
|
-
livekit/plugins/google/version.py,sha256=LeUJJQ9jwADplJbF46ClzVjYAClwJEhZMCToNJN9lWc,601
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=9EhmwgeIgKDqdSijv5Q9pgx7UhAakK02ZDwbnUsra_o,657
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=8JdWUMUheGhy1ia6JbN3_U2_cL7CNs8-1fTOAgW4I38,22999
|
14
|
-
livekit/plugins/google/beta/realtime/transcriber.py,sha256=rjXO0cSPr3HATxrSfv1MX7IbrjmiTvnLPF280BfRBL8,9809
|
15
|
-
livekit_plugins_google-0.11.1.dist-info/METADATA,sha256=m7B07abY9wTbEJVa3dmdsgfatxYwJFwDNQYhyJgIPJU,3732
|
16
|
-
livekit_plugins_google-0.11.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
17
|
-
livekit_plugins_google-0.11.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
-
livekit_plugins_google-0.11.1.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
livekit
|