livekit-plugins-google 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +2 -1
- livekit/plugins/google/_utils.py +202 -0
- livekit/plugins/google/beta/realtime/__init__.py +0 -2
- livekit/plugins/google/beta/realtime/api_proto.py +5 -60
- livekit/plugins/google/beta/realtime/realtime_api.py +168 -42
- livekit/plugins/google/beta/realtime/transcriber.py +173 -0
- livekit/plugins/google/llm.py +414 -0
- livekit/plugins/google/models.py +2 -0
- livekit/plugins/google/stt.py +64 -10
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.10.0.dist-info}/METADATA +13 -3
- livekit_plugins_google-0.10.0.dist-info/RECORD +18 -0
- {livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.10.0.dist-info}/WHEEL +1 -1
- livekit_plugins_google-0.9.0.dist-info/RECORD +0 -15
- {livekit_plugins_google-0.9.0.dist-info → livekit_plugins_google-0.10.0.dist-info}/top_level.txt +0 -0
@@ -13,11 +13,12 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from . import beta
|
16
|
+
from .llm import LLM
|
16
17
|
from .stt import STT, SpeechStream
|
17
18
|
from .tts import TTS
|
18
19
|
from .version import __version__
|
19
20
|
|
20
|
-
__all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta"]
|
21
|
+
__all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta", "LLM"]
|
21
22
|
from livekit.agents import Plugin
|
22
23
|
|
23
24
|
from .log import logger
|
@@ -0,0 +1,202 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import base64
|
4
|
+
import inspect
|
5
|
+
import json
|
6
|
+
from typing import Any, Dict, List, Optional, get_args, get_origin
|
7
|
+
|
8
|
+
from livekit import rtc
|
9
|
+
from livekit.agents import llm, utils
|
10
|
+
from livekit.agents.llm.function_context import _is_optional_type
|
11
|
+
|
12
|
+
from google.genai import types
|
13
|
+
|
14
|
+
JSON_SCHEMA_TYPE_MAP: dict[type, types.Type] = {
|
15
|
+
str: "STRING",
|
16
|
+
int: "INTEGER",
|
17
|
+
float: "NUMBER",
|
18
|
+
bool: "BOOLEAN",
|
19
|
+
dict: "OBJECT",
|
20
|
+
list: "ARRAY",
|
21
|
+
}
|
22
|
+
|
23
|
+
__all__ = ["_build_gemini_ctx", "_build_tools"]
|
24
|
+
|
25
|
+
|
26
|
+
def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
|
27
|
+
properties: Dict[str, types.Schema] = {}
|
28
|
+
required: List[str] = []
|
29
|
+
|
30
|
+
for arg_name, arg_info in arguments.items():
|
31
|
+
prop = types.Schema()
|
32
|
+
if arg_info.description:
|
33
|
+
prop.description = arg_info.description
|
34
|
+
|
35
|
+
_, py_type = _is_optional_type(arg_info.type)
|
36
|
+
origin = get_origin(py_type)
|
37
|
+
if origin is list:
|
38
|
+
item_type = get_args(py_type)[0]
|
39
|
+
if item_type not in JSON_SCHEMA_TYPE_MAP:
|
40
|
+
raise ValueError(f"Unsupported type: {item_type}")
|
41
|
+
prop.type = "ARRAY"
|
42
|
+
prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
|
43
|
+
|
44
|
+
if arg_info.choices:
|
45
|
+
prop.items.enum = arg_info.choices
|
46
|
+
else:
|
47
|
+
if py_type not in JSON_SCHEMA_TYPE_MAP:
|
48
|
+
raise ValueError(f"Unsupported type: {py_type}")
|
49
|
+
|
50
|
+
prop.type = JSON_SCHEMA_TYPE_MAP[py_type]
|
51
|
+
|
52
|
+
if arg_info.choices:
|
53
|
+
prop.enum = arg_info.choices
|
54
|
+
if py_type is int:
|
55
|
+
raise ValueError(
|
56
|
+
f"Parameter '{arg_info.name}' uses integer choices, not supported by this model."
|
57
|
+
)
|
58
|
+
|
59
|
+
properties[arg_name] = prop
|
60
|
+
|
61
|
+
if arg_info.default is inspect.Parameter.empty:
|
62
|
+
required.append(arg_name)
|
63
|
+
|
64
|
+
if properties:
|
65
|
+
parameters = types.Schema(type="OBJECT", properties=properties)
|
66
|
+
if required:
|
67
|
+
parameters.required = required
|
68
|
+
|
69
|
+
return parameters
|
70
|
+
|
71
|
+
return None
|
72
|
+
|
73
|
+
|
74
|
+
def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclaration]:
|
75
|
+
function_declarations: List[types.FunctionDeclaration] = []
|
76
|
+
for fnc_info in fnc_ctx.ai_functions.values():
|
77
|
+
parameters = _build_parameters(fnc_info.arguments)
|
78
|
+
|
79
|
+
func_decl = types.FunctionDeclaration(
|
80
|
+
name=fnc_info.name,
|
81
|
+
description=fnc_info.description,
|
82
|
+
parameters=parameters,
|
83
|
+
)
|
84
|
+
|
85
|
+
function_declarations.append(func_decl)
|
86
|
+
return function_declarations
|
87
|
+
|
88
|
+
|
89
|
+
def _build_gemini_ctx(
|
90
|
+
chat_ctx: llm.ChatContext, cache_key: Any
|
91
|
+
) -> tuple[list[types.Content], Optional[types.Content]]:
|
92
|
+
turns: list[types.Content] = []
|
93
|
+
system_instruction: Optional[types.Content] = None
|
94
|
+
current_role: Optional[str] = None
|
95
|
+
parts: list[types.Part] = []
|
96
|
+
|
97
|
+
for msg in chat_ctx.messages:
|
98
|
+
if msg.role == "system":
|
99
|
+
if isinstance(msg.content, str):
|
100
|
+
system_instruction = types.Content(parts=[types.Part(text=msg.content)])
|
101
|
+
continue
|
102
|
+
|
103
|
+
if msg.role == "assistant":
|
104
|
+
role = "model"
|
105
|
+
elif msg.role == "tool":
|
106
|
+
role = "user"
|
107
|
+
else:
|
108
|
+
role = "user"
|
109
|
+
|
110
|
+
# If role changed, finalize previous parts into a turn
|
111
|
+
if role != current_role:
|
112
|
+
if current_role is not None and parts:
|
113
|
+
turns.append(types.Content(role=current_role, parts=parts))
|
114
|
+
current_role = role
|
115
|
+
parts = []
|
116
|
+
|
117
|
+
if msg.tool_calls:
|
118
|
+
for fnc in msg.tool_calls:
|
119
|
+
parts.append(
|
120
|
+
types.Part(
|
121
|
+
function_call=types.FunctionCall(
|
122
|
+
id=fnc.tool_call_id,
|
123
|
+
name=fnc.function_info.name,
|
124
|
+
args=fnc.arguments,
|
125
|
+
)
|
126
|
+
)
|
127
|
+
)
|
128
|
+
|
129
|
+
if msg.role == "tool":
|
130
|
+
if msg.content:
|
131
|
+
if isinstance(msg.content, dict):
|
132
|
+
parts.append(
|
133
|
+
types.Part(
|
134
|
+
function_response=types.FunctionResponse(
|
135
|
+
id=msg.tool_call_id,
|
136
|
+
name=msg.name,
|
137
|
+
response=msg.content,
|
138
|
+
)
|
139
|
+
)
|
140
|
+
)
|
141
|
+
elif isinstance(msg.content, str):
|
142
|
+
parts.append(
|
143
|
+
types.Part(
|
144
|
+
function_response=types.FunctionResponse(
|
145
|
+
id=msg.tool_call_id,
|
146
|
+
name=msg.name,
|
147
|
+
response={"result": msg.content},
|
148
|
+
)
|
149
|
+
)
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
if msg.content:
|
153
|
+
if isinstance(msg.content, str):
|
154
|
+
parts.append(types.Part(text=msg.content))
|
155
|
+
elif isinstance(msg.content, dict):
|
156
|
+
parts.append(types.Part(text=json.dumps(msg.content)))
|
157
|
+
elif isinstance(msg.content, list):
|
158
|
+
for item in msg.content:
|
159
|
+
if isinstance(item, str):
|
160
|
+
parts.append(types.Part(text=item))
|
161
|
+
elif isinstance(item, llm.ChatImage):
|
162
|
+
parts.append(_build_gemini_image_part(item, cache_key))
|
163
|
+
|
164
|
+
# Finalize last role's parts if any remain
|
165
|
+
if current_role is not None and parts:
|
166
|
+
turns.append(types.Content(role=current_role, parts=parts))
|
167
|
+
|
168
|
+
return turns, system_instruction
|
169
|
+
|
170
|
+
|
171
|
+
def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part:
|
172
|
+
if isinstance(image.image, str):
|
173
|
+
# Check if the string is a Data URL
|
174
|
+
if image.image.startswith("data:image/jpeg;base64,"):
|
175
|
+
# Extract the base64 part after the comma
|
176
|
+
base64_data = image.image.split(",", 1)[1]
|
177
|
+
try:
|
178
|
+
image_bytes = base64.b64decode(base64_data)
|
179
|
+
except Exception as e:
|
180
|
+
raise ValueError("Invalid base64 data in image URL") from e
|
181
|
+
|
182
|
+
return types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
|
183
|
+
else:
|
184
|
+
# Assume it's a regular URL
|
185
|
+
return types.Part.from_uri(file_uri=image.image, mime_type="image/jpeg")
|
186
|
+
|
187
|
+
elif isinstance(image.image, rtc.VideoFrame):
|
188
|
+
if cache_key not in image._cache:
|
189
|
+
opts = utils.images.EncodeOptions()
|
190
|
+
if image.inference_width and image.inference_height:
|
191
|
+
opts.resize_options = utils.images.ResizeOptions(
|
192
|
+
width=image.inference_width,
|
193
|
+
height=image.inference_height,
|
194
|
+
strategy="scale_aspect_fit",
|
195
|
+
)
|
196
|
+
encoded_data = utils.images.encode(image.image, opts)
|
197
|
+
image._cache[cache_key] = base64.b64encode(encoded_data).decode("utf-8")
|
198
|
+
|
199
|
+
return types.Part.from_bytes(
|
200
|
+
data=image._cache[cache_key], mime_type="image/jpeg"
|
201
|
+
)
|
202
|
+
raise ValueError(f"Unsupported image type: {type(image.image)}")
|
@@ -1,7 +1,6 @@
|
|
1
1
|
from .api_proto import (
|
2
2
|
ClientEvents,
|
3
3
|
LiveAPIModels,
|
4
|
-
ResponseModality,
|
5
4
|
Voice,
|
6
5
|
)
|
7
6
|
from .realtime_api import RealtimeModel
|
@@ -10,6 +9,5 @@ __all__ = [
|
|
10
9
|
"RealtimeModel",
|
11
10
|
"ClientEvents",
|
12
11
|
"LiveAPIModels",
|
13
|
-
"ResponseModality",
|
14
12
|
"Voice",
|
15
13
|
]
|
@@ -1,15 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import
|
4
|
-
from typing import Any, Dict, List, Literal, Sequence, Union
|
3
|
+
from typing import Literal, Sequence, Union
|
5
4
|
|
6
|
-
from google.genai import types
|
5
|
+
from google.genai import types
|
6
|
+
|
7
|
+
from ..._utils import _build_gemini_ctx, _build_tools
|
7
8
|
|
8
9
|
LiveAPIModels = Literal["gemini-2.0-flash-exp"]
|
9
10
|
|
10
11
|
Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
|
11
|
-
ResponseModality = Literal["AUDIO", "TEXT"]
|
12
12
|
|
13
|
+
__all__ = ["_build_tools", "ClientEvents", "_build_gemini_ctx"]
|
13
14
|
|
14
15
|
ClientEvents = Union[
|
15
16
|
types.ContentListUnion,
|
@@ -21,59 +22,3 @@ ClientEvents = Union[
|
|
21
22
|
types.FunctionResponseOrDict,
|
22
23
|
Sequence[types.FunctionResponseOrDict],
|
23
24
|
]
|
24
|
-
|
25
|
-
|
26
|
-
JSON_SCHEMA_TYPE_MAP = {
|
27
|
-
str: "string",
|
28
|
-
int: "integer",
|
29
|
-
float: "number",
|
30
|
-
bool: "boolean",
|
31
|
-
dict: "object",
|
32
|
-
list: "array",
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
|
-
def _build_parameters(arguments: Dict[str, Any]) -> types.SchemaDict:
|
37
|
-
properties: Dict[str, types.SchemaDict] = {}
|
38
|
-
required: List[str] = []
|
39
|
-
|
40
|
-
for arg_name, arg_info in arguments.items():
|
41
|
-
py_type = arg_info.type
|
42
|
-
if py_type not in JSON_SCHEMA_TYPE_MAP:
|
43
|
-
raise ValueError(f"Unsupported type: {py_type}")
|
44
|
-
|
45
|
-
prop: types.SchemaDict = {
|
46
|
-
"type": JSON_SCHEMA_TYPE_MAP[py_type],
|
47
|
-
"description": arg_info.description,
|
48
|
-
}
|
49
|
-
|
50
|
-
if arg_info.choices:
|
51
|
-
prop["enum"] = arg_info.choices
|
52
|
-
|
53
|
-
properties[arg_name] = prop
|
54
|
-
|
55
|
-
if arg_info.default is inspect.Parameter.empty:
|
56
|
-
required.append(arg_name)
|
57
|
-
|
58
|
-
parameters: types.SchemaDict = {"type": "object", "properties": properties}
|
59
|
-
|
60
|
-
if required:
|
61
|
-
parameters["required"] = required
|
62
|
-
|
63
|
-
return parameters
|
64
|
-
|
65
|
-
|
66
|
-
def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclarationDict]:
|
67
|
-
function_declarations: List[types.FunctionDeclarationDict] = []
|
68
|
-
for fnc_info in fnc_ctx.ai_functions.values():
|
69
|
-
parameters = _build_parameters(fnc_info.arguments)
|
70
|
-
|
71
|
-
func_decl: types.FunctionDeclarationDict = {
|
72
|
-
"name": fnc_info.name,
|
73
|
-
"description": fnc_info.description,
|
74
|
-
"parameters": parameters,
|
75
|
-
}
|
76
|
-
|
77
|
-
function_declarations.append(func_decl)
|
78
|
-
|
79
|
-
return function_declarations
|