livekit-plugins-google 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,12 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from . import beta
16
+ from .llm import LLM
16
17
  from .stt import STT, SpeechStream
17
18
  from .tts import TTS
18
19
  from .version import __version__
19
20
 
20
- __all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta"]
21
+ __all__ = ["STT", "TTS", "SpeechStream", "__version__", "beta", "LLM"]
21
22
  from livekit.agents import Plugin
22
23
 
23
24
  from .log import logger
@@ -0,0 +1,202 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import inspect
5
+ import json
6
+ from typing import Any, Dict, List, Optional, get_args, get_origin
7
+
8
+ from livekit import rtc
9
+ from livekit.agents import llm, utils
10
+ from livekit.agents.llm.function_context import _is_optional_type
11
+
12
+ from google.genai import types
13
+
14
+ JSON_SCHEMA_TYPE_MAP: dict[type, types.Type] = {
15
+ str: "STRING",
16
+ int: "INTEGER",
17
+ float: "NUMBER",
18
+ bool: "BOOLEAN",
19
+ dict: "OBJECT",
20
+ list: "ARRAY",
21
+ }
22
+
23
+ __all__ = ["_build_gemini_ctx", "_build_tools"]
24
+
25
+
26
+ def _build_parameters(arguments: Dict[str, Any]) -> types.Schema | None:
27
+ properties: Dict[str, types.Schema] = {}
28
+ required: List[str] = []
29
+
30
+ for arg_name, arg_info in arguments.items():
31
+ prop = types.Schema()
32
+ if arg_info.description:
33
+ prop.description = arg_info.description
34
+
35
+ _, py_type = _is_optional_type(arg_info.type)
36
+ origin = get_origin(py_type)
37
+ if origin is list:
38
+ item_type = get_args(py_type)[0]
39
+ if item_type not in JSON_SCHEMA_TYPE_MAP:
40
+ raise ValueError(f"Unsupported type: {item_type}")
41
+ prop.type = "ARRAY"
42
+ prop.items = types.Schema(type=JSON_SCHEMA_TYPE_MAP[item_type])
43
+
44
+ if arg_info.choices:
45
+ prop.items.enum = arg_info.choices
46
+ else:
47
+ if py_type not in JSON_SCHEMA_TYPE_MAP:
48
+ raise ValueError(f"Unsupported type: {py_type}")
49
+
50
+ prop.type = JSON_SCHEMA_TYPE_MAP[py_type]
51
+
52
+ if arg_info.choices:
53
+ prop.enum = arg_info.choices
54
+ if py_type is int:
55
+ raise ValueError(
56
+ f"Parameter '{arg_info.name}' uses integer choices, not supported by this model."
57
+ )
58
+
59
+ properties[arg_name] = prop
60
+
61
+ if arg_info.default is inspect.Parameter.empty:
62
+ required.append(arg_name)
63
+
64
+ if properties:
65
+ parameters = types.Schema(type="OBJECT", properties=properties)
66
+ if required:
67
+ parameters.required = required
68
+
69
+ return parameters
70
+
71
+ return None
72
+
73
+
74
+ def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclaration]:
75
+ function_declarations: List[types.FunctionDeclaration] = []
76
+ for fnc_info in fnc_ctx.ai_functions.values():
77
+ parameters = _build_parameters(fnc_info.arguments)
78
+
79
+ func_decl = types.FunctionDeclaration(
80
+ name=fnc_info.name,
81
+ description=fnc_info.description,
82
+ parameters=parameters,
83
+ )
84
+
85
+ function_declarations.append(func_decl)
86
+ return function_declarations
87
+
88
+
89
+ def _build_gemini_ctx(
90
+ chat_ctx: llm.ChatContext, cache_key: Any
91
+ ) -> tuple[list[types.Content], Optional[types.Content]]:
92
+ turns: list[types.Content] = []
93
+ system_instruction: Optional[types.Content] = None
94
+ current_role: Optional[str] = None
95
+ parts: list[types.Part] = []
96
+
97
+ for msg in chat_ctx.messages:
98
+ if msg.role == "system":
99
+ if isinstance(msg.content, str):
100
+ system_instruction = types.Content(parts=[types.Part(text=msg.content)])
101
+ continue
102
+
103
+ if msg.role == "assistant":
104
+ role = "model"
105
+ elif msg.role == "tool":
106
+ role = "user"
107
+ else:
108
+ role = "user"
109
+
110
+ # If role changed, finalize previous parts into a turn
111
+ if role != current_role:
112
+ if current_role is not None and parts:
113
+ turns.append(types.Content(role=current_role, parts=parts))
114
+ current_role = role
115
+ parts = []
116
+
117
+ if msg.tool_calls:
118
+ for fnc in msg.tool_calls:
119
+ parts.append(
120
+ types.Part(
121
+ function_call=types.FunctionCall(
122
+ id=fnc.tool_call_id,
123
+ name=fnc.function_info.name,
124
+ args=fnc.arguments,
125
+ )
126
+ )
127
+ )
128
+
129
+ if msg.role == "tool":
130
+ if msg.content:
131
+ if isinstance(msg.content, dict):
132
+ parts.append(
133
+ types.Part(
134
+ function_response=types.FunctionResponse(
135
+ id=msg.tool_call_id,
136
+ name=msg.name,
137
+ response=msg.content,
138
+ )
139
+ )
140
+ )
141
+ elif isinstance(msg.content, str):
142
+ parts.append(
143
+ types.Part(
144
+ function_response=types.FunctionResponse(
145
+ id=msg.tool_call_id,
146
+ name=msg.name,
147
+ response={"result": msg.content},
148
+ )
149
+ )
150
+ )
151
+ else:
152
+ if msg.content:
153
+ if isinstance(msg.content, str):
154
+ parts.append(types.Part(text=msg.content))
155
+ elif isinstance(msg.content, dict):
156
+ parts.append(types.Part(text=json.dumps(msg.content)))
157
+ elif isinstance(msg.content, list):
158
+ for item in msg.content:
159
+ if isinstance(item, str):
160
+ parts.append(types.Part(text=item))
161
+ elif isinstance(item, llm.ChatImage):
162
+ parts.append(_build_gemini_image_part(item, cache_key))
163
+
164
+ # Finalize last role's parts if any remain
165
+ if current_role is not None and parts:
166
+ turns.append(types.Content(role=current_role, parts=parts))
167
+
168
+ return turns, system_instruction
169
+
170
+
171
+ def _build_gemini_image_part(image: llm.ChatImage, cache_key: Any) -> types.Part:
172
+ if isinstance(image.image, str):
173
+ # Check if the string is a Data URL
174
+ if image.image.startswith("data:image/jpeg;base64,"):
175
+ # Extract the base64 part after the comma
176
+ base64_data = image.image.split(",", 1)[1]
177
+ try:
178
+ image_bytes = base64.b64decode(base64_data)
179
+ except Exception as e:
180
+ raise ValueError("Invalid base64 data in image URL") from e
181
+
182
+ return types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
183
+ else:
184
+ # Assume it's a regular URL
185
+ return types.Part.from_uri(file_uri=image.image, mime_type="image/jpeg")
186
+
187
+ elif isinstance(image.image, rtc.VideoFrame):
188
+ if cache_key not in image._cache:
189
+ opts = utils.images.EncodeOptions()
190
+ if image.inference_width and image.inference_height:
191
+ opts.resize_options = utils.images.ResizeOptions(
192
+ width=image.inference_width,
193
+ height=image.inference_height,
194
+ strategy="scale_aspect_fit",
195
+ )
196
+ encoded_data = utils.images.encode(image.image, opts)
197
+ image._cache[cache_key] = base64.b64encode(encoded_data).decode("utf-8")
198
+
199
+ return types.Part.from_bytes(
200
+ data=image._cache[cache_key], mime_type="image/jpeg"
201
+ )
202
+ raise ValueError(f"Unsupported image type: {type(image.image)}")
@@ -1,7 +1,6 @@
1
1
  from .api_proto import (
2
2
  ClientEvents,
3
3
  LiveAPIModels,
4
- ResponseModality,
5
4
  Voice,
6
5
  )
7
6
  from .realtime_api import RealtimeModel
@@ -10,6 +9,5 @@ __all__ = [
10
9
  "RealtimeModel",
11
10
  "ClientEvents",
12
11
  "LiveAPIModels",
13
- "ResponseModality",
14
12
  "Voice",
15
13
  ]
@@ -1,15 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
- import inspect
4
- from typing import Any, Dict, List, Literal, Sequence, Union
3
+ from typing import Literal, Sequence, Union
5
4
 
6
- from google.genai import types # type: ignore
5
+ from google.genai import types
6
+
7
+ from ..._utils import _build_gemini_ctx, _build_tools
7
8
 
8
9
  LiveAPIModels = Literal["gemini-2.0-flash-exp"]
9
10
 
10
11
  Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
11
- ResponseModality = Literal["AUDIO", "TEXT"]
12
12
 
13
+ __all__ = ["_build_tools", "ClientEvents", "_build_gemini_ctx"]
13
14
 
14
15
  ClientEvents = Union[
15
16
  types.ContentListUnion,
@@ -21,59 +22,3 @@ ClientEvents = Union[
21
22
  types.FunctionResponseOrDict,
22
23
  Sequence[types.FunctionResponseOrDict],
23
24
  ]
24
-
25
-
26
- JSON_SCHEMA_TYPE_MAP = {
27
- str: "string",
28
- int: "integer",
29
- float: "number",
30
- bool: "boolean",
31
- dict: "object",
32
- list: "array",
33
- }
34
-
35
-
36
- def _build_parameters(arguments: Dict[str, Any]) -> types.SchemaDict:
37
- properties: Dict[str, types.SchemaDict] = {}
38
- required: List[str] = []
39
-
40
- for arg_name, arg_info in arguments.items():
41
- py_type = arg_info.type
42
- if py_type not in JSON_SCHEMA_TYPE_MAP:
43
- raise ValueError(f"Unsupported type: {py_type}")
44
-
45
- prop: types.SchemaDict = {
46
- "type": JSON_SCHEMA_TYPE_MAP[py_type],
47
- "description": arg_info.description,
48
- }
49
-
50
- if arg_info.choices:
51
- prop["enum"] = arg_info.choices
52
-
53
- properties[arg_name] = prop
54
-
55
- if arg_info.default is inspect.Parameter.empty:
56
- required.append(arg_name)
57
-
58
- parameters: types.SchemaDict = {"type": "object", "properties": properties}
59
-
60
- if required:
61
- parameters["required"] = required
62
-
63
- return parameters
64
-
65
-
66
- def _build_tools(fnc_ctx: Any) -> List[types.FunctionDeclarationDict]:
67
- function_declarations: List[types.FunctionDeclarationDict] = []
68
- for fnc_info in fnc_ctx.ai_functions.values():
69
- parameters = _build_parameters(fnc_info.arguments)
70
-
71
- func_decl: types.FunctionDeclarationDict = {
72
- "name": fnc_info.name,
73
- "description": fnc_info.description,
74
- "parameters": parameters,
75
- }
76
-
77
- function_declarations.append(func_decl)
78
-
79
- return function_declarations