livekit-plugins-google 1.0.22__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +3 -2
- livekit/plugins/google/beta/realtime/api_proto.py +8 -2
- livekit/plugins/google/beta/realtime/realtime_api.py +316 -117
- livekit/plugins/google/llm.py +62 -32
- livekit/plugins/google/models.py +1 -0
- livekit/plugins/google/stt.py +19 -12
- livekit/plugins/google/tools.py +11 -0
- livekit/plugins/google/tts.py +109 -136
- livekit/plugins/google/utils.py +39 -88
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.0.22.dist-info → livekit_plugins_google-1.1.0.dist-info}/METADATA +3 -3
- livekit_plugins_google-1.1.0.dist-info/RECORD +17 -0
- livekit_plugins_google-1.0.22.dist-info/RECORD +0 -16
- {livekit_plugins_google-1.0.22.dist-info → livekit_plugins_google-1.1.0.dist-info}/WHEEL +0 -0
livekit/plugins/google/tts.py
CHANGED
@@ -16,26 +16,15 @@ from __future__ import annotations
|
|
16
16
|
|
17
17
|
import asyncio
|
18
18
|
import weakref
|
19
|
-
from
|
19
|
+
from collections.abc import AsyncGenerator
|
20
|
+
from dataclasses import dataclass, replace
|
20
21
|
|
21
22
|
from google.api_core.client_options import ClientOptions
|
22
23
|
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
23
24
|
from google.cloud import texttospeech
|
24
25
|
from google.cloud.texttospeech_v1.types import SsmlVoiceGender, SynthesizeSpeechResponse
|
25
|
-
from livekit.agents import
|
26
|
-
|
27
|
-
APIConnectOptions,
|
28
|
-
APIStatusError,
|
29
|
-
APITimeoutError,
|
30
|
-
tokenize,
|
31
|
-
tts,
|
32
|
-
utils,
|
33
|
-
)
|
34
|
-
from livekit.agents.types import (
|
35
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
36
|
-
NOT_GIVEN,
|
37
|
-
NotGivenOr,
|
38
|
-
)
|
26
|
+
from livekit.agents import APIConnectOptions, APIStatusError, APITimeoutError, tokenize, tts, utils
|
27
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
|
39
28
|
from livekit.agents.utils import is_given
|
40
29
|
|
41
30
|
from .log import logger
|
@@ -51,7 +40,11 @@ DEFAULT_GENDER = "neutral"
|
|
51
40
|
@dataclass
|
52
41
|
class _TTSOptions:
|
53
42
|
voice: texttospeech.VoiceSelectionParams
|
54
|
-
|
43
|
+
encoding: texttospeech.AudioEncoding
|
44
|
+
sample_rate: int
|
45
|
+
pitch: float
|
46
|
+
effects_profile_id: str
|
47
|
+
speaking_rate: float
|
55
48
|
tokenizer: tokenize.SentenceTokenizer
|
56
49
|
|
57
50
|
|
@@ -67,11 +60,11 @@ class TTS(tts.TTS):
|
|
67
60
|
effects_profile_id: str = "",
|
68
61
|
speaking_rate: float = 1.0,
|
69
62
|
location: str = "global",
|
70
|
-
audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.
|
63
|
+
audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.OGG_OPUS, # type: ignore
|
71
64
|
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
72
65
|
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
73
66
|
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
|
74
|
-
use_streaming:
|
67
|
+
use_streaming: bool = True,
|
75
68
|
) -> None:
|
76
69
|
"""
|
77
70
|
Create a new instance of Google TTS.
|
@@ -94,9 +87,6 @@ class TTS(tts.TTS):
|
|
94
87
|
tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
|
95
88
|
use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
|
96
89
|
""" # noqa: E501
|
97
|
-
if not is_given(use_streaming):
|
98
|
-
use_streaming = True
|
99
|
-
|
100
90
|
super().__init__(
|
101
91
|
capabilities=tts.TTSCapabilities(streaming=use_streaming),
|
102
92
|
sample_rate=sample_rate,
|
@@ -122,13 +112,11 @@ class TTS(tts.TTS):
|
|
122
112
|
|
123
113
|
self._opts = _TTSOptions(
|
124
114
|
voice=voice_params,
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
speaking_rate=speaking_rate,
|
131
|
-
),
|
115
|
+
encoding=audio_encoding,
|
116
|
+
sample_rate=sample_rate,
|
117
|
+
pitch=pitch,
|
118
|
+
effects_profile_id=effects_profile_id,
|
119
|
+
speaking_rate=speaking_rate,
|
132
120
|
tokenizer=tokenizer,
|
133
121
|
)
|
134
122
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
@@ -149,7 +137,7 @@ class TTS(tts.TTS):
|
|
149
137
|
gender (Gender | str, optional): Voice gender ("male", "female", "neutral").
|
150
138
|
voice_name (str, optional): Specific voice name.
|
151
139
|
speaking_rate (float, optional): Speed of speech.
|
152
|
-
"""
|
140
|
+
"""
|
153
141
|
params = {}
|
154
142
|
if is_given(language):
|
155
143
|
params["language_code"] = str(language)
|
@@ -162,7 +150,7 @@ class TTS(tts.TTS):
|
|
162
150
|
self._opts.voice = texttospeech.VoiceSelectionParams(**params)
|
163
151
|
|
164
152
|
if is_given(speaking_rate):
|
165
|
-
self._opts.
|
153
|
+
self._opts.speaking_rate = speaking_rate
|
166
154
|
|
167
155
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
168
156
|
api_endpoint = "texttospeech.googleapis.com"
|
@@ -190,107 +178,89 @@ class TTS(tts.TTS):
|
|
190
178
|
def stream(
|
191
179
|
self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
192
180
|
) -> SynthesizeStream:
|
193
|
-
stream = SynthesizeStream(
|
194
|
-
tts=self,
|
195
|
-
opts=self._opts,
|
196
|
-
client=self._ensure_client(),
|
197
|
-
conn_options=conn_options,
|
198
|
-
)
|
181
|
+
stream = SynthesizeStream(tts=self, conn_options=conn_options)
|
199
182
|
self._streams.add(stream)
|
200
183
|
return stream
|
201
184
|
|
202
185
|
def synthesize(
|
203
|
-
self,
|
204
|
-
text: str,
|
205
|
-
*,
|
206
|
-
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
186
|
+
self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
207
187
|
) -> ChunkedStream:
|
208
|
-
return ChunkedStream(
|
209
|
-
tts=self,
|
210
|
-
input_text=text,
|
211
|
-
conn_options=conn_options,
|
212
|
-
opts=self._opts,
|
213
|
-
client=self._ensure_client(),
|
214
|
-
)
|
188
|
+
return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
|
215
189
|
|
216
190
|
async def aclose(self) -> None:
|
217
191
|
for stream in list(self._streams):
|
218
192
|
await stream.aclose()
|
219
193
|
self._streams.clear()
|
220
|
-
await super().aclose()
|
221
194
|
|
222
195
|
|
223
196
|
class ChunkedStream(tts.ChunkedStream):
|
224
|
-
def __init__(
|
225
|
-
self,
|
226
|
-
*,
|
227
|
-
tts: TTS,
|
228
|
-
input_text: str,
|
229
|
-
opts: _TTSOptions,
|
230
|
-
client: texttospeech.TextToSpeechAsyncClient,
|
231
|
-
conn_options: APIConnectOptions,
|
232
|
-
) -> None:
|
197
|
+
def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
|
233
198
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
234
|
-
self.
|
235
|
-
|
236
|
-
async def _run(self) -> None:
|
237
|
-
request_id = utils.shortuuid()
|
199
|
+
self._tts: TTS = tts
|
200
|
+
self._opts = replace(tts._opts)
|
238
201
|
|
202
|
+
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
239
203
|
try:
|
240
|
-
response: SynthesizeSpeechResponse = await self.
|
204
|
+
response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
|
241
205
|
input=texttospeech.SynthesisInput(text=self._input_text),
|
242
206
|
voice=self._opts.voice,
|
243
|
-
audio_config=
|
207
|
+
audio_config=texttospeech.AudioConfig(
|
208
|
+
audio_encoding=self._opts.encoding,
|
209
|
+
sample_rate_hertz=self._opts.sample_rate,
|
210
|
+
pitch=self._opts.pitch,
|
211
|
+
effects_profile_id=self._opts.effects_profile_id,
|
212
|
+
speaking_rate=self._opts.speaking_rate,
|
213
|
+
),
|
244
214
|
timeout=self._conn_options.timeout,
|
245
215
|
)
|
246
216
|
|
247
|
-
|
248
|
-
|
249
|
-
sample_rate=self._opts.
|
217
|
+
output_emitter.initialize(
|
218
|
+
request_id=utils.shortuuid(),
|
219
|
+
sample_rate=self._opts.sample_rate,
|
250
220
|
num_channels=1,
|
221
|
+
mime_type=_encoding_to_mimetype(self._opts.encoding),
|
251
222
|
)
|
252
223
|
|
253
|
-
|
254
|
-
decoder.push(response.audio_content)
|
255
|
-
decoder.end_input()
|
256
|
-
emitter = tts.SynthesizedAudioEmitter(
|
257
|
-
event_ch=self._event_ch,
|
258
|
-
request_id=request_id,
|
259
|
-
)
|
260
|
-
async for frame in decoder:
|
261
|
-
emitter.push(frame)
|
262
|
-
emitter.flush()
|
263
|
-
finally:
|
264
|
-
await decoder.aclose()
|
265
|
-
|
224
|
+
output_emitter.push(response.audio_content)
|
266
225
|
except DeadlineExceeded:
|
267
226
|
raise APITimeoutError() from None
|
268
227
|
except GoogleAPICallError as e:
|
269
|
-
raise APIStatusError(
|
270
|
-
f"{e.message} {e.details}", status_code=e.code or -1, request_id=None, body=None
|
271
|
-
) from e
|
272
|
-
except Exception as e:
|
273
|
-
raise APIConnectionError() from e
|
228
|
+
raise APIStatusError(e.message, status_code=e.code or -1) from e
|
274
229
|
|
275
230
|
|
276
231
|
class SynthesizeStream(tts.SynthesizeStream):
|
277
|
-
def __init__(
|
278
|
-
self,
|
279
|
-
*,
|
280
|
-
tts: TTS,
|
281
|
-
opts: _TTSOptions,
|
282
|
-
client: texttospeech.TextToSpeechAsyncClient,
|
283
|
-
conn_options: APIConnectOptions,
|
284
|
-
):
|
232
|
+
def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
|
285
233
|
super().__init__(tts=tts, conn_options=conn_options)
|
286
|
-
self.
|
234
|
+
self._tts: TTS = tts
|
235
|
+
self._opts = replace(tts._opts)
|
287
236
|
self._segments_ch = utils.aio.Chan[tokenize.SentenceStream]()
|
288
237
|
|
289
|
-
async def _run(self) -> None:
|
290
|
-
|
238
|
+
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
239
|
+
encoding = self._opts.encoding
|
240
|
+
if encoding not in (texttospeech.AudioEncoding.OGG_OPUS, texttospeech.AudioEncoding.PCM):
|
241
|
+
enc_name = texttospeech.AudioEncoding._member_names_[encoding]
|
242
|
+
logger.warning(
|
243
|
+
f"encoding {enc_name} isn't supported by the streaming_synthesize, "
|
244
|
+
"fallbacking to PCM"
|
245
|
+
)
|
246
|
+
encoding = texttospeech.AudioEncoding.PCM # type: ignore
|
291
247
|
|
292
|
-
|
293
|
-
|
248
|
+
output_emitter.initialize(
|
249
|
+
request_id=utils.shortuuid(),
|
250
|
+
sample_rate=self._opts.sample_rate,
|
251
|
+
num_channels=1,
|
252
|
+
mime_type=_encoding_to_mimetype(encoding),
|
253
|
+
stream=True,
|
254
|
+
)
|
255
|
+
|
256
|
+
streaming_config = texttospeech.StreamingSynthesizeConfig(
|
257
|
+
voice=self._opts.voice,
|
258
|
+
streaming_audio_config=texttospeech.StreamingAudioConfig(
|
259
|
+
audio_encoding=encoding, sample_rate_hertz=self._opts.sample_rate
|
260
|
+
),
|
261
|
+
)
|
262
|
+
|
263
|
+
async def _tokenize_input() -> None:
|
294
264
|
input_stream = None
|
295
265
|
async for input in self._input_ch:
|
296
266
|
if isinstance(input, str):
|
@@ -302,12 +272,12 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
302
272
|
if input_stream:
|
303
273
|
input_stream.end_input()
|
304
274
|
input_stream = None
|
275
|
+
|
305
276
|
self._segments_ch.close()
|
306
277
|
|
307
|
-
|
308
|
-
async def _run_segments():
|
278
|
+
async def _run_segments() -> None:
|
309
279
|
async for input_stream in self._segments_ch:
|
310
|
-
await self._run_stream(input_stream,
|
280
|
+
await self._run_stream(input_stream, output_emitter, streaming_config)
|
311
281
|
|
312
282
|
tasks = [
|
313
283
|
asyncio.create_task(_tokenize_input()),
|
@@ -315,26 +285,22 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
315
285
|
]
|
316
286
|
try:
|
317
287
|
await asyncio.gather(*tasks)
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
async def _run_stream(self, input_stream, request_id):
|
322
|
-
streaming_config = texttospeech.StreamingSynthesizeConfig(
|
323
|
-
voice=self._opts.voice,
|
324
|
-
streaming_audio_config=texttospeech.StreamingAudioConfig(
|
325
|
-
audio_encoding=texttospeech.AudioEncoding.PCM
|
326
|
-
),
|
327
|
-
)
|
328
|
-
emitter = tts.SynthesizedAudioEmitter(event_ch=self._event_ch, request_id=request_id)
|
329
|
-
audio_bstream = utils.audio.AudioByteStream(
|
330
|
-
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
331
|
-
num_channels=NUM_CHANNELS,
|
332
|
-
)
|
288
|
+
finally:
|
289
|
+
await utils.aio.cancel_and_wait(*tasks)
|
333
290
|
|
291
|
+
async def _run_stream(
|
292
|
+
self,
|
293
|
+
input_stream: tokenize.SentenceStream,
|
294
|
+
output_emitter: tts.AudioEmitter,
|
295
|
+
streaming_config: texttospeech.StreamingSynthesizeConfig,
|
296
|
+
) -> None:
|
334
297
|
@utils.log_exceptions(logger=logger)
|
335
|
-
async def input_generator()
|
298
|
+
async def input_generator() -> AsyncGenerator[
|
299
|
+
texttospeech.StreamingSynthesizeRequest, None
|
300
|
+
]:
|
336
301
|
try:
|
337
302
|
yield texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
|
303
|
+
|
338
304
|
async for input in input_stream:
|
339
305
|
self._mark_started()
|
340
306
|
yield texttospeech.StreamingSynthesizeRequest(
|
@@ -344,30 +310,24 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
344
310
|
except Exception:
|
345
311
|
logger.exception("an error occurred while streaming input to google TTS")
|
346
312
|
|
313
|
+
input_gen = input_generator()
|
347
314
|
try:
|
348
|
-
stream = await self.
|
349
|
-
|
350
|
-
timeout=self._conn_options.timeout,
|
315
|
+
stream = await self._tts._ensure_client().streaming_synthesize(
|
316
|
+
input_gen, timeout=self._conn_options.timeout
|
351
317
|
)
|
318
|
+
output_emitter.start_segment(segment_id=utils.shortuuid())
|
319
|
+
|
352
320
|
async for resp in stream:
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
except DeadlineExceeded as e:
|
360
|
-
logger.debug(f"google tts deadline exceeded: {e}")
|
361
|
-
pass
|
321
|
+
output_emitter.push(resp.audio_content)
|
322
|
+
|
323
|
+
output_emitter.end_segment()
|
324
|
+
|
325
|
+
except DeadlineExceeded:
|
326
|
+
raise APITimeoutError() from None
|
362
327
|
except GoogleAPICallError as e:
|
363
|
-
raise APIStatusError(
|
364
|
-
|
365
|
-
|
366
|
-
request_id=request_id,
|
367
|
-
body=None,
|
368
|
-
) from e
|
369
|
-
except Exception as e:
|
370
|
-
raise APIConnectionError() from e
|
328
|
+
raise APIStatusError(e.message, status_code=e.code or -1) from e
|
329
|
+
finally:
|
330
|
+
await input_gen.aclose()
|
371
331
|
|
372
332
|
|
373
333
|
def _gender_from_str(gender: str) -> SsmlVoiceGender:
|
@@ -378,3 +338,16 @@ def _gender_from_str(gender: str) -> SsmlVoiceGender:
|
|
378
338
|
ssml_gender = SsmlVoiceGender.FEMALE
|
379
339
|
|
380
340
|
return ssml_gender # type: ignore
|
341
|
+
|
342
|
+
|
343
|
+
def _encoding_to_mimetype(encoding: texttospeech.AudioEncoding) -> str:
|
344
|
+
if encoding == texttospeech.AudioEncoding.PCM:
|
345
|
+
return "audio/pcm"
|
346
|
+
elif encoding == texttospeech.AudioEncoding.LINEAR16:
|
347
|
+
return "audio/wav"
|
348
|
+
elif encoding == texttospeech.AudioEncoding.MP3:
|
349
|
+
return "audio/mp3"
|
350
|
+
elif encoding == texttospeech.AudioEncoding.OGG_OPUS:
|
351
|
+
return "audio/opus"
|
352
|
+
else:
|
353
|
+
raise RuntimeError(f"encoding {encoding} isn't supported")
|
livekit/plugins/google/utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import json
|
4
3
|
import re
|
5
4
|
from copy import deepcopy
|
6
5
|
from typing import Any
|
@@ -19,8 +18,9 @@ from livekit.agents.llm.tool_context import (
|
|
19
18
|
)
|
20
19
|
|
21
20
|
from .log import logger
|
21
|
+
from .tools import _LLMTool
|
22
22
|
|
23
|
-
__all__ = ["
|
23
|
+
__all__ = ["to_fnc_ctx"]
|
24
24
|
|
25
25
|
|
26
26
|
def to_fnc_ctx(fncs: list[FunctionTool | RawFunctionTool]) -> list[types.FunctionDeclaration]:
|
@@ -36,6 +36,42 @@ def to_fnc_ctx(fncs: list[FunctionTool | RawFunctionTool]) -> list[types.Functio
|
|
36
36
|
return tools
|
37
37
|
|
38
38
|
|
39
|
+
def create_tools_config(
|
40
|
+
*,
|
41
|
+
function_tools: list[types.FunctionDeclaration] | None = None,
|
42
|
+
gemini_tools: list[_LLMTool] | None = None,
|
43
|
+
) -> list[types.Tool]:
|
44
|
+
tools: list[types.Tool] = []
|
45
|
+
|
46
|
+
if function_tools:
|
47
|
+
tools.append(types.Tool(function_declarations=function_tools))
|
48
|
+
|
49
|
+
if gemini_tools:
|
50
|
+
for tool in gemini_tools:
|
51
|
+
if isinstance(tool, types.GoogleSearchRetrieval):
|
52
|
+
tools.append(types.Tool(google_search_retrieval=tool))
|
53
|
+
elif isinstance(tool, types.ToolCodeExecution):
|
54
|
+
tools.append(types.Tool(code_execution=tool))
|
55
|
+
elif isinstance(tool, types.GoogleSearch):
|
56
|
+
tools.append(types.Tool(google_search=tool))
|
57
|
+
elif isinstance(tool, types.UrlContext):
|
58
|
+
tools.append(types.Tool(url_context=tool))
|
59
|
+
elif isinstance(tool, types.GoogleMaps):
|
60
|
+
tools.append(types.Tool(google_maps=tool))
|
61
|
+
else:
|
62
|
+
logger.warning(f"Warning: Received unhandled tool type: {type(tool)}")
|
63
|
+
continue
|
64
|
+
|
65
|
+
if len(tools) > 1:
|
66
|
+
# https://github.com/google/adk-python/issues/53#issuecomment-2799538041
|
67
|
+
logger.warning(
|
68
|
+
"Multiple kinds of tools are not supported in Gemini. Only the first tool will be used."
|
69
|
+
)
|
70
|
+
tools = tools[:1]
|
71
|
+
|
72
|
+
return tools
|
73
|
+
|
74
|
+
|
39
75
|
def get_tool_results_for_realtime(
|
40
76
|
chat_ctx: llm.ChatContext, *, vertexai: bool = False
|
41
77
|
) -> types.LiveClientToolResponse | None:
|
@@ -58,98 +94,13 @@ def get_tool_results_for_realtime(
|
|
58
94
|
)
|
59
95
|
|
60
96
|
|
61
|
-
def to_chat_ctx(
|
62
|
-
chat_ctx: llm.ChatContext,
|
63
|
-
cache_key: Any,
|
64
|
-
ignore_functions: bool = False,
|
65
|
-
generate: bool = False,
|
66
|
-
) -> tuple[list[types.Content], types.Content | None]:
|
67
|
-
turns: list[types.Content] = []
|
68
|
-
system_instruction: types.Content | None = None
|
69
|
-
current_role: str | None = None
|
70
|
-
parts: list[types.Part] = []
|
71
|
-
|
72
|
-
for msg in chat_ctx.items:
|
73
|
-
if msg.type == "message" and msg.role == "system":
|
74
|
-
sys_parts = []
|
75
|
-
for content in msg.content:
|
76
|
-
if content and isinstance(content, str):
|
77
|
-
sys_parts.append(types.Part(text=content))
|
78
|
-
system_instruction = types.Content(parts=sys_parts)
|
79
|
-
continue
|
80
|
-
|
81
|
-
if msg.type == "message":
|
82
|
-
role = "model" if msg.role == "assistant" else "user"
|
83
|
-
elif msg.type == "function_call":
|
84
|
-
role = "model"
|
85
|
-
elif msg.type == "function_call_output":
|
86
|
-
role = "user"
|
87
|
-
|
88
|
-
# if the effective role changed, finalize the previous turn.
|
89
|
-
if role != current_role:
|
90
|
-
if current_role is not None and parts:
|
91
|
-
turns.append(types.Content(role=current_role, parts=parts))
|
92
|
-
parts = []
|
93
|
-
current_role = role
|
94
|
-
|
95
|
-
if msg.type == "message":
|
96
|
-
for content in msg.content:
|
97
|
-
if content and isinstance(content, str):
|
98
|
-
parts.append(types.Part(text=content))
|
99
|
-
elif content and isinstance(content, dict):
|
100
|
-
parts.append(types.Part(text=json.dumps(content)))
|
101
|
-
elif isinstance(content, llm.ImageContent):
|
102
|
-
parts.append(_to_image_part(content, cache_key))
|
103
|
-
elif msg.type == "function_call" and not ignore_functions:
|
104
|
-
parts.append(
|
105
|
-
types.Part(
|
106
|
-
function_call=types.FunctionCall(
|
107
|
-
name=msg.name,
|
108
|
-
args=json.loads(msg.arguments),
|
109
|
-
)
|
110
|
-
)
|
111
|
-
)
|
112
|
-
elif msg.type == "function_call_output" and not ignore_functions:
|
113
|
-
parts.append(
|
114
|
-
types.Part(
|
115
|
-
function_response=types.FunctionResponse(
|
116
|
-
name=msg.name,
|
117
|
-
response={"text": msg.output},
|
118
|
-
)
|
119
|
-
)
|
120
|
-
)
|
121
|
-
|
122
|
-
if current_role is not None and parts:
|
123
|
-
turns.append(types.Content(role=current_role, parts=parts))
|
124
|
-
|
125
|
-
# Gemini requires the last message to end with user's turn before they can generate
|
126
|
-
if generate and current_role != "user":
|
127
|
-
turns.append(types.Content(role="user", parts=[types.Part(text=".")]))
|
128
|
-
|
129
|
-
return turns, system_instruction
|
130
|
-
|
131
|
-
|
132
|
-
def _to_image_part(image: llm.ImageContent, cache_key: Any) -> types.Part:
|
133
|
-
img = llm.utils.serialize_image(image)
|
134
|
-
if img.external_url:
|
135
|
-
if img.mime_type:
|
136
|
-
mime_type = img.mime_type
|
137
|
-
else:
|
138
|
-
logger.debug("No media type provided for image, defaulting to image/jpeg.")
|
139
|
-
mime_type = "image/jpeg"
|
140
|
-
return types.Part.from_uri(file_uri=img.external_url, mime_type=mime_type)
|
141
|
-
if cache_key not in image._cache:
|
142
|
-
image._cache[cache_key] = img.data_bytes
|
143
|
-
return types.Part.from_bytes(data=image._cache[cache_key], mime_type=img.mime_type)
|
144
|
-
|
145
|
-
|
146
97
|
def _build_gemini_fnc(function_tool: FunctionTool) -> types.FunctionDeclaration:
|
147
98
|
fnc = llm.utils.build_legacy_openai_schema(function_tool, internally_tagged=True)
|
148
99
|
json_schema = _GeminiJsonSchema(fnc["parameters"]).simplify()
|
149
100
|
return types.FunctionDeclaration(
|
150
101
|
name=fnc["name"],
|
151
102
|
description=fnc["description"],
|
152
|
-
parameters=json_schema,
|
103
|
+
parameters=types.Schema.model_validate(json_schema) if json_schema else None,
|
153
104
|
)
|
154
105
|
|
155
106
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -21,8 +21,8 @@ Requires-Python: >=3.9.0
|
|
21
21
|
Requires-Dist: google-auth<3,>=2
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2.24
|
24
|
-
Requires-Dist: google-genai>=
|
25
|
-
Requires-Dist: livekit-agents>=1.0
|
24
|
+
Requires-Dist: google-genai>=v1.16.1
|
25
|
+
Requires-Dist: livekit-agents>=1.1.0
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# Google AI plugin for LiveKit Agents
|
@@ -0,0 +1,17 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
|
2
|
+
livekit/plugins/google/llm.py,sha256=MIi-6kk8AZQxcf5y4zB3HwwEQHAJSCIdX79yf9QMAvI,17835
|
3
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
+
livekit/plugins/google/models.py,sha256=hOpfbN_qdQ1ZTpCN9m9dvG2eb6WgQ3KE3WRpIeeM_T0,1569
|
5
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
livekit/plugins/google/stt.py,sha256=SddM50w6g2rNkjaF5OtrPwEH-qqq36sa-v_6ogKoBYg,24077
|
7
|
+
livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
|
8
|
+
livekit/plugins/google/tts.py,sha256=PzDfEfvQfj-uSHYOUelFnwYK0Wu2-5Mp8PID0b4I5kc,14293
|
9
|
+
livekit/plugins/google/utils.py,sha256=-4z6wrjVaZPtFRowkpwaA2acBRfqtzTk4r2xrPDUdCk,8609
|
10
|
+
livekit/plugins/google/version.py,sha256=7SjyflIFTjH0djSotKGIRoRykPCqMpVYetIlvHMFuh0,600
|
11
|
+
livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
|
12
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
13
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=NfE7xr2N3JOu7gVfWbAmDcEhs8vuZgMRu5vpScPJzsg,776
|
14
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=Mt-f7mkwVd7Aq84HPh_AdIOaB4ye8d6TTllcEjKO5TY,45918
|
15
|
+
livekit_plugins_google-1.1.0.dist-info/METADATA,sha256=HeQoxgYu0-hOIOawXsvtwHeESXj1U2Oo5GpwEUEx-W8,1907
|
16
|
+
livekit_plugins_google-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
livekit_plugins_google-1.1.0.dist-info/RECORD,,
|
@@ -1,16 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=xain2qUzU-YWhYWsLBkW8Q-szV-htpnzHTqymMPo-j0,1364
|
2
|
-
livekit/plugins/google/llm.py,sha256=Kr9qeBZ5Dd0WCCBR_-gM3WWsVRZPCSteK8NpBsg2C5Y,16304
|
3
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
-
livekit/plugins/google/models.py,sha256=maGlEM3hK4-5hMnH9UQMJewA7BZMrnStsFLBNoNVySg,1531
|
5
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
livekit/plugins/google/stt.py,sha256=2jk-1fHiBT8UW_n3CZsIEdMp2iBnUAlTnmefdUd8rAM,23620
|
7
|
-
livekit/plugins/google/tts.py,sha256=FfhNfGtW8drmYDDfLLZDjaIp2GvNiIdoovgtZq4t_l8,14211
|
8
|
-
livekit/plugins/google/utils.py,sha256=UBAbddYk7G8Nojg6bSC7_xN2pdl9qhs86HGhKYFuf9M,10509
|
9
|
-
livekit/plugins/google/version.py,sha256=-8dkOE2vDSF9WN8VoBrSwU2sb5YBGFuwPnSQXQ-uaYM,601
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=yYB5fKXl_aaMH_ZSpfUlfOTUg4eRqqRENLTZhZMfBMc,36253
|
14
|
-
livekit_plugins_google-1.0.22.dist-info/METADATA,sha256=S4bQZr4NhWrAI6vyJi299sh5lsD5eVMNfxvN9__xAMY,1908
|
15
|
-
livekit_plugins_google-1.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
-
livekit_plugins_google-1.0.22.dist-info/RECORD,,
|
File without changes
|