livekit-plugins-google 1.0.13__py3-none-any.whl → 1.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/api_proto.py +1 -1
- livekit/plugins/google/beta/realtime/realtime_api.py +12 -7
- livekit/plugins/google/llm.py +27 -3
- livekit/plugins/google/tts.py +36 -13
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.0.13.dist-info → livekit_plugins_google-1.0.14.dist-info}/METADATA +2 -2
- {livekit_plugins_google-1.0.13.dist-info → livekit_plugins_google-1.0.14.dist-info}/RECORD +8 -8
- {livekit_plugins_google-1.0.13.dist-info → livekit_plugins_google-1.0.14.dist-info}/WHEEL +0 -0
@@ -5,7 +5,7 @@ from typing import Literal, Union
|
|
5
5
|
|
6
6
|
from google.genai import types
|
7
7
|
|
8
|
-
LiveAPIModels = Literal["gemini-2.0-flash-exp"]
|
8
|
+
LiveAPIModels = Literal["gemini-2.0-flash-exp", "gemini-2.0-flash-live-001"]
|
9
9
|
|
10
10
|
Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
|
11
11
|
|
@@ -95,7 +95,7 @@ class RealtimeModel(llm.RealtimeModel):
|
|
95
95
|
self,
|
96
96
|
*,
|
97
97
|
instructions: NotGivenOr[str] = NOT_GIVEN,
|
98
|
-
model: LiveAPIModels | str = "gemini-2.0-flash-
|
98
|
+
model: LiveAPIModels | str = "gemini-2.0-flash-live-001",
|
99
99
|
api_key: NotGivenOr[str] = NOT_GIVEN,
|
100
100
|
voice: Voice | str = "Puck",
|
101
101
|
modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
|
@@ -357,10 +357,10 @@ class RealtimeSession(llm.RealtimeSession):
|
|
357
357
|
return fut
|
358
358
|
|
359
359
|
def interrupt(self) -> None:
|
360
|
-
|
360
|
+
pass
|
361
361
|
|
362
362
|
def truncate(self, *, message_id: str, audio_end_ms: int) -> None:
|
363
|
-
|
363
|
+
pass
|
364
364
|
|
365
365
|
async def aclose(self) -> None:
|
366
366
|
self._msg_ch.close()
|
@@ -423,8 +423,8 @@ class RealtimeSession(llm.RealtimeSession):
|
|
423
423
|
async for msg in self._msg_ch:
|
424
424
|
if isinstance(msg, LiveClientContent):
|
425
425
|
await session.send(input=msg, end_of_turn=True)
|
426
|
-
|
427
|
-
|
426
|
+
else:
|
427
|
+
await session.send(input=msg)
|
428
428
|
await session.send(input=".", end_of_turn=True)
|
429
429
|
|
430
430
|
@utils.log_exceptions(logger=logger)
|
@@ -543,8 +543,11 @@ class RealtimeSession(llm.RealtimeSession):
|
|
543
543
|
output_transcription = server_content.output_transcription
|
544
544
|
if output_transcription and output_transcription.text:
|
545
545
|
item_generation.text_ch.send_nowait(output_transcription.text)
|
546
|
+
if server_content.interrupted:
|
547
|
+
self._finalize_response()
|
548
|
+
self._handle_input_speech_started()
|
546
549
|
|
547
|
-
if server_content.
|
550
|
+
if server_content.turn_complete:
|
548
551
|
self._finalize_response()
|
549
552
|
|
550
553
|
def _finalize_response(self) -> None:
|
@@ -560,7 +563,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
560
563
|
self._current_generation = None
|
561
564
|
self._is_interrupted = True
|
562
565
|
self._active_response_id = None
|
563
|
-
|
566
|
+
|
567
|
+
def _handle_input_speech_started(self):
|
568
|
+
self.emit("input_speech_started", llm.InputSpeechStartedEvent())
|
564
569
|
|
565
570
|
def _handle_tool_calls(self, tool_call: LiveServerToolCall):
|
566
571
|
if not self._current_generation:
|
livekit/plugins/google/llm.py
CHANGED
@@ -53,6 +53,7 @@ class _LLMOptions:
|
|
53
53
|
top_k: NotGivenOr[float]
|
54
54
|
presence_penalty: NotGivenOr[float]
|
55
55
|
frequency_penalty: NotGivenOr[float]
|
56
|
+
thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
|
56
57
|
|
57
58
|
|
58
59
|
class LLM(llm.LLM):
|
@@ -71,6 +72,7 @@ class LLM(llm.LLM):
|
|
71
72
|
presence_penalty: NotGivenOr[float] = NOT_GIVEN,
|
72
73
|
frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
|
73
74
|
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
75
|
+
thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
|
74
76
|
) -> None:
|
75
77
|
"""
|
76
78
|
Create a new instance of Google GenAI LLM.
|
@@ -95,6 +97,7 @@ class LLM(llm.LLM):
|
|
95
97
|
presence_penalty (float, optional): Penalizes the model for generating previously mentioned concepts. Defaults to None.
|
96
98
|
frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
|
97
99
|
tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
|
100
|
+
thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
|
98
101
|
""" # noqa: E501
|
99
102
|
super().__init__()
|
100
103
|
gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
|
@@ -121,6 +124,22 @@ class LLM(llm.LLM):
|
|
121
124
|
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
|
122
125
|
)
|
123
126
|
|
127
|
+
# Validate thinking_config
|
128
|
+
if is_given(thinking_config):
|
129
|
+
_thinking_budget = None
|
130
|
+
if isinstance(thinking_config, dict):
|
131
|
+
_thinking_budget = thinking_config.get("thinking_budget")
|
132
|
+
elif isinstance(thinking_config, types.ThinkingConfig):
|
133
|
+
_thinking_budget = thinking_config.thinking_budget
|
134
|
+
|
135
|
+
if _thinking_budget is not None:
|
136
|
+
if not isinstance(_thinking_budget, int):
|
137
|
+
raise ValueError("thinking_budget inside thinking_config must be an integer")
|
138
|
+
if not (0 <= _thinking_budget <= 24576):
|
139
|
+
raise ValueError(
|
140
|
+
"thinking_budget inside thinking_config must be between 0 and 24576"
|
141
|
+
)
|
142
|
+
|
124
143
|
self._opts = _LLMOptions(
|
125
144
|
model=model,
|
126
145
|
temperature=temperature,
|
@@ -133,6 +152,7 @@ class LLM(llm.LLM):
|
|
133
152
|
top_k=top_k,
|
134
153
|
presence_penalty=presence_penalty,
|
135
154
|
frequency_penalty=frequency_penalty,
|
155
|
+
thinking_config=thinking_config,
|
136
156
|
)
|
137
157
|
self._client = genai.Client(
|
138
158
|
api_key=gemini_api_key,
|
@@ -212,6 +232,10 @@ class LLM(llm.LLM):
|
|
212
232
|
if is_given(self._opts.frequency_penalty):
|
213
233
|
extra["frequency_penalty"] = self._opts.frequency_penalty
|
214
234
|
|
235
|
+
# Add thinking config if thinking_budget is provided
|
236
|
+
if is_given(self._opts.thinking_config):
|
237
|
+
extra["thinking_config"] = self._opts.thinking_config
|
238
|
+
|
215
239
|
return LLMStream(
|
216
240
|
self,
|
217
241
|
client=self._client,
|
@@ -310,7 +334,7 @@ class LLMStream(llm.LLMStream):
|
|
310
334
|
raise APIStatusError(
|
311
335
|
"gemini llm: client error",
|
312
336
|
status_code=e.code,
|
313
|
-
body=e.message
|
337
|
+
body=f"{e.message} {e.status}",
|
314
338
|
request_id=request_id,
|
315
339
|
retryable=False if e.code != 429 else True,
|
316
340
|
) from e
|
@@ -318,7 +342,7 @@ class LLMStream(llm.LLMStream):
|
|
318
342
|
raise APIStatusError(
|
319
343
|
"gemini llm: server error",
|
320
344
|
status_code=e.code,
|
321
|
-
body=e.message
|
345
|
+
body=f"{e.message} {e.status}",
|
322
346
|
request_id=request_id,
|
323
347
|
retryable=retryable,
|
324
348
|
) from e
|
@@ -326,7 +350,7 @@ class LLMStream(llm.LLMStream):
|
|
326
350
|
raise APIStatusError(
|
327
351
|
"gemini llm: api error",
|
328
352
|
status_code=e.code,
|
329
|
-
body=e.message
|
353
|
+
body=f"{e.message} {e.status}",
|
330
354
|
request_id=request_id,
|
331
355
|
retryable=retryable,
|
332
356
|
) from e
|
livekit/plugins/google/tts.py
CHANGED
@@ -35,6 +35,8 @@ from livekit.agents.types import (
|
|
35
35
|
)
|
36
36
|
from livekit.agents.utils import is_given
|
37
37
|
|
38
|
+
from .models import Gender, SpeechLanguages
|
39
|
+
|
38
40
|
|
39
41
|
@dataclass
|
40
42
|
class _TTSOptions:
|
@@ -46,7 +48,9 @@ class TTS(tts.TTS):
|
|
46
48
|
def __init__(
|
47
49
|
self,
|
48
50
|
*,
|
49
|
-
|
51
|
+
language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
|
52
|
+
gender: NotGivenOr[Gender | str] = NOT_GIVEN,
|
53
|
+
voice_name: NotGivenOr[str] = NOT_GIVEN,
|
50
54
|
sample_rate: int = 24000,
|
51
55
|
pitch: int = 0,
|
52
56
|
effects_profile_id: str = "",
|
@@ -63,7 +67,9 @@ class TTS(tts.TTS):
|
|
63
67
|
environmental variable.
|
64
68
|
|
65
69
|
Args:
|
66
|
-
|
70
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US"). Default is "en-US".
|
71
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
72
|
+
voice_name (str, optional): Specific voice name. Default is an empty string.
|
67
73
|
sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
|
68
74
|
location (str, optional): Location for the TTS client. Default is "global".
|
69
75
|
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
@@ -85,15 +91,19 @@ class TTS(tts.TTS):
|
|
85
91
|
self._credentials_info = credentials_info
|
86
92
|
self._credentials_file = credentials_file
|
87
93
|
self._location = location
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
+
|
95
|
+
lang = language if is_given(language) else "en-US"
|
96
|
+
ssml_gender = _gender_from_str("neutral" if not is_given(gender) else gender)
|
97
|
+
name = "" if not is_given(voice_name) else voice_name
|
98
|
+
|
99
|
+
voice_params = texttospeech.VoiceSelectionParams(
|
100
|
+
name=name,
|
101
|
+
language_code=lang,
|
102
|
+
ssml_gender=ssml_gender,
|
103
|
+
)
|
94
104
|
|
95
105
|
self._opts = _TTSOptions(
|
96
|
-
voice=
|
106
|
+
voice=voice_params,
|
97
107
|
audio_config=texttospeech.AudioConfig(
|
98
108
|
audio_encoding=texttospeech.AudioEncoding.OGG_OPUS,
|
99
109
|
sample_rate_hertz=sample_rate,
|
@@ -106,18 +116,31 @@ class TTS(tts.TTS):
|
|
106
116
|
def update_options(
|
107
117
|
self,
|
108
118
|
*,
|
109
|
-
|
119
|
+
language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
|
120
|
+
gender: NotGivenOr[Gender | str] = NOT_GIVEN,
|
121
|
+
voice_name: NotGivenOr[str] = NOT_GIVEN,
|
110
122
|
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
|
111
123
|
) -> None:
|
112
124
|
"""
|
113
125
|
Update the TTS options.
|
114
126
|
|
115
127
|
Args:
|
116
|
-
|
128
|
+
language (SpeechLanguages | str, optional): Language code (e.g., "en-US").
|
129
|
+
gender (Gender | str, optional): Voice gender ("male", "female", "neutral").
|
130
|
+
voice_name (str, optional): Specific voice name.
|
117
131
|
speaking_rate (float, optional): Speed of speech.
|
118
132
|
""" # noqa: E501
|
119
|
-
|
120
|
-
|
133
|
+
params = {}
|
134
|
+
if is_given(language):
|
135
|
+
params["language"] = language
|
136
|
+
if is_given(gender):
|
137
|
+
params["gender"] = gender
|
138
|
+
if is_given(voice_name):
|
139
|
+
params["voice_name"] = voice_name
|
140
|
+
|
141
|
+
if params:
|
142
|
+
self._opts.voice = texttospeech.VoiceSelectionParams(**params)
|
143
|
+
|
121
144
|
if is_given(speaking_rate):
|
122
145
|
self._opts.audio_config.speaking_rate = speaking_rate
|
123
146
|
|
{livekit_plugins_google-1.0.13.dist-info → livekit_plugins_google-1.0.14.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.14
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
24
24
|
Requires-Dist: google-genai>=1.10.0
|
25
|
-
Requires-Dist: livekit-agents>=1.0.
|
25
|
+
Requires-Dist: livekit-agents>=1.0.14
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# LiveKit Plugins Google
|
@@ -1,16 +1,16 @@
|
|
1
1
|
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/llm.py,sha256=
|
2
|
+
livekit/plugins/google/llm.py,sha256=SqNGg6-wlrIUo9uaismP7QW5XztkXyDivJXLVgOIZMI,16175
|
3
3
|
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
4
|
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
5
|
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
|
7
|
-
livekit/plugins/google/tts.py,sha256=
|
7
|
+
livekit/plugins/google/tts.py,sha256=xhINokqY8UutXn85N-cbzq68eptbM6TTtIXmLktE_RM,9004
|
8
8
|
livekit/plugins/google/utils.py,sha256=pbLSOAdQxInWhgI2Yhsrr9KvgvpFXYDdU2yx2p03pFg,9437
|
9
|
-
livekit/plugins/google/version.py,sha256=
|
9
|
+
livekit/plugins/google/version.py,sha256=_YeHNcjndmG5QY6ec1nkXLdNYsalxmzzc9riXew4isE,601
|
10
10
|
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
11
|
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=
|
14
|
-
livekit_plugins_google-1.0.
|
15
|
-
livekit_plugins_google-1.0.
|
16
|
-
livekit_plugins_google-1.0.
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=Fyrejs3SG0EjOPCCFLEnWXKEUxCff47PMWk2VsKJm5E,594
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=HvPYyQXC9OodWaDNxbRt1UAJ8IVdXZGK-PsIEr7UwbY,25078
|
14
|
+
livekit_plugins_google-1.0.14.dist-info/METADATA,sha256=6Cad6bNTLQwTapX6_ByDGHlbRZLbg0FRo2NbF7NfVrw,3492
|
15
|
+
livekit_plugins_google-1.0.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
livekit_plugins_google-1.0.14.dist-info/RECORD,,
|
File without changes
|