livekit-plugins-google 1.3.8__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +11 -3
- livekit/plugins/google/llm.py +142 -81
- livekit/plugins/google/models.py +15 -1
- livekit/plugins/google/realtime/api_proto.py +12 -10
- livekit/plugins/google/realtime/realtime_api.py +25 -28
- livekit/plugins/google/stt.py +281 -93
- livekit/plugins/google/tools.py +69 -9
- livekit/plugins/google/tts.py +17 -9
- livekit/plugins/google/utils.py +21 -87
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/METADATA +1 -1
- livekit_plugins_google-1.3.11.dist-info/RECORD +18 -0
- livekit_plugins_google-1.3.8.dist-info/RECORD +0 -18
- {livekit_plugins_google-1.3.8.dist-info → livekit_plugins_google-1.3.11.dist-info}/WHEEL +0 -0
|
@@ -27,8 +27,7 @@ from livekit.agents.utils import audio as audio_utils, images, is_given
|
|
|
27
27
|
from livekit.plugins.google.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
|
|
28
28
|
|
|
29
29
|
from ..log import logger
|
|
30
|
-
from ..
|
|
31
|
-
from ..utils import create_tools_config, get_tool_results_for_realtime, to_fnc_ctx
|
|
30
|
+
from ..utils import create_tools_config, get_tool_results_for_realtime
|
|
32
31
|
from ..version import __version__
|
|
33
32
|
|
|
34
33
|
INPUT_AUDIO_SAMPLE_RATE = 16000
|
|
@@ -79,7 +78,6 @@ class _RealtimeOptions:
|
|
|
79
78
|
realtime_input_config: NotGivenOr[types.RealtimeInputConfig] = NOT_GIVEN
|
|
80
79
|
context_window_compression: NotGivenOr[types.ContextWindowCompressionConfig] = NOT_GIVEN
|
|
81
80
|
api_version: NotGivenOr[str] = NOT_GIVEN
|
|
82
|
-
gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN
|
|
83
81
|
tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN
|
|
84
82
|
tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN
|
|
85
83
|
thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN
|
|
@@ -150,7 +148,6 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
150
148
|
api_version: NotGivenOr[str] = NOT_GIVEN,
|
|
151
149
|
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
152
150
|
http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
|
|
153
|
-
_gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
|
154
151
|
thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN,
|
|
155
152
|
) -> None:
|
|
156
153
|
"""
|
|
@@ -167,7 +164,7 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
167
164
|
instructions (str, optional): Initial system instructions for the model. Defaults to "".
|
|
168
165
|
api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
|
|
169
166
|
modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
|
|
170
|
-
model (str, optional): The name of the model to use. Defaults to "gemini-2.
|
|
167
|
+
model (str, optional): The name of the model to use. Defaults to "gemini-2.5-flash-native-audio-preview-12-2025" or "gemini-live-2.5-flash-native-audio" (vertexai).
|
|
171
168
|
voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
|
|
172
169
|
language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
|
|
173
170
|
temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
|
|
@@ -191,7 +188,6 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
191
188
|
session_resumption (SessionResumptionConfig, optional): The configuration for session resumption. Defaults to None.
|
|
192
189
|
thinking_config (ThinkingConfig, optional): Native audio thinking configuration.
|
|
193
190
|
conn_options (APIConnectOptions, optional): The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
|
|
194
|
-
_gemini_tools (list[LLMTool], optional): Gemini-specific tools to use for the session. This parameter is experimental and may change.
|
|
195
191
|
|
|
196
192
|
Raises:
|
|
197
193
|
ValueError: If the API key is required but not found.
|
|
@@ -283,7 +279,6 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
283
279
|
realtime_input_config=realtime_input_config,
|
|
284
280
|
context_window_compression=context_window_compression,
|
|
285
281
|
api_version=api_version,
|
|
286
|
-
gemini_tools=_gemini_tools,
|
|
287
282
|
tool_behavior=tool_behavior,
|
|
288
283
|
tool_response_scheduling=tool_response_scheduling,
|
|
289
284
|
conn_options=conn_options,
|
|
@@ -355,7 +350,6 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
355
350
|
super().__init__(realtime_model)
|
|
356
351
|
self._opts = realtime_model._opts
|
|
357
352
|
self._tools = llm.ToolContext.empty()
|
|
358
|
-
self._gemini_declarations: list[types.FunctionDeclaration] = []
|
|
359
353
|
self._chat_ctx = llm.ChatContext.empty()
|
|
360
354
|
self._msg_ch = utils.aio.Chan[ClientEvents]()
|
|
361
355
|
self._input_resampler: rtc.AudioResampler | None = None
|
|
@@ -368,7 +362,11 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
368
362
|
)
|
|
369
363
|
|
|
370
364
|
api_version = self._opts.api_version
|
|
371
|
-
if
|
|
365
|
+
if (
|
|
366
|
+
not api_version
|
|
367
|
+
and (self._opts.enable_affective_dialog or self._opts.proactivity)
|
|
368
|
+
and not self._opts.vertexai
|
|
369
|
+
):
|
|
372
370
|
api_version = "v1alpha"
|
|
373
371
|
|
|
374
372
|
http_options = self._opts.http_options or types.HttpOptions(
|
|
@@ -473,9 +471,12 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
473
471
|
self._mark_restart_needed()
|
|
474
472
|
|
|
475
473
|
async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None:
|
|
474
|
+
chat_ctx = chat_ctx.copy(
|
|
475
|
+
exclude_handoff=True, exclude_instructions=True, exclude_empty_message=True
|
|
476
|
+
)
|
|
476
477
|
async with self._session_lock:
|
|
477
478
|
if not self._active_session:
|
|
478
|
-
self._chat_ctx = chat_ctx
|
|
479
|
+
self._chat_ctx = chat_ctx
|
|
479
480
|
return
|
|
480
481
|
|
|
481
482
|
diff_ops = llm.utils.compute_chat_ctx_diff(self._chat_ctx, chat_ctx)
|
|
@@ -490,9 +491,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
490
491
|
append_ctx.items.append(item)
|
|
491
492
|
|
|
492
493
|
if append_ctx.items:
|
|
493
|
-
turns_dict, _ = append_ctx.copy(
|
|
494
|
-
|
|
495
|
-
)
|
|
494
|
+
turns_dict, _ = append_ctx.copy(exclude_function_call=True).to_provider_format(
|
|
495
|
+
format="google", inject_dummy_user_message=False
|
|
496
|
+
)
|
|
496
497
|
# we are not generating, and do not need to inject
|
|
497
498
|
turns = [types.Content.model_validate(turn) for turn in turns_dict]
|
|
498
499
|
tool_results = get_tool_results_for_realtime(
|
|
@@ -507,19 +508,15 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
507
508
|
|
|
508
509
|
# since we don't have a view of the history on the server side, we'll assume
|
|
509
510
|
# the current state is accurate. this isn't perfect because removals aren't done.
|
|
510
|
-
self._chat_ctx = chat_ctx
|
|
511
|
+
self._chat_ctx = chat_ctx
|
|
511
512
|
|
|
512
|
-
async def update_tools(self, tools: list[llm.
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
current_tool_names = {f.name for f in self._gemini_declarations}
|
|
517
|
-
new_tool_names = {f.name for f in new_declarations}
|
|
513
|
+
async def update_tools(self, tools: list[llm.Tool]) -> None:
|
|
514
|
+
tool_ctx = llm.ToolContext(tools)
|
|
515
|
+
if self._tools == tool_ctx:
|
|
516
|
+
return
|
|
518
517
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
self._tools = llm.ToolContext(tools)
|
|
522
|
-
self._mark_restart_needed()
|
|
518
|
+
self._tools = tool_ctx
|
|
519
|
+
self._mark_restart_needed()
|
|
523
520
|
|
|
524
521
|
@property
|
|
525
522
|
def chat_ctx(self) -> llm.ChatContext:
|
|
@@ -686,6 +683,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
686
683
|
self._active_session = session
|
|
687
684
|
turns_dict, _ = self._chat_ctx.copy(
|
|
688
685
|
exclude_function_call=True,
|
|
686
|
+
exclude_handoff=True,
|
|
687
|
+
exclude_instructions=True,
|
|
688
|
+
exclude_empty_message=True,
|
|
689
689
|
).to_provider_format(format="google", inject_dummy_user_message=False)
|
|
690
690
|
if turns_dict:
|
|
691
691
|
turns = [types.Content.model_validate(turn) for turn in turns_dict]
|
|
@@ -876,10 +876,7 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
876
876
|
def _build_connect_config(self) -> types.LiveConnectConfig:
|
|
877
877
|
temp = self._opts.temperature if is_given(self._opts.temperature) else None
|
|
878
878
|
|
|
879
|
-
tools_config = create_tools_config(
|
|
880
|
-
function_tools=self._gemini_declarations,
|
|
881
|
-
gemini_tools=self._opts.gemini_tools if is_given(self._opts.gemini_tools) else None,
|
|
882
|
-
)
|
|
879
|
+
tools_config = create_tools_config(self._tools, tool_behavior=self._opts.tool_behavior)
|
|
883
880
|
conf = types.LiveConnectConfig(
|
|
884
881
|
response_modalities=self._opts.response_modalities,
|
|
885
882
|
generation_config=types.GenerationConfig(
|