livekit-plugins-google 1.0.0rc9__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/PKG-INFO +2 -2
  2. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/beta/realtime/api_proto.py +0 -3
  3. livekit_plugins_google-1.0.2/livekit/plugins/google/beta/realtime/realtime_api.py +550 -0
  4. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/stt.py +6 -12
  5. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/tts.py +4 -7
  6. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/utils.py +21 -3
  7. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/version.py +1 -1
  8. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/pyproject.toml +1 -1
  9. livekit_plugins_google-1.0.0rc9/livekit/plugins/google/beta/realtime/realtime_api.py +0 -569
  10. livekit_plugins_google-1.0.0rc9/livekit/plugins/google/beta/realtime/temp.py +0 -10
  11. livekit_plugins_google-1.0.0rc9/livekit/plugins/google/beta/realtime/transcriber.py +0 -254
  12. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/.gitignore +0 -0
  13. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/README.md +0 -0
  14. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/__init__.py +0 -0
  15. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/beta/__init__.py +0 -0
  16. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/beta/realtime/__init__.py +0 -0
  17. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/llm.py +0 -0
  18. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/log.py +0 -0
  19. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/models.py +0 -0
  20. {livekit_plugins_google-1.0.0rc9 → livekit_plugins_google-1.0.2}/livekit/plugins/google/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.0.0rc9
3
+ Version: 1.0.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
24
24
  Requires-Dist: google-genai==1.5.0
25
- Requires-Dist: livekit-agents>=1.0.0.rc9
25
+ Requires-Dist: livekit-agents>=1.0.2
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # LiveKit Plugins Google
@@ -5,13 +5,10 @@ from typing import Literal, Union
5
5
 
6
6
  from google.genai import types
7
7
 
8
- # from ..._utils import _build_gemini_ctx, _build_tools
9
-
10
8
  LiveAPIModels = Literal["gemini-2.0-flash-exp"]
11
9
 
12
10
  Voice = Literal["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
13
11
 
14
- # __all__ = ["_build_tools", "ClientEvents", "_build_gemini_ctx"]
15
12
 
16
13
  ClientEvents = Union[
17
14
  types.ContentListUnion,
@@ -0,0 +1,550 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import os
6
+ import weakref
7
+ from dataclasses import dataclass
8
+
9
+ from google import genai
10
+ from google.genai._api_client import HttpOptions
11
+ from google.genai.types import (
12
+ Blob,
13
+ Content,
14
+ FunctionDeclaration,
15
+ GenerationConfig,
16
+ LiveClientContent,
17
+ LiveClientRealtimeInput,
18
+ LiveConnectConfig,
19
+ LiveServerContent,
20
+ LiveServerToolCall,
21
+ LiveServerToolCallCancellation,
22
+ Modality,
23
+ Part,
24
+ PrebuiltVoiceConfig,
25
+ SpeechConfig,
26
+ Tool,
27
+ VoiceConfig,
28
+ )
29
+ from livekit import rtc
30
+ from livekit.agents import llm, utils
31
+ from livekit.agents.types import NOT_GIVEN, NotGivenOr
32
+ from livekit.agents.utils import is_given
33
+
34
+ from ...log import logger
35
+ from ...utils import _build_gemini_fnc, get_tool_results_for_realtime, to_chat_ctx
36
+ from .api_proto import ClientEvents, LiveAPIModels, Voice
37
+
38
+ INPUT_AUDIO_SAMPLE_RATE = 16000
39
+ OUTPUT_AUDIO_SAMPLE_RATE = 24000
40
+ NUM_CHANNELS = 1
41
+
42
+
43
+ @dataclass
44
+ class InputTranscription:
45
+ item_id: str
46
+ transcript: str
47
+
48
+
49
+ @dataclass
50
+ class _RealtimeOptions:
51
+ model: LiveAPIModels | str
52
+ api_key: str | None
53
+ voice: Voice | str
54
+ response_modalities: NotGivenOr[list[Modality]]
55
+ vertexai: bool
56
+ project: str | None
57
+ location: str | None
58
+ candidate_count: int
59
+ temperature: NotGivenOr[float]
60
+ max_output_tokens: NotGivenOr[int]
61
+ top_p: NotGivenOr[float]
62
+ top_k: NotGivenOr[int]
63
+ presence_penalty: NotGivenOr[float]
64
+ frequency_penalty: NotGivenOr[float]
65
+ instructions: NotGivenOr[str]
66
+
67
+
68
+ @dataclass
69
+ class _MessageGeneration:
70
+ message_id: str
71
+ text_ch: utils.aio.Chan[str]
72
+ audio_ch: utils.aio.Chan[rtc.AudioFrame]
73
+
74
+
75
+ @dataclass
76
+ class _ResponseGeneration:
77
+ message_ch: utils.aio.Chan[llm.MessageGeneration]
78
+ function_ch: utils.aio.Chan[llm.FunctionCall]
79
+
80
+ messages: dict[str, _MessageGeneration]
81
+
82
+
83
+ class RealtimeModel(llm.RealtimeModel):
84
+ def __init__(
85
+ self,
86
+ *,
87
+ instructions: NotGivenOr[str] = NOT_GIVEN,
88
+ model: LiveAPIModels | str = "gemini-2.0-flash-exp",
89
+ api_key: NotGivenOr[str] = NOT_GIVEN,
90
+ voice: Voice | str = "Puck",
91
+ modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
92
+ vertexai: bool = False,
93
+ project: NotGivenOr[str] = NOT_GIVEN,
94
+ location: NotGivenOr[str] = NOT_GIVEN,
95
+ candidate_count: int = 1,
96
+ temperature: NotGivenOr[float] = NOT_GIVEN,
97
+ max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
98
+ top_p: NotGivenOr[float] = NOT_GIVEN,
99
+ top_k: NotGivenOr[int] = NOT_GIVEN,
100
+ presence_penalty: NotGivenOr[float] = NOT_GIVEN,
101
+ frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
102
+ ) -> None:
103
+ """
104
+ Initializes a RealtimeModel instance for interacting with Google's Realtime API.
105
+
106
+ Environment Requirements:
107
+ - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.
108
+ The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables
109
+ `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file,
110
+ and the location defaults to "us-central1".
111
+ - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable.
112
+
113
+ Args:
114
+ instructions (str, optional): Initial system instructions for the model. Defaults to "".
115
+ api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
116
+ modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
117
+ model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp".
118
+ voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
119
+ temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
120
+ vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False.
121
+ project (str, optional): The project id to use for the API. Defaults to None. (for vertexai)
122
+ location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
123
+ candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1.
124
+ top_p (float, optional): The top-p value for response generation
125
+ top_k (int, optional): The top-k value for response generation
126
+ presence_penalty (float, optional): The presence penalty for response generation
127
+ frequency_penalty (float, optional): The frequency penalty for response generation
128
+
129
+ Raises:
130
+ ValueError: If the API key is required but not found.
131
+ """ # noqa: E501
132
+ super().__init__(
133
+ capabilities=llm.RealtimeCapabilities(
134
+ message_truncation=False,
135
+ turn_detection=True,
136
+ user_transcription=False,
137
+ )
138
+ )
139
+
140
+ gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
141
+ gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
142
+ gcp_location = location if is_given(location) else os.environ.get("GOOGLE_CLOUD_LOCATION")
143
+ if vertexai:
144
+ if not gcp_project or not gcp_location:
145
+ raise ValueError(
146
+ "Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables" # noqa: E501
147
+ )
148
+ gemini_api_key = None # VertexAI does not require an API key
149
+
150
+ else:
151
+ gcp_project = None
152
+ gcp_location = None
153
+ if not gemini_api_key:
154
+ raise ValueError(
155
+ "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
156
+ )
157
+
158
+ self._opts = _RealtimeOptions(
159
+ model=model,
160
+ api_key=gemini_api_key,
161
+ voice=voice,
162
+ response_modalities=modalities,
163
+ vertexai=vertexai,
164
+ project=gcp_project,
165
+ location=gcp_location,
166
+ candidate_count=candidate_count,
167
+ temperature=temperature,
168
+ max_output_tokens=max_output_tokens,
169
+ top_p=top_p,
170
+ top_k=top_k,
171
+ presence_penalty=presence_penalty,
172
+ frequency_penalty=frequency_penalty,
173
+ instructions=instructions,
174
+ )
175
+
176
+ self._sessions = weakref.WeakSet[RealtimeSession]()
177
+
178
+ def session(self) -> RealtimeSession:
179
+ sess = RealtimeSession(self)
180
+ self._sessions.add(sess)
181
+ return sess
182
+
183
+ def update_options(
184
+ self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN
185
+ ) -> None:
186
+ if is_given(voice):
187
+ self._opts.voice = voice
188
+
189
+ if is_given(temperature):
190
+ self._opts.temperature = temperature
191
+
192
+ for sess in self._sessions:
193
+ sess.update_options(voice=self._opts.voice, temperature=self._opts.temperature)
194
+
195
+ async def aclose(self) -> None: ...
196
+
197
+
198
+ class RealtimeSession(llm.RealtimeSession):
199
+ def __init__(self, realtime_model: RealtimeModel) -> None:
200
+ super().__init__(realtime_model)
201
+ self._opts = realtime_model._opts
202
+ self._tools = llm.ToolContext.empty()
203
+ self._chat_ctx = llm.ChatContext.empty()
204
+ self._msg_ch = utils.aio.Chan[ClientEvents]()
205
+ self._gemini_tools: list[Tool] = []
206
+ self._client = genai.Client(
207
+ http_options=HttpOptions(api_version="v1alpha"),
208
+ api_key=self._opts.api_key,
209
+ vertexai=self._opts.vertexai,
210
+ project=self._opts.project,
211
+ location=self._opts.location,
212
+ )
213
+ self._main_atask = asyncio.create_task(self._main_task(), name="gemini-realtime-session")
214
+
215
+ self._current_generation: _ResponseGeneration | None = None
216
+
217
+ self._is_interrupted = False
218
+ self._active_response_id = None
219
+ self._session = None
220
+ self._update_chat_ctx_lock = asyncio.Lock()
221
+ self._update_fnc_ctx_lock = asyncio.Lock()
222
+ self._response_created_futures: dict[str, asyncio.Future[llm.GenerationCreatedEvent]] = {}
223
+ self._pending_generation_event_id = None
224
+
225
+ self._reconnect_event = asyncio.Event()
226
+ self._session_lock = asyncio.Lock()
227
+ self._gemini_close_task: asyncio.Task | None = None
228
+
229
+ def _schedule_gemini_session_close(self) -> None:
230
+ if self._session is not None:
231
+ self._gemini_close_task = asyncio.create_task(self._close_gemini_session())
232
+
233
+ async def _close_gemini_session(self) -> None:
234
+ async with self._session_lock:
235
+ if self._session:
236
+ try:
237
+ await self._session.close()
238
+ finally:
239
+ self._session = None
240
+
241
+ def update_options(
242
+ self,
243
+ *,
244
+ voice: NotGivenOr[str] = NOT_GIVEN,
245
+ tool_choice: NotGivenOr[llm.ToolChoice | None] = NOT_GIVEN,
246
+ temperature: NotGivenOr[float] = NOT_GIVEN,
247
+ ) -> None:
248
+ if is_given(voice):
249
+ self._opts.voice = voice
250
+
251
+ if is_given(temperature):
252
+ self._opts.temperature = temperature
253
+
254
+ if self._session:
255
+ logger.warning("Updating options; triggering Gemini session reconnect.")
256
+ self._reconnect_event.set()
257
+ self._schedule_gemini_session_close()
258
+
259
+ async def update_instructions(self, instructions: str) -> None:
260
+ self._opts.instructions = instructions
261
+ if self._session:
262
+ logger.warning("Updating instructions; triggering Gemini session reconnect.")
263
+ self._reconnect_event.set()
264
+ self._schedule_gemini_session_close()
265
+
266
+ async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None:
267
+ async with self._update_chat_ctx_lock:
268
+ self._chat_ctx = chat_ctx
269
+ turns, _ = to_chat_ctx(self._chat_ctx, id(self), ignore_functions=True)
270
+ tool_results = get_tool_results_for_realtime(self._chat_ctx)
271
+ if turns:
272
+ self._msg_ch.send_nowait(LiveClientContent(turns=turns, turn_complete=False))
273
+ if tool_results:
274
+ self._msg_ch.send_nowait(tool_results)
275
+
276
+ async def update_tools(self, tools: list[llm.FunctionTool]) -> None:
277
+ async with self._update_fnc_ctx_lock:
278
+ retained_tools: list[llm.FunctionTool] = []
279
+ gemini_function_declarations: list[FunctionDeclaration] = []
280
+
281
+ for tool in tools:
282
+ gemini_function = _build_gemini_fnc(tool)
283
+ gemini_function_declarations.append(gemini_function)
284
+ retained_tools.append(tool)
285
+
286
+ self._tools = llm.ToolContext(retained_tools)
287
+ self._gemini_tools = [Tool(function_declarations=gemini_function_declarations)]
288
+ if self._session and gemini_function_declarations:
289
+ logger.warning("Updating tools; triggering Gemini session reconnect.")
290
+ self._reconnect_event.set()
291
+ self._schedule_gemini_session_close()
292
+
293
+ @property
294
+ def chat_ctx(self) -> llm.ChatContext:
295
+ return self._chat_ctx
296
+
297
+ @property
298
+ def tools(self) -> llm.ToolContext:
299
+ return self._tools
300
+
301
+ def push_audio(self, frame: rtc.AudioFrame) -> None:
302
+ realtime_input = LiveClientRealtimeInput(
303
+ media_chunks=[Blob(data=frame.data.tobytes(), mime_type="audio/pcm")],
304
+ )
305
+ self._msg_ch.send_nowait(realtime_input)
306
+
307
+ def generate_reply(
308
+ self, *, instructions: NotGivenOr[str] = NOT_GIVEN
309
+ ) -> asyncio.Future[llm.GenerationCreatedEvent]:
310
+ fut = asyncio.Future()
311
+
312
+ event_id = utils.shortuuid("gemini-response-")
313
+ self._response_created_futures[event_id] = fut
314
+ self._pending_generation_event_id = event_id
315
+
316
+ instructions_content = instructions if is_given(instructions) else "."
317
+ ctx = [Content(parts=[Part(text=instructions_content)], role="user")]
318
+ self._msg_ch.send_nowait(LiveClientContent(turns=ctx, turn_complete=True))
319
+
320
+ def _on_timeout() -> None:
321
+ if event_id in self._response_created_futures and not fut.done():
322
+ fut.set_exception(llm.RealtimeError("generate_reply timed out."))
323
+ self._response_created_futures.pop(event_id, None)
324
+ if self._pending_generation_event_id == event_id:
325
+ self._pending_generation_event_id = None
326
+
327
+ handle = asyncio.get_event_loop().call_later(5.0, _on_timeout)
328
+ fut.add_done_callback(lambda _: handle.cancel())
329
+
330
+ return fut
331
+
332
+ def interrupt(self) -> None:
333
+ logger.warning("interrupt() - no direct cancellation in Gemini")
334
+
335
+ def truncate(self, *, message_id: str, audio_end_ms: int) -> None:
336
+ logger.warning(f"truncate(...) called for {message_id}, ignoring for Gemini")
337
+
338
+ async def aclose(self) -> None:
339
+ self._msg_ch.close()
340
+
341
+ for fut in self._response_created_futures.values():
342
+ if not fut.done():
343
+ fut.set_exception(llm.RealtimeError("Session closed"))
344
+
345
+ if self._main_atask:
346
+ await utils.aio.cancel_and_wait(self._main_atask)
347
+
348
+ if self._gemini_close_task:
349
+ await utils.aio.cancel_and_wait(self._gemini_close_task)
350
+
351
+ @utils.log_exceptions(logger=logger)
352
+ async def _main_task(self):
353
+ while True:
354
+ config = LiveConnectConfig(
355
+ response_modalities=self._opts.response_modalities
356
+ if is_given(self._opts.response_modalities)
357
+ else [Modality.AUDIO],
358
+ generation_config=GenerationConfig(
359
+ candidate_count=self._opts.candidate_count,
360
+ temperature=self._opts.temperature
361
+ if is_given(self._opts.temperature)
362
+ else None,
363
+ max_output_tokens=self._opts.max_output_tokens
364
+ if is_given(self._opts.max_output_tokens)
365
+ else None,
366
+ top_p=self._opts.top_p if is_given(self._opts.top_p) else None,
367
+ top_k=self._opts.top_k if is_given(self._opts.top_k) else None,
368
+ presence_penalty=self._opts.presence_penalty
369
+ if is_given(self._opts.presence_penalty)
370
+ else None,
371
+ frequency_penalty=self._opts.frequency_penalty
372
+ if is_given(self._opts.frequency_penalty)
373
+ else None,
374
+ ),
375
+ system_instruction=Content(parts=[Part(text=self._opts.instructions)])
376
+ if is_given(self._opts.instructions)
377
+ else None,
378
+ speech_config=SpeechConfig(
379
+ voice_config=VoiceConfig(
380
+ prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=self._opts.voice)
381
+ )
382
+ ),
383
+ tools=self._gemini_tools,
384
+ )
385
+
386
+ async with self._client.aio.live.connect(
387
+ model=self._opts.model, config=config
388
+ ) as session:
389
+ async with self._session_lock:
390
+ self._session = session
391
+
392
+ @utils.log_exceptions(logger=logger)
393
+ async def _send_task():
394
+ async for msg in self._msg_ch:
395
+ if isinstance(msg, LiveClientContent):
396
+ await session.send(input=msg, end_of_turn=True)
397
+
398
+ await session.send(input=msg)
399
+ await session.send(input=".", end_of_turn=True)
400
+
401
+ @utils.log_exceptions(logger=logger)
402
+ async def _recv_task():
403
+ while True:
404
+ async for response in session.receive():
405
+ if self._active_response_id is None:
406
+ self._start_new_generation()
407
+ if response.server_content:
408
+ self._handle_server_content(response.server_content)
409
+ if response.tool_call:
410
+ self._handle_tool_calls(response.tool_call)
411
+ if response.tool_call_cancellation:
412
+ self._handle_tool_call_cancellation(response.tool_call_cancellation)
413
+
414
+ send_task = asyncio.create_task(_send_task(), name="gemini-realtime-send")
415
+ recv_task = asyncio.create_task(_recv_task(), name="gemini-realtime-recv")
416
+ reconnect_task = asyncio.create_task(
417
+ self._reconnect_event.wait(), name="reconnect-wait"
418
+ )
419
+
420
+ try:
421
+ done, _ = await asyncio.wait(
422
+ [send_task, recv_task, reconnect_task],
423
+ return_when=asyncio.FIRST_COMPLETED,
424
+ )
425
+ for task in done:
426
+ if task != reconnect_task:
427
+ task.result()
428
+
429
+ if reconnect_task not in done:
430
+ break
431
+
432
+ self._reconnect_event.clear()
433
+ finally:
434
+ await utils.aio.cancel_and_wait(send_task, recv_task, reconnect_task)
435
+
436
+ def _start_new_generation(self):
437
+ self._is_interrupted = False
438
+ self._active_response_id = utils.shortuuid("gemini-turn-")
439
+ self._current_generation = _ResponseGeneration(
440
+ message_ch=utils.aio.Chan[llm.MessageGeneration](),
441
+ function_ch=utils.aio.Chan[llm.FunctionCall](),
442
+ messages={},
443
+ )
444
+
445
+ # We'll assume each chunk belongs to a single message ID self._active_response_id
446
+ item_generation = _MessageGeneration(
447
+ message_id=self._active_response_id,
448
+ text_ch=utils.aio.Chan[str](),
449
+ audio_ch=utils.aio.Chan[rtc.AudioFrame](),
450
+ )
451
+
452
+ self._current_generation.message_ch.send_nowait(
453
+ llm.MessageGeneration(
454
+ message_id=self._active_response_id,
455
+ text_stream=item_generation.text_ch,
456
+ audio_stream=item_generation.audio_ch,
457
+ )
458
+ )
459
+
460
+ generation_event = llm.GenerationCreatedEvent(
461
+ message_stream=self._current_generation.message_ch,
462
+ function_stream=self._current_generation.function_ch,
463
+ user_initiated=False,
464
+ )
465
+
466
+ # Resolve any pending future from generate_reply()
467
+ if self._pending_generation_event_id and (
468
+ fut := self._response_created_futures.pop(self._pending_generation_event_id, None)
469
+ ):
470
+ fut.set_result(generation_event)
471
+
472
+ self._pending_generation_event_id = None
473
+ self.emit("generation_created", generation_event)
474
+
475
+ self._current_generation.messages[self._active_response_id] = item_generation
476
+
477
+ def _handle_server_content(self, server_content: LiveServerContent):
478
+ if not self._current_generation or not self._active_response_id:
479
+ logger.warning(
480
+ "gemini-realtime-session: No active response ID, skipping server content"
481
+ )
482
+ return
483
+
484
+ item_generation = self._current_generation.messages[self._active_response_id]
485
+
486
+ model_turn = server_content.model_turn
487
+ if model_turn:
488
+ for part in model_turn.parts:
489
+ if part.text:
490
+ item_generation.text_ch.send_nowait(part.text)
491
+ if part.inline_data:
492
+ frame_data = part.inline_data.data
493
+ frame = rtc.AudioFrame(
494
+ data=frame_data,
495
+ sample_rate=OUTPUT_AUDIO_SAMPLE_RATE,
496
+ num_channels=NUM_CHANNELS,
497
+ samples_per_channel=len(frame_data) // 2,
498
+ )
499
+ item_generation.audio_ch.send_nowait(frame)
500
+
501
+ if server_content.interrupted or server_content.turn_complete:
502
+ self._finalize_response()
503
+
504
+ def _finalize_response(self) -> None:
505
+ if not self._current_generation:
506
+ return
507
+
508
+ for item_generation in self._current_generation.messages.values():
509
+ item_generation.text_ch.close()
510
+ item_generation.audio_ch.close()
511
+
512
+ self._current_generation.function_ch.close()
513
+ self._current_generation.message_ch.close()
514
+ self._current_generation = None
515
+ self._is_interrupted = True
516
+ self._active_response_id = None
517
+ self.emit("agent_speech_stopped")
518
+
519
+ def _handle_tool_calls(self, tool_call: LiveServerToolCall):
520
+ if not self._current_generation:
521
+ return
522
+ for fnc_call in tool_call.function_calls:
523
+ self._current_generation.function_ch.send_nowait(
524
+ llm.FunctionCall(
525
+ call_id=fnc_call.id,
526
+ name=fnc_call.name,
527
+ arguments=json.dumps(fnc_call.args),
528
+ )
529
+ )
530
+ self._finalize_response()
531
+
532
+ def _handle_tool_call_cancellation(
533
+ self, tool_call_cancellation: LiveServerToolCallCancellation
534
+ ):
535
+ logger.warning(
536
+ "function call cancelled",
537
+ extra={
538
+ "function_call_ids": tool_call_cancellation.ids,
539
+ },
540
+ )
541
+ self.emit("function_calls_cancelled", tool_call_cancellation.ids)
542
+
543
+ def commit_audio(self) -> None:
544
+ raise NotImplementedError("commit_audio_buffer is not supported yet")
545
+
546
+ def clear_audio(self) -> None:
547
+ raise NotImplementedError("clear_audio is not supported yet")
548
+
549
+ def server_vad_enabled(self) -> bool:
550
+ return True
@@ -132,11 +132,11 @@ class STT(stt.STT):
132
132
  try:
133
133
  gauth_default()
134
134
  except DefaultCredentialsError:
135
- raise ValueError( # noqa: B904
135
+ raise ValueError(
136
136
  "Application default credentials must be available "
137
137
  "when using Google STT without explicitly passing "
138
138
  "credentials through credentials_info or credentials_file."
139
- )
139
+ ) from None
140
140
 
141
141
  if isinstance(languages, str):
142
142
  languages = [languages]
@@ -244,12 +244,9 @@ class STT(stt.STT):
244
244
 
245
245
  return _recognize_response_to_speech_event(raw)
246
246
  except DeadlineExceeded:
247
- raise APITimeoutError() # noqa: B904
247
+ raise APITimeoutError() from None
248
248
  except GoogleAPICallError as e:
249
- raise APIStatusError( # noqa: B904
250
- e.message,
251
- status_code=e.code or -1,
252
- )
249
+ raise APIStatusError(e.message, status_code=e.code or -1) from None
253
250
  except Exception as e:
254
251
  raise APIConnectionError() from e
255
252
 
@@ -495,12 +492,9 @@ class SpeechStream(stt.SpeechStream):
495
492
  await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
496
493
  should_stop.set()
497
494
  except DeadlineExceeded:
498
- raise APITimeoutError() # noqa: B904
495
+ raise APITimeoutError() from None
499
496
  except GoogleAPICallError as e:
500
- raise APIStatusError( # noqa: B904
501
- e.message,
502
- status_code=e.code or -1,
503
- )
497
+ raise APIStatusError(e.message, status_code=e.code or -1) from None
504
498
  except Exception as e:
505
499
  raise APIConnectionError() from e
506
500
 
@@ -203,14 +203,11 @@ class ChunkedStream(tts.ChunkedStream):
203
203
  await decoder.aclose()
204
204
 
205
205
  except DeadlineExceeded:
206
- raise APITimeoutError() # noqa: B904
206
+ raise APITimeoutError() from None
207
207
  except GoogleAPICallError as e:
208
- raise APIStatusError( # noqa: B904
209
- e.message,
210
- status_code=e.code or -1,
211
- request_id=None,
212
- body=None,
213
- )
208
+ raise APIStatusError(
209
+ e.message, status_code=e.code or -1, request_id=None, body=None
210
+ ) from None
214
211
  except Exception as e:
215
212
  raise APIConnectionError() from e
216
213
 
@@ -20,8 +20,26 @@ def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
20
20
  return [_build_gemini_fnc(fnc) for fnc in fncs]
21
21
 
22
22
 
23
+ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClientToolResponse | None:
24
+ function_responses: list[types.FunctionResponse] = []
25
+ for msg in chat_ctx.items:
26
+ if msg.type == "function_call_output":
27
+ function_responses.append(
28
+ types.FunctionResponse(
29
+ id=msg.call_id,
30
+ name=msg.name,
31
+ response={"text": msg.output},
32
+ )
33
+ )
34
+ return (
35
+ types.LiveClientToolResponse(function_responses=function_responses)
36
+ if function_responses
37
+ else None
38
+ )
39
+
40
+
23
41
  def to_chat_ctx(
24
- chat_ctx: llm.ChatContext, cache_key: Any
42
+ chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
25
43
  ) -> tuple[list[types.Content], types.Content | None]:
26
44
  turns: list[types.Content] = []
27
45
  system_instruction: types.Content | None = None
@@ -59,7 +77,7 @@ def to_chat_ctx(
59
77
  parts.append(types.Part(text=json.dumps(content)))
60
78
  elif isinstance(content, llm.ImageContent):
61
79
  parts.append(_to_image_part(content, cache_key))
62
- elif msg.type == "function_call":
80
+ elif msg.type == "function_call" and not ignore_functions:
63
81
  parts.append(
64
82
  types.Part(
65
83
  function_call=types.FunctionCall(
@@ -68,7 +86,7 @@ def to_chat_ctx(
68
86
  )
69
87
  )
70
88
  )
71
- elif msg.type == "function_call_output":
89
+ elif msg.type == "function_call_output" and not ignore_functions:
72
90
  parts.append(
73
91
  types.Part(
74
92
  function_response=types.FunctionResponse(