livekit-plugins-google 1.0.0rc9__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -132,11 +132,11 @@ class STT(stt.STT):
132
132
  try:
133
133
  gauth_default()
134
134
  except DefaultCredentialsError:
135
- raise ValueError( # noqa: B904
135
+ raise ValueError(
136
136
  "Application default credentials must be available "
137
137
  "when using Google STT without explicitly passing "
138
138
  "credentials through credentials_info or credentials_file."
139
- )
139
+ ) from None
140
140
 
141
141
  if isinstance(languages, str):
142
142
  languages = [languages]
@@ -244,12 +244,9 @@ class STT(stt.STT):
244
244
 
245
245
  return _recognize_response_to_speech_event(raw)
246
246
  except DeadlineExceeded:
247
- raise APITimeoutError() # noqa: B904
247
+ raise APITimeoutError() from None
248
248
  except GoogleAPICallError as e:
249
- raise APIStatusError( # noqa: B904
250
- e.message,
251
- status_code=e.code or -1,
252
- )
249
+ raise APIStatusError(e.message, status_code=e.code or -1) from None
253
250
  except Exception as e:
254
251
  raise APIConnectionError() from e
255
252
 
@@ -495,12 +492,9 @@ class SpeechStream(stt.SpeechStream):
495
492
  await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
496
493
  should_stop.set()
497
494
  except DeadlineExceeded:
498
- raise APITimeoutError() # noqa: B904
495
+ raise APITimeoutError() from None
499
496
  except GoogleAPICallError as e:
500
- raise APIStatusError( # noqa: B904
501
- e.message,
502
- status_code=e.code or -1,
503
- )
497
+ raise APIStatusError(e.message, status_code=e.code or -1) from None
504
498
  except Exception as e:
505
499
  raise APIConnectionError() from e
506
500
 
@@ -203,14 +203,11 @@ class ChunkedStream(tts.ChunkedStream):
203
203
  await decoder.aclose()
204
204
 
205
205
  except DeadlineExceeded:
206
- raise APITimeoutError() # noqa: B904
206
+ raise APITimeoutError() from None
207
207
  except GoogleAPICallError as e:
208
- raise APIStatusError( # noqa: B904
209
- e.message,
210
- status_code=e.code or -1,
211
- request_id=None,
212
- body=None,
213
- )
208
+ raise APIStatusError(
209
+ e.message, status_code=e.code or -1, request_id=None, body=None
210
+ ) from None
214
211
  except Exception as e:
215
212
  raise APIConnectionError() from e
216
213
 
@@ -20,8 +20,26 @@ def to_fnc_ctx(fncs: list[FunctionTool]) -> list[types.FunctionDeclaration]:
20
20
  return [_build_gemini_fnc(fnc) for fnc in fncs]
21
21
 
22
22
 
23
+ def get_tool_results_for_realtime(chat_ctx: llm.ChatContext) -> types.LiveClientToolResponse | None:
24
+ function_responses: list[types.FunctionResponse] = []
25
+ for msg in chat_ctx.items:
26
+ if msg.type == "function_call_output":
27
+ function_responses.append(
28
+ types.FunctionResponse(
29
+ id=msg.call_id,
30
+ name=msg.name,
31
+ response={"text": msg.output},
32
+ )
33
+ )
34
+ return (
35
+ types.LiveClientToolResponse(function_responses=function_responses)
36
+ if function_responses
37
+ else None
38
+ )
39
+
40
+
23
41
  def to_chat_ctx(
24
- chat_ctx: llm.ChatContext, cache_key: Any
42
+ chat_ctx: llm.ChatContext, cache_key: Any, ignore_functions: bool = False
25
43
  ) -> tuple[list[types.Content], types.Content | None]:
26
44
  turns: list[types.Content] = []
27
45
  system_instruction: types.Content | None = None
@@ -59,7 +77,7 @@ def to_chat_ctx(
59
77
  parts.append(types.Part(text=json.dumps(content)))
60
78
  elif isinstance(content, llm.ImageContent):
61
79
  parts.append(_to_image_part(content, cache_key))
62
- elif msg.type == "function_call":
80
+ elif msg.type == "function_call" and not ignore_functions:
63
81
  parts.append(
64
82
  types.Part(
65
83
  function_call=types.FunctionCall(
@@ -68,7 +86,7 @@ def to_chat_ctx(
68
86
  )
69
87
  )
70
88
  )
71
- elif msg.type == "function_call_output":
89
+ elif msg.type == "function_call_output" and not ignore_functions:
72
90
  parts.append(
73
91
  types.Part(
74
92
  function_response=types.FunctionResponse(
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = '1.0.0.rc9'
15
+ __version__ = '1.0.2'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-google
3
- Version: 1.0.0rc9
3
+ Version: 1.0.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
22
22
  Requires-Dist: google-cloud-speech<3,>=2
23
23
  Requires-Dist: google-cloud-texttospeech<3,>=2
24
24
  Requires-Dist: google-genai==1.5.0
25
- Requires-Dist: livekit-agents>=1.0.0.rc9
25
+ Requires-Dist: livekit-agents>=1.0.2
26
26
  Description-Content-Type: text/markdown
27
27
 
28
28
  # LiveKit Plugins Google
@@ -0,0 +1,16 @@
1
+ livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
+ livekit/plugins/google/llm.py,sha256=81LCCJPmpMOkApX0S0a-zu5xIvcm2Pk8lTTz-PoK5m0,14740
3
+ livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
+ livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
+ livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/google/stt.py,sha256=AG_lh2fuuduJi0jFbA_QKFXLJ6NUdF1W_FfkLUJML_Q,22413
7
+ livekit/plugins/google/tts.py,sha256=P8Zu2s0TfmyzlrNxzDIqyn3sGiNSW0n3nB_JlO_ojiM,7985
8
+ livekit/plugins/google/utils.py,sha256=pbLSOAdQxInWhgI2Yhsrr9KvgvpFXYDdU2yx2p03pFg,9437
9
+ livekit/plugins/google/version.py,sha256=VAosEGj0ByVVgOD0nuevp_anp63XZCFxkxz7t-41kg8,600
10
+ livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
+ livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
+ livekit/plugins/google/beta/realtime/api_proto.py,sha256=cwpFOYjN_3v5PMY0TnzoHhJoASfZ7Qt9IO281ZhJ7Ww,565
13
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=ubF2Ha9zCD28gQrrjTcX3MWgMBs7bC3rI0DUdaHAa_Q,22021
14
+ livekit_plugins_google-1.0.2.dist-info/METADATA,sha256=0sqwsTwIAhKGSWqP4QXvP4GX5LGcGxGIkN0AP6BDoNE,3489
15
+ livekit_plugins_google-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ livekit_plugins_google-1.0.2.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- def _build_tools():
2
- pass
3
-
4
-
5
- def _create_ai_function_info():
6
- pass
7
-
8
-
9
- def _build_gemini_ctx():
10
- pass
@@ -1,254 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import re
5
- from dataclasses import dataclass
6
- from typing import Literal
7
-
8
- import websockets
9
-
10
- from google import genai
11
- from google.genai import types
12
- from google.genai.errors import APIError, ClientError, ServerError
13
- from livekit import rtc
14
- from livekit.agents import APIConnectionError, APIStatusError, utils
15
-
16
- from ...log import logger
17
- from .api_proto import ClientEvents, LiveAPIModels
18
-
19
- EventTypes = Literal["input_speech_started", "input_speech_done"]
20
-
21
- DEFAULT_LANGUAGE = "English"
22
-
23
- SYSTEM_INSTRUCTIONS = f"""
24
- You are an **Audio Transcriber**. Your task is to convert audio content into accurate and precise text.
25
- - Transcribe verbatim; exclude non-speech sounds.
26
- - Provide only transcription; no extra text or explanations.
27
- - If audio is unclear, respond with: `...`
28
- - Ensure error-free transcription, preserving meaning and context.
29
- - Use proper punctuation and formatting.
30
- - Do not add explanations, comments, or extra information.
31
- - Do not include timestamps, speaker labels, or annotations unless specified.
32
- - Audio Language: {DEFAULT_LANGUAGE}
33
- """ # noqa: E501
34
-
35
-
36
- @dataclass
37
- class TranscriptionContent:
38
- response_id: str
39
- text: str
40
-
41
-
42
- class TranscriberSession(utils.EventEmitter[EventTypes]):
43
- """
44
- Handles live audio transcription using the realtime API.
45
- """
46
-
47
- def __init__(self, *, client: genai.Client, model: LiveAPIModels | str):
48
- super().__init__()
49
- self._client = client
50
- self._model = model
51
- self._needed_sr = 16000
52
- self._closed = False
53
-
54
- system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
55
- self._config = types.LiveConnectConfig(
56
- response_modalities=[types.Modality.TEXT],
57
- system_instruction=system_instructions,
58
- generation_config=types.GenerationConfig(temperature=0.0),
59
- )
60
- self._main_atask = asyncio.create_task(
61
- self._main_task(), name="gemini-realtime-transcriber"
62
- )
63
- self._send_ch = utils.aio.Chan[ClientEvents]()
64
- self._resampler: rtc.AudioResampler | None = None
65
- self._active_response_id = None
66
-
67
- def _push_audio(self, frame: rtc.AudioFrame) -> None:
68
- if self._closed:
69
- return
70
- if frame.sample_rate != self._needed_sr:
71
- if not self._resampler:
72
- self._resampler = rtc.AudioResampler(
73
- frame.sample_rate,
74
- self._needed_sr,
75
- quality=rtc.AudioResamplerQuality.HIGH,
76
- )
77
-
78
- if self._resampler:
79
- for f in self._resampler.push(frame):
80
- self._queue_msg(
81
- types.LiveClientRealtimeInput(
82
- media_chunks=[types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")]
83
- )
84
- )
85
- else:
86
- self._queue_msg(
87
- types.LiveClientRealtimeInput(
88
- media_chunks=[types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")]
89
- )
90
- )
91
-
92
- def _queue_msg(self, msg: ClientEvents) -> None:
93
- if not self._closed:
94
- self._send_ch.send_nowait(msg)
95
-
96
- async def aclose(self) -> None:
97
- if self._send_ch.closed:
98
- return
99
- self._closed = True
100
- self._send_ch.close()
101
- await self._main_atask
102
-
103
- @utils.log_exceptions(logger=logger)
104
- async def _main_task(self):
105
- @utils.log_exceptions(logger=logger)
106
- async def _send_task():
107
- try:
108
- async for msg in self._send_ch:
109
- if self._closed:
110
- break
111
- await self._session.send(input=msg)
112
- except websockets.exceptions.ConnectionClosedError as e:
113
- logger.exception(f"Transcriber session closed in _send_task: {e}")
114
- self._closed = True
115
- except Exception as e:
116
- logger.exception(f"Uncaught error in transcriber _send_task: {e}")
117
- self._closed = True
118
-
119
- @utils.log_exceptions(logger=logger)
120
- async def _recv_task():
121
- try:
122
- while not self._closed:
123
- async for response in self._session.receive():
124
- if self._closed:
125
- break
126
- if self._active_response_id is None:
127
- self._active_response_id = utils.shortuuid()
128
- content = TranscriptionContent(
129
- response_id=self._active_response_id,
130
- text="",
131
- )
132
- self.emit("input_speech_started", content)
133
-
134
- server_content = response.server_content
135
- if server_content:
136
- model_turn = server_content.model_turn
137
- if model_turn:
138
- for part in model_turn.parts:
139
- if part.text:
140
- content.text += part.text
141
-
142
- if server_content.turn_complete:
143
- content.text = clean_transcription(content.text)
144
- self.emit("input_speech_done", content)
145
- self._active_response_id = None
146
-
147
- except websockets.exceptions.ConnectionClosedError as e:
148
- logger.exception(f"Transcriber session closed in _recv_task: {e}")
149
- self._closed = True
150
- except Exception as e:
151
- logger.exception(f"Uncaught error in transcriber _recv_task: {e}")
152
- self._closed = True
153
-
154
- async with self._client.aio.live.connect(model=self._model, config=self._config) as session:
155
- self._session = session
156
- tasks = [
157
- asyncio.create_task(_send_task(), name="gemini-realtime-transcriber-send"),
158
- asyncio.create_task(_recv_task(), name="gemini-realtime-transcriber-recv"),
159
- ]
160
-
161
- try:
162
- await asyncio.gather(*tasks)
163
- finally:
164
- await utils.aio.gracefully_cancel(*tasks)
165
- await self._session.close()
166
-
167
-
168
- class ModelTranscriber(utils.EventEmitter[EventTypes]):
169
- """
170
- Transcribes agent audio using model generation.
171
- """
172
-
173
- def __init__(self, *, client: genai.Client, model: LiveAPIModels | str):
174
- super().__init__()
175
- self._client = client
176
- self._model = model
177
- self._needed_sr = 16000
178
- self._system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
179
- self._config = types.GenerateContentConfig(
180
- temperature=0.0,
181
- system_instruction=self._system_instructions,
182
- # TODO: add response_schem
183
- )
184
- self._resampler: rtc.AudioResampler | None = None
185
- self._buffer: rtc.AudioFrame | None = None
186
- self._audio_ch = utils.aio.Chan[rtc.AudioFrame]()
187
- self._main_atask = asyncio.create_task(self._main_task(), name="gemini-model-transcriber")
188
-
189
- async def aclose(self) -> None:
190
- if self._audio_ch.closed:
191
- return
192
- self._audio_ch.close()
193
- await self._main_atask
194
-
195
- def _push_audio(self, frames: list[rtc.AudioFrame]) -> None:
196
- if not frames:
197
- return
198
-
199
- buffer = utils.merge_frames(frames)
200
-
201
- if buffer.sample_rate != self._needed_sr:
202
- if self._resampler is None:
203
- self._resampler = rtc.AudioResampler(
204
- input_rate=buffer.sample_rate,
205
- output_rate=self._needed_sr,
206
- quality=rtc.AudioResamplerQuality.HIGH,
207
- )
208
-
209
- buffer = utils.merge_frames(self._resampler.push(buffer))
210
-
211
- self._audio_ch.send_nowait(buffer)
212
-
213
- @utils.log_exceptions(logger=logger)
214
- async def _main_task(self):
215
- request_id = utils.shortuuid()
216
- try:
217
- async for buffer in self._audio_ch:
218
- # TODO: stream content for better latency
219
- response = await self._client.aio.models.generate_content(
220
- model=self._model,
221
- contents=[
222
- types.Content(
223
- parts=[
224
- types.Part(text=SYSTEM_INSTRUCTIONS),
225
- types.Part.from_bytes(
226
- data=buffer.to_wav_bytes(),
227
- mime_type="audio/wav",
228
- ),
229
- ],
230
- role="user",
231
- )
232
- ],
233
- config=self._config,
234
- )
235
- content = TranscriptionContent(
236
- response_id=request_id, text=clean_transcription(response.text)
237
- )
238
- self.emit("input_speech_done", content)
239
-
240
- except (ClientError, ServerError, APIError) as e:
241
- raise APIStatusError(
242
- f"model transcriber error: {e}",
243
- status_code=e.code,
244
- body=e.message,
245
- request_id=request_id,
246
- ) from e
247
- except Exception as e:
248
- raise APIConnectionError("Error generating transcription") from e
249
-
250
-
251
- def clean_transcription(text: str) -> str:
252
- text = text.replace("\n", " ")
253
- text = re.sub(r"\s+", " ", text)
254
- return text.strip()
@@ -1,18 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
- livekit/plugins/google/llm.py,sha256=81LCCJPmpMOkApX0S0a-zu5xIvcm2Pk8lTTz-PoK5m0,14740
3
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
4
- livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
5
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/google/stt.py,sha256=fT5JtDM8ck2iMAzRvFKVeyT1oPt_R-bDkqiLa-ysikc,22539
7
- livekit/plugins/google/tts.py,sha256=aA3VuNaMcE6I1M43Sm-2mmvNyA9D2EyqfpyAporMUSg,8042
8
- livekit/plugins/google/utils.py,sha256=dLkq-8lbWDC7AQ7nULd9unWwu_Wv9czdlxchyiJQ2KQ,8740
9
- livekit/plugins/google/version.py,sha256=5bk2f3atP67YoCoyxLdm3aJrB_QkLXroUSkhYmjhT1o,604
10
- livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
- livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
12
- livekit/plugins/google/beta/realtime/api_proto.py,sha256=VO6QqOGOrxzsaOLBqnwNd8c-BId0PjwKicdrPTJisy0,688
13
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=ERM6WvcTtrfIyKpukzoSYrkhd3eYxIY-I09mKWp8vLk,22576
14
- livekit/plugins/google/beta/realtime/temp.py,sha256=an_YueuS_tUw3_QC6xWkkcw5JrJOBQFv2pJh6atpNcc,108
15
- livekit/plugins/google/beta/realtime/transcriber.py,sha256=DD7q894xc25GeeuKDar6-GwH-MxStEwhwBiX-KZ-Jo4,9559
16
- livekit_plugins_google-1.0.0rc9.dist-info/METADATA,sha256=l6sYTKKhQp1_JY5NwEZI5aRe8WvXfhgPx0gDMyicgJg,3496
17
- livekit_plugins_google-1.0.0rc9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- livekit_plugins_google-1.0.0rc9.dist-info/RECORD,,