livekit-plugins-google 0.11.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,270 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import re
5
- from dataclasses import dataclass
6
- from typing import Literal
7
-
8
- import websockets
9
- from livekit import rtc
10
- from livekit.agents import APIConnectionError, APIStatusError, utils
11
-
12
- from google import genai
13
- from google.genai import types
14
- from google.genai.errors import APIError, ClientError, ServerError
15
-
16
- from ...log import logger
17
- from .api_proto import ClientEvents, LiveAPIModels
18
-
19
- EventTypes = Literal["input_speech_started", "input_speech_done"]
20
-
21
- DEFAULT_LANGUAGE = "English"
22
-
23
- SYSTEM_INSTRUCTIONS = f"""
24
- You are an **Audio Transcriber**. Your task is to convert audio content into accurate and precise text.
25
- - Transcribe verbatim; exclude non-speech sounds.
26
- - Provide only transcription; no extra text or explanations.
27
- - If audio is unclear, respond with: `...`
28
- - Ensure error-free transcription, preserving meaning and context.
29
- - Use proper punctuation and formatting.
30
- - Do not add explanations, comments, or extra information.
31
- - Do not include timestamps, speaker labels, or annotations unless specified.
32
- - Audio Language: {DEFAULT_LANGUAGE}
33
- """
34
-
35
-
36
- @dataclass
37
- class TranscriptionContent:
38
- response_id: str
39
- text: str
40
-
41
-
42
- class TranscriberSession(utils.EventEmitter[EventTypes]):
43
- """
44
- Handles live audio transcription using the realtime API.
45
- """
46
-
47
- def __init__(self, *, client: genai.Client, model: LiveAPIModels | str):
48
- super().__init__()
49
- self._client = client
50
- self._model = model
51
- self._needed_sr = 16000
52
- self._closed = False
53
-
54
- system_instructions = types.Content(
55
- parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
56
- )
57
- self._config = types.LiveConnectConfig(
58
- response_modalities=[types.Modality.TEXT],
59
- system_instruction=system_instructions,
60
- generation_config=types.GenerationConfig(temperature=0.0),
61
- )
62
- self._main_atask = asyncio.create_task(
63
- self._main_task(), name="gemini-realtime-transcriber"
64
- )
65
- self._send_ch = utils.aio.Chan[ClientEvents]()
66
- self._resampler: rtc.AudioResampler | None = None
67
- self._active_response_id = None
68
-
69
- def _push_audio(self, frame: rtc.AudioFrame) -> None:
70
- if self._closed:
71
- return
72
- if frame.sample_rate != self._needed_sr:
73
- if not self._resampler:
74
- self._resampler = rtc.AudioResampler(
75
- frame.sample_rate,
76
- self._needed_sr,
77
- quality=rtc.AudioResamplerQuality.HIGH,
78
- )
79
-
80
- if self._resampler:
81
- for f in self._resampler.push(frame):
82
- self._queue_msg(
83
- types.LiveClientRealtimeInput(
84
- media_chunks=[
85
- types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
86
- ]
87
- )
88
- )
89
- else:
90
- self._queue_msg(
91
- types.LiveClientRealtimeInput(
92
- media_chunks=[
93
- types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
94
- ]
95
- )
96
- )
97
-
98
- def _queue_msg(self, msg: ClientEvents) -> None:
99
- if not self._closed:
100
- self._send_ch.send_nowait(msg)
101
-
102
- async def aclose(self) -> None:
103
- if self._send_ch.closed:
104
- return
105
- self._closed = True
106
- self._send_ch.close()
107
- await self._main_atask
108
-
109
- @utils.log_exceptions(logger=logger)
110
- async def _main_task(self):
111
- @utils.log_exceptions(logger=logger)
112
- async def _send_task():
113
- try:
114
- async for msg in self._send_ch:
115
- if self._closed:
116
- break
117
- await self._session.send(input=msg)
118
- except websockets.exceptions.ConnectionClosedError as e:
119
- logger.exception(f"Transcriber session closed in _send_task: {e}")
120
- self._closed = True
121
- except Exception as e:
122
- logger.exception(f"Uncaught error in transcriber _send_task: {e}")
123
- self._closed = True
124
-
125
- @utils.log_exceptions(logger=logger)
126
- async def _recv_task():
127
- try:
128
- while not self._closed:
129
- async for response in self._session.receive():
130
- if self._closed:
131
- break
132
- if self._active_response_id is None:
133
- self._active_response_id = utils.shortuuid()
134
- content = TranscriptionContent(
135
- response_id=self._active_response_id,
136
- text="",
137
- )
138
- self.emit("input_speech_started", content)
139
-
140
- server_content = response.server_content
141
- if server_content:
142
- model_turn = server_content.model_turn
143
- if model_turn:
144
- for part in model_turn.parts:
145
- if part.text:
146
- content.text += part.text
147
-
148
- if server_content.turn_complete:
149
- content.text = clean_transcription(content.text)
150
- self.emit("input_speech_done", content)
151
- self._active_response_id = None
152
-
153
- except websockets.exceptions.ConnectionClosedError as e:
154
- logger.exception(f"Transcriber session closed in _recv_task: {e}")
155
- self._closed = True
156
- except Exception as e:
157
- logger.exception(f"Uncaught error in transcriber _recv_task: {e}")
158
- self._closed = True
159
-
160
- async with self._client.aio.live.connect(
161
- model=self._model, config=self._config
162
- ) as session:
163
- self._session = session
164
- tasks = [
165
- asyncio.create_task(
166
- _send_task(), name="gemini-realtime-transcriber-send"
167
- ),
168
- asyncio.create_task(
169
- _recv_task(), name="gemini-realtime-transcriber-recv"
170
- ),
171
- ]
172
-
173
- try:
174
- await asyncio.gather(*tasks)
175
- finally:
176
- await utils.aio.gracefully_cancel(*tasks)
177
- await self._session.close()
178
-
179
-
180
- class ModelTranscriber(utils.EventEmitter[EventTypes]):
181
- """
182
- Transcribes agent audio using model generation.
183
- """
184
-
185
- def __init__(self, *, client: genai.Client, model: LiveAPIModels | str):
186
- super().__init__()
187
- self._client = client
188
- self._model = model
189
- self._needed_sr = 16000
190
- self._system_instructions = types.Content(
191
- parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
192
- )
193
- self._config = types.GenerateContentConfig(
194
- temperature=0.0,
195
- system_instruction=self._system_instructions,
196
- # TODO: add response_schem
197
- )
198
- self._resampler: rtc.AudioResampler | None = None
199
- self._buffer: rtc.AudioFrame | None = None
200
- self._audio_ch = utils.aio.Chan[rtc.AudioFrame]()
201
- self._main_atask = asyncio.create_task(
202
- self._main_task(), name="gemini-model-transcriber"
203
- )
204
-
205
- async def aclose(self) -> None:
206
- if self._audio_ch.closed:
207
- return
208
- self._audio_ch.close()
209
- await self._main_atask
210
-
211
- def _push_audio(self, frames: list[rtc.AudioFrame]) -> None:
212
- if not frames:
213
- return
214
-
215
- buffer = utils.merge_frames(frames)
216
-
217
- if buffer.sample_rate != self._needed_sr:
218
- if self._resampler is None:
219
- self._resampler = rtc.AudioResampler(
220
- input_rate=buffer.sample_rate,
221
- output_rate=self._needed_sr,
222
- quality=rtc.AudioResamplerQuality.HIGH,
223
- )
224
-
225
- buffer = utils.merge_frames(self._resampler.push(buffer))
226
-
227
- self._audio_ch.send_nowait(buffer)
228
-
229
- @utils.log_exceptions(logger=logger)
230
- async def _main_task(self):
231
- request_id = utils.shortuuid()
232
- try:
233
- async for buffer in self._audio_ch:
234
- # TODO: stream content for better latency
235
- response = await self._client.aio.models.generate_content(
236
- model=self._model,
237
- contents=[
238
- types.Content(
239
- parts=[
240
- types.Part(text=SYSTEM_INSTRUCTIONS),
241
- types.Part.from_bytes(
242
- data=buffer.to_wav_bytes(),
243
- mime_type="audio/wav",
244
- ),
245
- ],
246
- role="user",
247
- )
248
- ],
249
- config=self._config,
250
- )
251
- content = TranscriptionContent(
252
- response_id=request_id, text=clean_transcription(response.text)
253
- )
254
- self.emit("input_speech_done", content)
255
-
256
- except (ClientError, ServerError, APIError) as e:
257
- raise APIStatusError(
258
- f"model transcriber error: {e}",
259
- status_code=e.code,
260
- body=e.message,
261
- request_id=request_id,
262
- ) from e
263
- except Exception as e:
264
- raise APIConnectionError("Error generating transcription") from e
265
-
266
-
267
- def clean_transcription(text: str) -> str:
268
- text = text.replace("\n", " ")
269
- text = re.sub(r"\s+", " ", text)
270
- return text.strip()
@@ -1,18 +0,0 @@
1
- livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
- livekit/plugins/google/_utils.py,sha256=FG1_26nlWGcI6onPleQQcmGBMfb4QNYgis1B5BMJxWA,7131
3
- livekit/plugins/google/llm.py,sha256=LZaHsrkjfboRZLWm7L2G0mw62q2sXBNj4YeeV2Sk2uU,16717
4
- livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
5
- livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
6
- livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- livekit/plugins/google/stt.py,sha256=l6UB9oaM7eFInnI_0t7Ub-edXLVRgvaiyHj-e_gEuwE,22781
8
- livekit/plugins/google/tts.py,sha256=pG9_pibO3NDGEMa4huU5S9lbeyI3daQyrS17SuTKfZI,8008
9
- livekit/plugins/google/version.py,sha256=_06ctkD1XWTWec2BVgcsxun2sFLxqnvJJJs7ZxIBuHA,601
10
- livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
- livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
12
- livekit/plugins/google/beta/realtime/api_proto.py,sha256=9EhmwgeIgKDqdSijv5Q9pgx7UhAakK02ZDwbnUsra_o,657
13
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=8JdWUMUheGhy1ia6JbN3_U2_cL7CNs8-1fTOAgW4I38,22999
14
- livekit/plugins/google/beta/realtime/transcriber.py,sha256=rjXO0cSPr3HATxrSfv1MX7IbrjmiTvnLPF280BfRBL8,9809
15
- livekit_plugins_google-0.11.2.dist-info/METADATA,sha256=MQF9voerbBB1t5fGRw94z7jyfgJOnsM-DmWxtCT10V8,3732
16
- livekit_plugins_google-0.11.2.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
17
- livekit_plugins_google-0.11.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
18
- livekit_plugins_google-0.11.2.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- livekit