livekit-plugins-google 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/realtime_api.py +12 -22
- livekit/plugins/google/beta/realtime/transcriber.py +26 -6
- livekit/plugins/google/llm.py +2 -2
- livekit/plugins/google/stt.py +3 -3
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/METADATA +2 -2
- {livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/RECORD +9 -9
- {livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/WHEEL +0 -0
- {livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/top_level.txt +0 -0
@@ -75,6 +75,7 @@ class InputTranscription:
|
|
75
75
|
@dataclass
|
76
76
|
class Capabilities:
|
77
77
|
supports_truncate: bool
|
78
|
+
input_audio_sample_rate: int | None = None
|
78
79
|
|
79
80
|
|
80
81
|
@dataclass
|
@@ -156,6 +157,7 @@ class RealtimeModel:
|
|
156
157
|
super().__init__()
|
157
158
|
self._capabilities = Capabilities(
|
158
159
|
supports_truncate=False,
|
160
|
+
input_audio_sample_rate=16000,
|
159
161
|
)
|
160
162
|
self._model = model
|
161
163
|
self._loop = loop or asyncio.get_event_loop()
|
@@ -307,8 +309,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
307
309
|
self._init_sync_task = asyncio.create_task(asyncio.sleep(0))
|
308
310
|
self._send_ch = utils.aio.Chan[ClientEvents]()
|
309
311
|
self._active_response_id = None
|
310
|
-
if chat_ctx:
|
311
|
-
self.generate_reply(chat_ctx)
|
312
312
|
|
313
313
|
async def aclose(self) -> None:
|
314
314
|
if self._send_ch.closed:
|
@@ -336,25 +336,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
336
336
|
def _queue_msg(self, msg: ClientEvents) -> None:
|
337
337
|
self._send_ch.send_nowait(msg)
|
338
338
|
|
339
|
-
def generate_reply(
|
340
|
-
self,
|
341
|
-
ctx: llm.ChatContext | llm.ChatMessage,
|
342
|
-
turn_complete: bool = True,
|
343
|
-
) -> None:
|
344
|
-
if isinstance(ctx, llm.ChatMessage) and isinstance(ctx.content, str):
|
345
|
-
new_chat_ctx = llm.ChatContext()
|
346
|
-
new_chat_ctx.append(text=ctx.content, role=ctx.role)
|
347
|
-
elif isinstance(ctx, llm.ChatContext):
|
348
|
-
new_chat_ctx = ctx
|
349
|
-
else:
|
350
|
-
raise ValueError("Invalid chat context")
|
351
|
-
turns, _ = _build_gemini_ctx(new_chat_ctx, id(self))
|
352
|
-
client_content = LiveClientContent(
|
353
|
-
turn_complete=turn_complete,
|
354
|
-
turns=turns,
|
355
|
-
)
|
356
|
-
self._queue_msg(client_content)
|
357
|
-
|
358
339
|
def chat_ctx_copy(self) -> llm.ChatContext:
|
359
340
|
return self._chat_ctx.copy()
|
360
341
|
|
@@ -370,7 +351,16 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
370
351
|
"cancel_existing", "cancel_new", "keep_both"
|
371
352
|
] = "keep_both",
|
372
353
|
) -> None:
|
373
|
-
|
354
|
+
turns, _ = _build_gemini_ctx(self._chat_ctx, id(self))
|
355
|
+
ctx = [self._opts.instructions] + turns if self._opts.instructions else turns
|
356
|
+
|
357
|
+
if not ctx:
|
358
|
+
logger.warning(
|
359
|
+
"gemini-realtime-session: No chat context to send, sending dummy content."
|
360
|
+
)
|
361
|
+
ctx = [Content(parts=[Part(text=".")])]
|
362
|
+
|
363
|
+
self._queue_msg(LiveClientContent(turns=ctx, turn_complete=True))
|
374
364
|
|
375
365
|
def commit_audio_buffer(self) -> None:
|
376
366
|
raise NotImplementedError("commit_audio_buffer is not supported yet")
|
@@ -56,6 +56,7 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
56
56
|
super().__init__()
|
57
57
|
self._client = client
|
58
58
|
self._model = model
|
59
|
+
self._needed_sr = 16000
|
59
60
|
self._closed = False
|
60
61
|
system_instructions = types.Content(
|
61
62
|
parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
|
@@ -72,18 +73,37 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
72
73
|
self._main_task(), name="gemini-realtime-transcriber"
|
73
74
|
)
|
74
75
|
self._send_ch = utils.aio.Chan[ClientEvents]()
|
76
|
+
self._resampler: rtc.AudioResampler | None = None
|
75
77
|
self._active_response_id = None
|
76
78
|
|
77
79
|
def _push_audio(self, frame: rtc.AudioFrame) -> None:
|
78
80
|
if self._closed:
|
79
81
|
return
|
80
|
-
self.
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
82
|
+
if frame.sample_rate != self._needed_sr:
|
83
|
+
if not self._resampler:
|
84
|
+
self._resampler = rtc.AudioResampler(
|
85
|
+
frame.sample_rate,
|
86
|
+
self._needed_sr,
|
87
|
+
quality=rtc.AudioResamplerQuality.HIGH,
|
88
|
+
)
|
89
|
+
|
90
|
+
if self._resampler:
|
91
|
+
for f in self._resampler.push(frame):
|
92
|
+
self._queue_msg(
|
93
|
+
types.LiveClientRealtimeInput(
|
94
|
+
media_chunks=[
|
95
|
+
types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
|
96
|
+
]
|
97
|
+
)
|
98
|
+
)
|
99
|
+
else:
|
100
|
+
self._queue_msg(
|
101
|
+
types.LiveClientRealtimeInput(
|
102
|
+
media_chunks=[
|
103
|
+
types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
|
104
|
+
]
|
105
|
+
)
|
85
106
|
)
|
86
|
-
)
|
87
107
|
|
88
108
|
def _queue_msg(self, msg: ClientEvents) -> None:
|
89
109
|
if not self._closed:
|
livekit/plugins/google/llm.py
CHANGED
@@ -108,8 +108,8 @@ class LLM(llm.LLM):
|
|
108
108
|
self._api_key = api_key or os.environ.get("GOOGLE_API_KEY", None)
|
109
109
|
_gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
|
110
110
|
if _gac is None:
|
111
|
-
|
112
|
-
"`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file."
|
111
|
+
logger.warning(
|
112
|
+
"`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods."
|
113
113
|
)
|
114
114
|
|
115
115
|
if vertexai:
|
livekit/plugins/google/stt.py
CHANGED
@@ -89,9 +89,9 @@ class STT(stt.STT):
|
|
89
89
|
detect_language: bool = True,
|
90
90
|
interim_results: bool = True,
|
91
91
|
punctuate: bool = True,
|
92
|
-
spoken_punctuation: bool =
|
93
|
-
model: SpeechModels = "
|
94
|
-
location: str = "
|
92
|
+
spoken_punctuation: bool = False,
|
93
|
+
model: SpeechModels = "chirp_2",
|
94
|
+
location: str = "us-central1",
|
95
95
|
sample_rate: int = 16000,
|
96
96
|
credentials_info: dict | None = None,
|
97
97
|
credentials_file: str | None = None,
|
{livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.2
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -23,7 +23,7 @@ Requires-Dist: google-auth<3,>=2
|
|
23
23
|
Requires-Dist: google-cloud-speech<3,>=2
|
24
24
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
25
|
Requires-Dist: google-genai==0.5.0
|
26
|
-
Requires-Dist: livekit-agents>=0.12.
|
26
|
+
Requires-Dist: livekit-agents>=0.12.11
|
27
27
|
Dynamic: classifier
|
28
28
|
Dynamic: description
|
29
29
|
Dynamic: description-content-type
|
@@ -1,18 +1,18 @@
|
|
1
1
|
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
2
|
livekit/plugins/google/_utils.py,sha256=mjsqblhGMgAZ2MNPisAVkNsqq4gfO6vvprEKzAGoVwE,7248
|
3
|
-
livekit/plugins/google/llm.py,sha256=
|
3
|
+
livekit/plugins/google/llm.py,sha256=o9EJBv3rS5vKRq7m5YjSSqOxtH6pPekxRS_lra35hzk,16445
|
4
4
|
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
5
5
|
livekit/plugins/google/models.py,sha256=w_qmOk5y86vjtszDiGpP9p0ctjQeaB8-UzqprxgpvCY,1407
|
6
6
|
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
livekit/plugins/google/stt.py,sha256=
|
7
|
+
livekit/plugins/google/stt.py,sha256=FA6Lpeb8QvRXLzkQ7cjsoMxHdtEGwHWkpN_TKqAdKAQ,21097
|
8
8
|
livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
|
9
|
-
livekit/plugins/google/version.py,sha256=
|
9
|
+
livekit/plugins/google/version.py,sha256=jklx55q_NtxoIUiYD5AFOO11S_Jij8P491Y8nkw-VZk,601
|
10
10
|
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
11
|
livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
|
12
12
|
livekit/plugins/google/beta/realtime/api_proto.py,sha256=9EhmwgeIgKDqdSijv5Q9pgx7UhAakK02ZDwbnUsra_o,657
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=
|
14
|
-
livekit/plugins/google/beta/realtime/transcriber.py,sha256=
|
15
|
-
livekit_plugins_google-0.10.
|
16
|
-
livekit_plugins_google-0.10.
|
17
|
-
livekit_plugins_google-0.10.
|
18
|
-
livekit_plugins_google-0.10.
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=OwNoPmmomMtRkmYw-g2u7hIYpeIrSSNky7FlcHBVyFQ,21150
|
14
|
+
livekit/plugins/google/beta/realtime/transcriber.py,sha256=JnZ75NyiOLkpvQ5N2nDniumDKcrjiq_tlryiLbuBoDM,6658
|
15
|
+
livekit_plugins_google-0.10.2.dist-info/METADATA,sha256=dTBdAuYpGyCFVJNw0c8upUEdaFgdodWwrm1bB3a4Xp4,2058
|
16
|
+
livekit_plugins_google-0.10.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
17
|
+
livekit_plugins_google-0.10.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
18
|
+
livekit_plugins_google-0.10.2.dist-info/RECORD,,
|
File without changes
|
{livekit_plugins_google-0.10.0.dist-info → livekit_plugins_google-0.10.2.dist-info}/top_level.txt
RENAMED
File without changes
|