livekit-plugins-google 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/realtime_api.py +9 -1
- livekit/plugins/google/llm.py +7 -2
- livekit/plugins/google/stt.py +16 -2
- livekit/plugins/google/tts.py +31 -2
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/METADATA +4 -4
- {livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/RECORD +8 -8
- {livekit_plugins_google-1.1.0.dist-info → livekit_plugins_google-1.1.2.dist-info}/WHEEL +0 -0
@@ -68,6 +68,7 @@ class _RealtimeOptions:
|
|
68
68
|
output_audio_transcription: types.AudioTranscriptionConfig | None
|
69
69
|
image_encode_options: NotGivenOr[images.EncodeOptions]
|
70
70
|
conn_options: APIConnectOptions
|
71
|
+
http_options: NotGivenOr[types.HttpOptions]
|
71
72
|
enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN
|
72
73
|
proactivity: NotGivenOr[bool] = NOT_GIVEN
|
73
74
|
realtime_input_config: NotGivenOr[types.RealtimeInputConfig] = NOT_GIVEN
|
@@ -136,6 +137,7 @@ class RealtimeModel(llm.RealtimeModel):
|
|
136
137
|
context_window_compression: NotGivenOr[types.ContextWindowCompressionConfig] = NOT_GIVEN,
|
137
138
|
api_version: NotGivenOr[str] = NOT_GIVEN,
|
138
139
|
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
140
|
+
http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
|
139
141
|
_gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
140
142
|
) -> None:
|
141
143
|
"""
|
@@ -259,6 +261,7 @@ class RealtimeModel(llm.RealtimeModel):
|
|
259
261
|
api_version=api_version,
|
260
262
|
gemini_tools=_gemini_tools,
|
261
263
|
conn_options=conn_options,
|
264
|
+
http_options=http_options,
|
262
265
|
)
|
263
266
|
|
264
267
|
self._sessions = weakref.WeakSet[RealtimeSession]()
|
@@ -319,7 +322,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
319
322
|
if not api_version and (self._opts.enable_affective_dialog or self._opts.proactivity):
|
320
323
|
api_version = "v1alpha"
|
321
324
|
|
322
|
-
http_options =
|
325
|
+
http_options = self._opts.http_options or types.HttpOptions(
|
326
|
+
timeout=int(self._opts.conn_options.timeout * 1000)
|
327
|
+
)
|
323
328
|
if api_version:
|
324
329
|
http_options.api_version = api_version
|
325
330
|
|
@@ -902,6 +907,9 @@ class RealtimeSession(llm.RealtimeSession):
|
|
902
907
|
)
|
903
908
|
|
904
909
|
if not gen.text_ch.closed:
|
910
|
+
if self._opts.output_audio_transcription is None:
|
911
|
+
# close the text data of transcription synchronizer
|
912
|
+
gen.text_ch.send_nowait("")
|
905
913
|
gen.text_ch.close()
|
906
914
|
if not gen.audio_ch.closed:
|
907
915
|
gen.audio_ch.close()
|
livekit/plugins/google/llm.py
CHANGED
@@ -60,6 +60,7 @@ class _LLMOptions:
|
|
60
60
|
frequency_penalty: NotGivenOr[float]
|
61
61
|
thinking_config: NotGivenOr[types.ThinkingConfigOrDict]
|
62
62
|
gemini_tools: NotGivenOr[list[_LLMTool]]
|
63
|
+
http_options: NotGivenOr[types.HttpOptions]
|
63
64
|
|
64
65
|
|
65
66
|
class LLM(llm.LLM):
|
@@ -80,6 +81,7 @@ class LLM(llm.LLM):
|
|
80
81
|
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
|
81
82
|
thinking_config: NotGivenOr[types.ThinkingConfigOrDict] = NOT_GIVEN,
|
82
83
|
gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
|
84
|
+
http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
|
83
85
|
) -> None:
|
84
86
|
"""
|
85
87
|
Create a new instance of Google GenAI LLM.
|
@@ -106,6 +108,7 @@ class LLM(llm.LLM):
|
|
106
108
|
tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
|
107
109
|
thinking_config (ThinkingConfigOrDict, optional): The thinking configuration for response generation. Defaults to None.
|
108
110
|
gemini_tools (list[LLMTool], optional): The Gemini-specific tools to use for the session.
|
111
|
+
http_options (HttpOptions, optional): The HTTP options to use for the session.
|
109
112
|
""" # noqa: E501
|
110
113
|
super().__init__()
|
111
114
|
gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
|
@@ -166,6 +169,7 @@ class LLM(llm.LLM):
|
|
166
169
|
frequency_penalty=frequency_penalty,
|
167
170
|
thinking_config=thinking_config,
|
168
171
|
gemini_tools=gemini_tools,
|
172
|
+
http_options=http_options,
|
169
173
|
)
|
170
174
|
self._client = Client(
|
171
175
|
api_key=gemini_api_key,
|
@@ -312,8 +316,9 @@ class LLMStream(llm.LLMStream):
|
|
312
316
|
if extra_data.system_messages
|
313
317
|
else None
|
314
318
|
),
|
315
|
-
http_options=
|
316
|
-
|
319
|
+
http_options=(
|
320
|
+
self._llm._opts.http_options
|
321
|
+
or types.HttpOptions(timeout=int(self._conn_options.timeout * 1000))
|
317
322
|
),
|
318
323
|
**self._extra_kwargs,
|
319
324
|
)
|
livekit/plugins/google/stt.py
CHANGED
@@ -381,11 +381,14 @@ class SpeechStream(stt.SpeechStream):
|
|
381
381
|
self._reconnect_event.set()
|
382
382
|
|
383
383
|
async def _run(self) -> None:
|
384
|
+
audio_pushed = False
|
385
|
+
|
384
386
|
# google requires a async generator when calling streaming_recognize
|
385
387
|
# this function basically convert the queue into a async generator
|
386
388
|
async def input_generator(
|
387
389
|
client: SpeechAsyncClient, should_stop: asyncio.Event
|
388
390
|
) -> AsyncGenerator[cloud_speech.StreamingRecognizeRequest, None]:
|
391
|
+
nonlocal audio_pushed
|
389
392
|
try:
|
390
393
|
# first request should contain the config
|
391
394
|
yield cloud_speech.StreamingRecognizeRequest(
|
@@ -402,6 +405,8 @@ class SpeechStream(stt.SpeechStream):
|
|
402
405
|
|
403
406
|
if isinstance(frame, rtc.AudioFrame):
|
404
407
|
yield cloud_speech.StreamingRecognizeRequest(audio=frame.data.tobytes())
|
408
|
+
if not audio_pushed:
|
409
|
+
audio_pushed = True
|
405
410
|
|
406
411
|
except Exception:
|
407
412
|
logger.exception("an error occurred while streaming input to google STT")
|
@@ -470,6 +475,7 @@ class SpeechStream(stt.SpeechStream):
|
|
470
475
|
has_started = False
|
471
476
|
|
472
477
|
while True:
|
478
|
+
audio_pushed = False
|
473
479
|
try:
|
474
480
|
async with self._pool.connection(timeout=self._conn_options.timeout) as client:
|
475
481
|
self._streaming_config = cloud_speech.StreamingRecognitionConfig(
|
@@ -514,13 +520,21 @@ class SpeechStream(stt.SpeechStream):
|
|
514
520
|
break
|
515
521
|
self._reconnect_event.clear()
|
516
522
|
finally:
|
517
|
-
await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
|
518
523
|
should_stop.set()
|
524
|
+
if not process_stream_task.done() and not wait_reconnect_task.done():
|
525
|
+
# try to gracefully stop the process_stream_task
|
526
|
+
try:
|
527
|
+
await asyncio.wait_for(process_stream_task, timeout=1.0)
|
528
|
+
except asyncio.TimeoutError:
|
529
|
+
pass
|
530
|
+
|
531
|
+
await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
|
519
532
|
except DeadlineExceeded:
|
520
533
|
raise APITimeoutError() from None
|
521
534
|
except GoogleAPICallError as e:
|
522
535
|
if e.code == 409:
|
523
|
-
|
536
|
+
if audio_pushed:
|
537
|
+
logger.debug("stream timed out, restarting.")
|
524
538
|
else:
|
525
539
|
raise APIStatusError(
|
526
540
|
f"{e.message} {e.details}", status_code=e.code or -1
|
livekit/plugins/google/tts.py
CHANGED
@@ -46,6 +46,8 @@ class _TTSOptions:
|
|
46
46
|
effects_profile_id: str
|
47
47
|
speaking_rate: float
|
48
48
|
tokenizer: tokenize.SentenceTokenizer
|
49
|
+
volume_gain_db: float
|
50
|
+
enable_ssml: bool
|
49
51
|
|
50
52
|
|
51
53
|
class TTS(tts.TTS):
|
@@ -59,12 +61,14 @@ class TTS(tts.TTS):
|
|
59
61
|
pitch: int = 0,
|
60
62
|
effects_profile_id: str = "",
|
61
63
|
speaking_rate: float = 1.0,
|
64
|
+
volume_gain_db: float = 0.0,
|
62
65
|
location: str = "global",
|
63
66
|
audio_encoding: texttospeech.AudioEncoding = texttospeech.AudioEncoding.OGG_OPUS, # type: ignore
|
64
67
|
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
65
68
|
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
66
69
|
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
|
67
70
|
use_streaming: bool = True,
|
71
|
+
enable_ssml: bool = False,
|
68
72
|
) -> None:
|
69
73
|
"""
|
70
74
|
Create a new instance of Google TTS.
|
@@ -82,10 +86,12 @@ class TTS(tts.TTS):
|
|
82
86
|
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
83
87
|
effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
|
84
88
|
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
89
|
+
volume_gain_db (float, optional): Volume gain in decibels. Default is 0.0. In the range [-96.0, 16.0]. Strongly recommended not to exceed +10 (dB).
|
85
90
|
credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
|
86
91
|
credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
|
87
92
|
tokenizer (tokenize.SentenceTokenizer, optional): Tokenizer for the TTS. Default is a basic sentence tokenizer.
|
88
93
|
use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
|
94
|
+
enable_ssml (bool, optional): Whether to enable SSML support. Default is False.
|
89
95
|
""" # noqa: E501
|
90
96
|
super().__init__(
|
91
97
|
capabilities=tts.TTSCapabilities(streaming=use_streaming),
|
@@ -93,6 +99,9 @@ class TTS(tts.TTS):
|
|
93
99
|
num_channels=1,
|
94
100
|
)
|
95
101
|
|
102
|
+
if enable_ssml and use_streaming:
|
103
|
+
raise ValueError("SSML support is not available for streaming synthesis")
|
104
|
+
|
96
105
|
self._client: texttospeech.TextToSpeechAsyncClient | None = None
|
97
106
|
self._credentials_info = credentials_info
|
98
107
|
self._credentials_file = credentials_file
|
@@ -118,6 +127,8 @@ class TTS(tts.TTS):
|
|
118
127
|
effects_profile_id=effects_profile_id,
|
119
128
|
speaking_rate=speaking_rate,
|
120
129
|
tokenizer=tokenizer,
|
130
|
+
volume_gain_db=volume_gain_db,
|
131
|
+
enable_ssml=enable_ssml,
|
121
132
|
)
|
122
133
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
123
134
|
|
@@ -128,6 +139,7 @@ class TTS(tts.TTS):
|
|
128
139
|
gender: NotGivenOr[Gender | str] = NOT_GIVEN,
|
129
140
|
voice_name: NotGivenOr[str] = NOT_GIVEN,
|
130
141
|
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
|
142
|
+
volume_gain_db: NotGivenOr[float] = NOT_GIVEN,
|
131
143
|
) -> None:
|
132
144
|
"""
|
133
145
|
Update the TTS options.
|
@@ -137,6 +149,7 @@ class TTS(tts.TTS):
|
|
137
149
|
gender (Gender | str, optional): Voice gender ("male", "female", "neutral").
|
138
150
|
voice_name (str, optional): Specific voice name.
|
139
151
|
speaking_rate (float, optional): Speed of speech.
|
152
|
+
volume_gain_db (float, optional): Volume gain in decibels.
|
140
153
|
"""
|
141
154
|
params = {}
|
142
155
|
if is_given(language):
|
@@ -151,6 +164,8 @@ class TTS(tts.TTS):
|
|
151
164
|
|
152
165
|
if is_given(speaking_rate):
|
153
166
|
self._opts.speaking_rate = speaking_rate
|
167
|
+
if is_given(volume_gain_db):
|
168
|
+
self._opts.volume_gain_db = volume_gain_db
|
154
169
|
|
155
170
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
156
171
|
api_endpoint = "texttospeech.googleapis.com"
|
@@ -199,10 +214,21 @@ class ChunkedStream(tts.ChunkedStream):
|
|
199
214
|
self._tts: TTS = tts
|
200
215
|
self._opts = replace(tts._opts)
|
201
216
|
|
217
|
+
def _build_ssml(self) -> str:
|
218
|
+
ssml = "<speak>"
|
219
|
+
ssml += self._input_text
|
220
|
+
ssml += "</speak>"
|
221
|
+
return ssml
|
222
|
+
|
202
223
|
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
203
224
|
try:
|
225
|
+
input = (
|
226
|
+
texttospeech.SynthesisInput(ssml=self._build_ssml())
|
227
|
+
if self._opts.enable_ssml
|
228
|
+
else texttospeech.SynthesisInput(text=self._input_text)
|
229
|
+
)
|
204
230
|
response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
|
205
|
-
input=
|
231
|
+
input=input,
|
206
232
|
voice=self._opts.voice,
|
207
233
|
audio_config=texttospeech.AudioConfig(
|
208
234
|
audio_encoding=self._opts.encoding,
|
@@ -210,6 +236,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
210
236
|
pitch=self._opts.pitch,
|
211
237
|
effects_profile_id=self._opts.effects_profile_id,
|
212
238
|
speaking_rate=self._opts.speaking_rate,
|
239
|
+
volume_gain_db=self._opts.volume_gain_db,
|
213
240
|
),
|
214
241
|
timeout=self._conn_options.timeout,
|
215
242
|
)
|
@@ -256,7 +283,9 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
256
283
|
streaming_config = texttospeech.StreamingSynthesizeConfig(
|
257
284
|
voice=self._opts.voice,
|
258
285
|
streaming_audio_config=texttospeech.StreamingAudioConfig(
|
259
|
-
audio_encoding=encoding,
|
286
|
+
audio_encoding=encoding,
|
287
|
+
sample_rate_hertz=self._opts.sample_rate,
|
288
|
+
speaking_rate=self._opts.speaking_rate,
|
260
289
|
),
|
261
290
|
)
|
262
291
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.2
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -20,9 +20,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Requires-Dist: google-auth<3,>=2
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
|
-
Requires-Dist: google-cloud-texttospeech<3,>=2.
|
24
|
-
Requires-Dist: google-genai>=v1.
|
25
|
-
Requires-Dist: livekit-agents>=1.1.
|
23
|
+
Requires-Dist: google-cloud-texttospeech<3,>=2.27
|
24
|
+
Requires-Dist: google-genai>=v1.21.1
|
25
|
+
Requires-Dist: livekit-agents>=1.1.2
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# Google AI plugin for LiveKit Agents
|
@@ -1,17 +1,17 @@
|
|
1
1
|
livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
|
2
|
-
livekit/plugins/google/llm.py,sha256=
|
2
|
+
livekit/plugins/google/llm.py,sha256=Feb2ixNN9YoDt3aPXkQNeVx2c-wkmrf-mv4r3vggY1s,18131
|
3
3
|
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
4
|
livekit/plugins/google/models.py,sha256=hOpfbN_qdQ1ZTpCN9m9dvG2eb6WgQ3KE3WRpIeeM_T0,1569
|
5
5
|
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
livekit/plugins/google/stt.py,sha256=
|
6
|
+
livekit/plugins/google/stt.py,sha256=ssDMH5U1vQOLA44XMlovYWIR4UqVtZSge3YFN-zZ7Iw,24696
|
7
7
|
livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
|
8
|
-
livekit/plugins/google/tts.py,sha256=
|
8
|
+
livekit/plugins/google/tts.py,sha256=YTfce55MWNJyDH4k8U1O2giOcrtccTs8vrkiW9GuBR0,15541
|
9
9
|
livekit/plugins/google/utils.py,sha256=-4z6wrjVaZPtFRowkpwaA2acBRfqtzTk4r2xrPDUdCk,8609
|
10
|
-
livekit/plugins/google/version.py,sha256=
|
10
|
+
livekit/plugins/google/version.py,sha256=gqaIRup9hxsq6YNsBlKPmS5PL-B8yqSRTd8wRfj8zoQ,600
|
11
11
|
livekit/plugins/google/beta/__init__.py,sha256=5PnoG3Ux24bjzMSzmTeSVljE9EINivGcbWUEV6egGnM,216
|
12
12
|
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
13
13
|
livekit/plugins/google/beta/realtime/api_proto.py,sha256=NfE7xr2N3JOu7gVfWbAmDcEhs8vuZgMRu5vpScPJzsg,776
|
14
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=
|
15
|
-
livekit_plugins_google-1.1.
|
16
|
-
livekit_plugins_google-1.1.
|
17
|
-
livekit_plugins_google-1.1.
|
14
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=tlAsTFsumqOavC9JT2SuQi_3eGYygZ3bbS-nEM7ea8Q,46293
|
15
|
+
livekit_plugins_google-1.1.2.dist-info/METADATA,sha256=cAk_E0o73mOJ1wFsuUFzmzW4vZ2B_lbM2O3aZeHoHq4,1907
|
16
|
+
livekit_plugins_google-1.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
livekit_plugins_google-1.1.2.dist-info/RECORD,,
|
File without changes
|