livekit-plugins-google 1.2.8__py3-none-any.whl → 1.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-google might be problematic. Click here for more details.
- livekit/plugins/google/beta/realtime/realtime_api.py +10 -3
- livekit/plugins/google/llm.py +1 -1
- livekit/plugins/google/stt.py +5 -0
- livekit/plugins/google/tts.py +27 -14
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/METADATA +2 -2
- {livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/RECORD +8 -8
- {livekit_plugins_google-1.2.8.dist-info → livekit_plugins_google-1.2.11.dist-info}/WHEEL +0 -0
|
@@ -202,6 +202,7 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
202
202
|
user_transcription=input_audio_transcription is not None,
|
|
203
203
|
auto_tool_reply_generation=True,
|
|
204
204
|
audio_output=types.Modality.AUDIO in modalities,
|
|
205
|
+
manual_function_calls=False,
|
|
205
206
|
)
|
|
206
207
|
)
|
|
207
208
|
|
|
@@ -304,6 +305,10 @@ class RealtimeModel(llm.RealtimeModel):
|
|
|
304
305
|
async def aclose(self) -> None:
|
|
305
306
|
pass
|
|
306
307
|
|
|
308
|
+
@property
|
|
309
|
+
def model(self) -> str:
|
|
310
|
+
return self._opts.model
|
|
311
|
+
|
|
307
312
|
|
|
308
313
|
class RealtimeSession(llm.RealtimeSession):
|
|
309
314
|
def __init__(self, realtime_model: RealtimeModel) -> None:
|
|
@@ -775,7 +780,7 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
775
780
|
),
|
|
776
781
|
language_code=self._opts.language if is_given(self._opts.language) else None,
|
|
777
782
|
),
|
|
778
|
-
tools=tools_config,
|
|
783
|
+
tools=tools_config,
|
|
779
784
|
input_audio_transcription=self._opts.input_audio_transcription,
|
|
780
785
|
output_audio_transcription=self._opts.output_audio_transcription,
|
|
781
786
|
session_resumption=types.SessionResumptionConfig(
|
|
@@ -829,6 +834,7 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
829
834
|
message_stream=self._current_generation.message_ch,
|
|
830
835
|
function_stream=self._current_generation.function_ch,
|
|
831
836
|
user_initiated=False,
|
|
837
|
+
response_id=self._current_generation.response_id,
|
|
832
838
|
)
|
|
833
839
|
|
|
834
840
|
if self._pending_generation_fut and not self._pending_generation_fut.done():
|
|
@@ -969,7 +975,7 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
969
975
|
gen.function_ch.send_nowait(
|
|
970
976
|
llm.FunctionCall(
|
|
971
977
|
call_id=fnc_call.id or utils.shortuuid("fnc-call-"),
|
|
972
|
-
name=fnc_call.name,
|
|
978
|
+
name=fnc_call.name,
|
|
973
979
|
arguments=arguments,
|
|
974
980
|
)
|
|
975
981
|
)
|
|
@@ -1018,7 +1024,8 @@ class RealtimeSession(llm.RealtimeSession):
|
|
|
1018
1024
|
return token_details_map
|
|
1019
1025
|
|
|
1020
1026
|
metrics = RealtimeModelMetrics(
|
|
1021
|
-
label=self._realtime_model.
|
|
1027
|
+
label=self._realtime_model.label,
|
|
1028
|
+
model=self._realtime_model.model,
|
|
1022
1029
|
request_id=current_gen.response_id,
|
|
1023
1030
|
timestamp=current_gen._created_timestamp,
|
|
1024
1031
|
duration=duration,
|
livekit/plugins/google/llm.py
CHANGED
|
@@ -423,7 +423,7 @@ class LLMStream(llm.LLMStream):
|
|
|
423
423
|
tool_calls=[
|
|
424
424
|
llm.FunctionToolCall(
|
|
425
425
|
arguments=json.dumps(part.function_call.args),
|
|
426
|
-
name=part.function_call.name,
|
|
426
|
+
name=part.function_call.name,
|
|
427
427
|
call_id=part.function_call.id or utils.shortuuid("function_call_"),
|
|
428
428
|
)
|
|
429
429
|
],
|
livekit/plugins/google/stt.py
CHANGED
|
@@ -70,6 +70,7 @@ class STTOptions:
|
|
|
70
70
|
spoken_punctuation: bool
|
|
71
71
|
enable_word_time_offsets: bool
|
|
72
72
|
enable_word_confidence: bool
|
|
73
|
+
enable_voice_activity_events: bool
|
|
73
74
|
model: SpeechModels | str
|
|
74
75
|
sample_rate: int
|
|
75
76
|
min_confidence_threshold: float
|
|
@@ -103,6 +104,7 @@ class STT(stt.STT):
|
|
|
103
104
|
spoken_punctuation: bool = False,
|
|
104
105
|
enable_word_time_offsets: bool = True,
|
|
105
106
|
enable_word_confidence: bool = False,
|
|
107
|
+
enable_voice_activity_events: bool = False,
|
|
106
108
|
model: SpeechModels | str = "latest_long",
|
|
107
109
|
location: str = "global",
|
|
108
110
|
sample_rate: int = 16000,
|
|
@@ -127,6 +129,7 @@ class STT(stt.STT):
|
|
|
127
129
|
spoken_punctuation(bool): whether to use spoken punctuation (default: False)
|
|
128
130
|
enable_word_time_offsets(bool): whether to enable word time offsets (default: True)
|
|
129
131
|
enable_word_confidence(bool): whether to enable word confidence (default: False)
|
|
132
|
+
enable_voice_activity_events(bool): whether to enable voice activity events (default: False)
|
|
130
133
|
model(SpeechModels): the model to use for recognition default: "latest_long"
|
|
131
134
|
location(str): the location to use for recognition default: "global"
|
|
132
135
|
sample_rate(int): the sample rate of the audio default: 16000
|
|
@@ -168,6 +171,7 @@ class STT(stt.STT):
|
|
|
168
171
|
spoken_punctuation=spoken_punctuation,
|
|
169
172
|
enable_word_time_offsets=enable_word_time_offsets,
|
|
170
173
|
enable_word_confidence=enable_word_confidence,
|
|
174
|
+
enable_voice_activity_events=enable_voice_activity_events,
|
|
171
175
|
model=model,
|
|
172
176
|
sample_rate=sample_rate,
|
|
173
177
|
min_confidence_threshold=min_confidence_threshold,
|
|
@@ -507,6 +511,7 @@ class SpeechStream(stt.SpeechStream):
|
|
|
507
511
|
),
|
|
508
512
|
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
|
509
513
|
interim_results=self._config.interim_results,
|
|
514
|
+
enable_voice_activity_events=self._config.enable_voice_activity_events,
|
|
510
515
|
),
|
|
511
516
|
)
|
|
512
517
|
|
livekit/plugins/google/tts.py
CHANGED
|
@@ -52,6 +52,7 @@ class _TTSOptions:
|
|
|
52
52
|
volume_gain_db: float
|
|
53
53
|
custom_pronunciations: CustomPronunciations | None
|
|
54
54
|
enable_ssml: bool
|
|
55
|
+
use_markup: bool
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
class TTS(tts.TTS):
|
|
@@ -75,6 +76,7 @@ class TTS(tts.TTS):
|
|
|
75
76
|
custom_pronunciations: NotGivenOr[CustomPronunciations] = NOT_GIVEN,
|
|
76
77
|
use_streaming: bool = True,
|
|
77
78
|
enable_ssml: bool = False,
|
|
79
|
+
use_markup: bool = False,
|
|
78
80
|
) -> None:
|
|
79
81
|
"""
|
|
80
82
|
Create a new instance of Google TTS.
|
|
@@ -100,6 +102,7 @@ class TTS(tts.TTS):
|
|
|
100
102
|
custom_pronunciations (CustomPronunciations, optional): Custom pronunciations for the TTS. Default is None.
|
|
101
103
|
use_streaming (bool, optional): Whether to use streaming synthesis. Default is True.
|
|
102
104
|
enable_ssml (bool, optional): Whether to enable SSML support. Default is False.
|
|
105
|
+
use_markup (bool, optional): Whether to enable markup input for HD voices. Default is False.
|
|
103
106
|
""" # noqa: E501
|
|
104
107
|
super().__init__(
|
|
105
108
|
capabilities=tts.TTSCapabilities(streaming=use_streaming),
|
|
@@ -107,8 +110,11 @@ class TTS(tts.TTS):
|
|
|
107
110
|
num_channels=1,
|
|
108
111
|
)
|
|
109
112
|
|
|
110
|
-
if enable_ssml
|
|
111
|
-
|
|
113
|
+
if enable_ssml:
|
|
114
|
+
if use_streaming:
|
|
115
|
+
raise ValueError("SSML support is not available for streaming synthesis")
|
|
116
|
+
if use_markup:
|
|
117
|
+
raise ValueError("SSML support is not available for markup input")
|
|
112
118
|
|
|
113
119
|
self._client: texttospeech.TextToSpeechAsyncClient | None = None
|
|
114
120
|
self._credentials_info = credentials_info
|
|
@@ -145,6 +151,7 @@ class TTS(tts.TTS):
|
|
|
145
151
|
volume_gain_db=volume_gain_db,
|
|
146
152
|
custom_pronunciations=pronunciations,
|
|
147
153
|
enable_ssml=enable_ssml,
|
|
154
|
+
use_markup=use_markup,
|
|
148
155
|
)
|
|
149
156
|
self._streams = weakref.WeakSet[SynthesizeStream]()
|
|
150
157
|
|
|
@@ -238,19 +245,21 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
238
245
|
|
|
239
246
|
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
|
240
247
|
try:
|
|
241
|
-
|
|
242
|
-
texttospeech.SynthesisInput(
|
|
243
|
-
|
|
244
|
-
custom_pronunciations=self._opts.custom_pronunciations,
|
|
248
|
+
if self._opts.use_markup:
|
|
249
|
+
tts_input = texttospeech.SynthesisInput(
|
|
250
|
+
markup=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
|
|
245
251
|
)
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
custom_pronunciations=self._opts.custom_pronunciations,
|
|
252
|
+
elif self._opts.enable_ssml:
|
|
253
|
+
tts_input = texttospeech.SynthesisInput(
|
|
254
|
+
ssml=self._build_ssml(), custom_pronunciations=self._opts.custom_pronunciations
|
|
250
255
|
)
|
|
251
|
-
|
|
256
|
+
else:
|
|
257
|
+
tts_input = texttospeech.SynthesisInput(
|
|
258
|
+
text=self._input_text, custom_pronunciations=self._opts.custom_pronunciations
|
|
259
|
+
)
|
|
260
|
+
|
|
252
261
|
response: SynthesizeSpeechResponse = await self._tts._ensure_client().synthesize_speech(
|
|
253
|
-
input=
|
|
262
|
+
input=tts_input,
|
|
254
263
|
voice=self._opts.voice,
|
|
255
264
|
audio_config=texttospeech.AudioConfig(
|
|
256
265
|
audio_encoding=self._opts.encoding,
|
|
@@ -355,8 +364,12 @@ class SynthesizeStream(tts.SynthesizeStream):
|
|
|
355
364
|
|
|
356
365
|
async for input in input_stream:
|
|
357
366
|
self._mark_started()
|
|
358
|
-
yield
|
|
359
|
-
|
|
367
|
+
yield (
|
|
368
|
+
texttospeech.StreamingSynthesizeRequest(
|
|
369
|
+
input=texttospeech.StreamingSynthesisInput(markup=input.token)
|
|
370
|
+
if self._opts.use_markup
|
|
371
|
+
else texttospeech.StreamingSynthesisInput(text=input.token)
|
|
372
|
+
)
|
|
360
373
|
)
|
|
361
374
|
|
|
362
375
|
except Exception:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-google
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.11
|
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2.27
|
|
24
24
|
Requires-Dist: google-genai>=v1.23.0
|
|
25
|
-
Requires-Dist: livekit-agents>=1.2.
|
|
25
|
+
Requires-Dist: livekit-agents>=1.2.11
|
|
26
26
|
Description-Content-Type: text/markdown
|
|
27
27
|
|
|
28
28
|
# Google AI plugin for LiveKit Agents
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
livekit/plugins/google/__init__.py,sha256=XIyZ-iFnRBpaLtOJgVwojlB-a8GjdDugVFcjBpMEww8,1412
|
|
2
|
-
livekit/plugins/google/llm.py,sha256=
|
|
2
|
+
livekit/plugins/google/llm.py,sha256=aeeGqhbEScbEs-GKp1T8rLocNqmvG4UBj6diekYe4FU,18809
|
|
3
3
|
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
|
4
4
|
livekit/plugins/google/models.py,sha256=poOvUBvgpqmmQV5EUQsq0RgNIRAq7nH-_IZIcIfPSBI,2801
|
|
5
5
|
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
livekit/plugins/google/stt.py,sha256=
|
|
6
|
+
livekit/plugins/google/stt.py,sha256=RhbDkCbrGWb8R7feS5w766fwUNJEbg7hHlba7Oq9lBI,26305
|
|
7
7
|
livekit/plugins/google/tools.py,sha256=tD5HVDHO5JfUF029Cx3axHMJec0Gxalkl7s1FDgxLzI,259
|
|
8
|
-
livekit/plugins/google/tts.py,sha256=
|
|
8
|
+
livekit/plugins/google/tts.py,sha256=LBLP3pEq1iCCgfidpRTtpeoDKYmXh8PKeJf1llAsybQ,17302
|
|
9
9
|
livekit/plugins/google/utils.py,sha256=z0iCP6-hYix3JRm2RM5JOBEJCICehUe5N4FTl-JpXLc,9269
|
|
10
|
-
livekit/plugins/google/version.py,sha256=
|
|
10
|
+
livekit/plugins/google/version.py,sha256=xsWwuH5qgJrB3wPPfmZaiEH7zObN2yGboBmyrTHj-b8,601
|
|
11
11
|
livekit/plugins/google/beta/__init__.py,sha256=RvAUdvEiRN-fe4JrgPcN0Jkw1kZR9wPerGMFVjS1Cc0,270
|
|
12
12
|
livekit/plugins/google/beta/gemini_tts.py,sha256=esWjr0Xf95tl0_AB7MXiFZ_VCORWgcWjzvLvRa3t0FQ,8515
|
|
13
13
|
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
|
14
14
|
livekit/plugins/google/beta/realtime/api_proto.py,sha256=nb_QkVQDEH7h0SKA9vdS3JaL12a6t2Z1ja4SdnxE6a8,814
|
|
15
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=
|
|
16
|
-
livekit_plugins_google-1.2.
|
|
17
|
-
livekit_plugins_google-1.2.
|
|
18
|
-
livekit_plugins_google-1.2.
|
|
15
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=RALLfKWb8c4K8ennINDLeVxKrP5JXvGa_nNGP0_ASlI,48012
|
|
16
|
+
livekit_plugins_google-1.2.11.dist-info/METADATA,sha256=ulwiM6njmKCQG_8e1imKwV6oG0IHuXRahBSB0UI0OBM,1909
|
|
17
|
+
livekit_plugins_google-1.2.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
18
|
+
livekit_plugins_google-1.2.11.dist-info/RECORD,,
|
|
File without changes
|