livekit-plugins-google 0.7.3__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/models.py +7 -1
- livekit/plugins/google/stt.py +210 -125
- livekit/plugins/google/tts.py +24 -9
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.7.3.dist-info → livekit_plugins_google-0.8.1.dist-info}/METADATA +5 -5
- livekit_plugins_google-0.8.1.dist-info/RECORD +11 -0
- {livekit_plugins_google-0.7.3.dist-info → livekit_plugins_google-0.8.1.dist-info}/WHEEL +1 -1
- livekit_plugins_google-0.7.3.dist-info/RECORD +0 -11
- {livekit_plugins_google-0.7.3.dist-info → livekit_plugins_google-0.8.1.dist-info}/top_level.txt +0 -0
livekit/plugins/google/models.py
CHANGED
@@ -3,7 +3,13 @@ from typing import Literal
|
|
3
3
|
# Speech to Text v2
|
4
4
|
|
5
5
|
SpeechModels = Literal[
|
6
|
-
"long",
|
6
|
+
"long",
|
7
|
+
"short",
|
8
|
+
"telephony",
|
9
|
+
"medical_dictation",
|
10
|
+
"medical_conversation",
|
11
|
+
"chirp",
|
12
|
+
"chirp_2",
|
7
13
|
]
|
8
14
|
|
9
15
|
SpeechLanguages = Literal[
|
livekit/plugins/google/stt.py
CHANGED
@@ -16,19 +16,23 @@ from __future__ import annotations
|
|
16
16
|
|
17
17
|
import asyncio
|
18
18
|
import dataclasses
|
19
|
+
import weakref
|
19
20
|
from dataclasses import dataclass
|
20
|
-
from typing import
|
21
|
+
from typing import List, Union
|
21
22
|
|
22
|
-
from livekit import
|
23
|
+
from livekit import rtc
|
23
24
|
from livekit.agents import (
|
25
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
24
26
|
APIConnectionError,
|
27
|
+
APIConnectOptions,
|
25
28
|
APIStatusError,
|
26
29
|
APITimeoutError,
|
27
30
|
stt,
|
28
31
|
utils,
|
29
32
|
)
|
30
33
|
|
31
|
-
from google.api_core.
|
34
|
+
from google.api_core.client_options import ClientOptions
|
35
|
+
from google.api_core.exceptions import DeadlineExceeded, GoogleAPICallError
|
32
36
|
from google.auth import default as gauth_default
|
33
37
|
from google.auth.exceptions import DefaultCredentialsError
|
34
38
|
from google.cloud.speech_v2 import SpeechAsyncClient
|
@@ -50,6 +54,7 @@ class STTOptions:
|
|
50
54
|
punctuate: bool
|
51
55
|
spoken_punctuation: bool
|
52
56
|
model: SpeechModels
|
57
|
+
sample_rate: int
|
53
58
|
keywords: List[tuple[str, float]] | None
|
54
59
|
|
55
60
|
def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
|
@@ -81,6 +86,8 @@ class STT(stt.STT):
|
|
81
86
|
punctuate: bool = True,
|
82
87
|
spoken_punctuation: bool = True,
|
83
88
|
model: SpeechModels = "long",
|
89
|
+
location: str = "global",
|
90
|
+
sample_rate: int = 16000,
|
84
91
|
credentials_info: dict | None = None,
|
85
92
|
credentials_file: str | None = None,
|
86
93
|
keywords: List[tuple[str, float]] | None = None,
|
@@ -97,6 +104,7 @@ class STT(stt.STT):
|
|
97
104
|
)
|
98
105
|
|
99
106
|
self._client: SpeechAsyncClient | None = None
|
107
|
+
self._location = location
|
100
108
|
self._credentials_info = credentials_info
|
101
109
|
self._credentials_file = credentials_file
|
102
110
|
|
@@ -120,8 +128,10 @@ class STT(stt.STT):
|
|
120
128
|
punctuate=punctuate,
|
121
129
|
spoken_punctuation=spoken_punctuation,
|
122
130
|
model=model,
|
131
|
+
sample_rate=sample_rate,
|
123
132
|
keywords=keywords,
|
124
133
|
)
|
134
|
+
self._streams = weakref.WeakSet[SpeechStream]()
|
125
135
|
|
126
136
|
def _ensure_client(self) -> SpeechAsyncClient:
|
127
137
|
if self._credentials_info:
|
@@ -132,9 +142,16 @@ class STT(stt.STT):
|
|
132
142
|
self._client = SpeechAsyncClient.from_service_account_file(
|
133
143
|
self._credentials_file
|
134
144
|
)
|
135
|
-
|
145
|
+
elif self._location == "global":
|
136
146
|
self._client = SpeechAsyncClient()
|
137
|
-
|
147
|
+
else:
|
148
|
+
# Add support for passing a specific location that matches recognizer
|
149
|
+
# see: https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages
|
150
|
+
self._client = SpeechAsyncClient(
|
151
|
+
client_options=ClientOptions(
|
152
|
+
api_endpoint=f"{self._location}-speech.googleapis.com"
|
153
|
+
)
|
154
|
+
)
|
138
155
|
assert self._client is not None
|
139
156
|
return self._client
|
140
157
|
|
@@ -150,7 +167,7 @@ class STT(stt.STT):
|
|
150
167
|
from google.auth import default as ga_default
|
151
168
|
|
152
169
|
_, project_id = ga_default()
|
153
|
-
return f"projects/{project_id}/locations/
|
170
|
+
return f"projects/{project_id}/locations/{self._location}/recognizers/_"
|
154
171
|
|
155
172
|
def _sanitize_options(self, *, language: str | None = None) -> STTOptions:
|
156
173
|
config = dataclasses.replace(self._config)
|
@@ -173,10 +190,11 @@ class STT(stt.STT):
|
|
173
190
|
self,
|
174
191
|
buffer: utils.AudioBuffer,
|
175
192
|
*,
|
176
|
-
language: SpeechLanguages | str | None
|
193
|
+
language: SpeechLanguages | str | None,
|
194
|
+
conn_options: APIConnectOptions,
|
177
195
|
) -> stt.SpeechEvent:
|
178
196
|
config = self._sanitize_options(language=language)
|
179
|
-
frame =
|
197
|
+
frame = rtc.combine_audio_frames(buffer)
|
180
198
|
|
181
199
|
config = cloud_speech.RecognitionConfig(
|
182
200
|
explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
|
@@ -200,7 +218,8 @@ class STT(stt.STT):
|
|
200
218
|
recognizer=self._recognizer,
|
201
219
|
config=config,
|
202
220
|
content=frame.data.tobytes(),
|
203
|
-
)
|
221
|
+
),
|
222
|
+
timeout=conn_options.timeout,
|
204
223
|
)
|
205
224
|
|
206
225
|
return _recognize_response_to_speech_event(raw)
|
@@ -217,154 +236,220 @@ class STT(stt.STT):
|
|
217
236
|
raise APIConnectionError() from e
|
218
237
|
|
219
238
|
def stream(
|
220
|
-
self,
|
239
|
+
self,
|
240
|
+
*,
|
241
|
+
language: SpeechLanguages | str | None = None,
|
242
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
221
243
|
) -> "SpeechStream":
|
222
244
|
config = self._sanitize_options(language=language)
|
223
|
-
|
245
|
+
stream = SpeechStream(
|
246
|
+
stt=self,
|
247
|
+
client=self._ensure_client(),
|
248
|
+
recognizer=self._recognizer,
|
249
|
+
config=config,
|
250
|
+
conn_options=conn_options,
|
251
|
+
)
|
252
|
+
self._streams.add(stream)
|
253
|
+
return stream
|
254
|
+
|
255
|
+
def update_options(
|
256
|
+
self,
|
257
|
+
*,
|
258
|
+
languages: LanguageCode | None = None,
|
259
|
+
detect_language: bool | None = None,
|
260
|
+
interim_results: bool | None = None,
|
261
|
+
punctuate: bool | None = None,
|
262
|
+
spoken_punctuation: bool | None = None,
|
263
|
+
model: SpeechModels | None = None,
|
264
|
+
location: str | None = None,
|
265
|
+
keywords: List[tuple[str, float]] | None = None,
|
266
|
+
):
|
267
|
+
if languages is not None:
|
268
|
+
if isinstance(languages, str):
|
269
|
+
languages = [languages]
|
270
|
+
self._config.languages = languages
|
271
|
+
if detect_language is not None:
|
272
|
+
self._config.detect_language = detect_language
|
273
|
+
if interim_results is not None:
|
274
|
+
self._config.interim_results = interim_results
|
275
|
+
if punctuate is not None:
|
276
|
+
self._config.punctuate = punctuate
|
277
|
+
if spoken_punctuation is not None:
|
278
|
+
self._config.spoken_punctuation = spoken_punctuation
|
279
|
+
if model is not None:
|
280
|
+
self._config.model = model
|
281
|
+
if keywords is not None:
|
282
|
+
self._config.keywords = keywords
|
283
|
+
|
284
|
+
for stream in self._streams:
|
285
|
+
stream.update_options(
|
286
|
+
languages=languages,
|
287
|
+
detect_language=detect_language,
|
288
|
+
interim_results=interim_results,
|
289
|
+
punctuate=punctuate,
|
290
|
+
spoken_punctuation=spoken_punctuation,
|
291
|
+
model=model,
|
292
|
+
location=location,
|
293
|
+
keywords=keywords,
|
294
|
+
)
|
224
295
|
|
225
296
|
|
226
297
|
class SpeechStream(stt.SpeechStream):
|
227
298
|
def __init__(
|
228
299
|
self,
|
300
|
+
*,
|
229
301
|
stt: STT,
|
302
|
+
conn_options: APIConnectOptions,
|
230
303
|
client: SpeechAsyncClient,
|
231
304
|
recognizer: str,
|
232
305
|
config: STTOptions,
|
233
|
-
sample_rate: int = 48000,
|
234
|
-
num_channels: int = 1,
|
235
|
-
max_retry: int = 32,
|
236
306
|
) -> None:
|
237
|
-
super().__init__(
|
307
|
+
super().__init__(
|
308
|
+
stt=stt, conn_options=conn_options, sample_rate=config.sample_rate
|
309
|
+
)
|
238
310
|
|
239
311
|
self._client = client
|
240
312
|
self._recognizer = recognizer
|
241
313
|
self._config = config
|
242
|
-
self.
|
243
|
-
self._num_channels = num_channels
|
244
|
-
self._max_retry = max_retry
|
245
|
-
|
246
|
-
self._streaming_config = cloud_speech.StreamingRecognitionConfig(
|
247
|
-
config=cloud_speech.RecognitionConfig(
|
248
|
-
explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
|
249
|
-
encoding=cloud_speech.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
|
250
|
-
sample_rate_hertz=self._sample_rate,
|
251
|
-
audio_channel_count=self._num_channels,
|
252
|
-
),
|
253
|
-
adaptation=config.build_adaptation(),
|
254
|
-
language_codes=self._config.languages,
|
255
|
-
model=self._config.model,
|
256
|
-
features=cloud_speech.RecognitionFeatures(
|
257
|
-
enable_automatic_punctuation=self._config.punctuate,
|
258
|
-
enable_word_time_offsets=True,
|
259
|
-
),
|
260
|
-
),
|
261
|
-
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
262
|
-
enable_voice_activity_events=True,
|
263
|
-
interim_results=self._config.interim_results,
|
264
|
-
),
|
265
|
-
)
|
314
|
+
self._reconnect_event = asyncio.Event()
|
266
315
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
316
|
+
def update_options(
|
317
|
+
self,
|
318
|
+
*,
|
319
|
+
languages: LanguageCode | None = None,
|
320
|
+
detect_language: bool | None = None,
|
321
|
+
interim_results: bool | None = None,
|
322
|
+
punctuate: bool | None = None,
|
323
|
+
spoken_punctuation: bool | None = None,
|
324
|
+
model: SpeechModels | None = None,
|
325
|
+
location: str | None = None,
|
326
|
+
keywords: List[tuple[str, float]] | None = None,
|
327
|
+
):
|
328
|
+
if languages is not None:
|
329
|
+
if isinstance(languages, str):
|
330
|
+
languages = [languages]
|
331
|
+
self._config.languages = languages
|
332
|
+
if detect_language is not None:
|
333
|
+
self._config.detect_language = detect_language
|
334
|
+
if interim_results is not None:
|
335
|
+
self._config.interim_results = interim_results
|
336
|
+
if punctuate is not None:
|
337
|
+
self._config.punctuate = punctuate
|
338
|
+
if spoken_punctuation is not None:
|
339
|
+
self._config.spoken_punctuation = spoken_punctuation
|
340
|
+
if model is not None:
|
341
|
+
self._config.model = model
|
342
|
+
if keywords is not None:
|
343
|
+
self._config.keywords = keywords
|
344
|
+
|
345
|
+
self._reconnect_event.set()
|
346
|
+
|
347
|
+
async def _run(self) -> None:
|
348
|
+
# google requires a async generator when calling streaming_recognize
|
349
|
+
# this function basically convert the queue into a async generator
|
350
|
+
async def input_generator():
|
274
351
|
try:
|
275
|
-
#
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
352
|
+
# first request should contain the config
|
353
|
+
yield cloud_speech.StreamingRecognizeRequest(
|
354
|
+
recognizer=self._recognizer,
|
355
|
+
streaming_config=self._streaming_config,
|
356
|
+
)
|
357
|
+
|
358
|
+
async for frame in self._input_ch:
|
359
|
+
if isinstance(frame, rtc.AudioFrame):
|
280
360
|
yield cloud_speech.StreamingRecognizeRequest(
|
281
|
-
|
282
|
-
streaming_config=self._streaming_config,
|
361
|
+
audio=frame.data.tobytes()
|
283
362
|
)
|
284
363
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
364
|
+
except Exception:
|
365
|
+
logger.exception(
|
366
|
+
"an error occurred while streaming input to google STT"
|
367
|
+
)
|
368
|
+
|
369
|
+
async def process_stream(stream):
|
370
|
+
async for resp in stream:
|
371
|
+
if (
|
372
|
+
resp.speech_event_type
|
373
|
+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
|
374
|
+
):
|
375
|
+
self._event_ch.send_nowait(
|
376
|
+
stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
|
377
|
+
)
|
293
378
|
|
294
|
-
|
295
|
-
|
296
|
-
|
379
|
+
if (
|
380
|
+
resp.speech_event_type
|
381
|
+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED
|
382
|
+
):
|
383
|
+
result = resp.results[0]
|
384
|
+
speech_data = _streaming_recognize_response_to_speech_data(resp)
|
385
|
+
if speech_data is None:
|
386
|
+
continue
|
387
|
+
|
388
|
+
if not result.is_final:
|
389
|
+
self._event_ch.send_nowait(
|
390
|
+
stt.SpeechEvent(
|
391
|
+
type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
392
|
+
alternatives=[speech_data],
|
393
|
+
)
|
394
|
+
)
|
395
|
+
else:
|
396
|
+
self._event_ch.send_nowait(
|
397
|
+
stt.SpeechEvent(
|
398
|
+
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
399
|
+
alternatives=[speech_data],
|
400
|
+
)
|
297
401
|
)
|
298
402
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
)
|
303
|
-
|
304
|
-
|
305
|
-
await self._run_stream(stream)
|
306
|
-
except Aborted:
|
307
|
-
logger.error("google stt connection aborted")
|
308
|
-
break
|
309
|
-
except Exception as e:
|
310
|
-
if retry_count >= max_retry:
|
311
|
-
logger.error(
|
312
|
-
f"failed to connect to google stt after {max_retry} tries",
|
313
|
-
exc_info=e,
|
403
|
+
if (
|
404
|
+
resp.speech_event_type
|
405
|
+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_END
|
406
|
+
):
|
407
|
+
self._event_ch.send_nowait(
|
408
|
+
stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
|
314
409
|
)
|
315
|
-
break
|
316
410
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
411
|
+
while True:
|
412
|
+
try:
|
413
|
+
self._streaming_config = cloud_speech.StreamingRecognitionConfig(
|
414
|
+
config=cloud_speech.RecognitionConfig(
|
415
|
+
explicit_decoding_config=cloud_speech.ExplicitDecodingConfig(
|
416
|
+
encoding=cloud_speech.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
|
417
|
+
sample_rate_hertz=self._config.sample_rate,
|
418
|
+
audio_channel_count=1,
|
419
|
+
),
|
420
|
+
adaptation=self._config.build_adaptation(),
|
421
|
+
language_codes=self._config.languages,
|
422
|
+
model=self._config.model,
|
423
|
+
features=cloud_speech.RecognitionFeatures(
|
424
|
+
enable_automatic_punctuation=self._config.punctuate,
|
425
|
+
enable_word_time_offsets=True,
|
426
|
+
),
|
427
|
+
),
|
428
|
+
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
429
|
+
enable_voice_activity_events=True,
|
430
|
+
interim_results=self._config.interim_results,
|
431
|
+
),
|
322
432
|
)
|
323
|
-
await asyncio.sleep(retry_delay)
|
324
433
|
|
325
|
-
|
326
|
-
|
327
|
-
):
|
328
|
-
async for resp in stream:
|
329
|
-
if (
|
330
|
-
resp.speech_event_type
|
331
|
-
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
|
332
|
-
):
|
333
|
-
self._event_ch.send_nowait(
|
334
|
-
stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH)
|
434
|
+
stream = await self._client.streaming_recognize(
|
435
|
+
requests=input_generator(),
|
335
436
|
)
|
336
437
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
if speech_data is None:
|
344
|
-
continue
|
345
|
-
|
346
|
-
if not result.is_final:
|
347
|
-
self._event_ch.send_nowait(
|
348
|
-
stt.SpeechEvent(
|
349
|
-
type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
350
|
-
alternatives=[speech_data],
|
351
|
-
)
|
438
|
+
process_stream_task = asyncio.create_task(process_stream(stream))
|
439
|
+
wait_reconnect_task = asyncio.create_task(self._reconnect_event.wait())
|
440
|
+
try:
|
441
|
+
await asyncio.wait(
|
442
|
+
[process_stream_task, wait_reconnect_task],
|
443
|
+
return_when=asyncio.FIRST_COMPLETED,
|
352
444
|
)
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
357
|
-
alternatives=[speech_data],
|
358
|
-
)
|
445
|
+
finally:
|
446
|
+
await utils.aio.gracefully_cancel(
|
447
|
+
process_stream_task, wait_reconnect_task
|
359
448
|
)
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
):
|
365
|
-
self._event_ch.send_nowait(
|
366
|
-
stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
|
367
|
-
)
|
449
|
+
finally:
|
450
|
+
if not self._reconnect_event.is_set():
|
451
|
+
break
|
452
|
+
self._reconnect_event.clear()
|
368
453
|
|
369
454
|
|
370
455
|
def _recognize_response_to_speech_event(
|
livekit/plugins/google/tts.py
CHANGED
@@ -18,7 +18,9 @@ from dataclasses import dataclass
|
|
18
18
|
|
19
19
|
from livekit import rtc
|
20
20
|
from livekit.agents import (
|
21
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
21
22
|
APIConnectionError,
|
23
|
+
APIConnectOptions,
|
22
24
|
APIStatusError,
|
23
25
|
APITimeoutError,
|
24
26
|
tts,
|
@@ -134,7 +136,7 @@ class TTS(tts.TTS):
|
|
134
136
|
self._opts.audio_config.speaking_rate = speaking_rate
|
135
137
|
|
136
138
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
137
|
-
if
|
139
|
+
if self._client is None:
|
138
140
|
if self._credentials_info:
|
139
141
|
self._client = (
|
140
142
|
texttospeech.TextToSpeechAsyncClient.from_service_account_info(
|
@@ -154,22 +156,35 @@ class TTS(tts.TTS):
|
|
154
156
|
assert self._client is not None
|
155
157
|
return self._client
|
156
158
|
|
157
|
-
def synthesize(
|
158
|
-
|
159
|
+
def synthesize(
|
160
|
+
self,
|
161
|
+
text: str,
|
162
|
+
*,
|
163
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
164
|
+
) -> "ChunkedStream":
|
165
|
+
return ChunkedStream(
|
166
|
+
tts=self,
|
167
|
+
input_text=text,
|
168
|
+
conn_options=conn_options,
|
169
|
+
opts=self._opts,
|
170
|
+
client=self._ensure_client(),
|
171
|
+
)
|
159
172
|
|
160
173
|
|
161
174
|
class ChunkedStream(tts.ChunkedStream):
|
162
175
|
def __init__(
|
163
176
|
self,
|
177
|
+
*,
|
164
178
|
tts: TTS,
|
165
|
-
|
179
|
+
input_text: str,
|
180
|
+
conn_options: APIConnectOptions,
|
166
181
|
opts: _TTSOptions,
|
167
182
|
client: texttospeech.TextToSpeechAsyncClient,
|
168
183
|
) -> None:
|
169
|
-
super().__init__(tts,
|
184
|
+
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
170
185
|
self._opts, self._client = opts, client
|
171
186
|
|
172
|
-
async def
|
187
|
+
async def _run(self) -> None:
|
173
188
|
request_id = utils.shortuuid()
|
174
189
|
|
175
190
|
try:
|
@@ -177,16 +192,16 @@ class ChunkedStream(tts.ChunkedStream):
|
|
177
192
|
input=texttospeech.SynthesisInput(text=self._input_text),
|
178
193
|
voice=self._opts.voice,
|
179
194
|
audio_config=self._opts.audio_config,
|
195
|
+
timeout=self._conn_options.timeout,
|
180
196
|
)
|
181
197
|
|
182
|
-
data = response.audio_content
|
183
198
|
if self._opts.audio_config.audio_encoding == "mp3":
|
184
199
|
decoder = utils.codecs.Mp3StreamDecoder()
|
185
200
|
bstream = utils.audio.AudioByteStream(
|
186
201
|
sample_rate=self._opts.audio_config.sample_rate_hertz,
|
187
202
|
num_channels=1,
|
188
203
|
)
|
189
|
-
for frame in decoder.decode_chunk(
|
204
|
+
for frame in decoder.decode_chunk(response.audio_content):
|
190
205
|
for frame in bstream.write(frame.data.tobytes()):
|
191
206
|
self._event_ch.send_nowait(
|
192
207
|
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
@@ -197,7 +212,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
197
212
|
tts.SynthesizedAudio(request_id=request_id, frame=frame)
|
198
213
|
)
|
199
214
|
else:
|
200
|
-
data =
|
215
|
+
data = response.audio_content[44:] # skip WAV header
|
201
216
|
self._event_ch.send_nowait(
|
202
217
|
tts.SynthesizedAudio(
|
203
218
|
request_id=request_id,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.1
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -19,10 +19,10 @@ Classifier: Programming Language :: Python :: 3.10
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: >=3.9.0
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: google-auth
|
23
|
-
Requires-Dist: google-cloud-speech
|
24
|
-
Requires-Dist: google-cloud-texttospeech
|
25
|
-
Requires-Dist: livekit-agents
|
22
|
+
Requires-Dist: google-auth<3,>=2
|
23
|
+
Requires-Dist: google-cloud-speech<3,>=2
|
24
|
+
Requires-Dist: google-cloud-texttospeech<3,>=2
|
25
|
+
Requires-Dist: livekit-agents>=0.11
|
26
26
|
|
27
27
|
# LiveKit Plugins Google
|
28
28
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=rqV6C5mFNDFlrA2IcGJrsebr2VxQwMzoDUjY1JhMBZM,1117
|
2
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
3
|
+
livekit/plugins/google/models.py,sha256=cBXhZGY9bFaSCyL9VeSng9wsxhf3peJi3AUYBKV-8GQ,1343
|
4
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/google/stt.py,sha256=tmjktdO6C2AuJWHSKl20ae3cfy_DqfN_oNYYcE552pQ,18566
|
6
|
+
livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
|
7
|
+
livekit/plugins/google/version.py,sha256=PoHw-_DNE2B5SpeoQ-r6HSfVmbDgYuGamg0dN2jhayQ,600
|
8
|
+
livekit_plugins_google-0.8.1.dist-info/METADATA,sha256=RHRMpfHxvaWjwWStByUPghWBLY5tIuC5Lm8r9C3hEhc,1643
|
9
|
+
livekit_plugins_google-0.8.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
10
|
+
livekit_plugins_google-0.8.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
11
|
+
livekit_plugins_google-0.8.1.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=rqV6C5mFNDFlrA2IcGJrsebr2VxQwMzoDUjY1JhMBZM,1117
|
2
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
3
|
-
livekit/plugins/google/models.py,sha256=n8pgTJ7xyJpPCZJ_y0GzaQq6LqYknL6K6trpi07-AxM,1307
|
4
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
livekit/plugins/google/stt.py,sha256=WjeqYsunW8jY-WHlnNeks7gR-TiojMRR7LYdAVdCxqY,15268
|
6
|
-
livekit/plugins/google/tts.py,sha256=hRN8ul1lDXU8LPVEfbTszgBiRYsifZXCPMwk-Pv2KeA,8793
|
7
|
-
livekit/plugins/google/version.py,sha256=yJeG0VwiekDJAk7GHcIAe43ebagJgloe-ZsqEGZnqzE,600
|
8
|
-
livekit_plugins_google-0.7.3.dist-info/METADATA,sha256=8UvORpoVunOTq0xKxHEk8M3sexKFnBnu66DkEJCnrRY,1647
|
9
|
-
livekit_plugins_google-0.7.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
10
|
-
livekit_plugins_google-0.7.3.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
11
|
-
livekit_plugins_google-0.7.3.dist-info/RECORD,,
|
{livekit_plugins_google-0.7.3.dist-info → livekit_plugins_google-0.8.1.dist-info}/top_level.txt
RENAMED
File without changes
|