livekit-plugins-google 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/__init__.py +2 -1
- livekit/plugins/google/log.py +3 -0
- livekit/plugins/google/models.py +4 -0
- livekit/plugins/google/stt.py +6 -6
- livekit/plugins/google/tts.py +146 -0
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/METADATA +3 -3
- livekit_plugins_google-0.4.0.dist-info/RECORD +11 -0
- livekit_plugins_google-0.3.0.dist-info/RECORD +0 -9
- {livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/WHEEL +0 -0
- {livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,10 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from .stt import STT, SpeechStream
|
16
|
+
from .tts import TTS
|
16
17
|
from .version import __version__
|
17
18
|
|
18
|
-
__all__ = ["STT", "SpeechStream", "__version__"]
|
19
|
+
__all__ = ["STT", "TTS", "SpeechStream", "__version__"]
|
19
20
|
|
20
21
|
from livekit.agents import Plugin
|
21
22
|
|
livekit/plugins/google/models.py
CHANGED
livekit/plugins/google/stt.py
CHANGED
@@ -17,7 +17,6 @@ from __future__ import annotations
|
|
17
17
|
import asyncio
|
18
18
|
import contextlib
|
19
19
|
import dataclasses
|
20
|
-
import logging
|
21
20
|
from dataclasses import dataclass
|
22
21
|
from typing import Any, AsyncIterable, Dict, List
|
23
22
|
|
@@ -29,6 +28,7 @@ from google.auth import credentials # type: ignore
|
|
29
28
|
from google.cloud.speech_v2 import SpeechAsyncClient
|
30
29
|
from google.cloud.speech_v2.types import cloud_speech
|
31
30
|
|
31
|
+
from .log import logger
|
32
32
|
from .models import SpeechLanguages, SpeechModels
|
33
33
|
|
34
34
|
LgType = SpeechLanguages | str
|
@@ -105,7 +105,7 @@ class STT(stt.STT):
|
|
105
105
|
config.languages = [config.languages]
|
106
106
|
elif not config.detect_language:
|
107
107
|
if len(config.languages) > 1:
|
108
|
-
|
108
|
+
logger.warning(
|
109
109
|
"multiple languages provided, but language detection is disabled"
|
110
110
|
)
|
111
111
|
config.languages = [config.languages[0]]
|
@@ -208,7 +208,7 @@ class SpeechStream(stt.SpeechStream):
|
|
208
208
|
|
209
209
|
def log_exception(task: asyncio.Task) -> None:
|
210
210
|
if not task.cancelled() and task.exception():
|
211
|
-
|
211
|
+
logger.error(f"google stt task failed: {task.exception()}")
|
212
212
|
|
213
213
|
self._main_task.add_done_callback(log_exception)
|
214
214
|
|
@@ -256,7 +256,7 @@ class SpeechStream(stt.SpeechStream):
|
|
256
256
|
audio=frame.data.tobytes(),
|
257
257
|
)
|
258
258
|
except Exception as e:
|
259
|
-
|
259
|
+
logger.error(
|
260
260
|
f"an error occurred while streaming inputs: {e}"
|
261
261
|
)
|
262
262
|
|
@@ -269,7 +269,7 @@ class SpeechStream(stt.SpeechStream):
|
|
269
269
|
await self._run_stream(stream)
|
270
270
|
except Exception as e:
|
271
271
|
if retry_count >= max_retry:
|
272
|
-
|
272
|
+
logger.error(
|
273
273
|
f"failed to connect to google stt after {max_retry} tries",
|
274
274
|
exc_info=e,
|
275
275
|
)
|
@@ -277,7 +277,7 @@ class SpeechStream(stt.SpeechStream):
|
|
277
277
|
|
278
278
|
retry_delay = min(retry_count * 2, 10) # max 10s
|
279
279
|
retry_count += 1
|
280
|
-
|
280
|
+
logger.warning(
|
281
281
|
f"google stt connection failed, retrying in {retry_delay}s",
|
282
282
|
exc_info=e,
|
283
283
|
)
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from dataclasses import dataclass
|
16
|
+
from typing import AsyncIterable, Optional, Union
|
17
|
+
|
18
|
+
from livekit import rtc
|
19
|
+
from livekit.agents import codecs, tts
|
20
|
+
|
21
|
+
from google.cloud import texttospeech
|
22
|
+
from google.cloud.texttospeech_v1.types import (
|
23
|
+
SsmlVoiceGender,
|
24
|
+
SynthesizeSpeechResponse,
|
25
|
+
)
|
26
|
+
|
27
|
+
from .log import logger
|
28
|
+
from .models import AudioEncoding, Gender, SpeechLanguages
|
29
|
+
|
30
|
+
LgType = Union[SpeechLanguages, str]
|
31
|
+
GenderType = Union[Gender, str]
|
32
|
+
AudioEncodingType = Union[AudioEncoding, str]
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class TTSOptions:
|
37
|
+
voice: texttospeech.VoiceSelectionParams
|
38
|
+
audio_config: texttospeech.AudioConfig
|
39
|
+
|
40
|
+
|
41
|
+
class TTS(tts.TTS):
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
config: Optional[TTSOptions] = None,
|
45
|
+
*,
|
46
|
+
language: LgType = "en-US",
|
47
|
+
gender: GenderType = "neutral",
|
48
|
+
voice_name: str = "", # Not required
|
49
|
+
audio_encoding: AudioEncodingType = "wav",
|
50
|
+
sample_rate: int = 24000,
|
51
|
+
speaking_rate: float = 1.0,
|
52
|
+
credentials_info: Optional[dict] = None,
|
53
|
+
credentials_file: Optional[str] = None,
|
54
|
+
) -> None:
|
55
|
+
super().__init__(
|
56
|
+
streaming_supported=False, sample_rate=sample_rate, num_channels=1
|
57
|
+
)
|
58
|
+
|
59
|
+
if credentials_info:
|
60
|
+
self._client = (
|
61
|
+
texttospeech.TextToSpeechAsyncClient.from_service_account_info(
|
62
|
+
credentials_info
|
63
|
+
)
|
64
|
+
)
|
65
|
+
elif credentials_file:
|
66
|
+
self._client = (
|
67
|
+
texttospeech.TextToSpeechAsyncClient.from_service_account_file(
|
68
|
+
credentials_file
|
69
|
+
)
|
70
|
+
)
|
71
|
+
else:
|
72
|
+
self._client = texttospeech.TextToSpeechAsyncClient()
|
73
|
+
|
74
|
+
if not config:
|
75
|
+
_gender = SsmlVoiceGender.NEUTRAL
|
76
|
+
if gender == "male":
|
77
|
+
_gender = SsmlVoiceGender.MALE
|
78
|
+
elif gender == "female":
|
79
|
+
_gender = SsmlVoiceGender.FEMALE
|
80
|
+
voice = texttospeech.VoiceSelectionParams(
|
81
|
+
name=voice_name,
|
82
|
+
language_code=language,
|
83
|
+
ssml_gender=_gender,
|
84
|
+
)
|
85
|
+
# Support wav and mp3 only
|
86
|
+
if audio_encoding == "wav":
|
87
|
+
_audio_encoding = texttospeech.AudioEncoding.LINEAR16
|
88
|
+
elif audio_encoding == "mp3":
|
89
|
+
_audio_encoding = texttospeech.AudioEncoding.MP3
|
90
|
+
# elif audio_encoding == "opus":
|
91
|
+
# _audio_encoding = texttospeech.AudioEncoding.OGG_OPUS
|
92
|
+
# elif audio_encoding == "mulaw":
|
93
|
+
# _audio_encoding = texttospeech.AudioEncoding.MULAW
|
94
|
+
# elif audio_encoding == "alaw":
|
95
|
+
# _audio_encoding = texttospeech.AudioEncoding.ALAW
|
96
|
+
else:
|
97
|
+
raise NotImplementedError(
|
98
|
+
f"Audio encoding {audio_encoding} is not supported"
|
99
|
+
)
|
100
|
+
|
101
|
+
config = TTSOptions(
|
102
|
+
voice=voice,
|
103
|
+
audio_config=texttospeech.AudioConfig(
|
104
|
+
audio_encoding=_audio_encoding,
|
105
|
+
sample_rate_hertz=sample_rate,
|
106
|
+
speaking_rate=speaking_rate,
|
107
|
+
),
|
108
|
+
)
|
109
|
+
self._config = config
|
110
|
+
|
111
|
+
def synthesize(
|
112
|
+
self,
|
113
|
+
text: str,
|
114
|
+
) -> AsyncIterable[tts.SynthesizedAudio]:
|
115
|
+
async def generator():
|
116
|
+
try:
|
117
|
+
# Perform the text-to-speech request on the text input with the selected
|
118
|
+
# voice parameters and audio file type
|
119
|
+
response: SynthesizeSpeechResponse = (
|
120
|
+
await self._client.synthesize_speech(
|
121
|
+
input=texttospeech.SynthesisInput(text=text),
|
122
|
+
voice=self._config.voice,
|
123
|
+
audio_config=self._config.audio_config,
|
124
|
+
)
|
125
|
+
)
|
126
|
+
|
127
|
+
data = response.audio_content
|
128
|
+
if self._config.audio_config.audio_encoding == "mp3":
|
129
|
+
decoder = codecs.Mp3StreamDecoder()
|
130
|
+
frames = decoder.decode_chunk(data)
|
131
|
+
for frame in frames:
|
132
|
+
yield tts.SynthesizedAudio(text=text, data=frame)
|
133
|
+
else:
|
134
|
+
yield tts.SynthesizedAudio(
|
135
|
+
text=text,
|
136
|
+
data=rtc.AudioFrame(
|
137
|
+
data=data,
|
138
|
+
sample_rate=self._config.audio_config.sample_rate_hertz,
|
139
|
+
num_channels=1,
|
140
|
+
samples_per_channel=len(data) // 2, # 16-bit
|
141
|
+
),
|
142
|
+
)
|
143
|
+
except Exception as e:
|
144
|
+
logger.error(f"failed to synthesize: {e}")
|
145
|
+
|
146
|
+
return generator()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Home-page: https://github.com/livekit/agents
|
6
6
|
License: Apache-2.0
|
@@ -29,8 +29,8 @@ Requires-Dist: google-cloud-speech <3,>=2
|
|
29
29
|
Requires-Dist: google-cloud-texttospeech <3,>=2
|
30
30
|
Requires-Dist: google-cloud-translate <4,>=3
|
31
31
|
Requires-Dist: googleapis-common-protos <2,>=1
|
32
|
-
Requires-Dist: livekit
|
33
|
-
Requires-Dist: livekit-agents ~=0.
|
32
|
+
Requires-Dist: livekit ~=0.11
|
33
|
+
Requires-Dist: livekit-agents ~=0.6.0
|
34
34
|
|
35
35
|
# LiveKit Plugins Google
|
36
36
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=DlQC5cosMFyQlM8_vFvJGoZiziFkd0Sa4mutnsxXyZM,959
|
2
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
3
|
+
livekit/plugins/google/models.py,sha256=e-KvFKOn6eFfLucAltwdAwMEYByuHcJpIr7KfO0ClL0,1295
|
4
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
livekit/plugins/google/stt.py,sha256=sPV4ByAxfeGBNvAGIuwZvheEA0k7NYjXR_UiYWjd39Y,15029
|
6
|
+
livekit/plugins/google/tts.py,sha256=ZYtotaD8hZ-n53A7qOfp728oPAWIrJYLvCPjF_Ni-xo,5299
|
7
|
+
livekit/plugins/google/version.py,sha256=yelanl1wEXtgUH0CzoNVXfi2yTc2hElSzuAhULFzANc,600
|
8
|
+
livekit_plugins_google-0.4.0.dist-info/METADATA,sha256=xm5VC02Nbzj7x_cxZ-THc4iwb76_Jr7hu7C_G_Z-mtA,1941
|
9
|
+
livekit_plugins_google-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
10
|
+
livekit_plugins_google-0.4.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
11
|
+
livekit_plugins_google-0.4.0.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=snPMHNLrurYbLWQOkV_o6qG1CEWsOCZ8ZfPMvmh5ejY,931
|
2
|
-
livekit/plugins/google/models.py,sha256=DgiXOvGDO8D9rfCKHJL28lbyQR8mXXB2kpku-szXLRs,1185
|
3
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
livekit/plugins/google/stt.py,sha256=lYA8hlkxG3YSw1Q34j8hgs4us5Ij-TLBQTRwtGPN9MY,15025
|
5
|
-
livekit/plugins/google/version.py,sha256=G5iYozum4q7UpHwW43F7QfhzUfwcncPxBZ0gmUGsd5I,600
|
6
|
-
livekit_plugins_google-0.3.0.dist-info/METADATA,sha256=sPd3OZxViD0Aq1uF1qJpbsYeqLAlq8tB720JXk-_RKw,1945
|
7
|
-
livekit_plugins_google-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
8
|
-
livekit_plugins_google-0.3.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
9
|
-
livekit_plugins_google-0.3.0.dist-info/RECORD,,
|
File without changes
|
{livekit_plugins_google-0.3.0.dist-info → livekit_plugins_google-0.4.0.dist-info}/top_level.txt
RENAMED
File without changes
|