livekit-plugins-google 1.0.0.dev5__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/google/beta/realtime/api_proto.py +1 -1
- livekit/plugins/google/beta/realtime/realtime_api.py +13 -7
- livekit/plugins/google/beta/realtime/transcriber.py +2 -2
- livekit/plugins/google/llm.py +18 -18
- livekit/plugins/google/stt.py +58 -58
- livekit/plugins/google/tts.py +30 -35
- livekit/plugins/google/version.py +1 -1
- {livekit_plugins_google-1.0.0.dev5.dist-info → livekit_plugins_google-1.0.0rc1.dist-info}/METADATA +2 -2
- livekit_plugins_google-1.0.0rc1.dist-info/RECORD +17 -0
- livekit_plugins_google-1.0.0.dev5.dist-info/RECORD +0 -17
- {livekit_plugins_google-1.0.0.dev5.dist-info → livekit_plugins_google-1.0.0rc1.dist-info}/WHEEL +0 -0
@@ -31,7 +31,13 @@ from livekit.agents.llm.function_context import _create_ai_function_info
|
|
31
31
|
from livekit.agents.utils import images
|
32
32
|
|
33
33
|
from ...log import logger
|
34
|
-
from .api_proto import
|
34
|
+
from .api_proto import (
|
35
|
+
ClientEvents,
|
36
|
+
LiveAPIModels,
|
37
|
+
Voice,
|
38
|
+
_build_gemini_ctx,
|
39
|
+
_build_tools,
|
40
|
+
)
|
35
41
|
from .transcriber import ModelTranscriber, TranscriberSession, TranscriptionContent
|
36
42
|
|
37
43
|
EventTypes = Literal[
|
@@ -148,7 +154,7 @@ class RealtimeModel:
|
|
148
154
|
|
149
155
|
Raises:
|
150
156
|
ValueError: If the API key is not provided and cannot be found in environment variables.
|
151
|
-
"""
|
157
|
+
""" # noqa: E501
|
152
158
|
if modalities is None:
|
153
159
|
modalities = ["AUDIO"]
|
154
160
|
super().__init__()
|
@@ -164,7 +170,7 @@ class RealtimeModel:
|
|
164
170
|
if vertexai:
|
165
171
|
if not self._project or not self._location:
|
166
172
|
raise ValueError(
|
167
|
-
"Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables"
|
173
|
+
"Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables" # noqa: E501
|
168
174
|
)
|
169
175
|
self._api_key = None # VertexAI does not require an API key
|
170
176
|
|
@@ -173,7 +179,7 @@ class RealtimeModel:
|
|
173
179
|
self._location = None
|
174
180
|
if not self._api_key:
|
175
181
|
raise ValueError(
|
176
|
-
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
|
182
|
+
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
|
177
183
|
)
|
178
184
|
|
179
185
|
instructions_content = Content(parts=[Part(text=instructions)]) if instructions else None
|
@@ -337,7 +343,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
337
343
|
|
338
344
|
Notes:
|
339
345
|
- This will be sent immediately so you should use a sampling frame rate that makes sense for your application and Gemini's constraints. 1 FPS is a good starting point.
|
340
|
-
"""
|
346
|
+
""" # noqa: E501
|
341
347
|
encoded_data = images.encode(
|
342
348
|
frame,
|
343
349
|
encode_options,
|
@@ -401,7 +407,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
401
407
|
)
|
402
408
|
|
403
409
|
# self._chat_ctx.append(text=content.text, role="user")
|
404
|
-
# TODO: implement sync mechanism to make sure the transcribed user speech is inside the chat_ctx and always before the generated agent speech
|
410
|
+
# TODO: implement sync mechanism to make sure the transcribed user speech is inside the chat_ctx and always before the generated agent speech # noqa: E501
|
405
411
|
|
406
412
|
def _on_agent_speech_done(self, content: TranscriptionContent) -> None:
|
407
413
|
if content.response_id and content.text:
|
@@ -500,7 +506,7 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
|
|
500
506
|
logger.warning(
|
501
507
|
"function call cancelled",
|
502
508
|
extra={
|
503
|
-
"function_call_ids": response.tool_call_cancellation.function_call_ids,
|
509
|
+
"function_call_ids": response.tool_call_cancellation.function_call_ids, # noqa: E501
|
504
510
|
},
|
505
511
|
)
|
506
512
|
self.emit(
|
@@ -30,7 +30,7 @@ You are an **Audio Transcriber**. Your task is to convert audio content into acc
|
|
30
30
|
- Do not add explanations, comments, or extra information.
|
31
31
|
- Do not include timestamps, speaker labels, or annotations unless specified.
|
32
32
|
- Audio Language: {DEFAULT_LANGUAGE}
|
33
|
-
"""
|
33
|
+
""" # noqa: E501
|
34
34
|
|
35
35
|
|
36
36
|
@dataclass
|
@@ -53,7 +53,7 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
|
|
53
53
|
|
54
54
|
system_instructions = types.Content(parts=[types.Part(text=SYSTEM_INSTRUCTIONS)])
|
55
55
|
self._config = types.LiveConnectConfig(
|
56
|
-
response_modalities=[
|
56
|
+
response_modalities=[types.Modality.TEXT],
|
57
57
|
system_instruction=system_instructions,
|
58
58
|
generation_config=types.GenerationConfig(temperature=0.0),
|
59
59
|
)
|
livekit/plugins/google/llm.py
CHANGED
@@ -94,30 +94,30 @@ class LLM(llm.LLM):
|
|
94
94
|
presence_penalty (float, optional): Penalizes the model for generating previously mentioned concepts. Defaults to None.
|
95
95
|
frequency_penalty (float, optional): Penalizes the model for repeating words. Defaults to None.
|
96
96
|
tool_choice (ToolChoice or Literal["auto", "required", "none"], optional): Specifies whether to use tools during response generation. Defaults to "auto".
|
97
|
-
"""
|
97
|
+
""" # noqa: E501
|
98
98
|
super().__init__()
|
99
|
-
|
100
|
-
|
101
|
-
|
99
|
+
gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
|
100
|
+
gcp_location = location if is_given(location) else os.environ.get("GOOGLE_CLOUD_LOCATION")
|
101
|
+
gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
|
102
102
|
_gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
|
103
103
|
if _gac is None:
|
104
104
|
logger.warning(
|
105
|
-
"`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods."
|
105
|
+
"`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods." # noqa: E501
|
106
106
|
)
|
107
107
|
|
108
|
-
if vertexai:
|
109
|
-
if not
|
110
|
-
_,
|
108
|
+
if is_given(vertexai) and vertexai:
|
109
|
+
if not gcp_project:
|
110
|
+
_, gcp_project = default_async(
|
111
111
|
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
112
112
|
)
|
113
|
-
|
113
|
+
gemini_api_key = None # VertexAI does not require an API key
|
114
114
|
|
115
115
|
else:
|
116
|
-
|
117
|
-
|
118
|
-
if not
|
116
|
+
gcp_project = None
|
117
|
+
gcp_location = None
|
118
|
+
if not gemini_api_key:
|
119
119
|
raise ValueError(
|
120
|
-
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
|
120
|
+
"API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501
|
121
121
|
)
|
122
122
|
|
123
123
|
self._opts = _LLMOptions(
|
@@ -134,10 +134,10 @@ class LLM(llm.LLM):
|
|
134
134
|
frequency_penalty=frequency_penalty,
|
135
135
|
)
|
136
136
|
self._client = genai.Client(
|
137
|
-
api_key=
|
138
|
-
vertexai=vertexai,
|
139
|
-
project=
|
140
|
-
location=
|
137
|
+
api_key=gemini_api_key,
|
138
|
+
vertexai=is_given(vertexai) and vertexai,
|
139
|
+
project=gcp_project,
|
140
|
+
location=gcp_location,
|
141
141
|
)
|
142
142
|
|
143
143
|
def chat(
|
@@ -273,7 +273,7 @@ class LLMStream(llm.LLMStream):
|
|
273
273
|
|
274
274
|
if len(response.candidates) > 1:
|
275
275
|
logger.warning(
|
276
|
-
"gemini llm: there are multiple candidates in the response, returning response from the first one."
|
276
|
+
"gemini llm: there are multiple candidates in the response, returning response from the first one." # noqa: E501
|
277
277
|
)
|
278
278
|
|
279
279
|
for part in response.candidates[0].content.parts:
|
livekit/plugins/google/stt.py
CHANGED
@@ -37,6 +37,11 @@ from livekit.agents import (
|
|
37
37
|
stt,
|
38
38
|
utils,
|
39
39
|
)
|
40
|
+
from livekit.agents.types import (
|
41
|
+
NOT_GIVEN,
|
42
|
+
NotGivenOr,
|
43
|
+
)
|
44
|
+
from livekit.agents.utils import is_given
|
40
45
|
|
41
46
|
from .log import logger
|
42
47
|
from .models import SpeechLanguages, SpeechModels
|
@@ -62,10 +67,10 @@ class STTOptions:
|
|
62
67
|
spoken_punctuation: bool
|
63
68
|
model: SpeechModels | str
|
64
69
|
sample_rate: int
|
65
|
-
keywords: list[tuple[str, float]]
|
70
|
+
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN
|
66
71
|
|
67
72
|
def build_adaptation(self) -> cloud_speech.SpeechAdaptation | None:
|
68
|
-
if self.keywords:
|
73
|
+
if is_given(self.keywords):
|
69
74
|
return cloud_speech.SpeechAdaptation(
|
70
75
|
phrase_sets=[
|
71
76
|
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
|
@@ -91,11 +96,11 @@ class STT(stt.STT):
|
|
91
96
|
punctuate: bool = True,
|
92
97
|
spoken_punctuation: bool = False,
|
93
98
|
model: SpeechModels | str = "latest_long",
|
94
|
-
location: str = "
|
99
|
+
location: str = "global",
|
95
100
|
sample_rate: int = 16000,
|
96
|
-
credentials_info: dict
|
97
|
-
credentials_file: str
|
98
|
-
keywords: list[tuple[str, float]]
|
101
|
+
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
102
|
+
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
103
|
+
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
|
99
104
|
):
|
100
105
|
"""
|
101
106
|
Create a new instance of Google STT.
|
@@ -111,7 +116,7 @@ class STT(stt.STT):
|
|
111
116
|
punctuate(bool): whether to punctuate the audio (default: True)
|
112
117
|
spoken_punctuation(bool): whether to use spoken punctuation (default: False)
|
113
118
|
model(SpeechModels): the model to use for recognition default: "latest_long"
|
114
|
-
location(str): the location to use for recognition default: "
|
119
|
+
location(str): the location to use for recognition default: "global"
|
115
120
|
sample_rate(int): the sample rate of the audio default: 16000
|
116
121
|
credentials_info(dict): the credentials info to use for recognition (default: None)
|
117
122
|
credentials_file(str): the credentials file to use for recognition (default: None)
|
@@ -123,11 +128,11 @@ class STT(stt.STT):
|
|
123
128
|
self._credentials_info = credentials_info
|
124
129
|
self._credentials_file = credentials_file
|
125
130
|
|
126
|
-
if credentials_file
|
131
|
+
if not is_given(credentials_file) and not is_given(credentials_info):
|
127
132
|
try:
|
128
133
|
gauth_default()
|
129
134
|
except DefaultCredentialsError:
|
130
|
-
raise ValueError(
|
135
|
+
raise ValueError( # noqa: B904
|
131
136
|
"Application default credentials must be available "
|
132
137
|
"when using Google STT without explicitly passing "
|
133
138
|
"credentials through credentials_info or credentials_file."
|
@@ -159,20 +164,16 @@ class STT(stt.STT):
|
|
159
164
|
client: SpeechAsyncClient | None = None
|
160
165
|
if self._location != "global":
|
161
166
|
client_options = ClientOptions(api_endpoint=f"{self._location}-speech.googleapis.com")
|
162
|
-
if self._credentials_info:
|
167
|
+
if is_given(self._credentials_info):
|
163
168
|
client = SpeechAsyncClient.from_service_account_info(
|
164
|
-
self._credentials_info,
|
165
|
-
client_options=client_options,
|
169
|
+
self._credentials_info, client_options=client_options
|
166
170
|
)
|
167
|
-
elif self._credentials_file:
|
171
|
+
elif is_given(self._credentials_file):
|
168
172
|
client = SpeechAsyncClient.from_service_account_file(
|
169
|
-
self._credentials_file,
|
170
|
-
client_options=client_options,
|
173
|
+
self._credentials_file, client_options=client_options
|
171
174
|
)
|
172
175
|
else:
|
173
|
-
client = SpeechAsyncClient(
|
174
|
-
client_options=client_options,
|
175
|
-
)
|
176
|
+
client = SpeechAsyncClient(client_options=client_options)
|
176
177
|
assert client is not None
|
177
178
|
return client
|
178
179
|
|
@@ -189,10 +190,10 @@ class STT(stt.STT):
|
|
189
190
|
_, project_id = ga_default()
|
190
191
|
return f"projects/{project_id}/locations/{self._location}/recognizers/_"
|
191
192
|
|
192
|
-
def _sanitize_options(self, *, language: str
|
193
|
+
def _sanitize_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> STTOptions:
|
193
194
|
config = dataclasses.replace(self._config)
|
194
195
|
|
195
|
-
if language:
|
196
|
+
if is_given(language):
|
196
197
|
config.languages = [language]
|
197
198
|
|
198
199
|
if not isinstance(config.languages, list):
|
@@ -208,7 +209,7 @@ class STT(stt.STT):
|
|
208
209
|
self,
|
209
210
|
buffer: utils.AudioBuffer,
|
210
211
|
*,
|
211
|
-
language: SpeechLanguages | str
|
212
|
+
language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
|
212
213
|
conn_options: APIConnectOptions,
|
213
214
|
) -> stt.SpeechEvent:
|
214
215
|
config = self._sanitize_options(language=language)
|
@@ -243,9 +244,9 @@ class STT(stt.STT):
|
|
243
244
|
|
244
245
|
return _recognize_response_to_speech_event(raw)
|
245
246
|
except DeadlineExceeded:
|
246
|
-
raise APITimeoutError()
|
247
|
+
raise APITimeoutError() # noqa: B904
|
247
248
|
except GoogleAPICallError as e:
|
248
|
-
raise APIStatusError(
|
249
|
+
raise APIStatusError( # noqa: B904
|
249
250
|
e.message,
|
250
251
|
status_code=e.code or -1,
|
251
252
|
)
|
@@ -255,7 +256,7 @@ class STT(stt.STT):
|
|
255
256
|
def stream(
|
256
257
|
self,
|
257
258
|
*,
|
258
|
-
language: SpeechLanguages | str
|
259
|
+
language: NotGivenOr[SpeechLanguages | str] = NOT_GIVEN,
|
259
260
|
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
260
261
|
) -> SpeechStream:
|
261
262
|
config = self._sanitize_options(language=language)
|
@@ -272,34 +273,34 @@ class STT(stt.STT):
|
|
272
273
|
def update_options(
|
273
274
|
self,
|
274
275
|
*,
|
275
|
-
languages: LanguageCode
|
276
|
-
detect_language: bool
|
277
|
-
interim_results: bool
|
278
|
-
punctuate: bool
|
279
|
-
spoken_punctuation: bool
|
280
|
-
model: SpeechModels
|
281
|
-
location: str
|
282
|
-
keywords: list[tuple[str, float]]
|
276
|
+
languages: NotGivenOr[LanguageCode] = NOT_GIVEN,
|
277
|
+
detect_language: NotGivenOr[bool] = NOT_GIVEN,
|
278
|
+
interim_results: NotGivenOr[bool] = NOT_GIVEN,
|
279
|
+
punctuate: NotGivenOr[bool] = NOT_GIVEN,
|
280
|
+
spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
|
281
|
+
model: NotGivenOr[SpeechModels] = NOT_GIVEN,
|
282
|
+
location: NotGivenOr[str] = NOT_GIVEN,
|
283
|
+
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
|
283
284
|
):
|
284
|
-
if languages
|
285
|
+
if is_given(languages):
|
285
286
|
if isinstance(languages, str):
|
286
287
|
languages = [languages]
|
287
288
|
self._config.languages = languages
|
288
|
-
if detect_language
|
289
|
+
if is_given(detect_language):
|
289
290
|
self._config.detect_language = detect_language
|
290
|
-
if interim_results
|
291
|
+
if is_given(interim_results):
|
291
292
|
self._config.interim_results = interim_results
|
292
|
-
if punctuate
|
293
|
+
if is_given(punctuate):
|
293
294
|
self._config.punctuate = punctuate
|
294
|
-
if spoken_punctuation
|
295
|
+
if is_given(spoken_punctuation):
|
295
296
|
self._config.spoken_punctuation = spoken_punctuation
|
296
|
-
if model
|
297
|
+
if is_given(model):
|
297
298
|
self._config.model = model
|
298
|
-
if location
|
299
|
+
if is_given(location):
|
299
300
|
self._location = location
|
300
301
|
# if location is changed, fetch a new client and recognizer as per the new location
|
301
302
|
self._pool.invalidate()
|
302
|
-
if keywords
|
303
|
+
if is_given(keywords):
|
303
304
|
self._config.keywords = keywords
|
304
305
|
|
305
306
|
for stream in self._streams:
|
@@ -339,29 +340,29 @@ class SpeechStream(stt.SpeechStream):
|
|
339
340
|
def update_options(
|
340
341
|
self,
|
341
342
|
*,
|
342
|
-
languages: LanguageCode
|
343
|
-
detect_language: bool
|
344
|
-
interim_results: bool
|
345
|
-
punctuate: bool
|
346
|
-
spoken_punctuation: bool
|
347
|
-
model: SpeechModels
|
348
|
-
keywords: list[tuple[str, float]]
|
343
|
+
languages: NotGivenOr[LanguageCode] = NOT_GIVEN,
|
344
|
+
detect_language: NotGivenOr[bool] = NOT_GIVEN,
|
345
|
+
interim_results: NotGivenOr[bool] = NOT_GIVEN,
|
346
|
+
punctuate: NotGivenOr[bool] = NOT_GIVEN,
|
347
|
+
spoken_punctuation: NotGivenOr[bool] = NOT_GIVEN,
|
348
|
+
model: NotGivenOr[SpeechModels] = NOT_GIVEN,
|
349
|
+
keywords: NotGivenOr[list[tuple[str, float]]] = NOT_GIVEN,
|
349
350
|
):
|
350
|
-
if languages
|
351
|
+
if is_given(languages):
|
351
352
|
if isinstance(languages, str):
|
352
353
|
languages = [languages]
|
353
354
|
self._config.languages = languages
|
354
|
-
if detect_language
|
355
|
+
if is_given(detect_language):
|
355
356
|
self._config.detect_language = detect_language
|
356
|
-
if interim_results
|
357
|
+
if is_given(interim_results):
|
357
358
|
self._config.interim_results = interim_results
|
358
|
-
if punctuate
|
359
|
+
if is_given(punctuate):
|
359
360
|
self._config.punctuate = punctuate
|
360
|
-
if spoken_punctuation
|
361
|
+
if is_given(spoken_punctuation):
|
361
362
|
self._config.spoken_punctuation = spoken_punctuation
|
362
|
-
if model
|
363
|
+
if is_given(model):
|
363
364
|
self._config.model = model
|
364
|
-
if keywords
|
365
|
+
if is_given(keywords):
|
365
366
|
self._config.keywords = keywords
|
366
367
|
|
367
368
|
self._reconnect_event.set()
|
@@ -404,7 +405,7 @@ class SpeechStream(stt.SpeechStream):
|
|
404
405
|
|
405
406
|
if (
|
406
407
|
resp.speech_event_type
|
407
|
-
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED
|
408
|
+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_EVENT_TYPE_UNSPECIFIED # noqa: E501
|
408
409
|
):
|
409
410
|
result = resp.results[0]
|
410
411
|
speech_data = _streaming_recognize_response_to_speech_data(resp)
|
@@ -466,7 +467,6 @@ class SpeechStream(stt.SpeechStream):
|
|
466
467
|
),
|
467
468
|
),
|
468
469
|
streaming_features=cloud_speech.StreamingRecognitionFeatures(
|
469
|
-
enable_voice_activity_events=True,
|
470
470
|
interim_results=self._config.interim_results,
|
471
471
|
),
|
472
472
|
)
|
@@ -495,9 +495,9 @@ class SpeechStream(stt.SpeechStream):
|
|
495
495
|
await utils.aio.gracefully_cancel(process_stream_task, wait_reconnect_task)
|
496
496
|
should_stop.set()
|
497
497
|
except DeadlineExceeded:
|
498
|
-
raise APITimeoutError()
|
498
|
+
raise APITimeoutError() # noqa: B904
|
499
499
|
except GoogleAPICallError as e:
|
500
|
-
raise APIStatusError(
|
500
|
+
raise APIStatusError( # noqa: B904
|
501
501
|
e.message,
|
502
502
|
status_code=e.code or -1,
|
503
503
|
)
|
livekit/plugins/google/tts.py
CHANGED
@@ -27,8 +27,12 @@ from livekit.agents import (
|
|
27
27
|
tts,
|
28
28
|
utils,
|
29
29
|
)
|
30
|
-
|
31
|
-
|
30
|
+
from livekit.agents.types import (
|
31
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
32
|
+
NOT_GIVEN,
|
33
|
+
NotGivenOr,
|
34
|
+
)
|
35
|
+
from livekit.agents.utils import is_given
|
32
36
|
|
33
37
|
|
34
38
|
@dataclass
|
@@ -41,15 +45,13 @@ class TTS(tts.TTS):
|
|
41
45
|
def __init__(
|
42
46
|
self,
|
43
47
|
*,
|
44
|
-
|
45
|
-
gender: Gender | str = "neutral",
|
46
|
-
voice_name: str = "", # Not required
|
48
|
+
voice: NotGivenOr[texttospeech.VoiceSelectionParams] = NOT_GIVEN,
|
47
49
|
sample_rate: int = 24000,
|
48
50
|
pitch: int = 0,
|
49
51
|
effects_profile_id: str = "",
|
50
52
|
speaking_rate: float = 1.0,
|
51
|
-
credentials_info: dict
|
52
|
-
credentials_file: str
|
53
|
+
credentials_info: NotGivenOr[dict] = NOT_GIVEN,
|
54
|
+
credentials_file: NotGivenOr[str] = NOT_GIVEN,
|
53
55
|
) -> None:
|
54
56
|
"""
|
55
57
|
Create a new instance of Google TTS.
|
@@ -59,16 +61,14 @@ class TTS(tts.TTS):
|
|
59
61
|
environmental variable.
|
60
62
|
|
61
63
|
Args:
|
62
|
-
|
63
|
-
gender (Gender | str, optional): Voice gender ("male", "female", "neutral"). Default is "neutral".
|
64
|
-
voice_name (str, optional): Specific voice name. Default is an empty string.
|
64
|
+
voice (texttospeech.VoiceSelectionParams, optional): Voice selection parameters.
|
65
65
|
sample_rate (int, optional): Audio sample rate in Hz. Default is 24000.
|
66
66
|
pitch (float, optional): Speaking pitch, ranging from -20.0 to 20.0 semitones relative to the original pitch. Default is 0.
|
67
67
|
effects_profile_id (str): Optional identifier for selecting audio effects profiles to apply to the synthesized speech.
|
68
68
|
speaking_rate (float, optional): Speed of speech. Default is 1.0.
|
69
69
|
credentials_info (dict, optional): Dictionary containing Google Cloud credentials. Default is None.
|
70
70
|
credentials_file (str, optional): Path to the Google Cloud credentials JSON file. Default is None.
|
71
|
-
"""
|
71
|
+
""" # noqa: E501
|
72
72
|
|
73
73
|
super().__init__(
|
74
74
|
capabilities=tts.TTSCapabilities(
|
@@ -82,11 +82,12 @@ class TTS(tts.TTS):
|
|
82
82
|
self._credentials_info = credentials_info
|
83
83
|
self._credentials_file = credentials_file
|
84
84
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
if not is_given(voice):
|
86
|
+
voice = texttospeech.VoiceSelectionParams(
|
87
|
+
name="",
|
88
|
+
language_code="en-US",
|
89
|
+
ssml_gender=SsmlVoiceGender.NEUTRAL,
|
90
|
+
)
|
90
91
|
|
91
92
|
self._opts = _TTSOptions(
|
92
93
|
voice=voice,
|
@@ -102,26 +103,20 @@ class TTS(tts.TTS):
|
|
102
103
|
def update_options(
|
103
104
|
self,
|
104
105
|
*,
|
105
|
-
|
106
|
-
|
107
|
-
voice_name: str = "", # Not required
|
108
|
-
speaking_rate: float = 1.0,
|
106
|
+
voice: NotGivenOr[texttospeech.VoiceSelectionParams] = NOT_GIVEN,
|
107
|
+
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
|
109
108
|
) -> None:
|
110
109
|
"""
|
111
110
|
Update the TTS options.
|
112
111
|
|
113
112
|
Args:
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
language_code=language,
|
122
|
-
ssml_gender=_gender_from_str(gender),
|
123
|
-
)
|
124
|
-
self._opts.audio_config.speaking_rate = speaking_rate
|
113
|
+
voice (texttospeech.VoiceSelectionParams, optional): Voice selection parameters.
|
114
|
+
speaking_rate (float, optional): Speed of speech.
|
115
|
+
""" # noqa: E501
|
116
|
+
if is_given(voice):
|
117
|
+
self._opts.voice = voice
|
118
|
+
if is_given(speaking_rate):
|
119
|
+
self._opts.audio_config.speaking_rate = speaking_rate
|
125
120
|
|
126
121
|
def _ensure_client(self) -> texttospeech.TextToSpeechAsyncClient:
|
127
122
|
if self._client is None:
|
@@ -144,7 +139,7 @@ class TTS(tts.TTS):
|
|
144
139
|
self,
|
145
140
|
text: str,
|
146
141
|
*,
|
147
|
-
conn_options: APIConnectOptions
|
142
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
148
143
|
) -> ChunkedStream:
|
149
144
|
return ChunkedStream(
|
150
145
|
tts=self,
|
@@ -163,7 +158,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
163
158
|
input_text: str,
|
164
159
|
opts: _TTSOptions,
|
165
160
|
client: texttospeech.TextToSpeechAsyncClient,
|
166
|
-
conn_options: APIConnectOptions
|
161
|
+
conn_options: APIConnectOptions,
|
167
162
|
) -> None:
|
168
163
|
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
169
164
|
self._opts, self._client = opts, client
|
@@ -199,9 +194,9 @@ class ChunkedStream(tts.ChunkedStream):
|
|
199
194
|
await decoder.aclose()
|
200
195
|
|
201
196
|
except DeadlineExceeded:
|
202
|
-
raise APITimeoutError()
|
197
|
+
raise APITimeoutError() # noqa: B904
|
203
198
|
except GoogleAPICallError as e:
|
204
|
-
raise APIStatusError(
|
199
|
+
raise APIStatusError( # noqa: B904
|
205
200
|
e.message,
|
206
201
|
status_code=e.code or -1,
|
207
202
|
request_id=None,
|
{livekit_plugins_google-1.0.0.dev5.dist-info → livekit_plugins_google-1.0.0rc1.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: livekit-plugins-google
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.0rc1
|
4
4
|
Summary: Agent Framework plugin for services from Google Cloud
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
6
6
|
Project-URL: Website, https://livekit.io/
|
@@ -22,7 +22,7 @@ Requires-Dist: google-auth<3,>=2
|
|
22
22
|
Requires-Dist: google-cloud-speech<3,>=2
|
23
23
|
Requires-Dist: google-cloud-texttospeech<3,>=2
|
24
24
|
Requires-Dist: google-genai==1.5.0
|
25
|
-
Requires-Dist: livekit-agents>=1.0.0.
|
25
|
+
Requires-Dist: livekit-agents>=1.0.0.rc1
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
|
28
28
|
# LiveKit Plugins Google
|
@@ -0,0 +1,17 @@
|
|
1
|
+
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
+
livekit/plugins/google/llm.py,sha256=oAi4EUitki2EUWIMEcnSVO4AB6EmZzi3vNiDG2mdVvU,14377
|
3
|
+
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
+
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
|
+
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
livekit/plugins/google/stt.py,sha256=fT5JtDM8ck2iMAzRvFKVeyT1oPt_R-bDkqiLa-ysikc,22539
|
7
|
+
livekit/plugins/google/tts.py,sha256=mYm9n4zDzmNEAF3bSOb4-603CJrrdv9YJhrfbp5_k5A,7455
|
8
|
+
livekit/plugins/google/utils.py,sha256=SfuQLJSXSV708VMn5_TVB93ginUCX2izqT9r2seraSQ,8040
|
9
|
+
livekit/plugins/google/version.py,sha256=pF0lh6G9GYL7Mj7EnfhjFifzlzdWx6u3RvB0Itch4UE,604
|
10
|
+
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
+
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
+
livekit/plugins/google/beta/realtime/api_proto.py,sha256=zrAR9aZ3wFpEiIoSi_8xiP6i8k5diPSl-r298imUyf4,684
|
13
|
+
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=b7shOYZ86b38zxZCJkgzLQQeWESYo3VUZU1-7rNDAmw,22588
|
14
|
+
livekit/plugins/google/beta/realtime/transcriber.py,sha256=DD7q894xc25GeeuKDar6-GwH-MxStEwhwBiX-KZ-Jo4,9559
|
15
|
+
livekit_plugins_google-1.0.0rc1.dist-info/METADATA,sha256=u1QOa73RWGaBzxoRr6XI5n8oiUv42wtIVflCOkg-Sso,3496
|
16
|
+
livekit_plugins_google-1.0.0rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
livekit_plugins_google-1.0.0rc1.dist-info/RECORD,,
|
@@ -1,17 +0,0 @@
|
|
1
|
-
livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
|
2
|
-
livekit/plugins/google/llm.py,sha256=-GksCFbQB5r3Cg2Zousf0etTrj1N-FRwIoJolepTupQ,14259
|
3
|
-
livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
|
4
|
-
livekit/plugins/google/models.py,sha256=SGjAumdDK97NNLwMFcqZdKR68f1NoGB2Rk1UP2-imG0,1457
|
5
|
-
livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
livekit/plugins/google/stt.py,sha256=c3YRAiFYpLuw1Prb6LnQNCuHyHf8TWbqBbiBWBKNDCI,22227
|
7
|
-
livekit/plugins/google/tts.py,sha256=AOv3kbvqezUbZguK55fAKpaLhUd1FMv61NMUjwbixAA,7875
|
8
|
-
livekit/plugins/google/utils.py,sha256=SfuQLJSXSV708VMn5_TVB93ginUCX2izqT9r2seraSQ,8040
|
9
|
-
livekit/plugins/google/version.py,sha256=pXgCpV03nQI-5Kk-74NFyAdw1htj2cx6unwQHipEcfE,605
|
10
|
-
livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
|
11
|
-
livekit/plugins/google/beta/realtime/__init__.py,sha256=_fW2NMN22F-hnQ4xAJ_g5lPbR7CvM_xXzSWlUQY-E-U,188
|
12
|
-
livekit/plugins/google/beta/realtime/api_proto.py,sha256=f7YllxvIp-xefZQ-XrMjcu8SVbISUQYWlgs_1w-P9IM,685
|
13
|
-
livekit/plugins/google/beta/realtime/realtime_api.py,sha256=em3aPaxCtZCQ1zgKc8xfFfvqGtgYxjOC0zwpvkTvv60,22479
|
14
|
-
livekit/plugins/google/beta/realtime/transcriber.py,sha256=AHKIbJdX2SIj_s2QQwo9aE6xbQjSZ9YGp8Y204EuOq0,9532
|
15
|
-
livekit_plugins_google-1.0.0.dev5.dist-info/METADATA,sha256=xXzAhr3jkgHr23WhvcB31FQR30VR2RNu3bGS14zYrfk,3499
|
16
|
-
livekit_plugins_google-1.0.0.dev5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
-
livekit_plugins_google-1.0.0.dev5.dist-info/RECORD,,
|
{livekit_plugins_google-1.0.0.dev5.dist-info → livekit_plugins_google-1.0.0rc1.dist-info}/WHEEL
RENAMED
File without changes
|