livekit-plugins-slng 1.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/slng/__init__.py +51 -0
- livekit/plugins/slng/gateway_adapter.py +663 -0
- livekit/plugins/slng/log.py +17 -0
- livekit/plugins/slng/py.typed +0 -0
- livekit/plugins/slng/stt.py +792 -0
- livekit/plugins/slng/tts.py +655 -0
- livekit/plugins/slng/version.py +15 -0
- livekit_plugins_slng-1.5.7.dist-info/METADATA +64 -0
- livekit_plugins_slng-1.5.7.dist-info/RECORD +10 -0
- livekit_plugins_slng-1.5.7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright 2025 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""SLNG plugin for LiveKit Agents
|
|
16
|
+
|
|
17
|
+
STT and TTS adapters for SLNG gateway models.
|
|
18
|
+
|
|
19
|
+
See https://docs.slng.ai/ for more information.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .log import logger
|
|
23
|
+
from .stt import STT, SpeechStream
|
|
24
|
+
from .tts import TTS
|
|
25
|
+
from .version import __version__
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"STT",
|
|
29
|
+
"SpeechStream",
|
|
30
|
+
"TTS",
|
|
31
|
+
"__version__",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
from livekit.agents import Plugin
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SLNGPlugin(Plugin):
|
|
38
|
+
def __init__(self) -> None:
|
|
39
|
+
super().__init__(__name__, __version__, __package__, logger)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Plugin.register_plugin(SLNGPlugin())
|
|
43
|
+
|
|
44
|
+
# Cleanup docs of unexported modules
|
|
45
|
+
_module = dir()
|
|
46
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
47
|
+
|
|
48
|
+
__pdoc__ = {}
|
|
49
|
+
|
|
50
|
+
for n in NOT_IN_ALL:
|
|
51
|
+
__pdoc__[n] = False
|
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
RIME_DEFAULT_SPEAKER_BY_LANG: dict[str, str] = {
|
|
8
|
+
"ar": "sakina",
|
|
9
|
+
"de": "lorelei",
|
|
10
|
+
"en": "astra",
|
|
11
|
+
"es": "seraphina",
|
|
12
|
+
"fr": "destin",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
RIME_ALLOWED_SPEAKERS_BY_LANG: dict[str, set[str]] = {
|
|
16
|
+
"ar": {"batin", "layla", "qadir", "sakina"},
|
|
17
|
+
"de": {"alfhild", "baldur", "kumara", "liesel", "lorelei", "runa"},
|
|
18
|
+
"en": {
|
|
19
|
+
"ahmed_mohamed",
|
|
20
|
+
"albion",
|
|
21
|
+
"andersen_johan",
|
|
22
|
+
"anderson_emily",
|
|
23
|
+
"anderson_jake",
|
|
24
|
+
"anderson_james",
|
|
25
|
+
"anderson_kevin",
|
|
26
|
+
"andromeda",
|
|
27
|
+
"arcade",
|
|
28
|
+
"astra",
|
|
29
|
+
"atrium",
|
|
30
|
+
"bauer_felix",
|
|
31
|
+
"bennett_emily",
|
|
32
|
+
"bennett_ryan",
|
|
33
|
+
"biondi_paul",
|
|
34
|
+
"bond",
|
|
35
|
+
"brooks_jordan",
|
|
36
|
+
"brown_alex",
|
|
37
|
+
"brown_joshua",
|
|
38
|
+
"brown_madison",
|
|
39
|
+
"brown_matthew",
|
|
40
|
+
"brown_steven",
|
|
41
|
+
"bruno_katie",
|
|
42
|
+
"carter_colin",
|
|
43
|
+
"celeste",
|
|
44
|
+
"chatterjee_rini",
|
|
45
|
+
"chen_david",
|
|
46
|
+
"chen_mei",
|
|
47
|
+
"clark_tyler",
|
|
48
|
+
"cohen_emily",
|
|
49
|
+
"cohen_jared",
|
|
50
|
+
"collins_emily",
|
|
51
|
+
"cooper_logan",
|
|
52
|
+
"cupola",
|
|
53
|
+
"das_sourav",
|
|
54
|
+
"davies_james",
|
|
55
|
+
"dela_cristina",
|
|
56
|
+
"diallo_amara",
|
|
57
|
+
"dubois_emma",
|
|
58
|
+
"duncan_colin",
|
|
59
|
+
"duval_pierre",
|
|
60
|
+
"eliphas",
|
|
61
|
+
"estelle",
|
|
62
|
+
"esther",
|
|
63
|
+
"eucalyptus",
|
|
64
|
+
"evans_jason",
|
|
65
|
+
"fern",
|
|
66
|
+
"fernandez_carlos",
|
|
67
|
+
"goldberg_ryan",
|
|
68
|
+
"gomez_daniela",
|
|
69
|
+
"gomez_diego",
|
|
70
|
+
"gomez_isabel",
|
|
71
|
+
"gomez_isabella",
|
|
72
|
+
"gomez_javon",
|
|
73
|
+
"gonzalez_maya",
|
|
74
|
+
"gonzalez_michael",
|
|
75
|
+
"gonzalez_ryan",
|
|
76
|
+
"grayson_avery",
|
|
77
|
+
"hanson_ryan",
|
|
78
|
+
"harris_luke",
|
|
79
|
+
"harris_lynette",
|
|
80
|
+
"harrison_brianna",
|
|
81
|
+
"harrison_joey",
|
|
82
|
+
"harrison_mary",
|
|
83
|
+
"hassan_omar",
|
|
84
|
+
"henderson_brittney",
|
|
85
|
+
"hernandez_juanita",
|
|
86
|
+
"holliday_jewel",
|
|
87
|
+
"iyer_arun",
|
|
88
|
+
"jensen_mikkel",
|
|
89
|
+
"johnny_jackson",
|
|
90
|
+
"johnson_angela",
|
|
91
|
+
"johnson_asha",
|
|
92
|
+
"johnson_avery",
|
|
93
|
+
"johnson_brianna",
|
|
94
|
+
"johnson_cynthia",
|
|
95
|
+
"johnson_elijah",
|
|
96
|
+
"johnson_james",
|
|
97
|
+
"johnson_joshua",
|
|
98
|
+
"johnson_latisha",
|
|
99
|
+
"johnson_lisa",
|
|
100
|
+
"johnson_madison",
|
|
101
|
+
"johnson_malachi",
|
|
102
|
+
"johnson_marcel",
|
|
103
|
+
"johnson_mary",
|
|
104
|
+
"johnson_matthew",
|
|
105
|
+
"johnson_melissa",
|
|
106
|
+
"johnson_monique",
|
|
107
|
+
"johnson_nia",
|
|
108
|
+
"johnson_tasha",
|
|
109
|
+
"johnson_tia",
|
|
110
|
+
"johnson_walter",
|
|
111
|
+
"kelly_aoife",
|
|
112
|
+
"kelly_jennifer",
|
|
113
|
+
"kelly_john",
|
|
114
|
+
"kelly_maureen",
|
|
115
|
+
"khan_fatima",
|
|
116
|
+
"khan_umar",
|
|
117
|
+
"kim_ashley",
|
|
118
|
+
"kim_daniel",
|
|
119
|
+
"kim_sunny",
|
|
120
|
+
"kima",
|
|
121
|
+
"lee_sarah",
|
|
122
|
+
"levi_david",
|
|
123
|
+
"levine_emily",
|
|
124
|
+
"levine_joshua",
|
|
125
|
+
"levy_hannah",
|
|
126
|
+
"li_xiao",
|
|
127
|
+
"lintel",
|
|
128
|
+
"luna",
|
|
129
|
+
"lyra",
|
|
130
|
+
"maguire_jason",
|
|
131
|
+
"malik_ahmad",
|
|
132
|
+
"marinelli_giulia",
|
|
133
|
+
"marlu",
|
|
134
|
+
"martinez_amber",
|
|
135
|
+
"martinez_ana",
|
|
136
|
+
"martinez_dylan",
|
|
137
|
+
"martinez_jaime",
|
|
138
|
+
"martinez_leticia",
|
|
139
|
+
"martinez_rosa",
|
|
140
|
+
"martinez_ryan",
|
|
141
|
+
"masonry",
|
|
142
|
+
"mbunda_james",
|
|
143
|
+
"mccarthy_james",
|
|
144
|
+
"mccarthy_teresa",
|
|
145
|
+
"mcdowell_peter",
|
|
146
|
+
"mckinley_robert",
|
|
147
|
+
"mendoza_alonzo",
|
|
148
|
+
"mendoza_jesus",
|
|
149
|
+
"mendoza_luz",
|
|
150
|
+
"merritt_jimmy",
|
|
151
|
+
"miller_cameron",
|
|
152
|
+
"miller_judy",
|
|
153
|
+
"miller_kelsey",
|
|
154
|
+
"miller_lisa",
|
|
155
|
+
"miller_logan",
|
|
156
|
+
"miyamoto_akari",
|
|
157
|
+
"montgomery_elise",
|
|
158
|
+
"montgomery_emily",
|
|
159
|
+
"morgan_brianna",
|
|
160
|
+
"morgan_charles",
|
|
161
|
+
"morris_colin",
|
|
162
|
+
"morris_james",
|
|
163
|
+
"morris_leticia",
|
|
164
|
+
"morris_melvin",
|
|
165
|
+
"morton_daine",
|
|
166
|
+
"moss",
|
|
167
|
+
"moyo_david",
|
|
168
|
+
"murphy_colin",
|
|
169
|
+
"murphy_emily",
|
|
170
|
+
"murphy_grace",
|
|
171
|
+
"murphy_hannah",
|
|
172
|
+
"murphy_liam",
|
|
173
|
+
"murphy_nolan",
|
|
174
|
+
"neal_colin",
|
|
175
|
+
"novak_emily",
|
|
176
|
+
"nowak_joanna",
|
|
177
|
+
"nowak_michal",
|
|
178
|
+
"oculus",
|
|
179
|
+
"olsson_erik",
|
|
180
|
+
"orion",
|
|
181
|
+
"parapet",
|
|
182
|
+
"park_minseo",
|
|
183
|
+
"park_sumin",
|
|
184
|
+
"patel_amit",
|
|
185
|
+
"patel_asha",
|
|
186
|
+
"pham_daniel",
|
|
187
|
+
"pilaster",
|
|
188
|
+
"pola",
|
|
189
|
+
"ramirez_maya",
|
|
190
|
+
"ramos_raul",
|
|
191
|
+
"reddy_arjun",
|
|
192
|
+
"reddy_sunil",
|
|
193
|
+
"ricci_giulia",
|
|
194
|
+
"ricci_lorenzo",
|
|
195
|
+
"rodrigues_miguel",
|
|
196
|
+
"rodriguez_carla",
|
|
197
|
+
"rodriguez_carlos",
|
|
198
|
+
"rodriguez_eduardo",
|
|
199
|
+
"rodriguez_isabela",
|
|
200
|
+
"rodriguez_miguel",
|
|
201
|
+
"rossi_matteo",
|
|
202
|
+
"santos_angelica",
|
|
203
|
+
"schmidt_joshua",
|
|
204
|
+
"schmidt_julia",
|
|
205
|
+
"schmidt_sophie",
|
|
206
|
+
"schneider_eric",
|
|
207
|
+
"schneider_jack",
|
|
208
|
+
"sharma_amit",
|
|
209
|
+
"silva_ana",
|
|
210
|
+
"singh_anjali",
|
|
211
|
+
"sirius",
|
|
212
|
+
"smith_heather",
|
|
213
|
+
"smith_lisa",
|
|
214
|
+
"smith_michael",
|
|
215
|
+
"smith_mike",
|
|
216
|
+
"stucco",
|
|
217
|
+
"tauro",
|
|
218
|
+
"thalassa",
|
|
219
|
+
"thomas_sarah",
|
|
220
|
+
"thompson_kevin",
|
|
221
|
+
"torres_miguel",
|
|
222
|
+
"tran_david",
|
|
223
|
+
"tran_jessica",
|
|
224
|
+
"tran_tu",
|
|
225
|
+
"transom",
|
|
226
|
+
"truss",
|
|
227
|
+
"tupou_leilani",
|
|
228
|
+
"ursa",
|
|
229
|
+
"vashti",
|
|
230
|
+
"vespera",
|
|
231
|
+
"walnut",
|
|
232
|
+
"wang_mei",
|
|
233
|
+
"watson_emily",
|
|
234
|
+
"williams_anna",
|
|
235
|
+
"williams_brian",
|
|
236
|
+
"williams_darnell",
|
|
237
|
+
"williams_jennifer",
|
|
238
|
+
"williams_jordan",
|
|
239
|
+
"williams_ryan",
|
|
240
|
+
"williams_terence",
|
|
241
|
+
"williams_tiffany",
|
|
242
|
+
"wilson_emma",
|
|
243
|
+
"wong_kenny",
|
|
244
|
+
"wright_cooper",
|
|
245
|
+
"wright_jason",
|
|
246
|
+
"wright_julianne",
|
|
247
|
+
"wright_michael",
|
|
248
|
+
"zhang_mei",
|
|
249
|
+
},
|
|
250
|
+
"es": {"lark", "nova", "pola", "seraphina", "sirius", "ursa"},
|
|
251
|
+
"fr": {"destin", "morel_marianne", "solstice", "serrin_joseph"},
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
AURA_DEFAULT_VOICE_BY_VARIANT: dict[str, str] = {
|
|
255
|
+
"2": "aura-2-thalia-en",
|
|
256
|
+
"2-en": "aura-2-thalia-en",
|
|
257
|
+
"2-es": "aura-2-celeste-es",
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
SARVAM_BCP47_LANGUAGE_BY_CODE: dict[str, str] = {
|
|
261
|
+
"bn": "bn-IN",
|
|
262
|
+
"bn-in": "bn-IN",
|
|
263
|
+
"en": "en-IN",
|
|
264
|
+
"en-in": "en-IN",
|
|
265
|
+
"gu": "gu-IN",
|
|
266
|
+
"gu-in": "gu-IN",
|
|
267
|
+
"hi": "hi-IN",
|
|
268
|
+
"hi-in": "hi-IN",
|
|
269
|
+
"kn": "kn-IN",
|
|
270
|
+
"kn-in": "kn-IN",
|
|
271
|
+
"ml": "ml-IN",
|
|
272
|
+
"ml-in": "ml-IN",
|
|
273
|
+
"mr": "mr-IN",
|
|
274
|
+
"mr-in": "mr-IN",
|
|
275
|
+
"od": "od-IN",
|
|
276
|
+
"od-in": "od-IN",
|
|
277
|
+
"pa": "pa-IN",
|
|
278
|
+
"pa-in": "pa-IN",
|
|
279
|
+
"ta": "ta-IN",
|
|
280
|
+
"ta-in": "ta-IN",
|
|
281
|
+
"te": "te-IN",
|
|
282
|
+
"te-in": "te-IN",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def normalize_region_override(
|
|
287
|
+
region_override: str | list[str] | None,
|
|
288
|
+
) -> str | None:
|
|
289
|
+
if region_override is None:
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
if isinstance(region_override, str):
|
|
293
|
+
raw_values = region_override.split(",")
|
|
294
|
+
else:
|
|
295
|
+
raw_values = [str(value) for value in region_override]
|
|
296
|
+
|
|
297
|
+
values = [value.strip().lower() for value in raw_values if value.strip()]
|
|
298
|
+
if not values:
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
return ", ".join(values)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@dataclass(frozen=True)
|
|
305
|
+
class ModelRef:
|
|
306
|
+
raw: str
|
|
307
|
+
provider: str
|
|
308
|
+
model: str
|
|
309
|
+
variant: str | None
|
|
310
|
+
route_provider: str
|
|
311
|
+
route_model: str
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def parse_model_ref(model: str) -> ModelRef:
|
|
315
|
+
raw = (model or "").strip()
|
|
316
|
+
if not raw:
|
|
317
|
+
raise ValueError("model must not be empty")
|
|
318
|
+
|
|
319
|
+
if ":" in raw:
|
|
320
|
+
model_path, variant = raw.rsplit(":", 1)
|
|
321
|
+
if not variant:
|
|
322
|
+
raise ValueError("model variant must not be empty")
|
|
323
|
+
else:
|
|
324
|
+
model_path, variant = raw, None
|
|
325
|
+
|
|
326
|
+
parts = [p for p in model_path.split("/") if p]
|
|
327
|
+
if len(parts) < 2:
|
|
328
|
+
raise ValueError(
|
|
329
|
+
f"invalid model '{raw}'; expected '<provider>/<model>' or 'slng/<provider>/<model>'"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
provider = parts[0]
|
|
333
|
+
model_name = "/".join(parts[1:])
|
|
334
|
+
|
|
335
|
+
if provider == "slng":
|
|
336
|
+
if len(parts) < 3:
|
|
337
|
+
raise ValueError(f"invalid model '{raw}'; expected 'slng/<provider>/<model>'")
|
|
338
|
+
route_provider = parts[1]
|
|
339
|
+
route_model = "/".join(parts[2:])
|
|
340
|
+
else:
|
|
341
|
+
route_provider = provider
|
|
342
|
+
route_model = model_name
|
|
343
|
+
|
|
344
|
+
if not route_provider or not route_model:
|
|
345
|
+
raise ValueError(f"invalid model '{raw}'; provider and model must both be present")
|
|
346
|
+
|
|
347
|
+
return ModelRef(
|
|
348
|
+
raw=raw,
|
|
349
|
+
provider=provider,
|
|
350
|
+
model=model_name,
|
|
351
|
+
variant=variant,
|
|
352
|
+
route_provider=route_provider,
|
|
353
|
+
route_model=route_model,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _rime_lang_from_variant(variant: str | None) -> str | None:
|
|
358
|
+
"""Extract language code from Rime Arcana variant strings.
|
|
359
|
+
|
|
360
|
+
Handles both plain variants ("en", "es") and versioned variants ("3-en", "3-es").
|
|
361
|
+
"""
|
|
362
|
+
if not variant:
|
|
363
|
+
return None
|
|
364
|
+
# Plain language code (e.g., "en", "es", "fr")
|
|
365
|
+
if variant in RIME_DEFAULT_SPEAKER_BY_LANG:
|
|
366
|
+
return variant
|
|
367
|
+
# Versioned variant (e.g., "3-en", "3-es") — extract suffix after first hyphen
|
|
368
|
+
if "-" in variant:
|
|
369
|
+
lang = variant.split("-", 1)[1]
|
|
370
|
+
if lang in RIME_DEFAULT_SPEAKER_BY_LANG:
|
|
371
|
+
return lang
|
|
372
|
+
return None
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _is_aura_ref(ref: ModelRef) -> bool:
|
|
376
|
+
return ref.route_provider == "deepgram" and ref.route_model == "aura"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _is_arcana_ref(ref: ModelRef) -> bool:
|
|
380
|
+
return ref.route_provider == "rime" and ref.route_model == "arcana"
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _is_bulbul_ref(ref: ModelRef) -> bool:
|
|
384
|
+
return ref.route_provider == "sarvam" and ref.route_model == "bulbul"
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _is_sarvam_ref(ref: ModelRef) -> bool:
|
|
388
|
+
return ref.route_provider == "sarvam"
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def is_deepgram_aura_model(model: str) -> bool:
|
|
392
|
+
return _is_aura_ref(parse_model_ref(model))
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def is_rime_arcana_model(model: str) -> bool:
|
|
396
|
+
return _is_arcana_ref(parse_model_ref(model))
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def is_sarvam_bulbul_model(model: str) -> bool:
|
|
400
|
+
return _is_bulbul_ref(parse_model_ref(model))
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def is_sarvam_model(model: str) -> bool:
|
|
404
|
+
return _is_sarvam_ref(parse_model_ref(model))
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _normalize_language_for_ref(
|
|
408
|
+
ref: ModelRef | None,
|
|
409
|
+
language: str,
|
|
410
|
+
*,
|
|
411
|
+
model_options: Mapping[str, Any] | None = None,
|
|
412
|
+
) -> str:
|
|
413
|
+
override = None
|
|
414
|
+
if model_options:
|
|
415
|
+
candidate = model_options.get("target_language_code")
|
|
416
|
+
if isinstance(candidate, str):
|
|
417
|
+
override = candidate.strip() or None
|
|
418
|
+
|
|
419
|
+
cleaned = (override or language or "").strip()
|
|
420
|
+
if not cleaned or ref is None:
|
|
421
|
+
return cleaned
|
|
422
|
+
|
|
423
|
+
if _is_sarvam_ref(ref):
|
|
424
|
+
return SARVAM_BCP47_LANGUAGE_BY_CODE.get(cleaned.lower(), cleaned)
|
|
425
|
+
|
|
426
|
+
return cleaned
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def normalize_language_for_model(
|
|
430
|
+
model: str | None,
|
|
431
|
+
language: str,
|
|
432
|
+
*,
|
|
433
|
+
model_options: Mapping[str, Any] | None = None,
|
|
434
|
+
) -> str:
|
|
435
|
+
ref = parse_model_ref(model) if model else None
|
|
436
|
+
return _normalize_language_for_ref(ref, language, model_options=model_options)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _normalize_tts_voice_for_ref(ref: ModelRef, voice: str) -> str:
|
|
440
|
+
cleaned = (voice or "").strip()
|
|
441
|
+
|
|
442
|
+
if _is_arcana_ref(ref):
|
|
443
|
+
if cleaned and cleaned != "default":
|
|
444
|
+
return cleaned
|
|
445
|
+
lang = _rime_lang_from_variant(ref.variant)
|
|
446
|
+
if lang:
|
|
447
|
+
return RIME_DEFAULT_SPEAKER_BY_LANG[lang]
|
|
448
|
+
return RIME_DEFAULT_SPEAKER_BY_LANG["en"]
|
|
449
|
+
|
|
450
|
+
if _is_aura_ref(ref):
|
|
451
|
+
if cleaned and cleaned != "default":
|
|
452
|
+
return cleaned
|
|
453
|
+
if ref.variant and ref.variant in AURA_DEFAULT_VOICE_BY_VARIANT:
|
|
454
|
+
return AURA_DEFAULT_VOICE_BY_VARIANT[ref.variant]
|
|
455
|
+
return AURA_DEFAULT_VOICE_BY_VARIANT["2"]
|
|
456
|
+
|
|
457
|
+
return cleaned
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def normalize_tts_voice(model: str, voice: str) -> str:
|
|
461
|
+
return _normalize_tts_voice_for_ref(parse_model_ref(model), voice)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _validate_tts_voice_for_ref(ref: ModelRef, voice: str) -> list[str]:
|
|
465
|
+
errors: list[str] = []
|
|
466
|
+
cleaned = (voice or "").strip()
|
|
467
|
+
model = ref.raw
|
|
468
|
+
|
|
469
|
+
is_aura = _is_aura_ref(ref)
|
|
470
|
+
is_arcana = _is_arcana_ref(ref)
|
|
471
|
+
|
|
472
|
+
if is_aura:
|
|
473
|
+
if not cleaned:
|
|
474
|
+
errors.append(
|
|
475
|
+
f"tts_voice is required for {model}; expected an aura-2 voice like "
|
|
476
|
+
"'aura-2-thalia-en' or 'aura-2-celeste-es'"
|
|
477
|
+
)
|
|
478
|
+
return errors
|
|
479
|
+
|
|
480
|
+
if not cleaned.startswith("aura-2-"):
|
|
481
|
+
errors.append(
|
|
482
|
+
f"tts_voice '{cleaned}' is invalid for {model}; expected an aura-2 model id"
|
|
483
|
+
)
|
|
484
|
+
return errors
|
|
485
|
+
|
|
486
|
+
if ref.variant == "2-en" and not cleaned.endswith("-en"):
|
|
487
|
+
errors.append(
|
|
488
|
+
f"tts_voice '{cleaned}' is invalid for {model}; expected an English '-en' voice"
|
|
489
|
+
)
|
|
490
|
+
if ref.variant == "2-es" and not cleaned.endswith("-es"):
|
|
491
|
+
errors.append(
|
|
492
|
+
f"tts_voice '{cleaned}' is invalid for {model}; expected a Spanish '-es' voice"
|
|
493
|
+
)
|
|
494
|
+
if ref.variant in {"2", None} and not (cleaned.endswith("-en") or cleaned.endswith("-es")):
|
|
495
|
+
errors.append(
|
|
496
|
+
f"tts_voice '{cleaned}' is invalid for {model}; expected an '-en' or '-es' voice"
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
if is_arcana:
|
|
500
|
+
lang = _rime_lang_from_variant(ref.variant)
|
|
501
|
+
if not cleaned:
|
|
502
|
+
errors.append(f"tts_voice is required for {model}; expected a valid speaker")
|
|
503
|
+
return errors
|
|
504
|
+
if lang and lang in RIME_ALLOWED_SPEAKERS_BY_LANG:
|
|
505
|
+
allowed = RIME_ALLOWED_SPEAKERS_BY_LANG[lang]
|
|
506
|
+
if cleaned not in allowed:
|
|
507
|
+
allowed_speakers = ", ".join(sorted(allowed))
|
|
508
|
+
errors.append(
|
|
509
|
+
f"tts_voice '{cleaned}' is not valid for {model}; "
|
|
510
|
+
f"allowed speakers: {allowed_speakers}"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Generic check for all other models: warn if voice is empty
|
|
514
|
+
if not errors and not cleaned and not is_aura and not is_arcana:
|
|
515
|
+
errors.append(f"tts_voice is empty for {model}; a voice identifier should be provided")
|
|
516
|
+
|
|
517
|
+
return errors
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def validate_tts_voice(model: str, voice: str) -> list[str]:
|
|
521
|
+
return _validate_tts_voice_for_ref(parse_model_ref(model), voice)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _resolve_deepgram_stt_model_for_ref(ref: ModelRef) -> str | None:
|
|
525
|
+
if ref.route_provider != "deepgram" or ref.route_model != "nova":
|
|
526
|
+
return None
|
|
527
|
+
|
|
528
|
+
variant = (ref.variant or "").lower()
|
|
529
|
+
if variant.startswith("3-medical"):
|
|
530
|
+
return "nova-3-medical"
|
|
531
|
+
if variant.startswith("3"):
|
|
532
|
+
return "nova-3"
|
|
533
|
+
if variant.startswith("2"):
|
|
534
|
+
return "nova-2"
|
|
535
|
+
return None
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def resolve_deepgram_stt_model(model: str | None) -> str | None:
|
|
539
|
+
if not model:
|
|
540
|
+
return None
|
|
541
|
+
return _resolve_deepgram_stt_model_for_ref(parse_model_ref(model))
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def build_tts_init_payload(
|
|
545
|
+
*,
|
|
546
|
+
model: str,
|
|
547
|
+
voice: str,
|
|
548
|
+
language: str,
|
|
549
|
+
sample_rate: int,
|
|
550
|
+
encoding: str,
|
|
551
|
+
speed: float,
|
|
552
|
+
model_options: Mapping[str, Any] | None = None,
|
|
553
|
+
) -> dict[str, Any]:
|
|
554
|
+
ref = parse_model_ref(model)
|
|
555
|
+
options = dict(model_options or {})
|
|
556
|
+
normalized_language = _normalize_language_for_ref(
|
|
557
|
+
ref,
|
|
558
|
+
language,
|
|
559
|
+
model_options=options,
|
|
560
|
+
)
|
|
561
|
+
config: dict[str, Any] = {
|
|
562
|
+
"language": normalized_language,
|
|
563
|
+
"encoding": encoding,
|
|
564
|
+
"sample_rate": sample_rate,
|
|
565
|
+
"speed": speed,
|
|
566
|
+
}
|
|
567
|
+
payload: dict[str, Any] = {
|
|
568
|
+
"type": "init",
|
|
569
|
+
"model": model,
|
|
570
|
+
"voice": voice,
|
|
571
|
+
"language": normalized_language,
|
|
572
|
+
"config": config,
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if _is_aura_ref(ref):
|
|
576
|
+
payload["model"] = voice
|
|
577
|
+
|
|
578
|
+
if _is_arcana_ref(ref):
|
|
579
|
+
config["modelId"] = options.get("modelId", "arcana")
|
|
580
|
+
config["segment"] = options.get("segment", "bySentence")
|
|
581
|
+
for key in (
|
|
582
|
+
"speakingStyle",
|
|
583
|
+
"addBreathing",
|
|
584
|
+
"addDisfluencies",
|
|
585
|
+
"phonemizeBetweenBrackets",
|
|
586
|
+
"translateTo",
|
|
587
|
+
):
|
|
588
|
+
if key in options:
|
|
589
|
+
config[key] = options[key]
|
|
590
|
+
payload["speaker"] = voice
|
|
591
|
+
|
|
592
|
+
if _is_bulbul_ref(ref):
|
|
593
|
+
config["speech_sample_rate"] = str(sample_rate)
|
|
594
|
+
config["pace"] = options.get("pace", speed)
|
|
595
|
+
for key in (
|
|
596
|
+
"temperature",
|
|
597
|
+
"output_audio_bitrate",
|
|
598
|
+
"min_buffer_size",
|
|
599
|
+
"max_chunk_length",
|
|
600
|
+
):
|
|
601
|
+
if key in options:
|
|
602
|
+
config[key] = options[key]
|
|
603
|
+
|
|
604
|
+
return payload
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def build_stt_init_payload(
|
|
608
|
+
*,
|
|
609
|
+
model: str | None,
|
|
610
|
+
language: str,
|
|
611
|
+
sample_rate: int,
|
|
612
|
+
encoding: str,
|
|
613
|
+
vad_threshold: float,
|
|
614
|
+
vad_min_silence_duration_ms: int,
|
|
615
|
+
vad_speech_pad_ms: int,
|
|
616
|
+
enable_diarization: bool,
|
|
617
|
+
enable_partial_transcripts: bool,
|
|
618
|
+
min_speakers: int | None = None,
|
|
619
|
+
max_speakers: int | None = None,
|
|
620
|
+
model_options: Mapping[str, Any] | None = None,
|
|
621
|
+
) -> dict[str, Any]:
|
|
622
|
+
ref = parse_model_ref(model) if model is not None else None
|
|
623
|
+
|
|
624
|
+
normalized_language = _normalize_language_for_ref(
|
|
625
|
+
ref,
|
|
626
|
+
language,
|
|
627
|
+
model_options=model_options,
|
|
628
|
+
)
|
|
629
|
+
config: dict[str, Any] = {
|
|
630
|
+
"language": normalized_language,
|
|
631
|
+
"sample_rate": sample_rate,
|
|
632
|
+
"encoding": "linear16" if encoding == "pcm_s16le" else encoding,
|
|
633
|
+
"vad_threshold": vad_threshold,
|
|
634
|
+
"vad_min_silence_duration_ms": vad_min_silence_duration_ms,
|
|
635
|
+
"vad_speech_pad_ms": vad_speech_pad_ms,
|
|
636
|
+
"enable_diarization": enable_diarization,
|
|
637
|
+
"enable_partials": enable_partial_transcripts,
|
|
638
|
+
"enable_partial_transcripts": enable_partial_transcripts,
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
if min_speakers is not None:
|
|
642
|
+
config["min_speakers"] = min_speakers
|
|
643
|
+
if max_speakers is not None:
|
|
644
|
+
config["max_speakers"] = max_speakers
|
|
645
|
+
|
|
646
|
+
if model_options:
|
|
647
|
+
config.update(model_options)
|
|
648
|
+
|
|
649
|
+
partials_value = config.get(
|
|
650
|
+
"enable_partials",
|
|
651
|
+
config.get("enable_partial_transcripts", enable_partial_transcripts),
|
|
652
|
+
)
|
|
653
|
+
config["enable_partials"] = partials_value
|
|
654
|
+
config["enable_partial_transcripts"] = partials_value
|
|
655
|
+
|
|
656
|
+
payload: dict[str, Any] = {"type": "init", "config": config}
|
|
657
|
+
|
|
658
|
+
if ref is not None:
|
|
659
|
+
deepgram_model = _resolve_deepgram_stt_model_for_ref(ref)
|
|
660
|
+
if deepgram_model:
|
|
661
|
+
payload["model"] = deepgram_model
|
|
662
|
+
|
|
663
|
+
return payload
|