dulus 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent.py +363 -0
- backend/__init__.py +63 -0
- backend/compressor.py +261 -0
- backend/context.py +329 -0
- backend/githook.py +166 -0
- backend/marketplace.py +141 -0
- backend/mempalace_bridge.py +182 -0
- backend/personas.py +297 -0
- backend/plugins.py +222 -0
- backend/server.py +411 -0
- backend/tasks.py +213 -0
- batch_api.py +307 -0
- checkpoint/__init__.py +27 -0
- checkpoint/hooks.py +90 -0
- checkpoint/store.py +314 -0
- checkpoint/types.py +80 -0
- claude_code_watcher.py +214 -0
- clipboard_utils.py +246 -0
- cloudsave.py +159 -0
- common.py +177 -0
- compaction.py +378 -0
- config.py +180 -0
- context.py +241 -0
- dulus-0.2.0.dist-info/METADATA +600 -0
- dulus-0.2.0.dist-info/RECORD +101 -0
- dulus-0.2.0.dist-info/WHEEL +5 -0
- dulus-0.2.0.dist-info/entry_points.txt +2 -0
- dulus-0.2.0.dist-info/licenses/LICENSE +674 -0
- dulus-0.2.0.dist-info/licenses/license_manager.py +187 -0
- dulus-0.2.0.dist-info/top_level.txt +36 -0
- dulus.py +8455 -0
- dulus_gui.py +331 -0
- dulus_mcp/__init__.py +43 -0
- dulus_mcp/client.py +546 -0
- dulus_mcp/config.py +133 -0
- dulus_mcp/tools.py +131 -0
- dulus_mcp/types.py +124 -0
- gui/__init__.py +18 -0
- gui/agent_bridge.py +283 -0
- gui/chat_widget.py +448 -0
- gui/main_window.py +485 -0
- gui/personas.py +230 -0
- gui/session_utils.py +189 -0
- gui/settings_dialog.py +146 -0
- gui/sidebar.py +515 -0
- gui/tasks_view.py +499 -0
- gui/themes.py +256 -0
- gui/tool_panel.py +94 -0
- input.py +1030 -0
- license_manager.py +187 -0
- memory/__init__.py +93 -0
- memory/audit.py +51 -0
- memory/consolidator.py +312 -0
- memory/context.py +270 -0
- memory/offload.py +148 -0
- memory/palace.py +127 -0
- memory/scan.py +146 -0
- memory/sessions.py +100 -0
- memory/store.py +395 -0
- memory/tools.py +408 -0
- memory/types.py +114 -0
- memory/vector_search.py +92 -0
- multi_agent/__init__.py +23 -0
- multi_agent/subagent.py +501 -0
- multi_agent/tools.py +393 -0
- offload_helper.py +183 -0
- plugin/__init__.py +22 -0
- plugin/autoadapter.py +1641 -0
- plugin/loader.py +156 -0
- plugin/recommend.py +211 -0
- plugin/store.py +387 -0
- plugin/types.py +147 -0
- providers.py +3750 -0
- skill/__init__.py +14 -0
- skill/builtin.py +100 -0
- skill/clawhub.py +270 -0
- skill/executor.py +66 -0
- skill/loader.py +199 -0
- skill/tools.py +110 -0
- skills.py +14 -0
- spinner.py +42 -0
- string_utils.py +42 -0
- subagent.py +11 -0
- task/__init__.py +12 -0
- task/store.py +199 -0
- task/tools.py +265 -0
- task/types.py +92 -0
- tmux_offloader.py +177 -0
- tmux_tools.py +410 -0
- tool_registry.py +214 -0
- tools.py +2694 -0
- ui/__init__.py +1 -0
- ui/input.py +464 -0
- ui/render.py +272 -0
- voice/__init__.py +56 -0
- voice/keyterms.py +179 -0
- voice/recorder.py +263 -0
- voice/stt.py +408 -0
- voice/tts.py +570 -0
- webchat.py +432 -0
- webchat_server.py +1761 -0
voice/tts.py
ADDED
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""Text-to-speech (TTS) backends.
|
|
2
|
+
|
|
3
|
+
Backend priority (tried in order):
|
|
4
|
+
1. NVIDIA Riva — cloud, Magpie-Multilingual via NVCF gRPC.
|
|
5
|
+
pip install nvidia-riva-client + NVIDIA_API_KEY
|
|
6
|
+
2. OpenAI TTS — cloud, high quality, needs OPENAI_API_KEY.
|
|
7
|
+
3. gTTS — cloud, free, needs internet.
|
|
8
|
+
pip install gTTS
|
|
9
|
+
4. pyttsx3 — local, offline, uses system voices.
|
|
10
|
+
pip install pyttsx3
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import struct
|
|
18
|
+
import subprocess
|
|
19
|
+
import tempfile
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Optional
|
|
24
|
+
|
|
25
|
+
# ── Interrupt flag ────────────────────────────────────────────────────────
|
|
26
|
+
# `_say_lock` serializes calls to say(): two concurrent say()s would share
|
|
27
|
+
# `_stop_event` and the second .clear() would erase the first's cancel signal,
|
|
28
|
+
# leaving overlapping audio with no way to interrupt. Lock keeps audio sequential.
|
|
29
|
+
_stop_event = threading.Event()
|
|
30
|
+
_say_lock = threading.Lock()
|
|
31
|
+
|
|
32
|
+
def _watch_for_cancel() -> None:
|
|
33
|
+
"""Background thread: set _stop_event if user presses 'c'."""
|
|
34
|
+
try:
|
|
35
|
+
import msvcrt
|
|
36
|
+
while not _stop_event.is_set():
|
|
37
|
+
if msvcrt.kbhit():
|
|
38
|
+
ch = msvcrt.getwch()
|
|
39
|
+
if ch.lower() == 'c':
|
|
40
|
+
_stop_event.set()
|
|
41
|
+
print("\n ⏹ TTS stopped.", flush=True)
|
|
42
|
+
return
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# ── Playback Helper ───────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def _play_audio_file(file_path: str | Path) -> None:
|
|
49
|
+
"""Play an audio file, interruptible with 'c' key."""
|
|
50
|
+
file_path = str(file_path)
|
|
51
|
+
|
|
52
|
+
# Try ffplay
|
|
53
|
+
if shutil_which := __import__("shutil").which("ffplay"):
|
|
54
|
+
proc = subprocess.Popen(
|
|
55
|
+
[shutil_which, "-nodisp", "-autoexit", "-loglevel", "quiet", file_path])
|
|
56
|
+
try:
|
|
57
|
+
while proc.poll() is None:
|
|
58
|
+
if _stop_event.is_set():
|
|
59
|
+
proc.terminate()
|
|
60
|
+
return
|
|
61
|
+
time.sleep(0.05)
|
|
62
|
+
finally:
|
|
63
|
+
if proc.poll() is None:
|
|
64
|
+
proc.kill()
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
# Try mpv
|
|
68
|
+
if shutil_which := __import__("shutil").which("mpv"):
|
|
69
|
+
proc = subprocess.Popen(
|
|
70
|
+
[shutil_which, "--no-video", "--really-quiet", file_path])
|
|
71
|
+
try:
|
|
72
|
+
while proc.poll() is None:
|
|
73
|
+
if _stop_event.is_set():
|
|
74
|
+
proc.terminate()
|
|
75
|
+
return
|
|
76
|
+
time.sleep(0.05)
|
|
77
|
+
finally:
|
|
78
|
+
if proc.poll() is None:
|
|
79
|
+
proc.kill()
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
# Windows MCI
|
|
83
|
+
if os.name == "nt":
|
|
84
|
+
_play_windows_mci(file_path)
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
print(f" [TTS] Cannot play audio: no player found (install ffmpeg or mpv). File: {file_path}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _play_windows_mci(file_path: str) -> None:
|
|
91
|
+
"""Play via MCI, polling _stop_event every 50ms to allow 'c' cancel."""
|
|
92
|
+
try:
|
|
93
|
+
import ctypes
|
|
94
|
+
winmm = ctypes.windll.winmm
|
|
95
|
+
abs_path = str(Path(file_path).resolve())
|
|
96
|
+
ext = Path(file_path).suffix.lower()
|
|
97
|
+
mci_type = {".wav": "waveaudio", ".mp3": "mpegvideo",
|
|
98
|
+
".mp4": "mpegvideo", ".avi": "avivideo"}.get(ext, "mpegvideo")
|
|
99
|
+
winmm.mciSendStringW(f'open "{abs_path}" type {mci_type} alias _tts_track', None, 0, None)
|
|
100
|
+
winmm.mciSendStringW('play _tts_track', None, 0, None)
|
|
101
|
+
buf = ctypes.create_unicode_buffer(128)
|
|
102
|
+
while True:
|
|
103
|
+
if _stop_event.is_set():
|
|
104
|
+
winmm.mciSendStringW('stop _tts_track', None, 0, None)
|
|
105
|
+
break
|
|
106
|
+
winmm.mciSendStringW('status _tts_track mode', buf, 128, None)
|
|
107
|
+
if buf.value != 'playing':
|
|
108
|
+
break
|
|
109
|
+
time.sleep(0.05)
|
|
110
|
+
winmm.mciSendStringW('close _tts_track', None, 0, None)
|
|
111
|
+
time.sleep(0.1) # let MCI fully release the file handle
|
|
112
|
+
except Exception as e:
|
|
113
|
+
print(f" [TTS] Windows MCI playback error: {e}")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ── pyttsx3 singleton ─────────────────────────────────────────────────────
|
|
117
|
+
# Recreating the engine on every call causes COM errors on Windows.
|
|
118
|
+
_pyttsx3_engine = None
|
|
119
|
+
|
|
120
|
+
def _get_pyttsx3_engine():
|
|
121
|
+
global _pyttsx3_engine
|
|
122
|
+
if _pyttsx3_engine is None:
|
|
123
|
+
import pyttsx3
|
|
124
|
+
_pyttsx3_engine = pyttsx3.init()
|
|
125
|
+
return _pyttsx3_engine
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ── Azure Speech Services ─────────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
_AZURE_LANG_VOICES: dict[str, str] = {
|
|
131
|
+
"es": "es-ES-AlvaroNeural",
|
|
132
|
+
"en": "en-US-GuyNeural",
|
|
133
|
+
"fr": "fr-FR-HenriNeural",
|
|
134
|
+
"pt": "pt-BR-AntonioNeural",
|
|
135
|
+
"de": "de-DE-ConradNeural",
|
|
136
|
+
"it": "it-IT-DiegoNeural",
|
|
137
|
+
"ja": "ja-JP-KeitaNeural",
|
|
138
|
+
"zh": "zh-CN-YunxiNeural",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _azure_tts_available() -> bool:
|
|
143
|
+
try:
|
|
144
|
+
import azure.cognitiveservices.speech as _ # noqa: F401
|
|
145
|
+
except ImportError:
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
if os.environ.get("AZURE_SPEECH_KEY") and os.environ.get("AZURE_SPEECH_REGION"):
|
|
149
|
+
return True
|
|
150
|
+
|
|
151
|
+
# Fallback: read from Dulus config if env vars not set (e.g. key was
|
|
152
|
+
# configured this session via /config but load_config() already ran).
|
|
153
|
+
try:
|
|
154
|
+
from config import load_config
|
|
155
|
+
cfg = load_config()
|
|
156
|
+
key = cfg.get("azure_speech_key")
|
|
157
|
+
region = cfg.get("azure_speech_region")
|
|
158
|
+
if key and region:
|
|
159
|
+
os.environ["AZURE_SPEECH_KEY"] = key
|
|
160
|
+
os.environ["AZURE_SPEECH_REGION"] = region
|
|
161
|
+
return True
|
|
162
|
+
except Exception:
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _say_azure(text: str, voice: Optional[str] = None, lang: str = "es") -> bool:
|
|
169
|
+
if not _azure_tts_available():
|
|
170
|
+
return False
|
|
171
|
+
tmp_path: Optional[str] = None
|
|
172
|
+
try:
|
|
173
|
+
import azure.cognitiveservices.speech as speechsdk
|
|
174
|
+
|
|
175
|
+
key = os.environ.get("AZURE_SPEECH_KEY", "")
|
|
176
|
+
region = os.environ.get("AZURE_SPEECH_REGION", "")
|
|
177
|
+
|
|
178
|
+
speech_config = speechsdk.SpeechConfig(subscription=key, region=region)
|
|
179
|
+
|
|
180
|
+
# Resolve voice: explicit arg > env var > config > language default
|
|
181
|
+
if not voice:
|
|
182
|
+
voice = os.environ.get("AZURE_TTS_VOICE", "")
|
|
183
|
+
if not voice:
|
|
184
|
+
try:
|
|
185
|
+
from config import load_config
|
|
186
|
+
voice = load_config().get("azure_tts_voice", "")
|
|
187
|
+
except Exception:
|
|
188
|
+
pass
|
|
189
|
+
if not voice:
|
|
190
|
+
voice = _AZURE_LANG_VOICES.get(lang.lower(), _AZURE_LANG_VOICES.get("en"))
|
|
191
|
+
|
|
192
|
+
speech_config.speech_synthesis_voice_name = voice
|
|
193
|
+
|
|
194
|
+
# Use mkstemp + close handle immediately so Azure (and later the player)
|
|
195
|
+
# can open the file without Windows sharing violation.
|
|
196
|
+
fd, tmp_path = tempfile.mkstemp(suffix=".wav")
|
|
197
|
+
os.close(fd)
|
|
198
|
+
|
|
199
|
+
audio_config = speechsdk.audio.AudioOutputConfig(filename=tmp_path)
|
|
200
|
+
synthesizer = speechsdk.SpeechSynthesizer(
|
|
201
|
+
speech_config=speech_config, audio_config=audio_config
|
|
202
|
+
)
|
|
203
|
+
result = synthesizer.speak_text_async(text).get()
|
|
204
|
+
|
|
205
|
+
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
|
206
|
+
_play_audio_file(tmp_path)
|
|
207
|
+
return True
|
|
208
|
+
elif result.reason == speechsdk.ResultReason.Canceled:
|
|
209
|
+
cancellation = result.cancellation_details
|
|
210
|
+
print(f" [Azure TTS] Canceled: {cancellation.reason} — {cancellation.error_details}")
|
|
211
|
+
return False
|
|
212
|
+
except Exception as e:
|
|
213
|
+
print(f" [Azure TTS] Error: {e}")
|
|
214
|
+
return False
|
|
215
|
+
finally:
|
|
216
|
+
if tmp_path:
|
|
217
|
+
# Windows MCI may keep the file locked briefly after playback ends.
|
|
218
|
+
# Retry a few times before giving up.
|
|
219
|
+
for _ in range(15):
|
|
220
|
+
try:
|
|
221
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
222
|
+
break
|
|
223
|
+
except PermissionError:
|
|
224
|
+
time.sleep(0.1)
|
|
225
|
+
except Exception:
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# ── NVIDIA Riva (Magpie-Multilingual via NVCF gRPC) ──────────────────────
|
|
230
|
+
RIVA_TTS_SERVER = os.environ.get("DULUS_RIVA_SERVER", "grpc.nvcf.nvidia.com:443")
|
|
231
|
+
RIVA_TTS_FUNCTION_ID = os.environ.get("DULUS_RIVA_TTS_FUNCTION_ID",
|
|
232
|
+
"877104f7-e885-42b9-8de8-f6e4c6303969")
|
|
233
|
+
RIVA_TTS_DEFAULT_VOICE = "Magpie-Multilingual.EN-US.Aria"
|
|
234
|
+
RIVA_TTS_SAMPLE_RATE = 44100
|
|
235
|
+
|
|
236
|
+
# Short BCP-47 → Riva language codes (Magpie expects xx-YY form).
|
|
237
|
+
_RIVA_LANG_MAP = {
|
|
238
|
+
"es": "es-US", "en": "en-US", "fr": "fr-FR", "pt": "pt-BR",
|
|
239
|
+
"de": "de-DE", "it": "it-IT", "ja": "ja-JP", "zh": "zh-CN",
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _riva_lang_code(lang: str) -> str:
|
|
244
|
+
if not lang:
|
|
245
|
+
return "en-US"
|
|
246
|
+
return lang if "-" in lang else _RIVA_LANG_MAP.get(lang.lower(), f"{lang.lower()}-US")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _riva_voice_for(lang: str) -> str:
|
|
250
|
+
"""Resolve voice via env var (per-language first, then global, then default).
|
|
251
|
+
|
|
252
|
+
Set DULUS_RIVA_TTS_VOICE_ES="Magpie-Multilingual.ES-US.Lupe" etc. to map
|
|
253
|
+
voices per language. Run `talk.py --list-voices` once to discover names.
|
|
254
|
+
"""
|
|
255
|
+
specific = os.environ.get(f"DULUS_RIVA_TTS_VOICE_{(lang or 'en').upper().split('-')[0]}")
|
|
256
|
+
if specific:
|
|
257
|
+
return specific
|
|
258
|
+
return os.environ.get("DULUS_RIVA_TTS_VOICE", RIVA_TTS_DEFAULT_VOICE)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _pcm_to_wav(pcm: bytes, sample_rate: int = 44100) -> bytes:
|
|
262
|
+
"""Wrap raw int16 mono PCM in a minimal WAV container."""
|
|
263
|
+
data_size = len(pcm)
|
|
264
|
+
return struct.pack(
|
|
265
|
+
"<4sI4s4sIHHIIHH4sI",
|
|
266
|
+
b"RIFF", 36 + data_size, b"WAVE",
|
|
267
|
+
b"fmt ", 16, 1, 1, sample_rate,
|
|
268
|
+
sample_rate * 2, 2, 16,
|
|
269
|
+
b"data", data_size,
|
|
270
|
+
) + pcm
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _riva_tts_available() -> bool:
|
|
274
|
+
if not os.environ.get("NVIDIA_API_KEY"):
|
|
275
|
+
return False
|
|
276
|
+
try:
|
|
277
|
+
import riva.client # noqa: F401
|
|
278
|
+
return True
|
|
279
|
+
except ImportError:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
_RIVA_TTS_MAX_CHARS = 380 # Magpie hard limit is 400; leave headroom
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _split_for_riva(text: str, limit: int = _RIVA_TTS_MAX_CHARS) -> list[str]:
|
|
287
|
+
"""Split text into <=limit-char chunks at sentence/clause/word boundaries."""
|
|
288
|
+
import re as _re
|
|
289
|
+
text = text.strip()
|
|
290
|
+
if not text:
|
|
291
|
+
return []
|
|
292
|
+
# First pass: sentence-ish split keeping the punctuation.
|
|
293
|
+
parts = _re.split(r"(?<=[\.\!\?\u3002\uFF01\uFF1F\n])\s+", text)
|
|
294
|
+
out: list[str] = []
|
|
295
|
+
for p in parts:
|
|
296
|
+
p = p.strip()
|
|
297
|
+
if not p:
|
|
298
|
+
continue
|
|
299
|
+
if len(p) <= limit:
|
|
300
|
+
out.append(p)
|
|
301
|
+
continue
|
|
302
|
+
# Sentence too long — split on commas / semicolons / colons.
|
|
303
|
+
sub = _re.split(r"(?<=[,;:\u3001\uFF0C])\s+", p)
|
|
304
|
+
buf = ""
|
|
305
|
+
for s in sub:
|
|
306
|
+
s = s.strip()
|
|
307
|
+
if not s:
|
|
308
|
+
continue
|
|
309
|
+
if len(s) > limit:
|
|
310
|
+
# Last resort: hard wrap on word boundaries.
|
|
311
|
+
if buf:
|
|
312
|
+
out.append(buf)
|
|
313
|
+
buf = ""
|
|
314
|
+
words = s.split(" ")
|
|
315
|
+
w = ""
|
|
316
|
+
for word in words:
|
|
317
|
+
if len(w) + len(word) + 1 > limit:
|
|
318
|
+
if w:
|
|
319
|
+
out.append(w)
|
|
320
|
+
w = word
|
|
321
|
+
else:
|
|
322
|
+
w = (w + " " + word).strip()
|
|
323
|
+
if w:
|
|
324
|
+
buf = w
|
|
325
|
+
continue
|
|
326
|
+
if len(buf) + len(s) + 1 > limit:
|
|
327
|
+
out.append(buf)
|
|
328
|
+
buf = s
|
|
329
|
+
else:
|
|
330
|
+
buf = (buf + " " + s).strip()
|
|
331
|
+
if buf:
|
|
332
|
+
out.append(buf)
|
|
333
|
+
return out
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _say_nvidia_riva(text: str, lang: str = "es") -> bool:
|
|
337
|
+
if not _riva_tts_available():
|
|
338
|
+
return False
|
|
339
|
+
tmp_path = None
|
|
340
|
+
try:
|
|
341
|
+
import riva.client
|
|
342
|
+
api_key = os.environ["NVIDIA_API_KEY"]
|
|
343
|
+
auth = riva.client.Auth(
|
|
344
|
+
None, True, RIVA_TTS_SERVER,
|
|
345
|
+
[("function-id", RIVA_TTS_FUNCTION_ID),
|
|
346
|
+
("authorization", f"Bearer {api_key}")],
|
|
347
|
+
)
|
|
348
|
+
tts = riva.client.SpeechSynthesisService(auth)
|
|
349
|
+
# Magpie caps inputs at ~400 chars per request — chunk by sentence.
|
|
350
|
+
segments = _split_for_riva(text)
|
|
351
|
+
if not segments:
|
|
352
|
+
return False
|
|
353
|
+
chunks = bytearray()
|
|
354
|
+
voice = _riva_voice_for(lang)
|
|
355
|
+
lang_code = _riva_lang_code(lang)
|
|
356
|
+
enc = riva.client.AudioEncoding.LINEAR_PCM
|
|
357
|
+
for seg in segments:
|
|
358
|
+
try:
|
|
359
|
+
stream = tts.synthesize_online(
|
|
360
|
+
seg, voice_name=voice, language_code=lang_code,
|
|
361
|
+
encoding=enc, sample_rate_hz=RIVA_TTS_SAMPLE_RATE,
|
|
362
|
+
)
|
|
363
|
+
for r in stream:
|
|
364
|
+
if getattr(r, "audio", None):
|
|
365
|
+
chunks.extend(r.audio)
|
|
366
|
+
except AttributeError:
|
|
367
|
+
resp = tts.synthesize(
|
|
368
|
+
seg, voice_name=voice, language_code=lang_code,
|
|
369
|
+
encoding=enc, sample_rate_hz=RIVA_TTS_SAMPLE_RATE,
|
|
370
|
+
)
|
|
371
|
+
chunks.extend(resp.audio)
|
|
372
|
+
if not chunks:
|
|
373
|
+
return False
|
|
374
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
375
|
+
f.write(_pcm_to_wav(bytes(chunks), sample_rate=RIVA_TTS_SAMPLE_RATE))
|
|
376
|
+
tmp_path = f.name
|
|
377
|
+
_play_audio_file(tmp_path)
|
|
378
|
+
return True
|
|
379
|
+
except Exception as e:
|
|
380
|
+
print(f" [Riva TTS] {e}")
|
|
381
|
+
return False
|
|
382
|
+
finally:
|
|
383
|
+
if tmp_path:
|
|
384
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# ── OpenAI TTS ────────────────────────────────────────────────────────────
|
|
388
|
+
|
|
389
|
+
def _say_openai(text: str, voice: str = "alloy", speed: float = 1.0) -> bool:
|
|
390
|
+
if not os.environ.get("OPENAI_API_KEY"):
|
|
391
|
+
return False
|
|
392
|
+
tmp_path = None
|
|
393
|
+
try:
|
|
394
|
+
from openai import OpenAI
|
|
395
|
+
client = OpenAI(timeout=15.0)
|
|
396
|
+
response = client.audio.speech.create(
|
|
397
|
+
model="tts-1",
|
|
398
|
+
voice=voice,
|
|
399
|
+
input=text,
|
|
400
|
+
speed=speed
|
|
401
|
+
)
|
|
402
|
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
|
403
|
+
response.stream_to_file(f.name)
|
|
404
|
+
tmp_path = f.name
|
|
405
|
+
_play_audio_file(tmp_path)
|
|
406
|
+
return True
|
|
407
|
+
except Exception as e:
|
|
408
|
+
print(f" [OpenAI TTS] Error: {e}")
|
|
409
|
+
return False
|
|
410
|
+
finally:
|
|
411
|
+
if tmp_path:
|
|
412
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# ── gTTS ──────────────────────────────────────────────────────────────────
|
|
416
|
+
|
|
417
|
+
def _say_gtts(text: str, lang: str = "en") -> bool:
|
|
418
|
+
tmp_path = None
|
|
419
|
+
try:
|
|
420
|
+
from gtts import gTTS
|
|
421
|
+
tts = gTTS(text=text, lang=lang, timeout=15)
|
|
422
|
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
|
423
|
+
tts.save(f.name)
|
|
424
|
+
tmp_path = f.name
|
|
425
|
+
_play_audio_file(tmp_path)
|
|
426
|
+
return True
|
|
427
|
+
except ImportError:
|
|
428
|
+
return False
|
|
429
|
+
except Exception as e:
|
|
430
|
+
print(f" [gTTS] Error: {e}")
|
|
431
|
+
return False
|
|
432
|
+
finally:
|
|
433
|
+
if tmp_path:
|
|
434
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# ── pyttsx3 ───────────────────────────────────────────────────────────────
|
|
438
|
+
|
|
439
|
+
def _say_pyttsx3(text: str, rate: int = 175) -> bool:
|
|
440
|
+
try:
|
|
441
|
+
engine = _get_pyttsx3_engine()
|
|
442
|
+
engine.setProperty("rate", rate)
|
|
443
|
+
# Prefer Zira (female) over David
|
|
444
|
+
voices = engine.getProperty("voices")
|
|
445
|
+
zira = next((v for v in voices if "zira" in v.name.lower()), None)
|
|
446
|
+
if zira:
|
|
447
|
+
engine.setProperty("voice", zira.id)
|
|
448
|
+
engine.say(text)
|
|
449
|
+
engine.runAndWait()
|
|
450
|
+
return True
|
|
451
|
+
except ImportError:
|
|
452
|
+
return False
|
|
453
|
+
except Exception as e:
|
|
454
|
+
print(f" [pyttsx3] Error: {e}")
|
|
455
|
+
global _pyttsx3_engine
|
|
456
|
+
_pyttsx3_engine = None
|
|
457
|
+
return False
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
# ── Text Cleaner ──────────────────────────────────────────────────────────
|
|
461
|
+
|
|
462
|
+
def _clean_for_tts(text: str) -> str:
|
|
463
|
+
"""Strip markdown, HTML, emojis, and code blocks before speaking."""
|
|
464
|
+
# Remove <details>/<summary> blocks entirely
|
|
465
|
+
text = re.sub(r'<details>.*?</details>', '', text, flags=re.DOTALL)
|
|
466
|
+
# Remove remaining HTML tags
|
|
467
|
+
text = re.sub(r'<[^>]+>', '', text)
|
|
468
|
+
# Remove code fences (``` blocks)
|
|
469
|
+
text = re.sub(r'```[\s\S]*?```', '', text)
|
|
470
|
+
# Remove inline code
|
|
471
|
+
text = re.sub(r'`[^`]+`', '', text)
|
|
472
|
+
# Remove XML-style tags like <WebSearch>
|
|
473
|
+
text = re.sub(r'<\w+>.*?</\w+>', '', text, flags=re.DOTALL)
|
|
474
|
+
# Remove markdown bold/italic
|
|
475
|
+
text = re.sub(r'\*{1,3}([^*]+)\*{1,3}', r'\1', text)
|
|
476
|
+
# Remove markdown headers
|
|
477
|
+
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
|
|
478
|
+
# Remove emojis
|
|
479
|
+
text = re.sub(r'[\U00010000-\U0010ffff\U00002600-\U000027BF\U0001F300-\U0001FAFF]', '', text)
|
|
480
|
+
# Collapse whitespace
|
|
481
|
+
text = re.sub(r'\n{2,}', ' ', text)
|
|
482
|
+
text = re.sub(r'[ \t]+', ' ', text)
|
|
483
|
+
return text.strip()
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# ── Public Entry Point ────────────────────────────────────────────────────
|
|
487
|
+
|
|
488
|
+
def say(text: str, voice: Optional[str] = None, speed: float = 1.0, lang: str = "es", provider: Optional[str] = None) -> None:
|
|
489
|
+
"""Speak text using the best available TTS backend. Press 'c' to stop.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
provider: Explicit backend to use. "auto" or None tries in priority order.
|
|
493
|
+
Supported: "azure", "riva", "openai", "gtts", "pyttsx3".
|
|
494
|
+
"""
|
|
495
|
+
text = _clean_for_tts(text)
|
|
496
|
+
if not text.strip():
|
|
497
|
+
return
|
|
498
|
+
|
|
499
|
+
with _say_lock:
|
|
500
|
+
print(f" 📢 Speaking: '{text[:50]}...' [c = stop]")
|
|
501
|
+
|
|
502
|
+
_stop_event.clear()
|
|
503
|
+
watcher = threading.Thread(target=_watch_for_cancel, daemon=True)
|
|
504
|
+
watcher.start()
|
|
505
|
+
|
|
506
|
+
try:
|
|
507
|
+
# Helper to check if we should try a specific provider
|
|
508
|
+
def _should_try(name: str) -> bool:
|
|
509
|
+
if provider is None or provider == "auto":
|
|
510
|
+
return True
|
|
511
|
+
return provider.lower() == name.lower()
|
|
512
|
+
|
|
513
|
+
# 1. Azure Speech Services
|
|
514
|
+
if _should_try("azure") and _say_azure(text, voice=voice, lang=lang):
|
|
515
|
+
return
|
|
516
|
+
if _stop_event.is_set():
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
# 2. NVIDIA Riva (Magpie-Multilingual, cloud)
|
|
520
|
+
if _should_try("riva") and _say_nvidia_riva(text, lang=lang):
|
|
521
|
+
return
|
|
522
|
+
if _stop_event.is_set():
|
|
523
|
+
return
|
|
524
|
+
|
|
525
|
+
# 3. OpenAI (high quality, needs key)
|
|
526
|
+
if _should_try("openai") and _say_openai(text, voice=(voice or "alloy"), speed=speed):
|
|
527
|
+
return
|
|
528
|
+
if _stop_event.is_set():
|
|
529
|
+
return
|
|
530
|
+
|
|
531
|
+
# 4. gTTS — cloud Spanish
|
|
532
|
+
if _should_try("gtts") and _say_gtts(text, lang=lang):
|
|
533
|
+
return
|
|
534
|
+
if _stop_event.is_set():
|
|
535
|
+
return
|
|
536
|
+
|
|
537
|
+
# 5. pyttsx3 — offline fallback
|
|
538
|
+
if _should_try("pyttsx3") and _say_pyttsx3(text):
|
|
539
|
+
return
|
|
540
|
+
|
|
541
|
+
# Final fallback
|
|
542
|
+
print(f"\n📢 {text}")
|
|
543
|
+
finally:
|
|
544
|
+
_stop_event.set() # stop watcher thread if playback ended naturally
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def check_tts_availability() -> tuple[bool, str | None]:
|
|
548
|
+
"""Return (available, reason_if_not)."""
|
|
549
|
+
if _azure_tts_available():
|
|
550
|
+
return True, "Azure Speech Services (cloud)"
|
|
551
|
+
|
|
552
|
+
if _riva_tts_available():
|
|
553
|
+
return True, "NVIDIA Riva Magpie-Multilingual (cloud)"
|
|
554
|
+
|
|
555
|
+
if os.environ.get("OPENAI_API_KEY"):
|
|
556
|
+
return True, "OpenAI TTS (cloud)"
|
|
557
|
+
|
|
558
|
+
try:
|
|
559
|
+
import gtts
|
|
560
|
+
return True, "gTTS (cloud)"
|
|
561
|
+
except ImportError:
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
try:
|
|
565
|
+
import pyttsx3
|
|
566
|
+
return True, "pyttsx3 (local)"
|
|
567
|
+
except ImportError:
|
|
568
|
+
pass
|
|
569
|
+
|
|
570
|
+
return False, "No TTS backend installed. Try 'pip install azure-cognitiveservices-speech', 'pip install nvidia-riva-client', 'pip install gTTS', or 'pip install pyttsx3'."
|