dulus 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. agent.py +363 -0
  2. backend/__init__.py +63 -0
  3. backend/compressor.py +261 -0
  4. backend/context.py +329 -0
  5. backend/githook.py +166 -0
  6. backend/marketplace.py +141 -0
  7. backend/mempalace_bridge.py +182 -0
  8. backend/personas.py +297 -0
  9. backend/plugins.py +222 -0
  10. backend/server.py +411 -0
  11. backend/tasks.py +213 -0
  12. batch_api.py +307 -0
  13. checkpoint/__init__.py +27 -0
  14. checkpoint/hooks.py +90 -0
  15. checkpoint/store.py +314 -0
  16. checkpoint/types.py +80 -0
  17. claude_code_watcher.py +214 -0
  18. clipboard_utils.py +246 -0
  19. cloudsave.py +159 -0
  20. common.py +177 -0
  21. compaction.py +378 -0
  22. config.py +180 -0
  23. context.py +241 -0
  24. dulus-0.2.0.dist-info/METADATA +600 -0
  25. dulus-0.2.0.dist-info/RECORD +101 -0
  26. dulus-0.2.0.dist-info/WHEEL +5 -0
  27. dulus-0.2.0.dist-info/entry_points.txt +2 -0
  28. dulus-0.2.0.dist-info/licenses/LICENSE +674 -0
  29. dulus-0.2.0.dist-info/licenses/license_manager.py +187 -0
  30. dulus-0.2.0.dist-info/top_level.txt +36 -0
  31. dulus.py +8455 -0
  32. dulus_gui.py +331 -0
  33. dulus_mcp/__init__.py +43 -0
  34. dulus_mcp/client.py +546 -0
  35. dulus_mcp/config.py +133 -0
  36. dulus_mcp/tools.py +131 -0
  37. dulus_mcp/types.py +124 -0
  38. gui/__init__.py +18 -0
  39. gui/agent_bridge.py +283 -0
  40. gui/chat_widget.py +448 -0
  41. gui/main_window.py +485 -0
  42. gui/personas.py +230 -0
  43. gui/session_utils.py +189 -0
  44. gui/settings_dialog.py +146 -0
  45. gui/sidebar.py +515 -0
  46. gui/tasks_view.py +499 -0
  47. gui/themes.py +256 -0
  48. gui/tool_panel.py +94 -0
  49. input.py +1030 -0
  50. license_manager.py +187 -0
  51. memory/__init__.py +93 -0
  52. memory/audit.py +51 -0
  53. memory/consolidator.py +312 -0
  54. memory/context.py +270 -0
  55. memory/offload.py +148 -0
  56. memory/palace.py +127 -0
  57. memory/scan.py +146 -0
  58. memory/sessions.py +100 -0
  59. memory/store.py +395 -0
  60. memory/tools.py +408 -0
  61. memory/types.py +114 -0
  62. memory/vector_search.py +92 -0
  63. multi_agent/__init__.py +23 -0
  64. multi_agent/subagent.py +501 -0
  65. multi_agent/tools.py +393 -0
  66. offload_helper.py +183 -0
  67. plugin/__init__.py +22 -0
  68. plugin/autoadapter.py +1641 -0
  69. plugin/loader.py +156 -0
  70. plugin/recommend.py +211 -0
  71. plugin/store.py +387 -0
  72. plugin/types.py +147 -0
  73. providers.py +3750 -0
  74. skill/__init__.py +14 -0
  75. skill/builtin.py +100 -0
  76. skill/clawhub.py +270 -0
  77. skill/executor.py +66 -0
  78. skill/loader.py +199 -0
  79. skill/tools.py +110 -0
  80. skills.py +14 -0
  81. spinner.py +42 -0
  82. string_utils.py +42 -0
  83. subagent.py +11 -0
  84. task/__init__.py +12 -0
  85. task/store.py +199 -0
  86. task/tools.py +265 -0
  87. task/types.py +92 -0
  88. tmux_offloader.py +177 -0
  89. tmux_tools.py +410 -0
  90. tool_registry.py +214 -0
  91. tools.py +2694 -0
  92. ui/__init__.py +1 -0
  93. ui/input.py +464 -0
  94. ui/render.py +272 -0
  95. voice/__init__.py +56 -0
  96. voice/keyterms.py +179 -0
  97. voice/recorder.py +263 -0
  98. voice/stt.py +408 -0
  99. voice/tts.py +570 -0
  100. webchat.py +432 -0
  101. webchat_server.py +1761 -0
voice/tts.py ADDED
@@ -0,0 +1,570 @@
1
+ """Text-to-speech (TTS) backends.
2
+
3
+ Backend priority (tried in order):
4
+ 1. NVIDIA Riva — cloud, Magpie-Multilingual via NVCF gRPC.
5
+ pip install nvidia-riva-client + NVIDIA_API_KEY
6
+ 2. OpenAI TTS — cloud, high quality, needs OPENAI_API_KEY.
7
+ 3. gTTS — cloud, free, needs internet.
8
+ pip install gTTS
9
+ 4. pyttsx3 — local, offline, uses system voices.
10
+ pip install pyttsx3
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import re
17
+ import struct
18
+ import subprocess
19
+ import tempfile
20
+ import threading
21
+ import time
22
+ from pathlib import Path
23
+ from typing import Optional
24
+
25
+ # ── Interrupt flag ────────────────────────────────────────────────────────
26
+ # `_say_lock` serializes calls to say(): two concurrent say()s would share
27
+ # `_stop_event` and the second .clear() would erase the first's cancel signal,
28
+ # leaving overlapping audio with no way to interrupt. Lock keeps audio sequential.
29
+ _stop_event = threading.Event()
30
+ _say_lock = threading.Lock()
31
+
32
+ def _watch_for_cancel() -> None:
33
+ """Background thread: set _stop_event if user presses 'c'."""
34
+ try:
35
+ import msvcrt
36
+ while not _stop_event.is_set():
37
+ if msvcrt.kbhit():
38
+ ch = msvcrt.getwch()
39
+ if ch.lower() == 'c':
40
+ _stop_event.set()
41
+ print("\n ⏹ TTS stopped.", flush=True)
42
+ return
43
+ except Exception:
44
+ pass
45
+
46
+ # ── Playback Helper ───────────────────────────────────────────────────────
47
+
48
+ def _play_audio_file(file_path: str | Path) -> None:
49
+ """Play an audio file, interruptible with 'c' key."""
50
+ file_path = str(file_path)
51
+
52
+ # Try ffplay
53
+ if shutil_which := __import__("shutil").which("ffplay"):
54
+ proc = subprocess.Popen(
55
+ [shutil_which, "-nodisp", "-autoexit", "-loglevel", "quiet", file_path])
56
+ try:
57
+ while proc.poll() is None:
58
+ if _stop_event.is_set():
59
+ proc.terminate()
60
+ return
61
+ time.sleep(0.05)
62
+ finally:
63
+ if proc.poll() is None:
64
+ proc.kill()
65
+ return
66
+
67
+ # Try mpv
68
+ if shutil_which := __import__("shutil").which("mpv"):
69
+ proc = subprocess.Popen(
70
+ [shutil_which, "--no-video", "--really-quiet", file_path])
71
+ try:
72
+ while proc.poll() is None:
73
+ if _stop_event.is_set():
74
+ proc.terminate()
75
+ return
76
+ time.sleep(0.05)
77
+ finally:
78
+ if proc.poll() is None:
79
+ proc.kill()
80
+ return
81
+
82
+ # Windows MCI
83
+ if os.name == "nt":
84
+ _play_windows_mci(file_path)
85
+ return
86
+
87
+ print(f" [TTS] Cannot play audio: no player found (install ffmpeg or mpv). File: {file_path}")
88
+
89
+
90
+ def _play_windows_mci(file_path: str) -> None:
91
+ """Play via MCI, polling _stop_event every 50ms to allow 'c' cancel."""
92
+ try:
93
+ import ctypes
94
+ winmm = ctypes.windll.winmm
95
+ abs_path = str(Path(file_path).resolve())
96
+ ext = Path(file_path).suffix.lower()
97
+ mci_type = {".wav": "waveaudio", ".mp3": "mpegvideo",
98
+ ".mp4": "mpegvideo", ".avi": "avivideo"}.get(ext, "mpegvideo")
99
+ winmm.mciSendStringW(f'open "{abs_path}" type {mci_type} alias _tts_track', None, 0, None)
100
+ winmm.mciSendStringW('play _tts_track', None, 0, None)
101
+ buf = ctypes.create_unicode_buffer(128)
102
+ while True:
103
+ if _stop_event.is_set():
104
+ winmm.mciSendStringW('stop _tts_track', None, 0, None)
105
+ break
106
+ winmm.mciSendStringW('status _tts_track mode', buf, 128, None)
107
+ if buf.value != 'playing':
108
+ break
109
+ time.sleep(0.05)
110
+ winmm.mciSendStringW('close _tts_track', None, 0, None)
111
+ time.sleep(0.1) # let MCI fully release the file handle
112
+ except Exception as e:
113
+ print(f" [TTS] Windows MCI playback error: {e}")
114
+
115
+
116
+ # ── pyttsx3 singleton ─────────────────────────────────────────────────────
117
+ # Recreating the engine on every call causes COM errors on Windows.
118
+ _pyttsx3_engine = None
119
+
120
+ def _get_pyttsx3_engine():
121
+ global _pyttsx3_engine
122
+ if _pyttsx3_engine is None:
123
+ import pyttsx3
124
+ _pyttsx3_engine = pyttsx3.init()
125
+ return _pyttsx3_engine
126
+
127
+
128
+ # ── Azure Speech Services ─────────────────────────────────────────────────
129
+
130
+ _AZURE_LANG_VOICES: dict[str, str] = {
131
+ "es": "es-ES-AlvaroNeural",
132
+ "en": "en-US-GuyNeural",
133
+ "fr": "fr-FR-HenriNeural",
134
+ "pt": "pt-BR-AntonioNeural",
135
+ "de": "de-DE-ConradNeural",
136
+ "it": "it-IT-DiegoNeural",
137
+ "ja": "ja-JP-KeitaNeural",
138
+ "zh": "zh-CN-YunxiNeural",
139
+ }
140
+
141
+
142
+ def _azure_tts_available() -> bool:
143
+ try:
144
+ import azure.cognitiveservices.speech as _ # noqa: F401
145
+ except ImportError:
146
+ return False
147
+
148
+ if os.environ.get("AZURE_SPEECH_KEY") and os.environ.get("AZURE_SPEECH_REGION"):
149
+ return True
150
+
151
+ # Fallback: read from Dulus config if env vars not set (e.g. key was
152
+ # configured this session via /config but load_config() already ran).
153
+ try:
154
+ from config import load_config
155
+ cfg = load_config()
156
+ key = cfg.get("azure_speech_key")
157
+ region = cfg.get("azure_speech_region")
158
+ if key and region:
159
+ os.environ["AZURE_SPEECH_KEY"] = key
160
+ os.environ["AZURE_SPEECH_REGION"] = region
161
+ return True
162
+ except Exception:
163
+ pass
164
+
165
+ return False
166
+
167
+
168
+ def _say_azure(text: str, voice: Optional[str] = None, lang: str = "es") -> bool:
169
+ if not _azure_tts_available():
170
+ return False
171
+ tmp_path: Optional[str] = None
172
+ try:
173
+ import azure.cognitiveservices.speech as speechsdk
174
+
175
+ key = os.environ.get("AZURE_SPEECH_KEY", "")
176
+ region = os.environ.get("AZURE_SPEECH_REGION", "")
177
+
178
+ speech_config = speechsdk.SpeechConfig(subscription=key, region=region)
179
+
180
+ # Resolve voice: explicit arg > env var > config > language default
181
+ if not voice:
182
+ voice = os.environ.get("AZURE_TTS_VOICE", "")
183
+ if not voice:
184
+ try:
185
+ from config import load_config
186
+ voice = load_config().get("azure_tts_voice", "")
187
+ except Exception:
188
+ pass
189
+ if not voice:
190
+ voice = _AZURE_LANG_VOICES.get(lang.lower(), _AZURE_LANG_VOICES.get("en"))
191
+
192
+ speech_config.speech_synthesis_voice_name = voice
193
+
194
+ # Use mkstemp + close handle immediately so Azure (and later the player)
195
+ # can open the file without Windows sharing violation.
196
+ fd, tmp_path = tempfile.mkstemp(suffix=".wav")
197
+ os.close(fd)
198
+
199
+ audio_config = speechsdk.audio.AudioOutputConfig(filename=tmp_path)
200
+ synthesizer = speechsdk.SpeechSynthesizer(
201
+ speech_config=speech_config, audio_config=audio_config
202
+ )
203
+ result = synthesizer.speak_text_async(text).get()
204
+
205
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
206
+ _play_audio_file(tmp_path)
207
+ return True
208
+ elif result.reason == speechsdk.ResultReason.Canceled:
209
+ cancellation = result.cancellation_details
210
+ print(f" [Azure TTS] Canceled: {cancellation.reason} — {cancellation.error_details}")
211
+ return False
212
+ except Exception as e:
213
+ print(f" [Azure TTS] Error: {e}")
214
+ return False
215
+ finally:
216
+ if tmp_path:
217
+ # Windows MCI may keep the file locked briefly after playback ends.
218
+ # Retry a few times before giving up.
219
+ for _ in range(15):
220
+ try:
221
+ Path(tmp_path).unlink(missing_ok=True)
222
+ break
223
+ except PermissionError:
224
+ time.sleep(0.1)
225
+ except Exception:
226
+ break
227
+
228
+
229
+ # ── NVIDIA Riva (Magpie-Multilingual via NVCF gRPC) ──────────────────────
230
+ RIVA_TTS_SERVER = os.environ.get("DULUS_RIVA_SERVER", "grpc.nvcf.nvidia.com:443")
231
+ RIVA_TTS_FUNCTION_ID = os.environ.get("DULUS_RIVA_TTS_FUNCTION_ID",
232
+ "877104f7-e885-42b9-8de8-f6e4c6303969")
233
+ RIVA_TTS_DEFAULT_VOICE = "Magpie-Multilingual.EN-US.Aria"
234
+ RIVA_TTS_SAMPLE_RATE = 44100
235
+
236
+ # Short BCP-47 → Riva language codes (Magpie expects xx-YY form).
237
+ _RIVA_LANG_MAP = {
238
+ "es": "es-US", "en": "en-US", "fr": "fr-FR", "pt": "pt-BR",
239
+ "de": "de-DE", "it": "it-IT", "ja": "ja-JP", "zh": "zh-CN",
240
+ }
241
+
242
+
243
+ def _riva_lang_code(lang: str) -> str:
244
+ if not lang:
245
+ return "en-US"
246
+ return lang if "-" in lang else _RIVA_LANG_MAP.get(lang.lower(), f"{lang.lower()}-US")
247
+
248
+
249
+ def _riva_voice_for(lang: str) -> str:
250
+ """Resolve voice via env var (per-language first, then global, then default).
251
+
252
+ Set DULUS_RIVA_TTS_VOICE_ES="Magpie-Multilingual.ES-US.Lupe" etc. to map
253
+ voices per language. Run `talk.py --list-voices` once to discover names.
254
+ """
255
+ specific = os.environ.get(f"DULUS_RIVA_TTS_VOICE_{(lang or 'en').upper().split('-')[0]}")
256
+ if specific:
257
+ return specific
258
+ return os.environ.get("DULUS_RIVA_TTS_VOICE", RIVA_TTS_DEFAULT_VOICE)
259
+
260
+
261
+ def _pcm_to_wav(pcm: bytes, sample_rate: int = 44100) -> bytes:
262
+ """Wrap raw int16 mono PCM in a minimal WAV container."""
263
+ data_size = len(pcm)
264
+ return struct.pack(
265
+ "<4sI4s4sIHHIIHH4sI",
266
+ b"RIFF", 36 + data_size, b"WAVE",
267
+ b"fmt ", 16, 1, 1, sample_rate,
268
+ sample_rate * 2, 2, 16,
269
+ b"data", data_size,
270
+ ) + pcm
271
+
272
+
273
+ def _riva_tts_available() -> bool:
274
+ if not os.environ.get("NVIDIA_API_KEY"):
275
+ return False
276
+ try:
277
+ import riva.client # noqa: F401
278
+ return True
279
+ except ImportError:
280
+ return False
281
+
282
+
283
+ _RIVA_TTS_MAX_CHARS = 380 # Magpie hard limit is 400; leave headroom
284
+
285
+
286
+ def _split_for_riva(text: str, limit: int = _RIVA_TTS_MAX_CHARS) -> list[str]:
287
+ """Split text into <=limit-char chunks at sentence/clause/word boundaries."""
288
+ import re as _re
289
+ text = text.strip()
290
+ if not text:
291
+ return []
292
+ # First pass: sentence-ish split keeping the punctuation.
293
+ parts = _re.split(r"(?<=[\.\!\?\u3002\uFF01\uFF1F\n])\s+", text)
294
+ out: list[str] = []
295
+ for p in parts:
296
+ p = p.strip()
297
+ if not p:
298
+ continue
299
+ if len(p) <= limit:
300
+ out.append(p)
301
+ continue
302
+ # Sentence too long — split on commas / semicolons / colons.
303
+ sub = _re.split(r"(?<=[,;:\u3001\uFF0C])\s+", p)
304
+ buf = ""
305
+ for s in sub:
306
+ s = s.strip()
307
+ if not s:
308
+ continue
309
+ if len(s) > limit:
310
+ # Last resort: hard wrap on word boundaries.
311
+ if buf:
312
+ out.append(buf)
313
+ buf = ""
314
+ words = s.split(" ")
315
+ w = ""
316
+ for word in words:
317
+ if len(w) + len(word) + 1 > limit:
318
+ if w:
319
+ out.append(w)
320
+ w = word
321
+ else:
322
+ w = (w + " " + word).strip()
323
+ if w:
324
+ buf = w
325
+ continue
326
+ if len(buf) + len(s) + 1 > limit:
327
+ out.append(buf)
328
+ buf = s
329
+ else:
330
+ buf = (buf + " " + s).strip()
331
+ if buf:
332
+ out.append(buf)
333
+ return out
334
+
335
+
336
+ def _say_nvidia_riva(text: str, lang: str = "es") -> bool:
337
+ if not _riva_tts_available():
338
+ return False
339
+ tmp_path = None
340
+ try:
341
+ import riva.client
342
+ api_key = os.environ["NVIDIA_API_KEY"]
343
+ auth = riva.client.Auth(
344
+ None, True, RIVA_TTS_SERVER,
345
+ [("function-id", RIVA_TTS_FUNCTION_ID),
346
+ ("authorization", f"Bearer {api_key}")],
347
+ )
348
+ tts = riva.client.SpeechSynthesisService(auth)
349
+ # Magpie caps inputs at ~400 chars per request — chunk by sentence.
350
+ segments = _split_for_riva(text)
351
+ if not segments:
352
+ return False
353
+ chunks = bytearray()
354
+ voice = _riva_voice_for(lang)
355
+ lang_code = _riva_lang_code(lang)
356
+ enc = riva.client.AudioEncoding.LINEAR_PCM
357
+ for seg in segments:
358
+ try:
359
+ stream = tts.synthesize_online(
360
+ seg, voice_name=voice, language_code=lang_code,
361
+ encoding=enc, sample_rate_hz=RIVA_TTS_SAMPLE_RATE,
362
+ )
363
+ for r in stream:
364
+ if getattr(r, "audio", None):
365
+ chunks.extend(r.audio)
366
+ except AttributeError:
367
+ resp = tts.synthesize(
368
+ seg, voice_name=voice, language_code=lang_code,
369
+ encoding=enc, sample_rate_hz=RIVA_TTS_SAMPLE_RATE,
370
+ )
371
+ chunks.extend(resp.audio)
372
+ if not chunks:
373
+ return False
374
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
375
+ f.write(_pcm_to_wav(bytes(chunks), sample_rate=RIVA_TTS_SAMPLE_RATE))
376
+ tmp_path = f.name
377
+ _play_audio_file(tmp_path)
378
+ return True
379
+ except Exception as e:
380
+ print(f" [Riva TTS] {e}")
381
+ return False
382
+ finally:
383
+ if tmp_path:
384
+ Path(tmp_path).unlink(missing_ok=True)
385
+
386
+
387
+ # ── OpenAI TTS ────────────────────────────────────────────────────────────
388
+
389
+ def _say_openai(text: str, voice: str = "alloy", speed: float = 1.0) -> bool:
390
+ if not os.environ.get("OPENAI_API_KEY"):
391
+ return False
392
+ tmp_path = None
393
+ try:
394
+ from openai import OpenAI
395
+ client = OpenAI(timeout=15.0)
396
+ response = client.audio.speech.create(
397
+ model="tts-1",
398
+ voice=voice,
399
+ input=text,
400
+ speed=speed
401
+ )
402
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
403
+ response.stream_to_file(f.name)
404
+ tmp_path = f.name
405
+ _play_audio_file(tmp_path)
406
+ return True
407
+ except Exception as e:
408
+ print(f" [OpenAI TTS] Error: {e}")
409
+ return False
410
+ finally:
411
+ if tmp_path:
412
+ Path(tmp_path).unlink(missing_ok=True)
413
+
414
+
415
+ # ── gTTS ──────────────────────────────────────────────────────────────────
416
+
417
+ def _say_gtts(text: str, lang: str = "en") -> bool:
418
+ tmp_path = None
419
+ try:
420
+ from gtts import gTTS
421
+ tts = gTTS(text=text, lang=lang, timeout=15)
422
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
423
+ tts.save(f.name)
424
+ tmp_path = f.name
425
+ _play_audio_file(tmp_path)
426
+ return True
427
+ except ImportError:
428
+ return False
429
+ except Exception as e:
430
+ print(f" [gTTS] Error: {e}")
431
+ return False
432
+ finally:
433
+ if tmp_path:
434
+ Path(tmp_path).unlink(missing_ok=True)
435
+
436
+
437
+ # ── pyttsx3 ───────────────────────────────────────────────────────────────
438
+
439
+ def _say_pyttsx3(text: str, rate: int = 175) -> bool:
440
+ try:
441
+ engine = _get_pyttsx3_engine()
442
+ engine.setProperty("rate", rate)
443
+ # Prefer Zira (female) over David
444
+ voices = engine.getProperty("voices")
445
+ zira = next((v for v in voices if "zira" in v.name.lower()), None)
446
+ if zira:
447
+ engine.setProperty("voice", zira.id)
448
+ engine.say(text)
449
+ engine.runAndWait()
450
+ return True
451
+ except ImportError:
452
+ return False
453
+ except Exception as e:
454
+ print(f" [pyttsx3] Error: {e}")
455
+ global _pyttsx3_engine
456
+ _pyttsx3_engine = None
457
+ return False
458
+
459
+
460
+ # ── Text Cleaner ──────────────────────────────────────────────────────────
461
+
462
+ def _clean_for_tts(text: str) -> str:
463
+ """Strip markdown, HTML, emojis, and code blocks before speaking."""
464
+ # Remove <details>/<summary> blocks entirely
465
+ text = re.sub(r'<details>.*?</details>', '', text, flags=re.DOTALL)
466
+ # Remove remaining HTML tags
467
+ text = re.sub(r'<[^>]+>', '', text)
468
+ # Remove code fences (``` blocks)
469
+ text = re.sub(r'```[\s\S]*?```', '', text)
470
+ # Remove inline code
471
+ text = re.sub(r'`[^`]+`', '', text)
472
+ # Remove XML-style tags like <WebSearch>
473
+ text = re.sub(r'<\w+>.*?</\w+>', '', text, flags=re.DOTALL)
474
+ # Remove markdown bold/italic
475
+ text = re.sub(r'\*{1,3}([^*]+)\*{1,3}', r'\1', text)
476
+ # Remove markdown headers
477
+ text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
478
+ # Remove emojis
479
+ text = re.sub(r'[\U00010000-\U0010ffff\U00002600-\U000027BF\U0001F300-\U0001FAFF]', '', text)
480
+ # Collapse whitespace
481
+ text = re.sub(r'\n{2,}', ' ', text)
482
+ text = re.sub(r'[ \t]+', ' ', text)
483
+ return text.strip()
484
+
485
+
486
+ # ── Public Entry Point ────────────────────────────────────────────────────
487
+
488
+ def say(text: str, voice: Optional[str] = None, speed: float = 1.0, lang: str = "es", provider: Optional[str] = None) -> None:
489
+ """Speak text using the best available TTS backend. Press 'c' to stop.
490
+
491
+ Args:
492
+ provider: Explicit backend to use. "auto" or None tries in priority order.
493
+ Supported: "azure", "riva", "openai", "gtts", "pyttsx3".
494
+ """
495
+ text = _clean_for_tts(text)
496
+ if not text.strip():
497
+ return
498
+
499
+ with _say_lock:
500
+ print(f" 📢 Speaking: '{text[:50]}...' [c = stop]")
501
+
502
+ _stop_event.clear()
503
+ watcher = threading.Thread(target=_watch_for_cancel, daemon=True)
504
+ watcher.start()
505
+
506
+ try:
507
+ # Helper to check if we should try a specific provider
508
+ def _should_try(name: str) -> bool:
509
+ if provider is None or provider == "auto":
510
+ return True
511
+ return provider.lower() == name.lower()
512
+
513
+ # 1. Azure Speech Services
514
+ if _should_try("azure") and _say_azure(text, voice=voice, lang=lang):
515
+ return
516
+ if _stop_event.is_set():
517
+ return
518
+
519
+ # 2. NVIDIA Riva (Magpie-Multilingual, cloud)
520
+ if _should_try("riva") and _say_nvidia_riva(text, lang=lang):
521
+ return
522
+ if _stop_event.is_set():
523
+ return
524
+
525
+ # 3. OpenAI (high quality, needs key)
526
+ if _should_try("openai") and _say_openai(text, voice=(voice or "alloy"), speed=speed):
527
+ return
528
+ if _stop_event.is_set():
529
+ return
530
+
531
+ # 4. gTTS — cloud Spanish
532
+ if _should_try("gtts") and _say_gtts(text, lang=lang):
533
+ return
534
+ if _stop_event.is_set():
535
+ return
536
+
537
+ # 5. pyttsx3 — offline fallback
538
+ if _should_try("pyttsx3") and _say_pyttsx3(text):
539
+ return
540
+
541
+ # Final fallback
542
+ print(f"\n📢 {text}")
543
+ finally:
544
+ _stop_event.set() # stop watcher thread if playback ended naturally
545
+
546
+
547
+ def check_tts_availability() -> tuple[bool, str | None]:
548
+ """Return (available, reason_if_not)."""
549
+ if _azure_tts_available():
550
+ return True, "Azure Speech Services (cloud)"
551
+
552
+ if _riva_tts_available():
553
+ return True, "NVIDIA Riva Magpie-Multilingual (cloud)"
554
+
555
+ if os.environ.get("OPENAI_API_KEY"):
556
+ return True, "OpenAI TTS (cloud)"
557
+
558
+ try:
559
+ import gtts
560
+ return True, "gTTS (cloud)"
561
+ except ImportError:
562
+ pass
563
+
564
+ try:
565
+ import pyttsx3
566
+ return True, "pyttsx3 (local)"
567
+ except ImportError:
568
+ pass
569
+
570
+ return False, "No TTS backend installed. Try 'pip install azure-cognitiveservices-speech', 'pip install nvidia-riva-client', 'pip install gTTS', or 'pip install pyttsx3'."