abstractvoice 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abstractvoice/__init__.py +2 -5
  2. abstractvoice/__main__.py +82 -3
  3. abstractvoice/adapters/__init__.py +12 -0
  4. abstractvoice/adapters/base.py +207 -0
  5. abstractvoice/adapters/stt_faster_whisper.py +401 -0
  6. abstractvoice/adapters/tts_piper.py +480 -0
  7. abstractvoice/aec/__init__.py +10 -0
  8. abstractvoice/aec/webrtc_apm.py +56 -0
  9. abstractvoice/artifacts.py +173 -0
  10. abstractvoice/audio/__init__.py +7 -0
  11. abstractvoice/audio/recorder.py +46 -0
  12. abstractvoice/audio/resample.py +25 -0
  13. abstractvoice/cloning/__init__.py +7 -0
  14. abstractvoice/cloning/engine_chroma.py +738 -0
  15. abstractvoice/cloning/engine_f5.py +546 -0
  16. abstractvoice/cloning/manager.py +349 -0
  17. abstractvoice/cloning/store.py +362 -0
  18. abstractvoice/compute/__init__.py +6 -0
  19. abstractvoice/compute/device.py +73 -0
  20. abstractvoice/config/__init__.py +2 -0
  21. abstractvoice/config/voice_catalog.py +19 -0
  22. abstractvoice/dependency_check.py +0 -1
  23. abstractvoice/examples/cli_repl.py +2403 -243
  24. abstractvoice/examples/voice_cli.py +64 -63
  25. abstractvoice/integrations/__init__.py +2 -0
  26. abstractvoice/integrations/abstractcore.py +116 -0
  27. abstractvoice/integrations/abstractcore_plugin.py +253 -0
  28. abstractvoice/prefetch.py +82 -0
  29. abstractvoice/recognition.py +424 -42
  30. abstractvoice/stop_phrase.py +103 -0
  31. abstractvoice/tts/__init__.py +3 -3
  32. abstractvoice/tts/adapter_tts_engine.py +210 -0
  33. abstractvoice/tts/tts_engine.py +257 -1208
  34. abstractvoice/vm/__init__.py +2 -0
  35. abstractvoice/vm/common.py +21 -0
  36. abstractvoice/vm/core.py +139 -0
  37. abstractvoice/vm/manager.py +108 -0
  38. abstractvoice/vm/stt_mixin.py +158 -0
  39. abstractvoice/vm/tts_mixin.py +550 -0
  40. abstractvoice/voice_manager.py +6 -1061
  41. abstractvoice-0.6.1.dist-info/METADATA +213 -0
  42. abstractvoice-0.6.1.dist-info/RECORD +52 -0
  43. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
  44. abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
  45. abstractvoice/instant_setup.py +0 -83
  46. abstractvoice/simple_model_manager.py +0 -539
  47. abstractvoice-0.5.2.dist-info/METADATA +0 -1458
  48. abstractvoice-0.5.2.dist-info/RECORD +0 -23
  49. abstractvoice-0.5.2.dist-info/entry_points.txt +0 -2
  50. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
  51. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,550 @@
1
+ """TTS + voice/language methods for VoiceManager.
2
+
3
+ This module intentionally focuses on orchestration and keeps heavy engine details
4
+ behind adapters.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import threading
10
+ import time
11
+
12
+
13
+ class TtsMixin:
14
+ def _set_last_tts_metrics(self, metrics: dict | None) -> None:
15
+ lock = getattr(self, "_last_tts_metrics_lock", None)
16
+ if lock is None:
17
+ setattr(self, "_last_tts_metrics", metrics)
18
+ return
19
+ try:
20
+ with lock:
21
+ setattr(self, "_last_tts_metrics", metrics)
22
+ except Exception:
23
+ setattr(self, "_last_tts_metrics", metrics)
24
+
25
+ def pop_last_tts_metrics(self) -> dict | None:
26
+ lock = getattr(self, "_last_tts_metrics_lock", None)
27
+ if lock is None:
28
+ m = getattr(self, "_last_tts_metrics", None)
29
+ setattr(self, "_last_tts_metrics", None)
30
+ return m
31
+ try:
32
+ with lock:
33
+ m = getattr(self, "_last_tts_metrics", None)
34
+ setattr(self, "_last_tts_metrics", None)
35
+ return m
36
+ except Exception:
37
+ m = getattr(self, "_last_tts_metrics", None)
38
+ setattr(self, "_last_tts_metrics", None)
39
+ return m
40
+
41
+ def _get_voice_cloner(self):
42
+ if getattr(self, "_voice_cloner", None) is None:
43
+ try:
44
+ from ..cloning import VoiceCloner
45
+ except Exception as e:
46
+ raise RuntimeError(
47
+ "Voice cloning is an optional feature.\n"
48
+ "Install with: pip install \"abstractvoice[cloning]\"\n"
49
+ f"Original error: {e}"
50
+ ) from e
51
+
52
+ # Use a slightly larger STT model for one-time reference-text auto-fallback.
53
+ self._voice_cloner = VoiceCloner(
54
+ debug=bool(getattr(self, "debug_mode", False)),
55
+ whisper_model=getattr(self, "whisper_model", "tiny"),
56
+ reference_text_whisper_model="small",
57
+ allow_downloads=bool(getattr(self, "allow_downloads", True)),
58
+ default_engine=str(getattr(self, "cloning_engine", "f5_tts") or "f5_tts"),
59
+ )
60
+ return self._voice_cloner
61
+
62
+ def clone_voice(
63
+ self,
64
+ reference_audio_path: str,
65
+ name: str | None = None,
66
+ *,
67
+ reference_text: str | None = None,
68
+ engine: str | None = None,
69
+ ) -> str:
70
+ return self._get_voice_cloner().clone_voice(
71
+ reference_audio_path,
72
+ name=name,
73
+ reference_text=reference_text,
74
+ engine=engine,
75
+ )
76
+
77
+ def list_cloned_voices(self):
78
+ return self._get_voice_cloner().list_cloned_voices()
79
+
80
+ def get_cloned_voice(self, voice_id: str):
81
+ return self._get_voice_cloner().get_cloned_voice(voice_id)
82
+
83
+ def set_cloned_voice_reference_text(self, voice_id: str, reference_text: str) -> bool:
84
+ """Update a cloned voice's reference transcript (quality fix).
85
+
86
+ A bad reference transcript commonly causes repeated/incorrect words in output.
87
+ """
88
+ self._get_voice_cloner().set_reference_text(voice_id, reference_text)
89
+ return True
90
+
91
+ def export_voice(self, voice_id: str, path: str) -> str:
92
+ return self._get_voice_cloner().export_voice(voice_id, path)
93
+
94
+ def import_voice(self, path: str) -> str:
95
+ return self._get_voice_cloner().import_voice(path)
96
+
97
+ def set_cloned_tts_quality(self, preset: str) -> bool:
98
+ """Set cloned TTS quality preset: fast|balanced|high."""
99
+ self._get_voice_cloner().set_quality_preset(preset)
100
+ return True
101
+
102
+ def get_cloning_runtime_info(self):
103
+ return self._get_voice_cloner().get_runtime_info()
104
+
105
+ def rename_cloned_voice(self, voice_id: str, new_name: str) -> bool:
106
+ self._get_voice_cloner().rename_cloned_voice(voice_id, new_name)
107
+ return True
108
+
109
+ def delete_cloned_voice(self, voice_id: str) -> bool:
110
+ self._get_voice_cloner().delete_cloned_voice(voice_id)
111
+ return True
112
+
113
+ def unload_cloning_engines(self, *, keep_engine: str | None = None) -> int:
114
+ """Best-effort free memory held by loaded cloning engines.
115
+
116
+ This is critical for large backends (e.g. Chroma). It does NOT delete any
117
+ cloned voices; it only releases in-memory model weights.
118
+ """
119
+ try:
120
+ cloner = self._get_voice_cloner()
121
+ except Exception:
122
+ return 0
123
+ try:
124
+ if keep_engine:
125
+ return int(cloner.unload_engines_except(str(keep_engine)))
126
+ return int(cloner.unload_all_engines())
127
+ except Exception:
128
+ return 0
129
+
130
+ def unload_piper_voice(self) -> bool:
131
+ """Best-effort release of Piper voice weights/session (keeps audio output ready).
132
+
133
+ This helps reduce memory pressure when switching to large cloning backends.
134
+ """
135
+ try:
136
+ adapter = getattr(self, "tts_adapter", None)
137
+ if adapter is None:
138
+ return False
139
+ if hasattr(adapter, "unload"):
140
+ adapter.unload()
141
+ return True
142
+ # Back-compat: drop voice object if present.
143
+ if hasattr(adapter, "_voice"):
144
+ setattr(adapter, "_voice", None)
145
+ return True
146
+ except Exception:
147
+ return False
148
+ return False
149
+ def speak(self, text, speed=1.0, callback=None, voice: str | None = None):
150
+ sp = speed if speed != 1.0 else self.speed
151
+ if not self.tts_engine:
152
+ raise RuntimeError("No TTS engine available")
153
+
154
+ # Optional cloned voice playback:
155
+ # - stream chunks to the player for better perceived latency
156
+ # - support cancellation on stop_speaking() / new input (best-effort)
157
+ if voice:
158
+ import numpy as np
159
+
160
+ from ..audio.resample import linear_resample_mono
161
+
162
+ # Clear prior metrics for this new utterance.
163
+ self._set_last_tts_metrics(None)
164
+
165
+ # Stop any current speech and reset cancel token.
166
+ try:
167
+ self.stop_speaking()
168
+ except Exception:
169
+ pass
170
+
171
+ # IMPORTANT: cancellation must be per-utterance.
172
+ # If we reuse/clear the same Event, an old synthesis thread could resume
173
+ # after a new request starts (race), causing "old audio" to continue.
174
+ try:
175
+ old = getattr(self, "_cloned_cancel_event", None)
176
+ if old is not None:
177
+ old.set()
178
+ except Exception:
179
+ pass
180
+ cancel = threading.Event()
181
+ setattr(self, "_cloned_cancel_event", cancel)
182
+
183
+ cloner = self._get_voice_cloner()
184
+ # Prefer playing cloned audio at its native rate (F5 is typically 24kHz).
185
+ target_sr = 24000
186
+ clone_engine_name = ""
187
+ try:
188
+ info = cloner.get_cloned_voice(str(voice)) or {}
189
+ clone_engine_name = str(info.get("engine") or "").strip().lower()
190
+ except Exception:
191
+ clone_engine_name = ""
192
+
193
+ def _worker():
194
+ try:
195
+ synth_active = getattr(self, "_cloned_synthesis_active", None)
196
+ if synth_active is not None:
197
+ try:
198
+ synth_active.set()
199
+ except Exception:
200
+ pass
201
+
202
+ # Option: generate full audio first (smooth playback) vs streaming (faster TTFB).
203
+ if not bool(getattr(self, "cloned_tts_streaming", True)):
204
+ import io
205
+ import soundfile as sf
206
+
207
+ t0 = time.monotonic()
208
+ wav_bytes = cloner.speak_to_bytes(str(text), voice_id=voice, format="wav", speed=sp)
209
+ t1 = time.monotonic()
210
+ if cancel.is_set():
211
+ return
212
+ audio, sr = sf.read(io.BytesIO(wav_bytes), dtype="float32", always_2d=True)
213
+ mono = np.mean(audio, axis=1).astype(np.float32).reshape(-1)
214
+ sr = int(sr)
215
+
216
+ try:
217
+ audio_samples = int(len(mono))
218
+ except Exception:
219
+ audio_samples = 0
220
+ audio_s = (float(audio_samples) / float(sr)) if sr and audio_samples else 0.0
221
+ synth_s = float(t1 - t0)
222
+ self._set_last_tts_metrics(
223
+ {
224
+ "engine": "clone",
225
+ "clone_engine": clone_engine_name or None,
226
+ "voice_id": str(voice),
227
+ "streaming": False,
228
+ "synth_s": synth_s,
229
+ "audio_s": float(audio_s),
230
+ "rtf": (synth_s / float(audio_s)) if audio_s else None,
231
+ "sample_rate": int(sr) if sr else None,
232
+ "audio_samples": int(audio_samples),
233
+ "ts": time.time(),
234
+ }
235
+ )
236
+
237
+ if hasattr(self.tts_engine, "begin_playback"):
238
+ self.tts_engine.begin_playback(callback=callback, sample_rate=sr)
239
+ if cancel.is_set():
240
+ return
241
+ if hasattr(self.tts_engine, "enqueue_audio"):
242
+ try:
243
+ self.tts_engine.enqueue_audio(mono, sample_rate=sr)
244
+ except TypeError:
245
+ self.tts_engine.enqueue_audio(mono)
246
+ elif hasattr(self.tts_engine, "audio_player") and self.tts_engine.audio_player:
247
+ try:
248
+ self.tts_engine.audio_player.play_audio(mono, sample_rate=sr)
249
+ except TypeError:
250
+ self.tts_engine.audio_player.play_audio(mono)
251
+ return
252
+
253
+ # Streaming path: fewer, larger batches reduce audible cuts and overhead.
254
+ t0 = time.monotonic()
255
+ first_chunk_t = None
256
+ total_samples = 0
257
+ chunks = 0
258
+ chunks_iter = cloner.speak_to_audio_chunks(
259
+ str(text),
260
+ voice_id=voice,
261
+ speed=sp,
262
+ max_chars=240,
263
+ )
264
+
265
+ # Begin a playback session once (so TTS lifecycle hooks are correct).
266
+ if hasattr(self.tts_engine, "begin_playback"):
267
+ self.tts_engine.begin_playback(callback=callback, sample_rate=target_sr)
268
+
269
+ for chunk, sr in chunks_iter:
270
+ if cancel.is_set():
271
+ break
272
+ if first_chunk_t is None:
273
+ first_chunk_t = time.monotonic()
274
+ mono = np.asarray(chunk, dtype=np.float32).reshape(-1)
275
+ if int(sr) != target_sr:
276
+ mono = linear_resample_mono(mono, int(sr), target_sr)
277
+ try:
278
+ total_samples += int(len(mono))
279
+ chunks += 1
280
+ except Exception:
281
+ pass
282
+
283
+ if hasattr(self.tts_engine, "enqueue_audio"):
284
+ try:
285
+ self.tts_engine.enqueue_audio(mono, sample_rate=target_sr)
286
+ except TypeError:
287
+ self.tts_engine.enqueue_audio(mono)
288
+ elif hasattr(self.tts_engine, "audio_player") and self.tts_engine.audio_player:
289
+ try:
290
+ self.tts_engine.audio_player.play_audio(mono, sample_rate=target_sr)
291
+ except TypeError:
292
+ self.tts_engine.audio_player.play_audio(mono)
293
+ else:
294
+ break
295
+
296
+ t1 = time.monotonic()
297
+ audio_s = (float(total_samples) / float(target_sr)) if total_samples else 0.0
298
+ synth_s = float(t1 - t0)
299
+ ttfb_s = (float(first_chunk_t - t0) if first_chunk_t is not None else None)
300
+ self._set_last_tts_metrics(
301
+ {
302
+ "engine": "clone",
303
+ "clone_engine": clone_engine_name or None,
304
+ "voice_id": str(voice),
305
+ "streaming": True,
306
+ "cancelled": bool(cancel.is_set()),
307
+ "synth_s": synth_s,
308
+ "ttfb_s": ttfb_s,
309
+ "audio_s": float(audio_s),
310
+ "rtf": (synth_s / float(audio_s)) if audio_s else None,
311
+ "sample_rate": int(target_sr),
312
+ "audio_samples": int(total_samples),
313
+ "chunks": int(chunks),
314
+ "ts": time.time(),
315
+ }
316
+ )
317
+ except Exception as e:
318
+ # Best-effort: never crash caller thread.
319
+ try:
320
+ self._set_last_tts_metrics(
321
+ {
322
+ "engine": "clone",
323
+ "clone_engine": clone_engine_name or None,
324
+ "voice_id": str(voice),
325
+ "error": str(e),
326
+ "ts": time.time(),
327
+ }
328
+ )
329
+ except Exception:
330
+ pass
331
+ if bool(getattr(self, "debug_mode", False)):
332
+ print(f"⚠️ Cloned TTS failed: {e}")
333
+ finally:
334
+ try:
335
+ synth_active = getattr(self, "_cloned_synthesis_active", None)
336
+ if synth_active is not None:
337
+ synth_active.clear()
338
+ except Exception:
339
+ pass
340
+
341
+ threading.Thread(target=_worker, daemon=True).start()
342
+ return True
343
+
344
+ ok = self.tts_engine.speak(text, sp, callback)
345
+ # Mirror adapter metrics into the manager for a single "last TTS metrics"
346
+ # source of truth (used by the verbose REPL).
347
+ try:
348
+ m = getattr(self.tts_engine, "last_tts_metrics", None)
349
+ if isinstance(m, dict) and m:
350
+ self._set_last_tts_metrics(dict(m))
351
+ except Exception:
352
+ pass
353
+ return ok
354
+
355
+ # Network/headless-friendly methods
356
+ def speak_to_bytes(self, text: str, format: str = "wav", voice: str | None = None) -> bytes:
357
+ """Synthesize to bytes.
358
+
359
+ - If `voice` is None: use Piper (default).
360
+ - If `voice` is provided: treat as a cloned voice_id (requires `abstractvoice[cloning]`).
361
+ """
362
+ if voice:
363
+ cloner = self._get_voice_cloner()
364
+ return cloner.speak_to_bytes(text, voice_id=voice, format=format, speed=self.speed)
365
+
366
+ if self.tts_adapter and self.tts_adapter.is_available():
367
+ return self.tts_adapter.synthesize_to_bytes(text, format=format)
368
+ raise NotImplementedError("speak_to_bytes() requires Piper TTS (default engine).")
369
+
370
+ def speak_to_file(
371
+ self, text: str, output_path: str, format: str | None = None, voice: str | None = None
372
+ ) -> str:
373
+ if voice:
374
+ data = self.speak_to_bytes(text, format=(format or "wav"), voice=voice)
375
+ from pathlib import Path
376
+
377
+ out = Path(output_path)
378
+ out.parent.mkdir(parents=True, exist_ok=True)
379
+ out.write_bytes(data)
380
+ return str(out)
381
+
382
+ if self.tts_adapter and self.tts_adapter.is_available():
383
+ return self.tts_adapter.synthesize_to_file(text, output_path, format=format)
384
+ raise NotImplementedError("speak_to_file() requires Piper TTS (default engine).")
385
+
386
+ def stop_speaking(self):
387
+ if not self.tts_engine:
388
+ return False
389
+ # Best-effort cancel ongoing cloned synthesis.
390
+ try:
391
+ cancel = getattr(self, "_cloned_cancel_event", None)
392
+ if cancel is not None:
393
+ cancel.set()
394
+ except Exception:
395
+ pass
396
+ ok = False
397
+ try:
398
+ # Keep the output stream open when possible; repeatedly reopening
399
+ # PortAudio streams can be flaky on some macOS AUHAL setups.
400
+ try:
401
+ ok = bool(self.tts_engine.stop(close_stream=False))
402
+ except TypeError:
403
+ ok = bool(self.tts_engine.stop())
404
+ finally:
405
+ # CRITICAL: stopping playback abruptly may not trigger the normal
406
+ # playback-end callbacks (PortAudio stream is just closed).
407
+ # If we don't restore recognizer state here, transcriptions can stay
408
+ # paused or listening can remain paused, which breaks STOP/PTT.
409
+ try:
410
+ on_end = getattr(self, "_on_tts_end", None)
411
+ if callable(on_end):
412
+ on_end()
413
+ except Exception:
414
+ pass
415
+ return ok
416
+
417
+ def pause_speaking(self):
418
+ if not self.tts_engine:
419
+ return False
420
+ return self.tts_engine.pause()
421
+
422
+ def resume_speaking(self):
423
+ if not self.tts_engine:
424
+ return False
425
+ return self.tts_engine.resume()
426
+
427
+ def is_paused(self):
428
+ if not self.tts_engine:
429
+ return False
430
+ return self.tts_engine.is_paused()
431
+
432
+ def is_speaking(self):
433
+ if self.tts_engine:
434
+ return self.tts_engine.is_active()
435
+ return False
436
+
437
+ def set_speed(self, speed):
438
+ if 0.5 <= speed <= 2.0:
439
+ self.speed = speed
440
+ return True
441
+ return False
442
+
443
+ def get_speed(self):
444
+ return self.speed
445
+
446
+ def _try_init_piper(self, language: str):
447
+ try:
448
+ from ..adapters.tts_piper import PiperTTSAdapter
449
+ adapter = PiperTTSAdapter(
450
+ language=language,
451
+ allow_downloads=bool(getattr(self, "allow_downloads", True)),
452
+ auto_load=True,
453
+ )
454
+ # Return the adapter even if a voice is not yet loaded. This keeps audio
455
+ # playback available for cloning backends while remaining offline-first.
456
+ return adapter if bool(getattr(adapter, "_piper_available", False)) else None
457
+ except Exception as e:
458
+ if self.debug_mode:
459
+ print(f"⚠️ Piper TTS not available: {e}")
460
+ return None
461
+
462
+ def get_supported_languages(self):
463
+ return list(self.LANGUAGES.keys())
464
+
465
+ def list_available_models(self, language: str | None = None) -> dict:
466
+ """List available TTS voices/models (Piper-only core).
467
+
468
+ Returns a dict shaped for CLI display:
469
+ { "en": { "amy": { ... } }, "fr": { ... } }
470
+ """
471
+ if self.tts_adapter and hasattr(self.tts_adapter, "list_available_models"):
472
+ return self.tts_adapter.list_available_models(language=language)
473
+
474
+ # Best-effort: instantiate a temporary Piper adapter to enumerate models.
475
+ try:
476
+ from ..adapters.tts_piper import PiperTTSAdapter
477
+
478
+ return PiperTTSAdapter(
479
+ language=(language or "en"),
480
+ allow_downloads=False,
481
+ auto_load=False,
482
+ ).list_available_models(language=language)
483
+ except Exception:
484
+ return {}
485
+
486
+ # Backward-compatible alias used by some CLI code.
487
+ def list_voices(self, language: str | None = None) -> dict:
488
+ return self.list_available_models(language=language)
489
+
490
+ def get_language(self):
491
+ return self.language
492
+
493
+ def get_language_name(self, language_code=None):
494
+ lang = language_code or self.language
495
+ return self.LANGUAGES.get(lang, {}).get("name", lang)
496
+
497
+ def set_language(self, language):
498
+ language = language.lower()
499
+ if language not in self.LANGUAGES:
500
+ if self.debug_mode:
501
+ available = ", ".join(self.LANGUAGES.keys())
502
+ print(f"⚠️ Unsupported language '{language}'. Available: {available}")
503
+ return False
504
+
505
+ if language == self.language:
506
+ if self.debug_mode:
507
+ print(f"✓ Already using {self.LANGUAGES[language]['name']} voice")
508
+ return True
509
+
510
+ self.stop_speaking()
511
+ if self.voice_recognizer:
512
+ self.voice_recognizer.stop()
513
+
514
+ # Piper-only core: switch Piper model for the requested language.
515
+ try:
516
+ if self.tts_adapter is None:
517
+ self.tts_adapter = self._try_init_piper(language)
518
+ else:
519
+ self.tts_adapter.set_language(language)
520
+
521
+ if self.tts_adapter and self.tts_adapter.is_available():
522
+ if self._tts_engine_name != "piper" or self.tts_engine is None:
523
+ from ..tts.adapter_tts_engine import AdapterTTSEngine
524
+
525
+ self.tts_engine = AdapterTTSEngine(self.tts_adapter, debug_mode=self.debug_mode)
526
+ self._tts_engine_name = "piper"
527
+ self._wire_tts_callbacks()
528
+
529
+ self.language = language
530
+ self.speed = 1.0
531
+ return True
532
+ except Exception as e:
533
+ if self.debug_mode:
534
+ print(f"⚠️ Piper language switch failed: {e}")
535
+
536
+ return False
537
+
538
+ def set_voice(self, language, voice_id):
539
+ language = language.lower()
540
+
541
+ # Piper voice selection is adapter-specific. For now, treat `voice_id` as
542
+ # best-effort metadata and ensure language switching is robust.
543
+ try:
544
+ if not self.set_language(language):
545
+ return False
546
+ if self.debug_mode:
547
+ print(f"🎭 Piper voice selection (best-effort): {language}.{voice_id}")
548
+ return True
549
+ except Exception:
550
+ return False