openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,557 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Supertonic TTS Provider for OpenVoiceUI.
4
+
5
+ This provider wraps the existing supertonic_tts.py module, implementing
6
+ the TTSProvider interface for seamless integration with the TTS provider system.
7
+
8
+ Supertonic is a local ONNX-based Text-to-Speech engine that supports multiple
9
+ voice styles (M1-M5 for male, F1-F5 for female voices) and multiple languages.
10
+
11
+ Author: OpenVoiceUI
12
+ Date: 2026-02-11
13
+ """
14
+
15
+ import logging
16
+ import os
17
+ from typing import Dict, List, Any, Optional
18
+
19
+ from .base_provider import TTSProvider
20
+
21
+ # Configure logging
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # ── API mode (preferred) ───────────────────────────────────────────────────────
25
+ # When SUPERTONIC_API_URL is set, all synthesis calls go to the shared
26
+ # supertonic-tts microservice (loaded once, serves all users).
27
+ # Falls back to local ONNX loading if the env var is not set.
28
+ _API_URL = os.environ.get("SUPERTONIC_API_URL", "").rstrip("/")
29
+
30
+ # ── Local mode (fallback) ──────────────────────────────────────────────────────
31
+ import sys
32
+ from pathlib import Path
33
+
34
+ _project_root = Path(__file__).parent.parent
35
+ if str(_project_root) not in sys.path:
36
+ sys.path.insert(0, str(_project_root))
37
+
38
+ try:
39
+ from supertonic_tts import SupertonicTTS
40
+ except ImportError as e:
41
+ SupertonicTTS = None
42
+ _import_error = str(e)
43
+
44
+
45
+ class SupertonicProvider(TTSProvider):
46
+ """
47
+ TTS Provider for Supertonic ONNX-based Text-to-Speech engine.
48
+
49
+ This provider offers high-quality local TTS with multiple voice styles.
50
+ It runs entirely offline after initial model loading, making it ideal
51
+ for applications that need low latency and privacy.
52
+
53
+ Key Behavior:
54
+ This provider REINITIALIZES SupertonicTTS with the requested voice
55
+ for each generate_speech() call, matching the behavior of server.py
56
+ (lines ~3089-3094). This ensures proper voice switching without
57
+ state management issues, at the cost of slightly higher latency
58
+ on first use of each voice.
59
+
60
+ Voice Styles:
61
+ - M1-M5: Male voices with different characteristics
62
+ - F1-F5: Female voices with different characteristics
63
+
64
+ Languages:
65
+ - en (English)
66
+ - ko (Korean)
67
+ - es (Spanish)
68
+ - pt (Portuguese)
69
+ - fr (French)
70
+
71
+ Example:
72
+ >>> from tts_providers import SupertonicProvider
73
+ >>> provider = SupertonicProvider()
74
+ >>> audio = provider.generate_speech("Hello world", voice='M1')
75
+ >>> with open('output.wav', 'wb') as f:
76
+ ... f.write(audio)
77
+
78
+ Configuration:
79
+ >>> provider = SupertonicProvider(
80
+ ... onnx_dir="/path/to/onnx",
81
+ ... voice_styles_dir="/path/to/voice_styles",
82
+ ... default_voice="M1"
83
+ ... )
84
+ """
85
+
86
+ # Default paths — override via SUPERTONIC_MODEL_PATH env var
87
+ DEFAULT_ONNX_DIR = os.getenv('SUPERTONIC_MODEL_PATH', '/opt/supertonic/assets/onnx')
88
+ DEFAULT_VOICE_STYLES_DIR = None # derived from onnx_dir if not set
89
+
90
+ # Available voice styles (expanded from base implementation)
91
+ AVAILABLE_VOICES = [
92
+ 'M1', 'M2', 'M3', 'M4', 'M5', # Male voices
93
+ 'F1', 'F2', 'F3', 'F4', 'F5' # Female voices
94
+ ]
95
+
96
+ # Supported languages
97
+ SUPPORTED_LANGUAGES = ['en', 'ko', 'es', 'pt', 'fr']
98
+
99
+ # Provider metadata
100
+ PROVIDER_NAME = "Supertonic"
101
+ PROVIDER_VERSION = "1.0.0"
102
+ PROVIDER_DESCRIPTION = "Local ONNX-based TTS with multiple voice styles"
103
+
104
+ def __init__(
105
+ self,
106
+ onnx_dir: Optional[str] = None,
107
+ voice_styles_dir: Optional[str] = None,
108
+ default_voice: str = 'F3',
109
+ use_gpu: bool = False
110
+ ):
111
+ """
112
+ Initialize the Supertonic TTS Provider.
113
+
114
+ Args:
115
+ onnx_dir: Path to ONNX models directory. If None, uses DEFAULT_ONNX_DIR.
116
+ voice_styles_dir: Path to voice styles JSON files directory.
117
+ If None, uses DEFAULT_VOICE_STYLES_DIR.
118
+ default_voice: Default voice to use (M1-M5, F1-F5). Default is 'M1'.
119
+ use_gpu: Whether to use GPU for inference. Default is False (CPU only).
120
+
121
+ Raises:
122
+ ValueError: If SupertonicTTS module is not available.
123
+ FileNotFoundError: If required directories don't exist.
124
+ RuntimeError: If initialization fails.
125
+
126
+ Example:
127
+ >>> provider = SupertonicProvider(
128
+ ... onnx_dir="/custom/path/to/onnx",
129
+ ... default_voice="F1"
130
+ ... )
131
+ """
132
+ super().__init__()
133
+
134
+ self._status = 'inactive'
135
+ self._init_error = None
136
+ self._tts_cache: Dict[str, SupertonicTTS] = {}
137
+ self.default_voice = default_voice
138
+ self.use_gpu = use_gpu
139
+
140
+ # ── API mode ──────────────────────────────────────────────────────────
141
+ # Preferred: call the shared supertonic-tts microservice.
142
+ # Models are loaded once system-wide; no per-process ONNX loading.
143
+ if _API_URL:
144
+ try:
145
+ import requests
146
+ resp = requests.get(f"{_API_URL}/health", timeout=3)
147
+ if resp.ok:
148
+ self._use_api = True
149
+ self._api_url = _API_URL
150
+ self.onnx_dir = onnx_dir or self.DEFAULT_ONNX_DIR
151
+ self.voice_styles_dir = ""
152
+ self._status = 'active'
153
+ logger.info(f"SupertonicProvider: API mode → {_API_URL}")
154
+ return
155
+ except Exception as e:
156
+ logger.warning(f"SupertonicProvider: API at {_API_URL} unreachable ({e}), trying local")
157
+
158
+ self._use_api = False
159
+
160
+ # ── Local mode (fallback) ─────────────────────────────────────────────
161
+ if SupertonicTTS is None:
162
+ self._status = 'error'
163
+ self._init_error = "supertonic_tts module not found. Set SUPERTONIC_API_URL or SUPERTONIC_HELPER_PATH."
164
+ self.onnx_dir = onnx_dir or self.DEFAULT_ONNX_DIR
165
+ self.voice_styles_dir = voice_styles_dir or self.DEFAULT_ONNX_DIR.replace('/onnx', '/voice_styles')
166
+ return
167
+
168
+ self.onnx_dir = onnx_dir or self.DEFAULT_ONNX_DIR
169
+ self.voice_styles_dir = (
170
+ voice_styles_dir
171
+ or (self.DEFAULT_VOICE_STYLES_DIR if self.DEFAULT_VOICE_STYLES_DIR
172
+ else os.path.join(os.path.dirname(self.onnx_dir), 'voice_styles'))
173
+ )
174
+
175
+ if not os.path.exists(self.onnx_dir):
176
+ self._status = 'error'
177
+ self._init_error = f"ONNX directory not found: {self.onnx_dir}. Set SUPERTONIC_MODEL_PATH in .env."
178
+ logger.warning(f"SupertonicProvider: {self._init_error}")
179
+ return
180
+
181
+ if not os.path.exists(self.voice_styles_dir):
182
+ self._status = 'error'
183
+ self._init_error = f"Voice styles directory not found: {self.voice_styles_dir}"
184
+ logger.warning(f"SupertonicProvider: {self._init_error}")
185
+ return
186
+
187
+ try:
188
+ self._create_tts_instance(self.default_voice)
189
+ self._status = 'active'
190
+ logger.info(f"SupertonicProvider: local mode, voice '{default_voice}'")
191
+ except Exception as e:
192
+ self._status = 'error'
193
+ self._init_error = str(e)
194
+ logger.error(f"SupertonicProvider initialization failed: {e}")
195
+
196
+ def _get_voice_style_path(self, voice: str) -> str:
197
+ """
198
+ Get the full path to a voice style JSON file.
199
+
200
+ Args:
201
+ voice: Voice identifier (e.g., 'M1', 'F2').
202
+
203
+ Returns:
204
+ Full path to the voice style JSON file.
205
+
206
+ Raises:
207
+ ValueError: If voice is not available.
208
+ FileNotFoundError: If voice style file doesn't exist.
209
+ """
210
+ if voice not in self.AVAILABLE_VOICES:
211
+ raise ValueError(
212
+ f"Invalid voice: {voice}. Available: {self.AVAILABLE_VOICES}"
213
+ )
214
+
215
+ voice_path = os.path.join(self.voice_styles_dir, f"{voice}.json")
216
+
217
+ if not os.path.exists(voice_path):
218
+ raise FileNotFoundError(
219
+ f"Voice style file not found: {voice_path}"
220
+ )
221
+
222
+ return voice_path
223
+
224
+ def _create_tts_instance(self, voice: str) -> SupertonicTTS:
225
+ """
226
+ Get or create a TTS instance for the specified voice.
227
+
228
+ Uses caching to avoid reloading ONNX models for every call.
229
+ Instances are cached by voice name and reused.
230
+
231
+ Args:
232
+ voice: Voice identifier.
233
+
234
+ Returns:
235
+ SupertonicTTS instance for the specified voice.
236
+
237
+ Raises:
238
+ RuntimeError: If TTS instance creation fails.
239
+ """
240
+ # Check cache first
241
+ if voice in self._tts_cache:
242
+ logger.debug(f"Reusing cached TTS instance for voice '{voice}'")
243
+ return self._tts_cache[voice]
244
+
245
+ voice_style_path = self._get_voice_style_path(voice)
246
+
247
+ try:
248
+ tts_instance = SupertonicTTS(
249
+ onnx_dir=self.onnx_dir,
250
+ voice_style_path=voice_style_path,
251
+ voice_style_name=voice,
252
+ use_gpu=self.use_gpu
253
+ )
254
+ # Cache the instance for reuse
255
+ self._tts_cache[voice] = tts_instance
256
+ logger.debug(f"Created and cached new TTS instance for voice '{voice}'")
257
+ return tts_instance
258
+ except Exception as e:
259
+ logger.error(f"Failed to create TTS instance for voice '{voice}': {e}")
260
+ raise RuntimeError(f"TTS instance creation failed: {e}")
261
+
262
+ def generate_speech(
263
+ self,
264
+ text: str,
265
+ voice: Optional[str] = None,
266
+ lang: str = 'en',
267
+ speed: float = 1.0,
268
+ total_step: int = 15,
269
+ **options
270
+ ) -> bytes:
271
+ """
272
+ Generate speech from text using Supertonic TTS.
273
+
274
+ Args:
275
+ text: The text to synthesize. Must not be empty.
276
+ voice: Voice identifier (M1-M5, F1-F5). If None, uses default_voice.
277
+ lang: Language code ('en', 'ko', 'es', 'pt', 'fr'). Default is 'en'.
278
+ speed: Speech speed multiplier. Higher = faster.
279
+ Recommended range: 0.8 to 1.3. Default is 1.05.
280
+ total_step: Number of denoising steps. More = better quality but slower.
281
+ Recommended range: 3-10. Default is 5.
282
+ **options: Additional options (currently not used, reserved for future).
283
+
284
+ Returns:
285
+ bytes: WAV audio data ready to write to file or send via HTTP.
286
+
287
+ Raises:
288
+ ValueError: If text is empty, or voice/lang/speed/total_step invalid.
289
+ RuntimeError: If speech generation fails.
290
+
291
+ Example:
292
+ >>> audio = provider.generate_speech(
293
+ ... text="Hello world!",
294
+ ... voice="M1",
295
+ ... lang="en",
296
+ ... speed=1.1,
297
+ ... total_step=6
298
+ ... )
299
+ """
300
+ # Use default voice if not specified
301
+ if voice is None:
302
+ voice = self.default_voice
303
+
304
+ # Validate inputs
305
+ self.validate_text(text)
306
+
307
+ if voice not in self.AVAILABLE_VOICES:
308
+ raise ValueError(
309
+ f"Invalid voice: {voice}. Available: {self.AVAILABLE_VOICES}"
310
+ )
311
+
312
+ if lang not in self.SUPPORTED_LANGUAGES:
313
+ raise ValueError(
314
+ f"Unsupported language: {lang}. Supported: {self.SUPPORTED_LANGUAGES}"
315
+ )
316
+
317
+ if speed <= 0 or speed > 3:
318
+ raise ValueError(f"Invalid speed: {speed}. Must be between 0 and 3")
319
+
320
+ if total_step < 1 or total_step > 50:
321
+ raise ValueError(f"Invalid total_step: {total_step}. Must be between 1 and 50")
322
+
323
+ logger.info(
324
+ f"Generating speech: '{text[:50]}...' "
325
+ f"(voice={voice}, lang={lang}, speed={speed}, steps={total_step})"
326
+ )
327
+
328
+ try:
329
+ # ── API mode: call shared supertonic-tts service ──────────────────
330
+ if getattr(self, '_use_api', False):
331
+ import requests
332
+ resp = requests.post(
333
+ f"{self._api_url}/tts",
334
+ json={"text": text, "voice": voice, "speed": speed,
335
+ "steps": total_step, "lang": lang},
336
+ timeout=60,
337
+ )
338
+ if not resp.ok:
339
+ raise RuntimeError(f"Supertonic API error {resp.status_code}: {resp.text[:200]}")
340
+ audio_bytes = resp.content
341
+ logger.info(f"API: {len(audio_bytes)} bytes for voice '{voice}'")
342
+ return audio_bytes
343
+
344
+ # ── Local mode: load ONNX in-process ─────────────────────────────
345
+ tts = self._create_tts_instance(voice)
346
+ audio_bytes = tts.generate_speech(
347
+ text=text, lang=lang, speed=speed, total_step=total_step
348
+ )
349
+ logger.info(f"Local: {len(audio_bytes)} bytes for voice '{voice}'")
350
+ return audio_bytes
351
+
352
+ except Exception as e:
353
+ logger.error(f"Speech generation failed: {e}")
354
+ raise RuntimeError(f"Failed to generate speech: {e}")
355
+
356
+ def list_voices(self) -> List[str]:
357
+ """
358
+ List all available voice styles.
359
+
360
+ Returns:
361
+ List of voice identifiers that can be used with generate_speech().
362
+
363
+ Example:
364
+ >>> provider.list_voices()
365
+ ['M1', 'M2', 'M3', 'M4', 'M5', 'F1', 'F2', 'F3', 'F4', 'F5']
366
+ """
367
+ return self.AVAILABLE_VOICES.copy()
368
+
369
+ def get_info(self) -> Dict[str, Any]:
370
+ """
371
+ Get provider metadata and status matching providers_config.json format.
372
+
373
+ Returns:
374
+ Dict containing complete metadata including:
375
+ - 'name': Provider name
376
+ - 'provider_id': Unique provider identifier
377
+ - 'status': 'active', 'inactive', or 'error'
378
+ - 'description': Human-readable description
379
+ - 'quality': Audio quality rating
380
+ - 'latency': Expected latency category
381
+ - 'cost_per_minute': Cost per minute of audio
382
+ - 'voices': List of all available voice identifiers
383
+ - 'features': List of provider features
384
+ - 'requires_api_key': Whether API key is required
385
+ - 'languages': List of supported language codes
386
+ - 'max_characters': Max text length per request
387
+ - 'notes': Additional notes about the provider
388
+ - 'documentation_url': Link to documentation
389
+ - 'default_voice': Default voice identifier
390
+ - 'capabilities': Dict of feature flags
391
+ - 'onnx_dir': ONNX models directory path
392
+ - 'voice_styles_dir': Voice styles directory path
393
+ - 'error': Error message if status is 'error'
394
+
395
+ Example:
396
+ >>> info = provider.get_info()
397
+ >>> print(f"{info['name']}: {info['status']}")
398
+ Supertonic TTS: active
399
+ >>> info['languages']
400
+ ['en', 'ko', 'es', 'pt', 'fr']
401
+ """
402
+ onnx_dir = getattr(self, 'onnx_dir', self.DEFAULT_ONNX_DIR)
403
+ return {
404
+ 'name': 'Supertonic TTS',
405
+ 'provider_id': 'supertonic',
406
+ 'status': self._status,
407
+ 'description': 'Local ONNX-based TTS engine with multiple voice styles',
408
+ 'quality': 'high',
409
+ 'latency': 'very-fast',
410
+ 'cost_per_minute': 0.0,
411
+ 'voices': self.AVAILABLE_VOICES.copy(),
412
+ 'features': [
413
+ 'multi-language',
414
+ 'local-processing',
415
+ 'open-source',
416
+ 'no-api-key-required',
417
+ 'onnx-based',
418
+ 'voice-style-switching',
419
+ 'offline-capable',
420
+ ],
421
+ 'requires_api_key': False,
422
+ 'languages': self.SUPPORTED_LANGUAGES.copy(),
423
+ 'max_characters': 10000,
424
+ 'notes': (
425
+ 'Free, fast, local inference. Requires local ONNX models. '
426
+ f'Set SUPERTONIC_MODEL_PATH in .env. Current path: {onnx_dir}'
427
+ ),
428
+ 'documentation_url': 'https://github.com/playht/supertonic',
429
+ 'default_voice': self.default_voice,
430
+ 'capabilities': {
431
+ 'streaming': False,
432
+ 'ssml': False,
433
+ 'custom_voices': True,
434
+ 'offline': True,
435
+ 'gpu_support': True,
436
+ },
437
+ 'onnx_dir': self.onnx_dir,
438
+ 'voice_styles_dir': self.voice_styles_dir,
439
+ 'error': self._init_error if self._status == 'error' else None,
440
+ 'requires_microphone': False,
441
+ 'requires_websocket': False,
442
+ 'mode': 'tts-only',
443
+ }
444
+
445
+ def is_available(self) -> bool:
446
+ """
447
+ Check if the provider is ready to generate speech.
448
+
449
+ Returns:
450
+ True if provider is active and can generate speech, False otherwise.
451
+ """
452
+ return self._status == 'active'
453
+
454
+ def get_default_voice(self) -> str:
455
+ """
456
+ Get the default voice identifier.
457
+
458
+ Returns:
459
+ The default voice identifier (e.g., 'M1').
460
+ """
461
+ return self.default_voice
462
+
463
+ def set_default_voice(self, voice: str) -> None:
464
+ """
465
+ Change the default voice.
466
+
467
+ Args:
468
+ voice: New default voice identifier (must be in AVAILABLE_VOICES).
469
+
470
+ Raises:
471
+ ValueError: If voice is not available.
472
+ RuntimeError: If voice initialization fails.
473
+ """
474
+ if voice not in self.AVAILABLE_VOICES:
475
+ raise ValueError(
476
+ f"Invalid voice: {voice}. Available: {self.AVAILABLE_VOICES}"
477
+ )
478
+
479
+ try:
480
+ # Test initialization with the new default voice
481
+ self._create_tts_instance(voice)
482
+ self.default_voice = voice
483
+ logger.info(f"Default voice changed to '{voice}'")
484
+ except Exception as e:
485
+ raise RuntimeError(f"Failed to set default voice: {e}")
486
+
487
+ def get_supported_languages(self) -> List[str]:
488
+ """
489
+ Get list of supported language codes.
490
+
491
+ Returns:
492
+ List of supported language codes.
493
+ """
494
+ return self.SUPPORTED_LANGUAGES.copy()
495
+
496
+ def clear_cache(self) -> None:
497
+ """
498
+ Clear the TTS instance cache.
499
+
500
+ Removes all cached TTS instances, forcing new instances to be
501
+ created on the next generate_speech() call.
502
+ """
503
+ self._tts_cache.clear()
504
+ logger.debug("TTS instance cache cleared")
505
+
506
+ def preload_voice(self, voice: str) -> None:
507
+ """
508
+ Preload a TTS instance for a specific voice.
509
+
510
+ Creates and caches the TTS instance so it's ready for immediate use.
511
+
512
+ Args:
513
+ voice: Voice identifier to preload.
514
+
515
+ Raises:
516
+ ValueError: If voice is not available.
517
+ RuntimeError: If preloading fails.
518
+ """
519
+ if voice not in self.AVAILABLE_VOICES:
520
+ raise ValueError(
521
+ f"Invalid voice: {voice}. Available: {self.AVAILABLE_VOICES}"
522
+ )
523
+
524
+ try:
525
+ # Create and cache the TTS instance
526
+ self._create_tts_instance(voice)
527
+ logger.info(f"Voice '{voice}' preloaded and cached")
528
+ except Exception as e:
529
+ raise RuntimeError(f"Failed to preload voice '{voice}': {e}")
530
+
531
+ def preload_all_voices(self) -> Dict[str, bool]:
532
+ """
533
+ Test initialization for all available voices.
534
+
535
+ Note: This only tests that each voice can be initialized. Since the
536
+ provider reinitializes for each call, this is a validation check.
537
+
538
+ Returns:
539
+ Dict mapping voice identifiers to success status.
540
+
541
+ Example:
542
+ >>> results = provider.preload_all_voices()
543
+ >>> print(results)
544
+ {'M1': True, 'M2': True, 'F1': True, 'F2': False}
545
+ """
546
+ results = {}
547
+ for voice in self.AVAILABLE_VOICES:
548
+ try:
549
+ self.preload_voice(voice)
550
+ results[voice] = True
551
+ except Exception as e:
552
+ logger.error(f"Failed to preload voice '{voice}': {e}")
553
+ results[voice] = False
554
+ return results
555
+
556
+
557
+ __all__ = ['SupertonicProvider']