openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,399 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Supertonic TTS wrapper for OpenVoiceUI.
4
+
5
+ This module provides a clean interface to the Supertonic Text-to-Speech engine,
6
+ wrapping the helper.py functionality for use in Flask applications.
7
+
8
+ Author: OpenVoiceUI
9
+ Date: 2026-02-11
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import logging
15
+ from io import BytesIO
16
+ from typing import Optional
17
+
18
+ import numpy as np
19
+ import soundfile as sf
20
+
21
+ # Add the Supertonic helper.py directory to the path
22
+ SUPERTONIC_HELPER_PATH = os.environ.get("SUPERTONIC_HELPER_PATH", os.path.expanduser("~/supertonic/py"))
23
+ if SUPERTONIC_HELPER_PATH not in sys.path:
24
+ sys.path.insert(0, SUPERTONIC_HELPER_PATH)
25
+
26
+ try:
27
+ from helper import (
28
+ load_text_to_speech,
29
+ load_voice_style,
30
+ Style,
31
+ )
32
+ except ImportError as e:
33
+ logging.error(f"Failed to import Supertonic helper: {e}")
34
+ logging.error(f"Make sure {SUPERTONIC_HELPER_PATH}/helper.py exists")
35
+ raise
36
+
37
+
38
+ # Configure logging
39
+ logging.basicConfig(
40
+ level=logging.INFO,
41
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
42
+ )
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class SupertonicTTS:
47
+ """
48
+ Wrapper class for Supertonic Text-to-Speech engine.
49
+
50
+ This class provides a simple interface for generating speech from text
51
+ using the Supertonic ONNX models. It handles model loading, voice style
52
+ management, and audio generation.
53
+
54
+ Example:
55
+ >>> tts = SupertonicTTS(
56
+ ... onnx_dir="~/supertonic/assets/onnx",
57
+ ... voice_style_path="~/supertonic/assets/voice_styles/M1.json"
58
+ ... )
59
+ >>> audio_bytes = tts.generate_speech("Hello world, this is a test")
60
+ >>> # audio_bytes contains WAV format audio data
61
+ """
62
+
63
+ # Default paths (use SUPERTONIC_MODEL_PATH env var or ~/supertonic)
64
+ DEFAULT_ONNX_DIR = os.environ.get("SUPERTONIC_ONNX_DIR", os.path.expanduser("~/supertonic/assets/onnx"))
65
+ DEFAULT_VOICE_STYLES_DIR = os.environ.get("SUPERTONIC_VOICE_STYLES_DIR", os.path.expanduser("~/supertonic/assets/voice_styles"))
66
+
67
+ # Available voice styles
68
+ AVAILABLE_VOICE_STYLES = {
69
+ 'M1': 'M1.json', # Male voice 1
70
+ 'M2': 'M2.json', # Male voice 2
71
+ 'F1': 'F1.json', # Female voice 1
72
+ 'F2': 'F2.json', # Female voice 2
73
+ }
74
+
75
+ def __init__(
76
+ self,
77
+ onnx_dir: Optional[str] = None,
78
+ voice_style_path: Optional[str] = None,
79
+ voice_style_name: str = 'M1',
80
+ use_gpu: bool = False
81
+ ):
82
+ """
83
+ Initialize the Supertonic TTS engine.
84
+
85
+ Args:
86
+ onnx_dir: Path to the ONNX models directory. If None, uses DEFAULT_ONNX_DIR.
87
+ voice_style_path: Full path to the voice style JSON file. If None,
88
+ constructs path from voice_style_name.
89
+ voice_style_name: Name of the voice style (M1, M2, F1, F2). Used only
90
+ if voice_style_path is None.
91
+ use_gpu: Whether to use GPU for inference. Default is False (CPU only).
92
+
93
+ Raises:
94
+ FileNotFoundError: If onnx_dir or voice_style file doesn't exist.
95
+ RuntimeError: If model loading fails.
96
+ """
97
+ # Set paths
98
+ self.onnx_dir = onnx_dir or self.DEFAULT_ONNX_DIR
99
+ self.voice_style_name = voice_style_name
100
+
101
+ # Validate onnx directory
102
+ if not os.path.exists(self.onnx_dir):
103
+ raise FileNotFoundError(
104
+ f"ONNX models directory not found: {self.onnx_dir}"
105
+ )
106
+ logger.info(f"Using ONNX models from: {self.onnx_dir}")
107
+
108
+ # Set voice style path
109
+ if voice_style_path:
110
+ self.voice_style_path = voice_style_path
111
+ else:
112
+ # Construct path from voice style name
113
+ if voice_style_name not in self.AVAILABLE_VOICE_STYLES:
114
+ raise ValueError(
115
+ f"Invalid voice_style_name: {voice_style_name}. "
116
+ f"Available: {list(self.AVAILABLE_VOICE_STYLES.keys())}"
117
+ )
118
+ voice_style_file = self.AVAILABLE_VOICE_STYLES[voice_style_name]
119
+ self.voice_style_path = os.path.join(
120
+ self.DEFAULT_VOICE_STYLES_DIR, voice_style_file
121
+ )
122
+
123
+ # Validate voice style file
124
+ if not os.path.exists(self.voice_style_path):
125
+ raise FileNotFoundError(
126
+ f"Voice style file not found: {self.voice_style_path}"
127
+ )
128
+ logger.info(f"Using voice style: {self.voice_style_path}")
129
+
130
+ # Initialize models
131
+ try:
132
+ logger.info("Loading Supertonic TTS models...")
133
+ self.text_to_speech = load_text_to_speech(self.onnx_dir, use_gpu=use_gpu)
134
+ self.style = load_voice_style([self.voice_style_path], verbose=True)
135
+ self.sample_rate = self.text_to_speech.sample_rate
136
+ logger.info(f"TTS models loaded successfully (sample rate: {self.sample_rate}Hz)")
137
+ except Exception as e:
138
+ logger.error(f"Failed to load TTS models: {e}")
139
+ raise RuntimeError(f"TTS model loading failed: {e}")
140
+
141
+ def generate_speech(
142
+ self,
143
+ text: str,
144
+ lang: str = 'en',
145
+ speed: float = 1.0,
146
+ total_step: int = 15
147
+ ) -> bytes:
148
+ """
149
+ Generate speech from text.
150
+
151
+ Args:
152
+ text: The text to synthesize into speech.
153
+ lang: Language code ('en', 'ko', 'es', 'pt', 'fr'). Default is 'en'.
154
+ speed: Speech speed multiplier. Higher values = faster speech.
155
+ Recommended range: 0.8 to 1.3. Default is 1.05.
156
+ total_step: Number of denoising steps for generation. More steps =
157
+ better quality but slower. Recommended range: 3-10.
158
+ Default is 5 (good balance).
159
+
160
+ Returns:
161
+ bytes: Raw WAV audio data (can be written directly to file or sent
162
+ via HTTP with Content-Type: audio/wav).
163
+
164
+ Raises:
165
+ ValueError: If lang is not supported or parameters are invalid.
166
+ RuntimeError: If speech generation fails.
167
+
168
+ Example:
169
+ >>> audio = tts.generate_speech("Hello world", lang='en', speed=1.05)
170
+ >>> with open('output.wav', 'wb') as f:
171
+ ... f.write(audio)
172
+ """
173
+ # Validate inputs
174
+ if not text or not text.strip():
175
+ raise ValueError("Text cannot be empty")
176
+
177
+ supported_langs = ['en', 'ko', 'es', 'pt', 'fr']
178
+ if lang not in supported_langs:
179
+ raise ValueError(
180
+ f"Unsupported language: {lang}. Supported: {supported_langs}"
181
+ )
182
+
183
+ if speed <= 0 or speed > 3:
184
+ raise ValueError(f"Invalid speed: {speed}. Must be between 0 and 3")
185
+
186
+ if total_step < 1 or total_step > 50:
187
+ raise ValueError(f"Invalid total_step: {total_step}. Must be between 1 and 50")
188
+
189
+ logger.info(f"Generating speech: '{text[:50]}...' (lang={lang}, speed={speed}, steps={total_step})")
190
+
191
+ # Maximum character length per chunk to stay under ONNX token limit (~1000 tokens)
192
+ MAX_CHUNK_LENGTH = 500
193
+
194
+ def split_text_into_chunks(text: str, max_length: int) -> list:
195
+ """Split text into chunks at sentence boundaries."""
196
+ if len(text) <= max_length:
197
+ return [text]
198
+
199
+ chunks = []
200
+ # Split on sentence boundaries
201
+ sentence_endings = ['. ', '! ', '? ', '\n']
202
+
203
+ current_chunk = ""
204
+ # Split by sentences first
205
+ sentences = [text]
206
+ for ending in sentence_endings:
207
+ new_sentences = []
208
+ for s in sentences:
209
+ parts = s.split(ending)
210
+ for i, part in enumerate(parts):
211
+ if i < len(parts) - 1:
212
+ new_sentences.append(part + ending.strip())
213
+ elif part.strip():
214
+ new_sentences.append(part)
215
+ sentences = new_sentences if new_sentences else sentences
216
+
217
+ # Combine sentences into chunks up to max_length
218
+ for sentence in sentences:
219
+ sentence = sentence.strip()
220
+ if not sentence:
221
+ continue
222
+
223
+ if len(current_chunk) + len(sentence) + 1 <= max_length:
224
+ current_chunk += (" " if current_chunk else "") + sentence
225
+ else:
226
+ if current_chunk:
227
+ chunks.append(current_chunk.strip())
228
+ # If single sentence is too long, just use it (will be truncated by tokenizer)
229
+ if len(sentence) > max_length:
230
+ chunks.append(sentence)
231
+ current_chunk = ""
232
+ else:
233
+ current_chunk = sentence
234
+
235
+ if current_chunk.strip():
236
+ chunks.append(current_chunk.strip())
237
+
238
+ return chunks
239
+
240
+ try:
241
+ chunks = split_text_into_chunks(text, MAX_CHUNK_LENGTH)
242
+ logger.info(f"Text split into {len(chunks)} chunk(s)")
243
+
244
+ all_audio_chunks = []
245
+
246
+ for i, chunk in enumerate(chunks):
247
+ logger.info(f"Processing chunk {i+1}/{len(chunks)}: '{chunk[:30]}...'")
248
+
249
+ # Generate speech using the Supertonic TextToSpeech instance
250
+ wav, duration = self.text_to_speech(
251
+ text=chunk,
252
+ lang=lang,
253
+ style=self.style,
254
+ total_step=total_step,
255
+ speed=speed
256
+ )
257
+
258
+ # Extract the audio data (first batch item, trim to actual duration)
259
+ audio_data = wav[0, :int(self.sample_rate * duration[0].item())]
260
+ all_audio_chunks.append(audio_data)
261
+
262
+ # Concatenate all audio chunks
263
+ if len(all_audio_chunks) == 1:
264
+ final_audio = all_audio_chunks[0]
265
+ else:
266
+ final_audio = np.concatenate(all_audio_chunks)
267
+
268
+ # Write to BytesIO buffer to get raw bytes
269
+ buffer = BytesIO()
270
+ sf.write(buffer, final_audio, self.sample_rate, format='WAV')
271
+ audio_bytes = buffer.getvalue()
272
+
273
+ total_duration = len(final_audio) / self.sample_rate
274
+ logger.info(f"Generated {len(audio_bytes)} bytes of audio ({total_duration:.2f}s)")
275
+ return audio_bytes
276
+
277
+ except Exception as e:
278
+ logger.error(f"Speech generation failed: {e}")
279
+ raise RuntimeError(f"Failed to generate speech: {e}")
280
+
281
+ def set_voice_style(self, voice_style_name: str) -> None:
282
+ """
283
+ Change the voice style.
284
+
285
+ Args:
286
+ voice_style_name: Name of the new voice style (M1, M2, F1, F2).
287
+
288
+ Raises:
289
+ ValueError: If voice_style_name is not available.
290
+ FileNotFoundError: If the voice style file doesn't exist.
291
+ RuntimeError: If loading the new style fails.
292
+ """
293
+ if voice_style_name == self.voice_style_name:
294
+ logger.info(f"Already using voice style: {voice_style_name}")
295
+ return
296
+
297
+ if voice_style_name not in self.AVAILABLE_VOICE_STYLES:
298
+ raise ValueError(
299
+ f"Invalid voice_style_name: {voice_style_name}. "
300
+ f"Available: {list(self.AVAILABLE_VOICE_STYLES.keys())}"
301
+ )
302
+
303
+ voice_style_file = self.AVAILABLE_VOICE_STYLES[voice_style_name]
304
+ new_voice_style_path = os.path.join(
305
+ self.DEFAULT_VOICE_STYLES_DIR, voice_style_file
306
+ )
307
+
308
+ if not os.path.exists(new_voice_style_path):
309
+ raise FileNotFoundError(
310
+ f"Voice style file not found: {new_voice_style_path}"
311
+ )
312
+
313
+ try:
314
+ self.style = load_voice_style([new_voice_style_path], verbose=True)
315
+ self.voice_style_name = voice_style_name
316
+ self.voice_style_path = new_voice_style_path
317
+ logger.info(f"Voice style changed to: {voice_style_name}")
318
+ except Exception as e:
319
+ logger.error(f"Failed to load voice style: {e}")
320
+ raise RuntimeError(f"Failed to load voice style: {e}")
321
+
322
+
323
+ # Singleton instance for use in Flask app
324
+ _tts_instance: Optional[SupertonicTTS] = None
325
+
326
+
327
+ def get_tts_instance() -> Optional[SupertonicTTS]:
328
+ """
329
+ Get the global TTS instance (singleton).
330
+
331
+ Returns:
332
+ The global SupertonicTTS instance, or None if not initialized.
333
+
334
+ This is useful for Flask apps where you want to initialize TTS once
335
+ at startup and reuse the instance across requests.
336
+ """
337
+ global _tts_instance
338
+ return _tts_instance
339
+
340
+
341
+ def initialize_tts(
342
+ onnx_dir: Optional[str] = None,
343
+ voice_style_name: str = 'M1',
344
+ use_gpu: bool = False
345
+ ) -> Optional[SupertonicTTS]:
346
+ """
347
+ Initialize the global TTS instance.
348
+
349
+ Args:
350
+ onnx_dir: Path to ONNX models directory.
351
+ voice_style_name: Default voice style to use.
352
+ use_gpu: Whether to use GPU for inference.
353
+
354
+ Returns:
355
+ The initialized SupertonicTTS instance, or None if initialization fails.
356
+ """
357
+ global _tts_instance
358
+ try:
359
+ _tts_instance = SupertonicTTS(
360
+ onnx_dir=onnx_dir,
361
+ voice_style_name=voice_style_name,
362
+ use_gpu=use_gpu
363
+ )
364
+ logger.info("Global TTS instance initialized")
365
+ return _tts_instance
366
+ except Exception as e:
367
+ logger.error(f"Failed to initialize TTS: {e}")
368
+ _tts_instance = None
369
+ return None
370
+
371
+
372
+ if __name__ == "__main__":
373
+ # Simple test when run directly
374
+ print("Supertonic TTS Wrapper - Direct Test")
375
+ print("=" * 50)
376
+
377
+ try:
378
+ # Initialize TTS
379
+ tts = SupertonicTTS(
380
+ onnx_dir=os.environ.get("SUPERTONIC_ONNX_DIR", os.path.expanduser("~/supertonic/assets/onnx")),
381
+ voice_style_name="M1"
382
+ )
383
+
384
+ # Generate speech
385
+ test_text = "Hello world, this is a test of the Supertonic TTS system."
386
+ audio = tts.generate_speech(test_text, lang='en', speed=1.05)
387
+
388
+ # Save to file
389
+ output_path = "/tmp/supertonic_test_output.wav"
390
+ with open(output_path, 'wb') as f:
391
+ f.write(audio)
392
+
393
+ print(f"Success! Audio saved to: {output_path}")
394
+ print(f"Generated {len(audio)} bytes of audio data")
395
+
396
+ except Exception as e:
397
+ print(f"Error: {e}")
398
+ import traceback
399
+ traceback.print_exc()