openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TTS Providers Package.
4
+
5
+ This package provides a unified interface for multiple Text-to-Speech backends.
6
+ All providers inherit from TTSProvider base class and implement the same API.
7
+
8
+ Available Providers:
9
+ - HumeProvider: Hume EVI WebSocket TTS (INACTIVE - placeholder only)
10
+ - SupertonicProvider: Local ONNX-based TTS (active, recommended)
11
+
12
+ Usage:
13
+ >>> from tts_providers import get_provider, list_providers
14
+ >>> # Get default provider (Supertonic)
15
+ >>> provider = get_provider()
16
+ >>> audio = provider.generate_speech("Hello world", voice='M1')
17
+ >>>
18
+ >>> # List all providers
19
+ >>> providers = list_providers()
20
+
21
+ Author: OpenVoiceUI
22
+ Date: 2026-02-11
23
+ """
24
+
25
+ import json
26
+ import os
27
+ from typing import Optional, Dict, Any, List
28
+
29
+ from .base_provider import TTSProvider
30
+ from .hume_provider import HumeProvider
31
+ from .supertonic_provider import SupertonicProvider
32
+ from .groq_provider import GroqProvider
33
+ from .qwen3_provider import Qwen3Provider
34
+ from .resemble_provider import ResembleProvider
35
+
36
+ # Provider registry
37
+ _PROVIDERS = {
38
+ 'hume': HumeProvider,
39
+ 'supertonic': SupertonicProvider,
40
+ 'groq': GroqProvider,
41
+ 'qwen3': Qwen3Provider,
42
+ 'resemble': ResembleProvider,
43
+ }
44
+
45
+ def _load_config() -> Dict[str, Any]:
46
+ """Load providers configuration from JSON file."""
47
+ config_path = os.path.join(os.path.dirname(__file__), 'providers_config.json')
48
+ try:
49
+ with open(config_path, 'r') as f:
50
+ return json.load(f)
51
+ except FileNotFoundError:
52
+ return {'providers': {}, 'default_provider': 'supertonic'}
53
+
54
+ def get_provider(provider_id: Optional[str] = None) -> TTSProvider:
55
+ """
56
+ Get a TTS provider instance.
57
+
58
+ Args:
59
+ provider_id: Provider identifier ('hume', 'supertonic'). If None, uses default.
60
+
61
+ Returns:
62
+ TTSProvider instance
63
+
64
+ Raises:
65
+ ValueError: If provider_id is unknown
66
+
67
+ Example:
68
+ >>> provider = get_provider('supertonic')
69
+ >>> audio = provider.generate_speech("Hello", voice='M1')
70
+ """
71
+ config = _load_config()
72
+ if provider_id is None:
73
+ provider_id = config.get('default_provider', 'supertonic')
74
+
75
+ if provider_id not in _PROVIDERS:
76
+ available = ', '.join(_PROVIDERS.keys())
77
+ raise ValueError(f"Unknown provider '{provider_id}'. Available: {available}")
78
+
79
+ return _PROVIDERS[provider_id]()
80
+
81
+ def list_providers(include_inactive: bool = True) -> List[Dict[str, Any]]:
82
+ """
83
+ List all TTS providers with metadata.
84
+
85
+ Args:
86
+ include_inactive: If True, include inactive providers. Default True.
87
+
88
+ Returns:
89
+ List of provider metadata dictionaries
90
+
91
+ Example:
92
+ >>> for p in list_providers():
93
+ ... print(f"{p['name']}: ${p['cost_per_minute']}/min")
94
+ """
95
+ config = _load_config()
96
+ providers = []
97
+
98
+ for provider_id, provider_class in _PROVIDERS.items():
99
+ try:
100
+ instance = provider_class()
101
+ info = instance.get_info()
102
+
103
+ # Merge with config metadata
104
+ if provider_id in config.get('providers', {}):
105
+ config_data = config['providers'][provider_id]
106
+ info.update({
107
+ 'provider_id': provider_id,
108
+ 'cost_per_minute': config_data.get('cost_per_minute', 0.0),
109
+ 'quality': config_data.get('quality', 'unknown'),
110
+ 'latency': config_data.get('latency', 'unknown'),
111
+ 'features': config_data.get('features', []),
112
+ 'requires_api_key': config_data.get('requires_api_key', False),
113
+ 'languages': config_data.get('languages', []),
114
+ 'notes': config_data.get('notes', ''),
115
+ })
116
+
117
+ # Filter inactive if requested
118
+ if not include_inactive and info.get('status') != 'active':
119
+ continue
120
+
121
+ providers.append(info)
122
+ except Exception as e:
123
+ print(f"Warning: Failed to load provider {provider_id}: {e}")
124
+
125
+ return providers
126
+
127
+ __all__ = [
128
+ 'TTSProvider',
129
+ 'HumeProvider',
130
+ 'SupertonicProvider',
131
+ 'GroqProvider',
132
+ 'Qwen3Provider',
133
+ 'ResembleProvider',
134
+ 'get_provider',
135
+ 'list_providers',
136
+ ]
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Base TTS Provider Abstract Class for OpenVoiceUI.
4
+
5
+ This module defines the abstract interface that all TTS providers must implement.
6
+ It provides a consistent API for generating speech, listing available voices,
7
+ and retrieving provider information.
8
+
9
+ Author: OpenVoiceUI
10
+ Date: 2026-02-11
11
+ """
12
+
13
+ from abc import ABC, abstractmethod
14
+ from typing import Any, Dict, List, Optional
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass
19
+ class TTSVoice:
20
+ """
21
+ Represents a single voice available from a TTS provider.
22
+
23
+ Attributes:
24
+ id: Unique identifier for the voice (e.g., 'M1', 'your-hume-voice-id')
25
+ name: Human-readable name (e.g., 'Male Voice 1', 'Custom Voice')
26
+ language: Language code (e.g., 'en-US', 'en', 'es')
27
+ gender: Gender of the voice ('male', 'female', 'neutral', or None)
28
+ description: Optional description of the voice characteristics
29
+ """
30
+ id: str
31
+ name: str
32
+ language: str = 'en'
33
+ gender: Optional[str] = None
34
+ description: Optional[str] = None
35
+
36
+ def to_dict(self) -> Dict[str, Any]:
37
+ """Convert the voice to a dictionary representation."""
38
+ return {
39
+ 'id': self.id,
40
+ 'name': self.name,
41
+ 'language': self.language,
42
+ 'gender': self.gender,
43
+ 'description': self.description
44
+ }
45
+
46
+
47
+ @dataclass
48
+ class TTSProviderInfo:
49
+ """
50
+ Metadata about a TTS provider.
51
+
52
+ Attributes:
53
+ name: Provider name (e.g., 'supertonic', 'hume')
54
+ display_name: Human-readable name (e.g., 'Supertonic TTS', 'Hume EVI')
55
+ version: Provider version string
56
+ cost_per_minute: Cost in USD per minute of generated audio
57
+ quality: Quality rating ('low', 'medium', 'high', 'premium')
58
+ latency: Expected latency ('instant', 'fast', 'medium', 'slow')
59
+ features: List of feature strings (e.g., ['emotion-aware', 'multi-language'])
60
+ requires_api_key: Whether the provider requires an API key
61
+ is_online: Whether the provider requires internet connectivity
62
+ status: Current status ('active', 'inactive', 'error')
63
+ """
64
+ name: str
65
+ display_name: str
66
+ version: str
67
+ cost_per_minute: float
68
+ quality: str
69
+ latency: str
70
+ features: List[str]
71
+ requires_api_key: bool
72
+ is_online: bool
73
+ status: str = 'active'
74
+
75
+ def to_dict(self) -> Dict[str, Any]:
76
+ """Convert the provider info to a dictionary representation."""
77
+ return {
78
+ 'name': self.name,
79
+ 'display_name': self.display_name,
80
+ 'version': self.version,
81
+ 'cost_per_minute': self.cost_per_minute,
82
+ 'quality': self.quality,
83
+ 'latency': self.latency,
84
+ 'features': self.features,
85
+ 'requires_api_key': self.requires_api_key,
86
+ 'is_online': self.is_online,
87
+ 'status': self.status
88
+ }
89
+
90
+
91
+ class TTSProvider(ABC):
92
+ """
93
+ Abstract base class for Text-to-Speech providers.
94
+
95
+ All TTS providers must inherit from this class and implement the required methods.
96
+ This ensures a consistent interface across different TTS backends.
97
+
98
+ Required Methods:
99
+ - generate_speech(text, **kwargs): Convert text to audio bytes
100
+ - list_voices(): Return list of available voice names
101
+ - get_info(): Return provider metadata (name, status, capabilities)
102
+
103
+ Example:
104
+ >>> class MyTTS(TTSProvider):
105
+ ... def generate_speech(self, text, **kwargs):
106
+ ... # Implementation here
107
+ ... return audio_bytes
108
+ ... def list_voices(self):
109
+ ... return ['voice1', 'voice2']
110
+ ... def get_info(self):
111
+ ... return {'name': 'MyTTS', 'status': 'active'}
112
+ """
113
+
114
+ @abstractmethod
115
+ def generate_speech(self, text: str, **kwargs) -> bytes:
116
+ """
117
+ Generate speech audio from the given text.
118
+
119
+ Args:
120
+ text: The text to synthesize into speech.
121
+ **kwargs: Provider-specific parameters (voice, speed, lang, etc.)
122
+
123
+ Returns:
124
+ bytes: Raw audio data (usually WAV format) that can be written
125
+ to a file or sent via HTTP with Content-Type: audio/wav.
126
+
127
+ Raises:
128
+ ValueError: If text is empty or parameters are invalid.
129
+ RuntimeError: If speech generation fails.
130
+
131
+ Example:
132
+ >>> audio = provider.generate_speech("Hello world", voice='M1')
133
+ >>> with open('output.wav', 'wb') as f:
134
+ ... f.write(audio)
135
+ """
136
+ pass
137
+
138
+ @abstractmethod
139
+ def list_voices(self) -> List[str]:
140
+ """
141
+ Return a list of available voice names for this provider.
142
+
143
+ Returns:
144
+ List[str]: List of voice identifiers (e.g., ['M1', 'M2', 'F1']).
145
+ These IDs should be valid values for a 'voice' parameter
146
+ in generate_speech().
147
+
148
+ Example:
149
+ >>> provider.list_voices()
150
+ ['M1', 'M2', 'F1', 'F2']
151
+ """
152
+ pass
153
+
154
+ @abstractmethod
155
+ def get_info(self) -> Dict[str, Any]:
156
+ """
157
+ Return metadata about this TTS provider.
158
+
159
+ Returns:
160
+ Dict with at minimum:
161
+ - 'name': str - Provider display name
162
+ - 'status': str - 'active', 'inactive', or 'error'
163
+ - 'description': str - Brief description of the provider
164
+ - 'capabilities': dict - Optional feature flags
165
+ - 'streaming': bool - Supports streaming audio
166
+ - 'ssml': bool - Supports SSML markup
167
+ - 'custom_voices': bool - Supports custom voice cloning
168
+ - 'languages': List[str] - Supported language codes
169
+
170
+ Example:
171
+ >>> provider.get_info()
172
+ {
173
+ 'name': 'Supertonic',
174
+ 'status': 'active',
175
+ 'description': 'Local ONNX-based TTS engine',
176
+ 'capabilities': {
177
+ 'streaming': False,
178
+ 'ssml': False,
179
+ 'custom_voices': True,
180
+ 'languages': ['en', 'ko', 'es', 'pt', 'fr']
181
+ }
182
+ }
183
+ """
184
+ pass
185
+
186
+ def is_available(self) -> bool:
187
+ """
188
+ Check if this provider is currently available for use.
189
+
190
+ Returns:
191
+ bool: True if the provider is active and ready, False otherwise.
192
+
193
+ Default implementation checks if get_info()['status'] == 'active'.
194
+ Subclasses can override for more complex availability checks.
195
+
196
+ Example:
197
+ >>> if provider.is_available():
198
+ ... audio = provider.generate_speech("Hello")
199
+ """
200
+ return self.get_info().get('status', 'inactive') == 'active'
201
+
202
+ def validate_text(self, text: str) -> None:
203
+ """
204
+ Validate that text is suitable for speech generation.
205
+
206
+ Args:
207
+ text: Text to validate.
208
+
209
+ Raises:
210
+ ValueError: If text is None, empty, or only whitespace.
211
+
212
+ Example:
213
+ >>> provider.validate_text("Hello world") # OK
214
+ >>> provider.validate_text("") # Raises ValueError
215
+ """
216
+ if text is None:
217
+ raise ValueError("Text cannot be None")
218
+ if not isinstance(text, str):
219
+ raise ValueError(f"Text must be a string, got {type(text).__name__}")
220
+ if not text.strip():
221
+ raise ValueError("Text cannot be empty or contain only whitespace")
222
+
223
+ def validate_voice(self, voice: str) -> bool:
224
+ """
225
+ Check if a given voice name is valid for this provider.
226
+
227
+ Args:
228
+ voice: Voice identifier to validate.
229
+
230
+ Returns:
231
+ bool: True if the voice is available, False otherwise.
232
+
233
+ Example:
234
+ >>> provider.validate_voice('M1')
235
+ True
236
+ >>> provider.validate_voice('invalid')
237
+ False
238
+ """
239
+ return voice in self.list_voices()
240
+
241
+ def get_default_voice(self) -> Optional[str]:
242
+ """
243
+ Return the default voice for this provider.
244
+
245
+ Returns:
246
+ The first voice in list_voices(), or None if no voices available.
247
+
248
+ Example:
249
+ >>> provider.get_default_voice()
250
+ 'M1'
251
+ """
252
+ voices = self.list_voices()
253
+ return voices[0] if voices else None
254
+
255
+ def __repr__(self) -> str:
256
+ """String representation of the provider."""
257
+ info = self.get_info()
258
+ return f"{self.__class__.__name__}(name='{info.get('name', 'Unknown')}', status='{info.get('status', 'unknown')}')"
259
+
260
+
261
+ class TTSProviderError(Exception):
262
+ """
263
+ Base exception class for TTS provider errors.
264
+
265
+ This exception is raised when a TTS provider encounters an error
266
+ that is specific to the provider implementation.
267
+ """
268
+
269
+ def __init__(self, provider_name: str, message: str):
270
+ """
271
+ Initialize the exception.
272
+
273
+ Args:
274
+ provider_name: Name of the provider that raised the error.
275
+ message: Error message describing what went wrong.
276
+ """
277
+ self.provider_name = provider_name
278
+ super().__init__(f"[{provider_name}] {message}")
279
+
280
+
281
+ class TTSGenerationError(TTSProviderError):
282
+ """
283
+ Raised when speech generation fails.
284
+
285
+ This can occur due to invalid input, network issues, or
286
+ problems with the TTS service.
287
+ """
288
+ pass
289
+
290
+
291
+ class TTSConfigurationError(TTSProviderError):
292
+ """
293
+ Raised when the provider is misconfigured.
294
+
295
+ This can occur due to missing API keys, invalid paths,
296
+ or other configuration issues.
297
+ """
298
+ pass
299
+
300
+
301
+ class TTSVoiceNotFoundError(TTSProviderError):
302
+ """
303
+ Raised when a requested voice is not available.
304
+
305
+ This occurs when a voice ID is specified that doesn't
306
+ exist in the provider's voice catalog.
307
+ """
308
+ pass
309
+
310
+
311
+ __all__ = [
312
+ 'TTSProvider',
313
+ 'TTSVoice',
314
+ 'TTSProviderInfo',
315
+ 'TTSProviderError',
316
+ 'TTSGenerationError',
317
+ 'TTSConfigurationError',
318
+ 'TTSVoiceNotFoundError',
319
+ ]
@@ -0,0 +1,155 @@
1
+ """
2
+ Groq Orpheus TTS Provider — canopylabs/orpheus-v1-english via Groq LPU.
3
+
4
+ ~130-200ms TTFB, natural human-like prosody, MP3 output.
5
+ API key: GROQ_API_KEY env var
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import logging
11
+
12
+ from .base_provider import TTSProvider
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ MODEL = "canopylabs/orpheus-v1-english"
17
+ # If a single TTS call exceeds this, abort and let fallback handle it.
18
+ # Normal Groq latency is 130-700ms. Rate-limited calls take 12-27s.
19
+ GROQ_TTS_TIMEOUT_SECONDS = 5.0
20
+
21
+ AVAILABLE_VOICES = [
22
+ "autumn", # Female (default)
23
+ "diana", # Female
24
+ "hannah", # Female
25
+ "austin", # Male
26
+ "daniel", # Male
27
+ "troy", # Male
28
+ ]
29
+
30
+
31
+ class GroqProvider(TTSProvider):
32
+ """
33
+ TTS Provider using Groq Orpheus (canopylabs/orpheus-v1-english).
34
+
35
+ Voices: autumn, diana, hannah, austin, daniel, troy
36
+ Output: WAV audio bytes
37
+ Latency: ~130-200ms (Groq LPU)
38
+ Cost: ~$0.05/1K chars
39
+ """
40
+
41
+ def __init__(self):
42
+ super().__init__()
43
+ self.api_key = os.getenv('GROQ_API_KEY', '')
44
+ self._status = 'active' if self.api_key else 'error'
45
+ self._init_error = None if self.api_key else 'GROQ_API_KEY not set'
46
+ self._client = None
47
+
48
+ def _get_client(self):
49
+ if self._client is None:
50
+ try:
51
+ from groq import Groq
52
+ self._client = Groq(
53
+ api_key=self.api_key,
54
+ timeout=GROQ_TTS_TIMEOUT_SECONDS,
55
+ )
56
+ except ImportError:
57
+ raise RuntimeError("groq package not installed — run: pip install groq")
58
+ return self._client
59
+
60
+ def generate_speech(self, text: str, voice: str = 'autumn', **kwargs) -> bytes:
61
+ """
62
+ Generate speech via Groq Orpheus.
63
+
64
+ Args:
65
+ text: Text to synthesize.
66
+ voice: One of AVAILABLE_VOICES. Default: 'autumn'.
67
+
68
+ Returns:
69
+ MP3 audio bytes.
70
+ """
71
+ if not self.api_key:
72
+ raise RuntimeError("GROQ_API_KEY not set")
73
+
74
+ self.validate_text(text)
75
+
76
+ if voice not in AVAILABLE_VOICES:
77
+ logger.warning(f"[Groq] Unknown voice '{voice}', using autumn")
78
+ voice = 'autumn'
79
+
80
+ t = time.time()
81
+ logger.info(f"[Groq] Requesting TTS: '{text[:60]}' voice={voice}")
82
+
83
+ try:
84
+ client = self._get_client()
85
+ resp = client.audio.speech.create(
86
+ model=MODEL,
87
+ voice=voice,
88
+ input=text,
89
+ response_format="wav",
90
+ )
91
+ audio_bytes = resp.content if hasattr(resp, 'content') else resp.read()
92
+ except Exception as e:
93
+ # Parse structured Groq API errors to extract error code
94
+ import re
95
+ err_str = str(e)
96
+ err_code = 'unknown'
97
+ err_msg = err_str
98
+ try:
99
+ code_match = re.search(r"'code':\s*'([^']+)'", err_str)
100
+ msg_match = re.search(r"'message':\s*'([^']+)'", err_str)
101
+ if code_match:
102
+ err_code = code_match.group(1)
103
+ if msg_match:
104
+ err_msg = msg_match.group(1)
105
+ except Exception:
106
+ pass
107
+ raise RuntimeError(f"[groq:{err_code}] {err_msg}")
108
+
109
+ elapsed = int((time.time() - t) * 1000)
110
+ logger.info(f"[Groq] Generated {len(audio_bytes)} bytes in {elapsed}ms")
111
+ return audio_bytes
112
+
113
+ def health_check(self) -> dict:
114
+ if not self.api_key:
115
+ return {"ok": False, "latency_ms": 0, "detail": "GROQ_API_KEY not set"}
116
+ t = time.time()
117
+ try:
118
+ from groq import Groq
119
+ client = Groq(api_key=self.api_key)
120
+ client.models.list()
121
+ latency_ms = int((time.time() - t) * 1000)
122
+ return {"ok": True, "latency_ms": latency_ms, "detail": "Groq reachable — Orpheus ready"}
123
+ except Exception as e:
124
+ latency_ms = int((time.time() - t) * 1000)
125
+ return {"ok": False, "latency_ms": latency_ms, "detail": str(e)}
126
+
127
+ def list_voices(self) -> list:
128
+ return AVAILABLE_VOICES.copy()
129
+
130
+ def get_default_voice(self) -> str:
131
+ return 'autumn'
132
+
133
+ def is_available(self) -> bool:
134
+ return bool(self.api_key)
135
+
136
+ def get_info(self) -> dict:
137
+ return {
138
+ 'name': 'Groq Orpheus',
139
+ 'provider_id': 'groq',
140
+ 'status': self._status,
141
+ 'description': 'Orpheus TTS via Groq LPU — fast, natural, human-like prosody',
142
+ 'quality': 'high',
143
+ 'latency': 'very-fast',
144
+ 'cost_per_minute': 0.05,
145
+ 'voices': AVAILABLE_VOICES.copy(),
146
+ 'features': ['fast', 'natural', 'empathetic', 'mp3-output', 'cloud'],
147
+ 'requires_api_key': True,
148
+ 'languages': ['en'],
149
+ 'max_characters': 5000,
150
+ 'notes': 'Orpheus v1 English on Groq LPU. ~130-200ms latency. GROQ_API_KEY required.',
151
+ 'default_voice': 'autumn',
152
+ 'audio_format': 'wav',
153
+ 'sample_rate': 24000,
154
+ 'error': self._init_error,
155
+ }