openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Hume EVI TTS Provider (Placeholder/Stub).
|
|
4
|
+
|
|
5
|
+
This module is a STUB/PLACEHOLDER for future Hume EVI Text-to-Speech integration.
|
|
6
|
+
It is currently INACTIVE due to API costs - use Supertonic instead.
|
|
7
|
+
|
|
8
|
+
When implemented, this provider would use Hume's EVI WebSocket API to generate
|
|
9
|
+
speech using a custom cloned voice (bdcf156c-6678-4720-9f91-46bf8063bd7f).
|
|
10
|
+
|
|
11
|
+
IMPORTANT: This is NOT IMPLEMENTED. All methods will raise NotImplementedError
|
|
12
|
+
or return placeholder data. Use SupertonicTTS for actual TTS functionality.
|
|
13
|
+
|
|
14
|
+
Author: OpenVoiceUI
|
|
15
|
+
Date: 2026-02-11
|
|
16
|
+
|
|
17
|
+
Status: INACTIVE - No API funds available
|
|
18
|
+
|
|
19
|
+
Future Implementation Reference:
|
|
20
|
+
https://dev.hume.ai/docs/speech-to-speech-evi/overview
|
|
21
|
+
https://dev.hume.ai/docs/speech-to-speech-evi/streaming-with-websockets
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import os
|
|
26
|
+
from typing import List, Dict, Any, Optional
|
|
27
|
+
|
|
28
|
+
from .base_provider import TTSProvider
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Configure logging
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class HumeProvider(TTSProvider):
|
|
36
|
+
"""
|
|
37
|
+
Hume EVI TTS Provider - PLACEHOLDER (INACTIVE).
|
|
38
|
+
|
|
39
|
+
This is a stub class for future Hume EVI TTS integration.
|
|
40
|
+
Currently inactive due to API cost constraints.
|
|
41
|
+
|
|
42
|
+
When implemented, this would:
|
|
43
|
+
1. Connect to Hume EVI WebSocket API
|
|
44
|
+
2. Use a custom voice (ID: bdcf156c-6678-4720-9f91-46bf8063bd7f)
|
|
45
|
+
3. Stream text to the WebSocket
|
|
46
|
+
4. Receive audio chunks in real-time
|
|
47
|
+
5. Return concatenated audio bytes
|
|
48
|
+
|
|
49
|
+
Example (future implementation):
|
|
50
|
+
>>> provider = HumeProvider(api_key="xxx", voice_id="bdcf156c-...")
|
|
51
|
+
>>> audio = provider.generate_speech("Hello, how can I help you today?")
|
|
52
|
+
>>> # Returns WAV audio bytes using the custom cloned voice
|
|
53
|
+
|
|
54
|
+
Current behavior:
|
|
55
|
+
>>> provider = HumeProvider()
|
|
56
|
+
>>> provider.generate_speech("test")
|
|
57
|
+
NotImplementedError: Hume TTS is currently inactive (no funds). Use Supertonic instead.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Hume API configuration
|
|
61
|
+
HUME_API_BASE = "https://api.hume.ai"
|
|
62
|
+
HUME_WS_PATH = "/v0/evi/chat"
|
|
63
|
+
|
|
64
|
+
# Default custom voice ID (cloned voice)
|
|
65
|
+
DEFAULT_VOICE_ID = "bdcf156c-6678-4720-9f91-46bf8063bd7f"
|
|
66
|
+
|
|
67
|
+
# Config ID (optional, for preset configurations)
|
|
68
|
+
DEFAULT_CONFIG_ID = "3c824978-efa3-40df-bac2-023127b30e31"
|
|
69
|
+
|
|
70
|
+
def __init__(self, api_key: Optional[str] = None, voice_id: Optional[str] = None):
|
|
71
|
+
"""
|
|
72
|
+
Initialize the Hume TTS provider.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
api_key: Hume API key (from .env or parameter). Currently unused
|
|
76
|
+
as this provider is inactive.
|
|
77
|
+
voice_id: Custom voice ID. Defaults to the default custom voice.
|
|
78
|
+
|
|
79
|
+
Note:
|
|
80
|
+
This is a placeholder. Parameters are accepted for API compatibility
|
|
81
|
+
but are not used in the current stub implementation.
|
|
82
|
+
"""
|
|
83
|
+
self.api_key = api_key
|
|
84
|
+
self.voice_id = voice_id or self.DEFAULT_VOICE_ID
|
|
85
|
+
logger.debug(
|
|
86
|
+
"HumeProvider initialized (inactive stub). "
|
|
87
|
+
"Use SupertonicTTS for actual TTS functionality."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def generate_speech(self, text: str, **kwargs) -> bytes:
|
|
91
|
+
"""
|
|
92
|
+
Generate speech using Hume EVI WebSocket API.
|
|
93
|
+
|
|
94
|
+
**STUB METHOD - NOT IMPLEMENTED**
|
|
95
|
+
|
|
96
|
+
When implemented, this would:
|
|
97
|
+
1. Establish WebSocket connection to Hume EVI
|
|
98
|
+
2. Send text input via chat messages
|
|
99
|
+
3. Receive audio_output chunks
|
|
100
|
+
4. Concatenate and return as WAV bytes
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
text: Text to synthesize (currently ignored)
|
|
104
|
+
**kwargs: Additional parameters (currently ignored):
|
|
105
|
+
- speed: Speech speed multiplier
|
|
106
|
+
- temperature: Generation randomness
|
|
107
|
+
- language: Language code
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
bytes: WAV audio data (when implemented)
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
NotImplementedError: Always raised - this is a stub
|
|
114
|
+
|
|
115
|
+
Future Implementation Flow:
|
|
116
|
+
```python
|
|
117
|
+
async with websockets.connect(
|
|
118
|
+
f"{self.HUME_API_BASE.replace('https', 'wss')}{self.HUME_WS_PATH}"
|
|
119
|
+
f"?access_token={self.api_key}"
|
|
120
|
+
) as ws:
|
|
121
|
+
# Send text input
|
|
122
|
+
await ws.send(json.dumps({
|
|
123
|
+
"text": text,
|
|
124
|
+
"voice": {"voice_id": self.voice_id}
|
|
125
|
+
}))
|
|
126
|
+
|
|
127
|
+
# Collect audio chunks
|
|
128
|
+
audio_chunks = []
|
|
129
|
+
async for msg in ws:
|
|
130
|
+
data = json.loads(msg)
|
|
131
|
+
if 'audio_output' in data:
|
|
132
|
+
audio_chunks.append(base64.b64decode(data['audio_output']))
|
|
133
|
+
if data.get('message_end'):
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
return b''.join(audio_chunks)
|
|
137
|
+
```
|
|
138
|
+
"""
|
|
139
|
+
error_msg = (
|
|
140
|
+
"Hume TTS is currently inactive (no funds). Use Supertonic instead.\n\n"
|
|
141
|
+
"When funded, this will use Hume EVI WebSocket API with custom voice: "
|
|
142
|
+
f"{self.voice_id}\n"
|
|
143
|
+
f"See: https://dev.hume.ai/docs/speech-to-speech-evi/streaming-with-websockets"
|
|
144
|
+
)
|
|
145
|
+
logger.error(f"HumeProvider.generate_speech() called but not implemented: {text[:50]}")
|
|
146
|
+
raise NotImplementedError(error_msg)
|
|
147
|
+
|
|
148
|
+
def list_voices(self) -> List[str]:
|
|
149
|
+
"""
|
|
150
|
+
Return available Hume voices.
|
|
151
|
+
|
|
152
|
+
**STUB METHOD** - Returns placeholder data.
|
|
153
|
+
|
|
154
|
+
When implemented, this would query Hume's API to list:
|
|
155
|
+
- Custom cloned voices
|
|
156
|
+
- Built-in Hume voices
|
|
157
|
+
- Voice preview IDs
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List[str]: List of configured Hume voice IDs.
|
|
161
|
+
"""
|
|
162
|
+
return ['your-hume-voice-id']
|
|
163
|
+
|
|
164
|
+
def get_info(self) -> Dict[str, Any]:
|
|
165
|
+
"""
|
|
166
|
+
Return provider metadata.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dict with provider information including:
|
|
170
|
+
- name: Provider display name
|
|
171
|
+
- status: 'inactive' (no API funds)
|
|
172
|
+
- cost_per_minute: Cost in USD
|
|
173
|
+
- quality: Audio quality rating
|
|
174
|
+
- latency: Expected latency
|
|
175
|
+
- description: Brief provider description
|
|
176
|
+
- capabilities: Feature flags
|
|
177
|
+
- emotion_aware: Supports emotional expression
|
|
178
|
+
- real_time: Supports real-time streaming
|
|
179
|
+
"""
|
|
180
|
+
_api_key = os.environ.get('HUME_API_KEY', '').strip()
|
|
181
|
+
_secret = os.environ.get('HUME_SECRET_KEY', '').strip()
|
|
182
|
+
_status = 'active' if (_api_key and _secret) else 'inactive'
|
|
183
|
+
return {
|
|
184
|
+
'name': 'Hume EVI (subscription)',
|
|
185
|
+
'status': _status,
|
|
186
|
+
'cost_per_minute': 0.06,
|
|
187
|
+
'quality': 'high',
|
|
188
|
+
'latency': 'medium',
|
|
189
|
+
'description': 'Hume Expressive Voice Interface — full real-time voice agent (STT + TTS + emotion)',
|
|
190
|
+
'capabilities': {
|
|
191
|
+
'emotion_aware': True,
|
|
192
|
+
'real_time': True
|
|
193
|
+
},
|
|
194
|
+
'voice_id': self.DEFAULT_VOICE_ID,
|
|
195
|
+
'config_id': self.DEFAULT_CONFIG_ID,
|
|
196
|
+
'notes': [
|
|
197
|
+
'Subscription required. Plans: Starter $3/mo (40 min), Creator $14/mo (200 min), Pro $70/mo (1,200 min).',
|
|
198
|
+
'Overage: $0.06/min (~$3.60/hr). Effective cost on lower plans can be $5-10+/hr.',
|
|
199
|
+
'Set HUME_API_KEY and HUME_SECRET_KEY in .env to activate.',
|
|
200
|
+
'Docs: https://platform.hume.ai/pricing'
|
|
201
|
+
],
|
|
202
|
+
'api_endpoints': {
|
|
203
|
+
'websocket': f"{self.HUME_API_BASE.replace('https', 'wss')}{self.HUME_WS_PATH}",
|
|
204
|
+
'config': f"{self.HUME_API_BASE}/v0/evi/configs",
|
|
205
|
+
'voices': f"{self.HUME_API_BASE}/v0/evi/voices"
|
|
206
|
+
},
|
|
207
|
+
'requires_microphone': True,
|
|
208
|
+
'requires_websocket': True,
|
|
209
|
+
'mode': 'full-voice',
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def is_available(self) -> bool:
|
|
213
|
+
"""
|
|
214
|
+
Check if Hume provider is available.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
bool: Always False - this provider is inactive.
|
|
218
|
+
"""
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
def __repr__(self) -> str:
|
|
222
|
+
"""String representation showing inactive status."""
|
|
223
|
+
return f"HumeProvider(voice_id='{self.voice_id}', status='INACTIVE - Use Supertonic instead')"
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
__all__ = ['HumeProvider']
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
{
|
|
2
|
+
"providers": {
|
|
3
|
+
"hume": {
|
|
4
|
+
"name": "Hume EVI (subscription)",
|
|
5
|
+
"provider_id": "hume",
|
|
6
|
+
"cost_per_minute": 0.06,
|
|
7
|
+
"quality": "high",
|
|
8
|
+
"latency": "medium",
|
|
9
|
+
"voices": ["your-hume-voice-id"],
|
|
10
|
+
"features": [
|
|
11
|
+
"emotion-aware",
|
|
12
|
+
"real-time",
|
|
13
|
+
"websocket-streaming",
|
|
14
|
+
"voice-cloning"
|
|
15
|
+
],
|
|
16
|
+
"requires_api_key": true,
|
|
17
|
+
"status": "active",
|
|
18
|
+
"description": "Hume Expressive Voice Interface — full real-time voice agent (STT + TTS + emotion)",
|
|
19
|
+
"documentation_url": "https://platform.hume.ai/pricing",
|
|
20
|
+
"languages": ["en"],
|
|
21
|
+
"max_characters": 5000,
|
|
22
|
+
"notes": "Subscription required. Plans: Starter $3/mo (40 min), Creator $14/mo (200 min), Pro $70/mo (1,200 min). Overage: $0.06/min (~$3.60/hr). Effective cost on lower plans can be $5-10+/hr depending on usage. Includes STT+TTS+emotion — not just TTS.",
|
|
23
|
+
"requires_microphone": true,
|
|
24
|
+
"requires_websocket": true,
|
|
25
|
+
"mode": "full-voice"
|
|
26
|
+
},
|
|
27
|
+
"supertonic": {
|
|
28
|
+
"name": "Supertonic TTS",
|
|
29
|
+
"provider_id": "supertonic",
|
|
30
|
+
"cost_per_minute": 0.0,
|
|
31
|
+
"quality": "high",
|
|
32
|
+
"latency": "very-fast",
|
|
33
|
+
"voices": ["M1", "M2", "M3", "M4", "M5", "F1", "F2", "F3", "F4", "F5"],
|
|
34
|
+
"features": [
|
|
35
|
+
"multi-language",
|
|
36
|
+
"local-processing",
|
|
37
|
+
"open-source",
|
|
38
|
+
"no-api-key-required",
|
|
39
|
+
"onnx-based"
|
|
40
|
+
],
|
|
41
|
+
"requires_api_key": false,
|
|
42
|
+
"status": "active",
|
|
43
|
+
"description": "Local ONNX-based TTS engine with multiple voice styles",
|
|
44
|
+
"documentation_url": "https://github.com/playht/supertonic",
|
|
45
|
+
"languages": ["en", "ko", "es", "pt", "fr"],
|
|
46
|
+
"max_characters": 10000,
|
|
47
|
+
"notes": "Free, fast, local inference. Default provider.",
|
|
48
|
+
"requires_microphone": false,
|
|
49
|
+
"requires_websocket": false,
|
|
50
|
+
"mode": "tts-only"
|
|
51
|
+
},
|
|
52
|
+
"qwen3": {
|
|
53
|
+
"name": "Qwen3-TTS (fal.ai)",
|
|
54
|
+
"provider_id": "qwen3",
|
|
55
|
+
"cost_per_minute": 0.003,
|
|
56
|
+
"quality": "very-high",
|
|
57
|
+
"latency": "fast",
|
|
58
|
+
"voices": ["Vivian", "Serena", "Dylan", "Eric", "Ryan", "Aiden", "Uncle_Fu", "Ono_Anna", "Sohee"],
|
|
59
|
+
"features": ["multilingual", "expressive", "voice-cloning", "emotion-control", "cloud", "mp3-output"],
|
|
60
|
+
"requires_api_key": true,
|
|
61
|
+
"status": "active",
|
|
62
|
+
"description": "Qwen3-TTS via fal.ai — voice cloning, emotion control, multilingual, 10 languages",
|
|
63
|
+
"languages": ["en", "zh", "es", "fr", "de", "it", "ja", "ko", "pt", "ru"],
|
|
64
|
+
"max_characters": 5000,
|
|
65
|
+
"notes": "Voice cloning: POST /api/tts/clone with audio sample. Use cloned voice_id in /api/tts/generate. FAL_KEY required.",
|
|
66
|
+
"clone_endpoint": "/api/tts/clone",
|
|
67
|
+
"clone_cost_per_minute": 0.0008,
|
|
68
|
+
"requires_microphone": false,
|
|
69
|
+
"requires_websocket": false,
|
|
70
|
+
"mode": "tts-only",
|
|
71
|
+
"audio_format": "mp3",
|
|
72
|
+
"sample_rate": 24000
|
|
73
|
+
},
|
|
74
|
+
"resemble": {
|
|
75
|
+
"name": "Resemble AI (Chatterbox)",
|
|
76
|
+
"provider_id": "resemble",
|
|
77
|
+
"cost_per_minute": 0.10,
|
|
78
|
+
"quality": "very-high",
|
|
79
|
+
"latency": "very-fast",
|
|
80
|
+
"voices": [],
|
|
81
|
+
"features": ["streaming", "voice-cloning", "emotion-control", "ssml", "multilingual", "cloud", "wav-output", "paralinguistic-tags"],
|
|
82
|
+
"requires_api_key": true,
|
|
83
|
+
"status": "active",
|
|
84
|
+
"description": "Resemble AI Chatterbox — streaming TTS, voice cloning, emotion/exaggeration control, SSML, 90+ languages",
|
|
85
|
+
"documentation_url": "https://docs.resemble.ai",
|
|
86
|
+
"languages": ["en", "es", "fr", "de", "it", "pt", "ja", "ko", "zh", "ar", "ru", "hi", "nl", "pl", "sv", "da", "fi", "el", "cs", "hu", "ro", "tr", "uk", "vi", "th", "id"],
|
|
87
|
+
"max_characters": 2000,
|
|
88
|
+
"notes": "Models: chatterbox-turbo (fastest, paralinguistic tags), chatterbox (emotion/exaggeration), chatterbox-multilingual (23 langs). Voice cloning via Resemble dashboard. RESEMBLE_API_KEY + RESEMBLE_VOICE_UUID required.",
|
|
89
|
+
"requires_microphone": false,
|
|
90
|
+
"requires_websocket": false,
|
|
91
|
+
"mode": "tts-only",
|
|
92
|
+
"audio_format": "wav",
|
|
93
|
+
"sample_rate": 24000
|
|
94
|
+
},
|
|
95
|
+
"groq": {
|
|
96
|
+
"name": "Groq Orpheus",
|
|
97
|
+
"provider_id": "groq",
|
|
98
|
+
"cost_per_minute": 0.05,
|
|
99
|
+
"quality": "high",
|
|
100
|
+
"latency": "very-fast",
|
|
101
|
+
"voices": ["autumn", "diana", "hannah", "austin", "daniel", "troy"],
|
|
102
|
+
"features": ["streaming", "natural", "empathetic", "cloud", "mp3-output"],
|
|
103
|
+
"requires_api_key": true,
|
|
104
|
+
"status": "active",
|
|
105
|
+
"description": "Orpheus TTS (canopylabs/orpheus-v1-english) via Groq LPU — ~130-200ms, very natural",
|
|
106
|
+
"languages": ["en"],
|
|
107
|
+
"max_characters": 5000,
|
|
108
|
+
"notes": "~130-200ms latency on Groq LPU. Natural human-like prosody. GROQ_API_KEY required.",
|
|
109
|
+
"requires_microphone": false,
|
|
110
|
+
"requires_websocket": false,
|
|
111
|
+
"mode": "tts-only",
|
|
112
|
+
"audio_format": "mp3",
|
|
113
|
+
"sample_rate": 24000
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
"default_provider": "supertonic",
|
|
117
|
+
"version": "1.2.0",
|
|
118
|
+
"last_updated": "2026-02-21"
|
|
119
|
+
}
|