openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
TTS Providers Package.
|
|
4
|
+
|
|
5
|
+
This package provides a unified interface for multiple Text-to-Speech backends.
|
|
6
|
+
All providers inherit from TTSProvider base class and implement the same API.
|
|
7
|
+
|
|
8
|
+
Available Providers:
|
|
9
|
+
- HumeProvider: Hume EVI WebSocket TTS (INACTIVE - placeholder only)
|
|
10
|
+
- SupertonicProvider: Local ONNX-based TTS (active, recommended)
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
>>> from tts_providers import get_provider, list_providers
|
|
14
|
+
>>> # Get default provider (Supertonic)
|
|
15
|
+
>>> provider = get_provider()
|
|
16
|
+
>>> audio = provider.generate_speech("Hello world", voice='M1')
|
|
17
|
+
>>>
|
|
18
|
+
>>> # List all providers
|
|
19
|
+
>>> providers = list_providers()
|
|
20
|
+
|
|
21
|
+
Author: OpenVoiceUI
|
|
22
|
+
Date: 2026-02-11
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from typing import Optional, Dict, Any, List
|
|
28
|
+
|
|
29
|
+
from .base_provider import TTSProvider
|
|
30
|
+
from .hume_provider import HumeProvider
|
|
31
|
+
from .supertonic_provider import SupertonicProvider
|
|
32
|
+
from .groq_provider import GroqProvider
|
|
33
|
+
from .qwen3_provider import Qwen3Provider
|
|
34
|
+
from .resemble_provider import ResembleProvider
|
|
35
|
+
|
|
36
|
+
# Provider registry
|
|
37
|
+
_PROVIDERS = {
|
|
38
|
+
'hume': HumeProvider,
|
|
39
|
+
'supertonic': SupertonicProvider,
|
|
40
|
+
'groq': GroqProvider,
|
|
41
|
+
'qwen3': Qwen3Provider,
|
|
42
|
+
'resemble': ResembleProvider,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def _load_config() -> Dict[str, Any]:
|
|
46
|
+
"""Load providers configuration from JSON file."""
|
|
47
|
+
config_path = os.path.join(os.path.dirname(__file__), 'providers_config.json')
|
|
48
|
+
try:
|
|
49
|
+
with open(config_path, 'r') as f:
|
|
50
|
+
return json.load(f)
|
|
51
|
+
except FileNotFoundError:
|
|
52
|
+
return {'providers': {}, 'default_provider': 'supertonic'}
|
|
53
|
+
|
|
54
|
+
def get_provider(provider_id: Optional[str] = None) -> TTSProvider:
|
|
55
|
+
"""
|
|
56
|
+
Get a TTS provider instance.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
provider_id: Provider identifier ('hume', 'supertonic'). If None, uses default.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
TTSProvider instance
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ValueError: If provider_id is unknown
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
>>> provider = get_provider('supertonic')
|
|
69
|
+
>>> audio = provider.generate_speech("Hello", voice='M1')
|
|
70
|
+
"""
|
|
71
|
+
config = _load_config()
|
|
72
|
+
if provider_id is None:
|
|
73
|
+
provider_id = config.get('default_provider', 'supertonic')
|
|
74
|
+
|
|
75
|
+
if provider_id not in _PROVIDERS:
|
|
76
|
+
available = ', '.join(_PROVIDERS.keys())
|
|
77
|
+
raise ValueError(f"Unknown provider '{provider_id}'. Available: {available}")
|
|
78
|
+
|
|
79
|
+
return _PROVIDERS[provider_id]()
|
|
80
|
+
|
|
81
|
+
def list_providers(include_inactive: bool = True) -> List[Dict[str, Any]]:
|
|
82
|
+
"""
|
|
83
|
+
List all TTS providers with metadata.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
include_inactive: If True, include inactive providers. Default True.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
List of provider metadata dictionaries
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
>>> for p in list_providers():
|
|
93
|
+
... print(f"{p['name']}: ${p['cost_per_minute']}/min")
|
|
94
|
+
"""
|
|
95
|
+
config = _load_config()
|
|
96
|
+
providers = []
|
|
97
|
+
|
|
98
|
+
for provider_id, provider_class in _PROVIDERS.items():
|
|
99
|
+
try:
|
|
100
|
+
instance = provider_class()
|
|
101
|
+
info = instance.get_info()
|
|
102
|
+
|
|
103
|
+
# Merge with config metadata
|
|
104
|
+
if provider_id in config.get('providers', {}):
|
|
105
|
+
config_data = config['providers'][provider_id]
|
|
106
|
+
info.update({
|
|
107
|
+
'provider_id': provider_id,
|
|
108
|
+
'cost_per_minute': config_data.get('cost_per_minute', 0.0),
|
|
109
|
+
'quality': config_data.get('quality', 'unknown'),
|
|
110
|
+
'latency': config_data.get('latency', 'unknown'),
|
|
111
|
+
'features': config_data.get('features', []),
|
|
112
|
+
'requires_api_key': config_data.get('requires_api_key', False),
|
|
113
|
+
'languages': config_data.get('languages', []),
|
|
114
|
+
'notes': config_data.get('notes', ''),
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
# Filter inactive if requested
|
|
118
|
+
if not include_inactive and info.get('status') != 'active':
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
providers.append(info)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"Warning: Failed to load provider {provider_id}: {e}")
|
|
124
|
+
|
|
125
|
+
return providers
|
|
126
|
+
|
|
127
|
+
__all__ = [
|
|
128
|
+
'TTSProvider',
|
|
129
|
+
'HumeProvider',
|
|
130
|
+
'SupertonicProvider',
|
|
131
|
+
'GroqProvider',
|
|
132
|
+
'Qwen3Provider',
|
|
133
|
+
'ResembleProvider',
|
|
134
|
+
'get_provider',
|
|
135
|
+
'list_providers',
|
|
136
|
+
]
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Base TTS Provider Abstract Class for OpenVoiceUI.
|
|
4
|
+
|
|
5
|
+
This module defines the abstract interface that all TTS providers must implement.
|
|
6
|
+
It provides a consistent API for generating speech, listing available voices,
|
|
7
|
+
and retrieving provider information.
|
|
8
|
+
|
|
9
|
+
Author: OpenVoiceUI
|
|
10
|
+
Date: 2026-02-11
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class TTSVoice:
|
|
20
|
+
"""
|
|
21
|
+
Represents a single voice available from a TTS provider.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
id: Unique identifier for the voice (e.g., 'M1', 'your-hume-voice-id')
|
|
25
|
+
name: Human-readable name (e.g., 'Male Voice 1', 'Custom Voice')
|
|
26
|
+
language: Language code (e.g., 'en-US', 'en', 'es')
|
|
27
|
+
gender: Gender of the voice ('male', 'female', 'neutral', or None)
|
|
28
|
+
description: Optional description of the voice characteristics
|
|
29
|
+
"""
|
|
30
|
+
id: str
|
|
31
|
+
name: str
|
|
32
|
+
language: str = 'en'
|
|
33
|
+
gender: Optional[str] = None
|
|
34
|
+
description: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
37
|
+
"""Convert the voice to a dictionary representation."""
|
|
38
|
+
return {
|
|
39
|
+
'id': self.id,
|
|
40
|
+
'name': self.name,
|
|
41
|
+
'language': self.language,
|
|
42
|
+
'gender': self.gender,
|
|
43
|
+
'description': self.description
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class TTSProviderInfo:
|
|
49
|
+
"""
|
|
50
|
+
Metadata about a TTS provider.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
name: Provider name (e.g., 'supertonic', 'hume')
|
|
54
|
+
display_name: Human-readable name (e.g., 'Supertonic TTS', 'Hume EVI')
|
|
55
|
+
version: Provider version string
|
|
56
|
+
cost_per_minute: Cost in USD per minute of generated audio
|
|
57
|
+
quality: Quality rating ('low', 'medium', 'high', 'premium')
|
|
58
|
+
latency: Expected latency ('instant', 'fast', 'medium', 'slow')
|
|
59
|
+
features: List of feature strings (e.g., ['emotion-aware', 'multi-language'])
|
|
60
|
+
requires_api_key: Whether the provider requires an API key
|
|
61
|
+
is_online: Whether the provider requires internet connectivity
|
|
62
|
+
status: Current status ('active', 'inactive', 'error')
|
|
63
|
+
"""
|
|
64
|
+
name: str
|
|
65
|
+
display_name: str
|
|
66
|
+
version: str
|
|
67
|
+
cost_per_minute: float
|
|
68
|
+
quality: str
|
|
69
|
+
latency: str
|
|
70
|
+
features: List[str]
|
|
71
|
+
requires_api_key: bool
|
|
72
|
+
is_online: bool
|
|
73
|
+
status: str = 'active'
|
|
74
|
+
|
|
75
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
76
|
+
"""Convert the provider info to a dictionary representation."""
|
|
77
|
+
return {
|
|
78
|
+
'name': self.name,
|
|
79
|
+
'display_name': self.display_name,
|
|
80
|
+
'version': self.version,
|
|
81
|
+
'cost_per_minute': self.cost_per_minute,
|
|
82
|
+
'quality': self.quality,
|
|
83
|
+
'latency': self.latency,
|
|
84
|
+
'features': self.features,
|
|
85
|
+
'requires_api_key': self.requires_api_key,
|
|
86
|
+
'is_online': self.is_online,
|
|
87
|
+
'status': self.status
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TTSProvider(ABC):
|
|
92
|
+
"""
|
|
93
|
+
Abstract base class for Text-to-Speech providers.
|
|
94
|
+
|
|
95
|
+
All TTS providers must inherit from this class and implement the required methods.
|
|
96
|
+
This ensures a consistent interface across different TTS backends.
|
|
97
|
+
|
|
98
|
+
Required Methods:
|
|
99
|
+
- generate_speech(text, **kwargs): Convert text to audio bytes
|
|
100
|
+
- list_voices(): Return list of available voice names
|
|
101
|
+
- get_info(): Return provider metadata (name, status, capabilities)
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> class MyTTS(TTSProvider):
|
|
105
|
+
... def generate_speech(self, text, **kwargs):
|
|
106
|
+
... # Implementation here
|
|
107
|
+
... return audio_bytes
|
|
108
|
+
... def list_voices(self):
|
|
109
|
+
... return ['voice1', 'voice2']
|
|
110
|
+
... def get_info(self):
|
|
111
|
+
... return {'name': 'MyTTS', 'status': 'active'}
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def generate_speech(self, text: str, **kwargs) -> bytes:
|
|
116
|
+
"""
|
|
117
|
+
Generate speech audio from the given text.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
text: The text to synthesize into speech.
|
|
121
|
+
**kwargs: Provider-specific parameters (voice, speed, lang, etc.)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
bytes: Raw audio data (usually WAV format) that can be written
|
|
125
|
+
to a file or sent via HTTP with Content-Type: audio/wav.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
ValueError: If text is empty or parameters are invalid.
|
|
129
|
+
RuntimeError: If speech generation fails.
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
>>> audio = provider.generate_speech("Hello world", voice='M1')
|
|
133
|
+
>>> with open('output.wav', 'wb') as f:
|
|
134
|
+
... f.write(audio)
|
|
135
|
+
"""
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def list_voices(self) -> List[str]:
|
|
140
|
+
"""
|
|
141
|
+
Return a list of available voice names for this provider.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
List[str]: List of voice identifiers (e.g., ['M1', 'M2', 'F1']).
|
|
145
|
+
These IDs should be valid values for a 'voice' parameter
|
|
146
|
+
in generate_speech().
|
|
147
|
+
|
|
148
|
+
Example:
|
|
149
|
+
>>> provider.list_voices()
|
|
150
|
+
['M1', 'M2', 'F1', 'F2']
|
|
151
|
+
"""
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
@abstractmethod
|
|
155
|
+
def get_info(self) -> Dict[str, Any]:
|
|
156
|
+
"""
|
|
157
|
+
Return metadata about this TTS provider.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Dict with at minimum:
|
|
161
|
+
- 'name': str - Provider display name
|
|
162
|
+
- 'status': str - 'active', 'inactive', or 'error'
|
|
163
|
+
- 'description': str - Brief description of the provider
|
|
164
|
+
- 'capabilities': dict - Optional feature flags
|
|
165
|
+
- 'streaming': bool - Supports streaming audio
|
|
166
|
+
- 'ssml': bool - Supports SSML markup
|
|
167
|
+
- 'custom_voices': bool - Supports custom voice cloning
|
|
168
|
+
- 'languages': List[str] - Supported language codes
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
>>> provider.get_info()
|
|
172
|
+
{
|
|
173
|
+
'name': 'Supertonic',
|
|
174
|
+
'status': 'active',
|
|
175
|
+
'description': 'Local ONNX-based TTS engine',
|
|
176
|
+
'capabilities': {
|
|
177
|
+
'streaming': False,
|
|
178
|
+
'ssml': False,
|
|
179
|
+
'custom_voices': True,
|
|
180
|
+
'languages': ['en', 'ko', 'es', 'pt', 'fr']
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
"""
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
def is_available(self) -> bool:
|
|
187
|
+
"""
|
|
188
|
+
Check if this provider is currently available for use.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
bool: True if the provider is active and ready, False otherwise.
|
|
192
|
+
|
|
193
|
+
Default implementation checks if get_info()['status'] == 'active'.
|
|
194
|
+
Subclasses can override for more complex availability checks.
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
>>> if provider.is_available():
|
|
198
|
+
... audio = provider.generate_speech("Hello")
|
|
199
|
+
"""
|
|
200
|
+
return self.get_info().get('status', 'inactive') == 'active'
|
|
201
|
+
|
|
202
|
+
def validate_text(self, text: str) -> None:
|
|
203
|
+
"""
|
|
204
|
+
Validate that text is suitable for speech generation.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
text: Text to validate.
|
|
208
|
+
|
|
209
|
+
Raises:
|
|
210
|
+
ValueError: If text is None, empty, or only whitespace.
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
>>> provider.validate_text("Hello world") # OK
|
|
214
|
+
>>> provider.validate_text("") # Raises ValueError
|
|
215
|
+
"""
|
|
216
|
+
if text is None:
|
|
217
|
+
raise ValueError("Text cannot be None")
|
|
218
|
+
if not isinstance(text, str):
|
|
219
|
+
raise ValueError(f"Text must be a string, got {type(text).__name__}")
|
|
220
|
+
if not text.strip():
|
|
221
|
+
raise ValueError("Text cannot be empty or contain only whitespace")
|
|
222
|
+
|
|
223
|
+
def validate_voice(self, voice: str) -> bool:
|
|
224
|
+
"""
|
|
225
|
+
Check if a given voice name is valid for this provider.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
voice: Voice identifier to validate.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
bool: True if the voice is available, False otherwise.
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> provider.validate_voice('M1')
|
|
235
|
+
True
|
|
236
|
+
>>> provider.validate_voice('invalid')
|
|
237
|
+
False
|
|
238
|
+
"""
|
|
239
|
+
return voice in self.list_voices()
|
|
240
|
+
|
|
241
|
+
def get_default_voice(self) -> Optional[str]:
|
|
242
|
+
"""
|
|
243
|
+
Return the default voice for this provider.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
The first voice in list_voices(), or None if no voices available.
|
|
247
|
+
|
|
248
|
+
Example:
|
|
249
|
+
>>> provider.get_default_voice()
|
|
250
|
+
'M1'
|
|
251
|
+
"""
|
|
252
|
+
voices = self.list_voices()
|
|
253
|
+
return voices[0] if voices else None
|
|
254
|
+
|
|
255
|
+
def __repr__(self) -> str:
|
|
256
|
+
"""String representation of the provider."""
|
|
257
|
+
info = self.get_info()
|
|
258
|
+
return f"{self.__class__.__name__}(name='{info.get('name', 'Unknown')}', status='{info.get('status', 'unknown')}')"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class TTSProviderError(Exception):
|
|
262
|
+
"""
|
|
263
|
+
Base exception class for TTS provider errors.
|
|
264
|
+
|
|
265
|
+
This exception is raised when a TTS provider encounters an error
|
|
266
|
+
that is specific to the provider implementation.
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
def __init__(self, provider_name: str, message: str):
|
|
270
|
+
"""
|
|
271
|
+
Initialize the exception.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
provider_name: Name of the provider that raised the error.
|
|
275
|
+
message: Error message describing what went wrong.
|
|
276
|
+
"""
|
|
277
|
+
self.provider_name = provider_name
|
|
278
|
+
super().__init__(f"[{provider_name}] {message}")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class TTSGenerationError(TTSProviderError):
|
|
282
|
+
"""
|
|
283
|
+
Raised when speech generation fails.
|
|
284
|
+
|
|
285
|
+
This can occur due to invalid input, network issues, or
|
|
286
|
+
problems with the TTS service.
|
|
287
|
+
"""
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class TTSConfigurationError(TTSProviderError):
|
|
292
|
+
"""
|
|
293
|
+
Raised when the provider is misconfigured.
|
|
294
|
+
|
|
295
|
+
This can occur due to missing API keys, invalid paths,
|
|
296
|
+
or other configuration issues.
|
|
297
|
+
"""
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class TTSVoiceNotFoundError(TTSProviderError):
|
|
302
|
+
"""
|
|
303
|
+
Raised when a requested voice is not available.
|
|
304
|
+
|
|
305
|
+
This occurs when a voice ID is specified that doesn't
|
|
306
|
+
exist in the provider's voice catalog.
|
|
307
|
+
"""
|
|
308
|
+
pass
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
__all__ = [
|
|
312
|
+
'TTSProvider',
|
|
313
|
+
'TTSVoice',
|
|
314
|
+
'TTSProviderInfo',
|
|
315
|
+
'TTSProviderError',
|
|
316
|
+
'TTSGenerationError',
|
|
317
|
+
'TTSConfigurationError',
|
|
318
|
+
'TTSVoiceNotFoundError',
|
|
319
|
+
]
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Groq Orpheus TTS Provider — canopylabs/orpheus-v1-english via Groq LPU.
|
|
3
|
+
|
|
4
|
+
~130-200ms TTFB, natural human-like prosody, MP3 output.
|
|
5
|
+
API key: GROQ_API_KEY env var
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from .base_provider import TTSProvider
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
MODEL = "canopylabs/orpheus-v1-english"
|
|
17
|
+
# If a single TTS call exceeds this, abort and let fallback handle it.
|
|
18
|
+
# Normal Groq latency is 130-700ms. Rate-limited calls take 12-27s.
|
|
19
|
+
GROQ_TTS_TIMEOUT_SECONDS = 5.0
|
|
20
|
+
|
|
21
|
+
AVAILABLE_VOICES = [
|
|
22
|
+
"autumn", # Female (default)
|
|
23
|
+
"diana", # Female
|
|
24
|
+
"hannah", # Female
|
|
25
|
+
"austin", # Male
|
|
26
|
+
"daniel", # Male
|
|
27
|
+
"troy", # Male
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GroqProvider(TTSProvider):
|
|
32
|
+
"""
|
|
33
|
+
TTS Provider using Groq Orpheus (canopylabs/orpheus-v1-english).
|
|
34
|
+
|
|
35
|
+
Voices: autumn, diana, hannah, austin, daniel, troy
|
|
36
|
+
Output: WAV audio bytes
|
|
37
|
+
Latency: ~130-200ms (Groq LPU)
|
|
38
|
+
Cost: ~$0.05/1K chars
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
super().__init__()
|
|
43
|
+
self.api_key = os.getenv('GROQ_API_KEY', '')
|
|
44
|
+
self._status = 'active' if self.api_key else 'error'
|
|
45
|
+
self._init_error = None if self.api_key else 'GROQ_API_KEY not set'
|
|
46
|
+
self._client = None
|
|
47
|
+
|
|
48
|
+
def _get_client(self):
|
|
49
|
+
if self._client is None:
|
|
50
|
+
try:
|
|
51
|
+
from groq import Groq
|
|
52
|
+
self._client = Groq(
|
|
53
|
+
api_key=self.api_key,
|
|
54
|
+
timeout=GROQ_TTS_TIMEOUT_SECONDS,
|
|
55
|
+
)
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise RuntimeError("groq package not installed — run: pip install groq")
|
|
58
|
+
return self._client
|
|
59
|
+
|
|
60
|
+
def generate_speech(self, text: str, voice: str = 'autumn', **kwargs) -> bytes:
|
|
61
|
+
"""
|
|
62
|
+
Generate speech via Groq Orpheus.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
text: Text to synthesize.
|
|
66
|
+
voice: One of AVAILABLE_VOICES. Default: 'autumn'.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
MP3 audio bytes.
|
|
70
|
+
"""
|
|
71
|
+
if not self.api_key:
|
|
72
|
+
raise RuntimeError("GROQ_API_KEY not set")
|
|
73
|
+
|
|
74
|
+
self.validate_text(text)
|
|
75
|
+
|
|
76
|
+
if voice not in AVAILABLE_VOICES:
|
|
77
|
+
logger.warning(f"[Groq] Unknown voice '{voice}', using autumn")
|
|
78
|
+
voice = 'autumn'
|
|
79
|
+
|
|
80
|
+
t = time.time()
|
|
81
|
+
logger.info(f"[Groq] Requesting TTS: '{text[:60]}' voice={voice}")
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
client = self._get_client()
|
|
85
|
+
resp = client.audio.speech.create(
|
|
86
|
+
model=MODEL,
|
|
87
|
+
voice=voice,
|
|
88
|
+
input=text,
|
|
89
|
+
response_format="wav",
|
|
90
|
+
)
|
|
91
|
+
audio_bytes = resp.content if hasattr(resp, 'content') else resp.read()
|
|
92
|
+
except Exception as e:
|
|
93
|
+
# Parse structured Groq API errors to extract error code
|
|
94
|
+
import re
|
|
95
|
+
err_str = str(e)
|
|
96
|
+
err_code = 'unknown'
|
|
97
|
+
err_msg = err_str
|
|
98
|
+
try:
|
|
99
|
+
code_match = re.search(r"'code':\s*'([^']+)'", err_str)
|
|
100
|
+
msg_match = re.search(r"'message':\s*'([^']+)'", err_str)
|
|
101
|
+
if code_match:
|
|
102
|
+
err_code = code_match.group(1)
|
|
103
|
+
if msg_match:
|
|
104
|
+
err_msg = msg_match.group(1)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
raise RuntimeError(f"[groq:{err_code}] {err_msg}")
|
|
108
|
+
|
|
109
|
+
elapsed = int((time.time() - t) * 1000)
|
|
110
|
+
logger.info(f"[Groq] Generated {len(audio_bytes)} bytes in {elapsed}ms")
|
|
111
|
+
return audio_bytes
|
|
112
|
+
|
|
113
|
+
def health_check(self) -> dict:
|
|
114
|
+
if not self.api_key:
|
|
115
|
+
return {"ok": False, "latency_ms": 0, "detail": "GROQ_API_KEY not set"}
|
|
116
|
+
t = time.time()
|
|
117
|
+
try:
|
|
118
|
+
from groq import Groq
|
|
119
|
+
client = Groq(api_key=self.api_key)
|
|
120
|
+
client.models.list()
|
|
121
|
+
latency_ms = int((time.time() - t) * 1000)
|
|
122
|
+
return {"ok": True, "latency_ms": latency_ms, "detail": "Groq reachable — Orpheus ready"}
|
|
123
|
+
except Exception as e:
|
|
124
|
+
latency_ms = int((time.time() - t) * 1000)
|
|
125
|
+
return {"ok": False, "latency_ms": latency_ms, "detail": str(e)}
|
|
126
|
+
|
|
127
|
+
def list_voices(self) -> list:
|
|
128
|
+
return AVAILABLE_VOICES.copy()
|
|
129
|
+
|
|
130
|
+
def get_default_voice(self) -> str:
|
|
131
|
+
return 'autumn'
|
|
132
|
+
|
|
133
|
+
def is_available(self) -> bool:
|
|
134
|
+
return bool(self.api_key)
|
|
135
|
+
|
|
136
|
+
def get_info(self) -> dict:
|
|
137
|
+
return {
|
|
138
|
+
'name': 'Groq Orpheus',
|
|
139
|
+
'provider_id': 'groq',
|
|
140
|
+
'status': self._status,
|
|
141
|
+
'description': 'Orpheus TTS via Groq LPU — fast, natural, human-like prosody',
|
|
142
|
+
'quality': 'high',
|
|
143
|
+
'latency': 'very-fast',
|
|
144
|
+
'cost_per_minute': 0.05,
|
|
145
|
+
'voices': AVAILABLE_VOICES.copy(),
|
|
146
|
+
'features': ['fast', 'natural', 'empathetic', 'mp3-output', 'cloud'],
|
|
147
|
+
'requires_api_key': True,
|
|
148
|
+
'languages': ['en'],
|
|
149
|
+
'max_characters': 5000,
|
|
150
|
+
'notes': 'Orpheus v1 English on Groq LPU. ~130-200ms latency. GROQ_API_KEY required.',
|
|
151
|
+
'default_voice': 'autumn',
|
|
152
|
+
'audio_format': 'wav',
|
|
153
|
+
'sample_rate': 24000,
|
|
154
|
+
'error': self._init_error,
|
|
155
|
+
}
|