openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local Whisper STT provider (faster-whisper).
|
|
3
|
+
|
|
4
|
+
Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (WhisperProvider section)
|
|
5
|
+
Server-side transcription via local faster-whisper model.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import tempfile
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from providers.stt.base import STTError, STTProvider, TranscriptionResult
|
|
17
|
+
from providers.registry import ProviderType, registry
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WhisperProvider(STTProvider):
|
|
23
|
+
"""Local Whisper model for server-side transcription via faster-whisper."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, config: Dict[str, Any] = None) -> None:
|
|
26
|
+
super().__init__(config)
|
|
27
|
+
self.model_size = self._config.get("model", "base")
|
|
28
|
+
self.device = self._config.get("device", "cpu")
|
|
29
|
+
self._model = None
|
|
30
|
+
|
|
31
|
+
def _load_model(self):
|
|
32
|
+
if self._model is None:
|
|
33
|
+
try:
|
|
34
|
+
from faster_whisper import WhisperModel # type: ignore
|
|
35
|
+
self._model = WhisperModel(self.model_size, device=self.device)
|
|
36
|
+
logger.info("Whisper model loaded: %s on %s", self.model_size, self.device)
|
|
37
|
+
except ImportError:
|
|
38
|
+
raise STTError(
|
|
39
|
+
"whisper",
|
|
40
|
+
"faster-whisper not installed: pip install faster-whisper",
|
|
41
|
+
)
|
|
42
|
+
except Exception as exc:
|
|
43
|
+
raise STTError("whisper", f"Failed to load model: {exc}") from exc
|
|
44
|
+
return self._model
|
|
45
|
+
|
|
46
|
+
def transcribe(
|
|
47
|
+
self,
|
|
48
|
+
audio_data: bytes,
|
|
49
|
+
language: Optional[str] = None,
|
|
50
|
+
**kwargs,
|
|
51
|
+
) -> TranscriptionResult:
|
|
52
|
+
start = time.time()
|
|
53
|
+
try:
|
|
54
|
+
model = self._load_model()
|
|
55
|
+
except STTError:
|
|
56
|
+
raise
|
|
57
|
+
|
|
58
|
+
# Write to temp WAV file
|
|
59
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
60
|
+
f.write(audio_data)
|
|
61
|
+
temp_path = f.name
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
segments, info = model.transcribe(temp_path, language=language or "en")
|
|
65
|
+
text = " ".join(seg.text.strip() for seg in segments)
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
raise STTError("whisper", f"Transcription failed: {exc}") from exc
|
|
68
|
+
finally:
|
|
69
|
+
try:
|
|
70
|
+
os.unlink(temp_path)
|
|
71
|
+
except OSError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
return TranscriptionResult(
|
|
75
|
+
text=text,
|
|
76
|
+
confidence=0.9,
|
|
77
|
+
language=info.language,
|
|
78
|
+
duration_ms=(time.time() - start) * 1000,
|
|
79
|
+
provider="whisper",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def is_available(self) -> bool:
|
|
83
|
+
try:
|
|
84
|
+
from faster_whisper import WhisperModel # type: ignore # noqa: F401
|
|
85
|
+
return True
|
|
86
|
+
except ImportError:
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
def get_info(self) -> Dict[str, Any]:
|
|
90
|
+
return {
|
|
91
|
+
"name": self._config.get("name", "Whisper Local"),
|
|
92
|
+
"status": "active" if self.is_available() else "inactive",
|
|
93
|
+
"model": self.model_size,
|
|
94
|
+
"device": self.device,
|
|
95
|
+
"available": self.is_available(),
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Auto-register when this module is imported
|
|
100
|
+
registry.register(ProviderType.STT, "whisper", WhisperProvider)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""TTS provider package — adapter layer.
|
|
2
|
+
|
|
3
|
+
Architecture note:
|
|
4
|
+
- tts_providers/ ← canonical implementation (add new providers here)
|
|
5
|
+
- providers/tts/ ← adapter wrappers used by the registry system and tests
|
|
6
|
+
These delegate to tts_providers/ for actual TTS work.
|
|
7
|
+
|
|
8
|
+
To add a new TTS provider:
|
|
9
|
+
1. Create tts_providers/myprovider_provider.py (inheriting TTSProvider from tts_providers/base_provider.py)
|
|
10
|
+
2. Register it in tts_providers/__init__.py _PROVIDERS dict
|
|
11
|
+
3. Add metadata to tts_providers/providers_config.json
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from providers.tts.base import TTSProvider, TTSVoice, TTSError, TTSVoiceNotFoundError
|
|
15
|
+
|
|
16
|
+
# Import concrete providers so their registry.register() calls fire
|
|
17
|
+
from providers.tts import supertonic_provider # noqa: F401
|
|
18
|
+
from providers.tts import groq_provider # noqa: F401
|
|
19
|
+
|
|
20
|
+
__all__ = ["TTSProvider", "TTSVoice", "TTSError", "TTSVoiceNotFoundError"]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TTS provider abstract base class.
|
|
3
|
+
|
|
4
|
+
Extends the existing tts_providers/base_provider.py pattern to conform to the
|
|
5
|
+
unified BaseProvider interface required by ADR-003.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from providers.base import BaseProvider, ProviderError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TTSVoice:
|
|
17
|
+
id: str
|
|
18
|
+
name: str
|
|
19
|
+
language: str = "en"
|
|
20
|
+
gender: Optional[str] = None
|
|
21
|
+
description: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"id": self.id,
|
|
26
|
+
"name": self.name,
|
|
27
|
+
"language": self.language,
|
|
28
|
+
"gender": self.gender,
|
|
29
|
+
"description": self.description,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TTSProvider(BaseProvider):
|
|
34
|
+
"""Abstract base class for TTS providers (Supertonic, Groq, ElevenLabs, etc.)."""
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def generate_speech(self, text: str, **kwargs) -> bytes:
|
|
38
|
+
"""Convert text to audio bytes (WAV or MP3)."""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def list_voices(self) -> List[str]:
|
|
43
|
+
"""Return list of available voice IDs."""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
def list_voices_detailed(self) -> List[TTSVoice]:
|
|
47
|
+
"""Return TTSVoice objects; override for richer metadata."""
|
|
48
|
+
return [TTSVoice(id=v, name=v) for v in self.list_voices()]
|
|
49
|
+
|
|
50
|
+
def get_default_voice(self) -> Optional[str]:
|
|
51
|
+
voices = self.list_voices()
|
|
52
|
+
return voices[0] if voices else None
|
|
53
|
+
|
|
54
|
+
def validate_text(self, text: str) -> None:
|
|
55
|
+
if text is None:
|
|
56
|
+
raise ValueError("Text cannot be None")
|
|
57
|
+
if not isinstance(text, str):
|
|
58
|
+
raise ValueError(f"Text must be str, got {type(text).__name__}")
|
|
59
|
+
if not text.strip():
|
|
60
|
+
raise ValueError("Text cannot be empty or whitespace-only")
|
|
61
|
+
|
|
62
|
+
def validate_voice(self, voice: str) -> bool:
|
|
63
|
+
return voice in self.list_voices()
|
|
64
|
+
|
|
65
|
+
def is_available(self) -> bool:
|
|
66
|
+
return self.get_info().get("status", "inactive") == "active"
|
|
67
|
+
|
|
68
|
+
def get_info(self) -> Dict[str, Any]:
|
|
69
|
+
return {
|
|
70
|
+
"name": self._config.get("name", self.__class__.__name__),
|
|
71
|
+
"status": "active",
|
|
72
|
+
"available": True,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class TTSError(ProviderError):
|
|
77
|
+
"""TTS-specific provider error."""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TTSVoiceNotFoundError(TTSError):
|
|
82
|
+
"""Requested voice does not exist in this provider."""
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
"TTSProvider",
|
|
88
|
+
"TTSVoice",
|
|
89
|
+
"TTSError",
|
|
90
|
+
"TTSVoiceNotFoundError",
|
|
91
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Groq Orpheus TTS provider.
|
|
3
|
+
|
|
4
|
+
Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (GroqProvider section)
|
|
5
|
+
Fallback TTS when Supertonic is unavailable.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
from typing import Any, Dict, List
|
|
13
|
+
|
|
14
|
+
from providers.tts.base import TTSError, TTSProvider
|
|
15
|
+
from providers.registry import ProviderType, registry
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GroqTTSProvider(TTSProvider):
|
|
21
|
+
"""Groq Orpheus cloud TTS provider."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: Dict[str, Any] = None) -> None:
|
|
24
|
+
super().__init__(config)
|
|
25
|
+
self.api_key = self._resolve_api_key()
|
|
26
|
+
self.model = self._config.get("model", "canopylabs/orpheus-v1-english")
|
|
27
|
+
self.default_voice = self._config.get("voice", "autumn")
|
|
28
|
+
|
|
29
|
+
def _resolve_api_key(self) -> str:
|
|
30
|
+
key = self._config.get("api_key", "")
|
|
31
|
+
if key and not key.startswith("${"):
|
|
32
|
+
return key
|
|
33
|
+
return os.getenv("GROQ_API_KEY", "")
|
|
34
|
+
|
|
35
|
+
def generate_speech(self, text: str, **kwargs) -> bytes:
|
|
36
|
+
self.validate_text(text)
|
|
37
|
+
if not self.api_key:
|
|
38
|
+
raise TTSError("groq", "GROQ_API_KEY not set")
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
import groq as groq_lib # type: ignore
|
|
42
|
+
except ImportError:
|
|
43
|
+
raise TTSError("groq", "groq library not installed: pip install groq")
|
|
44
|
+
|
|
45
|
+
voice = kwargs.get("voice", self.default_voice)
|
|
46
|
+
try:
|
|
47
|
+
client = groq_lib.Groq(api_key=self.api_key)
|
|
48
|
+
response = client.audio.speech.create(
|
|
49
|
+
model=self.model,
|
|
50
|
+
voice=voice,
|
|
51
|
+
input=text,
|
|
52
|
+
response_format="mp3",
|
|
53
|
+
)
|
|
54
|
+
return response.read()
|
|
55
|
+
except Exception as exc:
|
|
56
|
+
raise TTSError("groq", f"Generation failed: {exc}") from exc
|
|
57
|
+
|
|
58
|
+
def list_voices(self) -> List[str]:
|
|
59
|
+
return ["autumn", "diana", "hannah", "austin", "daniel", "troy"]
|
|
60
|
+
|
|
61
|
+
def is_available(self) -> bool:
|
|
62
|
+
return bool(self.api_key)
|
|
63
|
+
|
|
64
|
+
def get_info(self) -> Dict[str, Any]:
|
|
65
|
+
return {
|
|
66
|
+
"name": self._config.get("name", "Groq Orpheus"),
|
|
67
|
+
"status": "active" if self.is_available() else "inactive",
|
|
68
|
+
"model": self.model,
|
|
69
|
+
"available": self.is_available(),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Auto-register when this module is imported
|
|
74
|
+
registry.register(ProviderType.TTS, "groq", GroqTTSProvider)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Supertonic ONNX TTS provider wrapper.
|
|
3
|
+
|
|
4
|
+
Wraps the existing tts_providers/supertonic_provider.py to conform to the
|
|
5
|
+
unified providers.tts.base.TTSProvider interface (ADR-003).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Dict, List
|
|
12
|
+
|
|
13
|
+
from providers.tts.base import TTSError, TTSProvider
|
|
14
|
+
from providers.registry import ProviderType, registry
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SupertonicProvider(TTSProvider):
|
|
20
|
+
"""Supertonic local ONNX TTS provider."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: Dict[str, Any] = None) -> None:
|
|
23
|
+
super().__init__(config)
|
|
24
|
+
import os
|
|
25
|
+
self.onnx_dir = self._config.get("onnx_dir", os.environ.get("SUPERTONIC_ONNX_DIR", os.path.expanduser("~/supertonic/assets/onnx")))
|
|
26
|
+
self.default_voice = self._config.get("default_voice", "M1")
|
|
27
|
+
self._delegate = None
|
|
28
|
+
|
|
29
|
+
def _get_delegate(self):
|
|
30
|
+
"""Lazy-load the underlying provider to avoid import-time crashes."""
|
|
31
|
+
if self._delegate is None:
|
|
32
|
+
try:
|
|
33
|
+
from tts_providers.supertonic_provider import SupertonicProvider as _Impl # type: ignore
|
|
34
|
+
self._delegate = _Impl()
|
|
35
|
+
except Exception as exc:
|
|
36
|
+
raise TTSError("supertonic", f"Failed to load Supertonic: {exc}") from exc
|
|
37
|
+
return self._delegate
|
|
38
|
+
|
|
39
|
+
def generate_speech(self, text: str, **kwargs) -> bytes:
|
|
40
|
+
self.validate_text(text)
|
|
41
|
+
voice = kwargs.get("voice", self.default_voice)
|
|
42
|
+
try:
|
|
43
|
+
return self._get_delegate().generate_speech(text, voice=voice, **kwargs)
|
|
44
|
+
except Exception as exc:
|
|
45
|
+
raise TTSError("supertonic", f"Generation failed: {exc}") from exc
|
|
46
|
+
|
|
47
|
+
def list_voices(self) -> List[str]:
|
|
48
|
+
try:
|
|
49
|
+
return self._get_delegate().list_voices()
|
|
50
|
+
except Exception:
|
|
51
|
+
return [self.default_voice]
|
|
52
|
+
|
|
53
|
+
def is_available(self) -> bool:
|
|
54
|
+
try:
|
|
55
|
+
from tts_providers.supertonic_provider import SupertonicProvider as _Impl # type: ignore
|
|
56
|
+
instance = _Impl()
|
|
57
|
+
return instance.is_available()
|
|
58
|
+
except Exception:
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
def get_info(self) -> Dict[str, Any]:
|
|
62
|
+
return {
|
|
63
|
+
"name": self._config.get("name", "Supertonic ONNX"),
|
|
64
|
+
"status": "active" if self.is_available() else "inactive",
|
|
65
|
+
"onnx_dir": self.onnx_dir,
|
|
66
|
+
"default_voice": self.default_voice,
|
|
67
|
+
"available": self.is_available(),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Auto-register when this module is imported
|
|
72
|
+
registry.register(ProviderType.TTS, "supertonic", SupertonicProvider)
|
package/requirements.txt
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python 3.10+ required
|
|
2
|
+
# Core framework
|
|
3
|
+
Flask==3.1.3
|
|
4
|
+
flask-cors==6.0.2
|
|
5
|
+
flask-limiter==4.1.1
|
|
6
|
+
flask-sock==0.7.0
|
|
7
|
+
|
|
8
|
+
# WebSocket & HTTP
|
|
9
|
+
websockets==16.0
|
|
10
|
+
requests==2.32.5
|
|
11
|
+
|
|
12
|
+
# Environment & config
|
|
13
|
+
python-dotenv==1.2.1
|
|
14
|
+
|
|
15
|
+
# System monitoring
|
|
16
|
+
psutil==7.2.2
|
|
17
|
+
|
|
18
|
+
# Speech-to-text (Faster Whisper — optional, for local STT)
|
|
19
|
+
faster-whisper==1.2.1
|
|
20
|
+
|
|
21
|
+
# TTS providers
|
|
22
|
+
groq==1.0.0 # Groq Orpheus TTS (required if using groq provider)
|
|
23
|
+
|
|
24
|
+
# Auth (Clerk JWT — optional, only if CANVAS_REQUIRE_AUTH=true)
|
|
25
|
+
PyJWT==2.11.0
|
|
26
|
+
cryptography==46.0.5
|
|
27
|
+
|
|
28
|
+
# Face recognition (local, free — runs on-server, no API calls)
|
|
29
|
+
deepface>=0.0.93 # DeepFace biometric face recognition
|
|
30
|
+
tf-keras>=2.19.0 # Required by deepface (Keras backend)
|
|
31
|
+
|
|
32
|
+
# AI providers (optional)
|
|
33
|
+
google-generativeai==0.8.6 # Only if using Gemini
|
|
34
|
+
|
|
35
|
+
# Supertonic local TTS (optional — install manually if using local ONNX TTS)
|
|
36
|
+
# onnxruntime>=1.23.1
|
|
37
|
+
# soundfile>=0.12.1
|
|
38
|
+
# librosa>=0.10.0
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# routes package — Flask Blueprints for ai-eyes2 (Phase 2 refactor)
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Base directory for all persistent data (uploads, music, faces, etc.).
|
|
7
|
+
# Default: app root (backward-compatible with non-Docker installs).
|
|
8
|
+
# Docker sets DATA_DIR=/app/data via docker-compose environment.
|
|
9
|
+
APP_ROOT = Path(__file__).parent.parent
|
|
10
|
+
DATA_DIR = Path(os.getenv('DATA_DIR', str(APP_ROOT)))
|