openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,112 @@
1
+ """
2
+ Clawdbot Gateway WebSocket LLM provider.
3
+
4
+ Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (ClawdbotProvider section)
5
+ Routes messages through the Clawdbot Gateway WebSocket for full agent context.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import time
11
+ from typing import Any, Dict, Iterator, List, Optional
12
+
13
+ from providers.llm.base import LLMError, LLMProvider, LLMResponse
14
+ from providers.registry import ProviderType, registry
15
+
16
+
17
+ class ClawdbotProvider(LLMProvider):
18
+ """Clawdbot Gateway WebSocket provider."""
19
+
20
+ def __init__(self, config: Dict[str, Any] = None) -> None:
21
+ super().__init__(config)
22
+ self.gateway_url = (
23
+ self._config.get("gateway_url")
24
+ or os.getenv("CLAWDBOT_GATEWAY_URL", "ws://127.0.0.1:18791")
25
+ )
26
+ self.auth_token = (
27
+ self._config.get("auth_token")
28
+ or os.getenv("CLAWDBOT_AUTH_TOKEN", "")
29
+ )
30
+ self.default_agent = self._config.get("default_agent", "main")
31
+ self.default_model = "glm-4-7-flash" # Gateway uses Z.AI/GLM
32
+
33
+ def generate(
34
+ self,
35
+ messages: List[Dict[str, str]],
36
+ system_prompt: Optional[str] = None,
37
+ model: Optional[str] = None,
38
+ **kwargs,
39
+ ) -> LLMResponse:
40
+ try:
41
+ import websocket # type: ignore
42
+ except ImportError:
43
+ raise LLMError("clawdbot", "websocket-client library not installed")
44
+
45
+ # Extract last user message
46
+ user_message = ""
47
+ for msg in reversed(messages):
48
+ if msg.get("role") == "user":
49
+ user_message = msg.get("content", "")
50
+ break
51
+
52
+ start = time.time()
53
+ try:
54
+ ws = websocket.create_connection(self.gateway_url, timeout=10)
55
+ try:
56
+ # Handshake
57
+ ws.send(json.dumps({"type": "connect.challenge", "token": self.auth_token}))
58
+ challenge = json.loads(ws.recv())
59
+ ws.send(json.dumps({"type": "connect", "response": challenge.get("challenge", "")}))
60
+ ws.recv() # hello frame
61
+
62
+ # Send message
63
+ agent = kwargs.get("agent", self.default_agent)
64
+ ws.send(json.dumps({"type": "chat.send", "content": user_message, "agent": agent}))
65
+
66
+ # Collect response
67
+ content = ""
68
+ while True:
69
+ raw = json.loads(ws.recv())
70
+ if raw.get("type") == "chat.response":
71
+ content += raw.get("content", "")
72
+ elif raw.get("type") in ("chat.done", "chat.final"):
73
+ if not content:
74
+ content = raw.get("content", "")
75
+ break
76
+ finally:
77
+ ws.close()
78
+ except Exception as exc:
79
+ raise LLMError("clawdbot", f"Gateway error: {exc}") from exc
80
+
81
+ latency_ms = (time.time() - start) * 1000
82
+ return LLMResponse(
83
+ content=content,
84
+ model=self.default_model,
85
+ provider="clawdbot",
86
+ usage={},
87
+ latency_ms=latency_ms,
88
+ )
89
+
90
+ def generate_stream(
91
+ self,
92
+ messages: List[Dict[str, str]],
93
+ system_prompt: Optional[str] = None,
94
+ model: Optional[str] = None,
95
+ **kwargs,
96
+ ) -> Iterator[str]:
97
+ # Full streaming would require a persistent connection (PG-T2)
98
+ response = self.generate(messages, system_prompt, model, **kwargs)
99
+ yield response.content
100
+
101
+ def is_available(self) -> bool:
102
+ return bool(self.auth_token)
103
+
104
+ def get_info(self) -> Dict[str, Any]:
105
+ info = super().get_info()
106
+ info["name"] = self._config.get("name", "Clawdbot Gateway")
107
+ info["gateway_url"] = self.gateway_url
108
+ return info
109
+
110
+
111
+ # Auto-register when this module is imported
112
+ registry.register(ProviderType.LLM, "clawdbot", ClawdbotProvider)
@@ -0,0 +1,115 @@
1
+ """
2
+ Z.AI GLM-4 LLM provider.
3
+
4
+ Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (ZAIProvider section)
5
+ IMPORTANT: This is the primary LLM backend for clawdbot (ADR — NEVER switch clawdbot to Anthropic).
6
+ """
7
+
8
+ import os
9
+ import time
10
+ from typing import Any, Dict, Iterator, List, Optional
11
+
12
+ from providers.llm.base import LLMError, LLMProvider, LLMResponse
13
+ from providers.registry import ProviderType, registry
14
+
15
+
16
+ class ZAIProvider(LLMProvider):
17
+ """Z.AI GLM-4 provider via REST API."""
18
+
19
+ API_URL = "https://api.zukijourney.com/v1/chat/completions"
20
+
21
+ def __init__(self, config: Dict[str, Any] = None) -> None:
22
+ super().__init__(config)
23
+ self.api_key = self._resolve_api_key()
24
+ self.default_model = self._config.get("default_model", "glm-4-7-flash")
25
+
26
+ def _resolve_api_key(self) -> str:
27
+ key = self._config.get("api_key", "")
28
+ # Skip unresolved placeholder
29
+ if key and not key.startswith("${"):
30
+ return key
31
+ return os.getenv("ZAI_API_KEY", "")
32
+
33
+ def generate(
34
+ self,
35
+ messages: List[Dict[str, str]],
36
+ system_prompt: Optional[str] = None,
37
+ model: Optional[str] = None,
38
+ **kwargs,
39
+ ) -> LLMResponse:
40
+ try:
41
+ import requests # type: ignore
42
+ except ImportError:
43
+ raise LLMError("zai", "requests library not installed")
44
+
45
+ model = model or self.default_model
46
+ max_tokens = kwargs.get("max_tokens", 512)
47
+ timeout = kwargs.get("timeout", 30)
48
+
49
+ # Temperature and penalty settings for natural conversation
50
+ temperature = kwargs.get("temperature", 0.7)
51
+ frequency_penalty = kwargs.get("frequency_penalty", 0.5)
52
+ presence_penalty = kwargs.get("presence_penalty", 0.3)
53
+
54
+ full_messages: List[Dict[str, str]] = []
55
+ if system_prompt:
56
+ full_messages.append({"role": "system", "content": system_prompt})
57
+ full_messages.extend(messages)
58
+
59
+ start = time.time()
60
+ try:
61
+ resp = requests.post(
62
+ self.API_URL,
63
+ headers={
64
+ "Authorization": f"Bearer {self.api_key}",
65
+ "Content-Type": "application/json",
66
+ },
67
+ json={
68
+ "model": model,
69
+ "messages": full_messages,
70
+ "max_tokens": max_tokens,
71
+ "temperature": temperature,
72
+ "frequency_penalty": frequency_penalty,
73
+ "presence_penalty": presence_penalty,
74
+ },
75
+ timeout=timeout,
76
+ )
77
+ resp.raise_for_status()
78
+ except Exception as exc:
79
+ raise LLMError("zai", f"API request failed: {exc}") from exc
80
+
81
+ data = resp.json()
82
+ latency_ms = (time.time() - start) * 1000
83
+
84
+ return LLMResponse(
85
+ content=data["choices"][0]["message"]["content"],
86
+ model=model,
87
+ provider="zai",
88
+ usage=data.get("usage", {}),
89
+ latency_ms=latency_ms,
90
+ finish_reason=data["choices"][0].get("finish_reason", "stop"),
91
+ raw_response=data,
92
+ )
93
+
94
+ def generate_stream(
95
+ self,
96
+ messages: List[Dict[str, str]],
97
+ system_prompt: Optional[str] = None,
98
+ model: Optional[str] = None,
99
+ **kwargs,
100
+ ) -> Iterator[str]:
101
+ # Z.AI REST API does not support streaming — fall back to non-streaming
102
+ response = self.generate(messages, system_prompt, model, **kwargs)
103
+ yield response.content
104
+
105
+ def is_available(self) -> bool:
106
+ return bool(self.api_key)
107
+
108
+ def get_info(self) -> Dict[str, Any]:
109
+ info = super().get_info()
110
+ info["name"] = self._config.get("name", "Z.AI GLM")
111
+ return info
112
+
113
+
114
+ # Auto-register when this module is imported
115
+ registry.register(ProviderType.LLM, "zai", ZAIProvider)
@@ -0,0 +1,320 @@
1
+ """
2
+ Provider registry with singleton pattern and auto-discovery.
3
+
4
+ P5-T2: Provider registry + auto-discovery
5
+ ADR-003: Abstract base class + registry pattern
6
+ Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (PluginRegistry section)
7
+
8
+ Usage:
9
+ from providers.registry import registry, ProviderType
10
+
11
+ # Register a provider
12
+ registry.register(ProviderType.TTS, 'supertonic', SupertonicProvider)
13
+
14
+ # Get a provider instance
15
+ tts = registry.get_provider(ProviderType.TTS) # default
16
+ tts = registry.get_provider(ProviderType.TTS, 'groq') # specific
17
+
18
+ # List available providers
19
+ providers = registry.list_providers(ProviderType.TTS)
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import importlib
25
+ import logging
26
+ import os
27
+ from enum import Enum
28
+ from pathlib import Path
29
+ from typing import Any, Dict, List, Optional, Type
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class ProviderType(Enum):
35
+ LLM = "llm"
36
+ TTS = "tts"
37
+ STT = "stt"
38
+
39
+
40
+ class ProviderRegistry:
41
+ """Singleton registry for all provider types (LLM, TTS, STT).
42
+
43
+ Providers are registered with a unique string ID per type.
44
+ get_provider() returns an instantiated provider with config merged from
45
+ the providers YAML (if loaded) and any explicit config passed at
46
+ register() time.
47
+ """
48
+
49
+ _instance: Optional["ProviderRegistry"] = None
50
+
51
+ def __init__(self) -> None:
52
+ self._providers: Dict[ProviderType, Dict[str, Type]] = {
53
+ ProviderType.LLM: {},
54
+ ProviderType.TTS: {},
55
+ ProviderType.STT: {},
56
+ }
57
+ # Static config passed at register() time
58
+ self._static_configs: Dict[str, Dict] = {}
59
+ # YAML config loaded lazily
60
+ self._yaml_config: Optional[Dict] = None
61
+
62
+ # ------------------------------------------------------------------
63
+ # Singleton
64
+ # ------------------------------------------------------------------
65
+
66
+ @classmethod
67
+ def get_instance(cls) -> "ProviderRegistry":
68
+ if cls._instance is None:
69
+ cls._instance = ProviderRegistry()
70
+ return cls._instance
71
+
72
+ # ------------------------------------------------------------------
73
+ # Registration
74
+ # ------------------------------------------------------------------
75
+
76
+ def register(
77
+ self,
78
+ provider_type: ProviderType,
79
+ provider_id: str,
80
+ provider_class: Type,
81
+ config: Optional[Dict] = None,
82
+ ) -> None:
83
+ """Register a provider implementation.
84
+
85
+ Args:
86
+ provider_type: LLM, TTS, or STT.
87
+ provider_id: Unique string key (e.g. 'supertonic', 'zai').
88
+ provider_class: Class (not instance) implementing the base type.
89
+ config: Optional static config dict merged with YAML config.
90
+ """
91
+ self._providers[provider_type][provider_id] = provider_class
92
+ if config:
93
+ self._static_configs[provider_id] = config
94
+ logger.debug("Registered %s provider: %s", provider_type.value, provider_id)
95
+
96
+ # ------------------------------------------------------------------
97
+ # Retrieval
98
+ # ------------------------------------------------------------------
99
+
100
+ def get_provider(
101
+ self,
102
+ provider_type: ProviderType,
103
+ provider_id: Optional[str] = None,
104
+ ) -> Any:
105
+ """Return an instantiated provider.
106
+
107
+ If provider_id is None, the default is read from providers YAML
108
+ (<type>.default_provider) or falls back to the first registered
109
+ provider for that type.
110
+
111
+ Config is merged: static config (register-time) is the base,
112
+ YAML config overrides it.
113
+
114
+ Raises:
115
+ ValueError: if the provider_id is not registered.
116
+ """
117
+ if provider_id is None:
118
+ provider_id = self._get_default_id(provider_type)
119
+
120
+ if provider_id not in self._providers[provider_type]:
121
+ available = list(self._providers[provider_type].keys())
122
+ raise ValueError(
123
+ f"Unknown {provider_type.value} provider: '{provider_id}'. "
124
+ f"Available: {available}"
125
+ )
126
+
127
+ provider_class = self._providers[provider_type][provider_id]
128
+ merged_config = self._build_config(provider_type, provider_id)
129
+
130
+ return provider_class(merged_config)
131
+
132
+ def list_providers(
133
+ self,
134
+ provider_type: ProviderType,
135
+ include_unavailable: bool = False,
136
+ ) -> List[Dict]:
137
+ """Return a sorted list of provider metadata dicts.
138
+
139
+ Each dict has keys: id, name, available, priority, info.
140
+ Sorted ascending by priority (lower number = higher priority).
141
+ """
142
+ results = []
143
+ for pid, provider_class in self._providers[provider_type].items():
144
+ config = self._build_config(provider_type, pid)
145
+ try:
146
+ instance = provider_class(config)
147
+ available = instance.is_available()
148
+ info = instance.get_info()
149
+ except Exception as exc:
150
+ logger.warning("Error probing provider %s: %s", pid, exc)
151
+ available = False
152
+ info = {"name": pid, "status": "error"}
153
+
154
+ if include_unavailable or available:
155
+ results.append(
156
+ {
157
+ "id": pid,
158
+ "name": info.get("name", pid),
159
+ "available": available,
160
+ "priority": config.get("priority", 100),
161
+ "info": info,
162
+ }
163
+ )
164
+
165
+ return sorted(results, key=lambda p: p["priority"])
166
+
167
+ # ------------------------------------------------------------------
168
+ # Auto-discovery
169
+ # ------------------------------------------------------------------
170
+
171
+ def autodiscover(self, providers_yaml_path: Optional[str] = None) -> None:
172
+ """Load providers.yaml and import/register configured providers.
173
+
174
+ This is the auto-discovery mechanism: the YAML file declares which
175
+ provider modules to load, and this method imports them so their
176
+ __init__.py register() calls fire automatically.
177
+
178
+ If providers_yaml_path is None, defaults to config/providers.yaml
179
+ relative to the project root (detected from this file's location).
180
+ """
181
+ if providers_yaml_path is None:
182
+ providers_yaml_path = self._default_yaml_path()
183
+
184
+ try:
185
+ import yaml # type: ignore
186
+ except ImportError:
187
+ logger.warning("PyYAML not installed — skipping autodiscover")
188
+ return
189
+
190
+ path = Path(providers_yaml_path)
191
+ if not path.exists():
192
+ logger.debug("providers.yaml not found at %s — skipping autodiscover", path)
193
+ return
194
+
195
+ with open(path) as f:
196
+ self._yaml_config = yaml.safe_load(f) or {}
197
+
198
+ logger.info("Loaded providers config from %s", path)
199
+
200
+ # Import each provider sub-package so their register() calls fire.
201
+ # The 'modules' key in each provider type section lists module paths.
202
+ for ptype_str in ("llm", "tts", "stt"):
203
+ section = self._yaml_config.get(ptype_str, {})
204
+ for module_path in section.get("modules", []):
205
+ try:
206
+ importlib.import_module(module_path)
207
+ logger.debug("Auto-imported provider module: %s", module_path)
208
+ except ImportError as exc:
209
+ logger.warning("Could not import provider module %s: %s", module_path, exc)
210
+
211
+ # ------------------------------------------------------------------
212
+ # Introspection helpers
213
+ # ------------------------------------------------------------------
214
+
215
+ def registered_ids(self, provider_type: ProviderType) -> List[str]:
216
+ """Return list of registered provider IDs for a type."""
217
+ return list(self._providers[provider_type].keys())
218
+
219
+ def is_registered(self, provider_type: ProviderType, provider_id: str) -> bool:
220
+ return provider_id in self._providers[provider_type]
221
+
222
+ # ------------------------------------------------------------------
223
+ # Internal helpers
224
+ # ------------------------------------------------------------------
225
+
226
+ def _get_default_id(self, provider_type: ProviderType) -> str:
227
+ """Determine the default provider ID for a type."""
228
+ # 1. Try YAML config
229
+ if self._yaml_config:
230
+ section = self._yaml_config.get(provider_type.value, {})
231
+ default = section.get("default_provider")
232
+ if default and default in self._providers[provider_type]:
233
+ return default
234
+
235
+ # 2. Fall back to first registered
236
+ registered = list(self._providers[provider_type].keys())
237
+ if registered:
238
+ return registered[0]
239
+
240
+ raise ValueError(
241
+ f"No {provider_type.value} providers registered. "
242
+ "Call registry.register() or registry.autodiscover() first."
243
+ )
244
+
245
+ def _build_config(self, provider_type: ProviderType, provider_id: str) -> Dict:
246
+ """Merge static + YAML config for a provider ID."""
247
+ # Start with static config (registered at register() time)
248
+ config = dict(self._static_configs.get(provider_id, {}))
249
+
250
+ # Layer YAML config on top
251
+ if self._yaml_config:
252
+ section = self._yaml_config.get(provider_type.value, {})
253
+ yaml_provider_cfg = section.get("providers", {}).get(provider_id, {})
254
+ config.update(yaml_provider_cfg)
255
+
256
+ # Resolve ${ENV_VAR} placeholders
257
+ config = _resolve_env_vars(config)
258
+
259
+ return config
260
+
261
+ def _default_yaml_path(self) -> str:
262
+ """Resolve default config/providers.yaml path from project root."""
263
+ # This file lives at providers/registry.py; project root is one level up.
264
+ project_root = Path(__file__).parent.parent
265
+ return str(project_root / "config" / "providers.yaml")
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Env-var placeholder resolution
270
+ # ---------------------------------------------------------------------------
271
+
272
+ def _resolve_env_vars(config: Dict) -> Dict:
273
+ """Recursively resolve ${ENV_VAR} placeholders in string config values."""
274
+ import re
275
+ _PLACEHOLDER = re.compile(r"\$\{([A-Z_][A-Z0-9_]*)\}")
276
+
277
+ def _resolve(value: Any) -> Any:
278
+ if isinstance(value, str):
279
+ def _sub(m: re.Match) -> str:
280
+ return os.environ.get(m.group(1), m.group(0))
281
+ return _PLACEHOLDER.sub(_sub, value)
282
+ if isinstance(value, dict):
283
+ return {k: _resolve(v) for k, v in value.items()}
284
+ if isinstance(value, list):
285
+ return [_resolve(v) for v in value]
286
+ return value
287
+
288
+ return _resolve(config)
289
+
290
+
291
+ # ---------------------------------------------------------------------------
292
+ # Module-level singleton + convenience aliases
293
+ # ---------------------------------------------------------------------------
294
+
295
+ registry = ProviderRegistry.get_instance()
296
+
297
+
298
+ def get_llm_provider(provider_id: Optional[str] = None) -> Any:
299
+ """Convenience: get an LLM provider instance."""
300
+ return registry.get_provider(ProviderType.LLM, provider_id)
301
+
302
+
303
+ def get_tts_provider(provider_id: Optional[str] = None) -> Any:
304
+ """Convenience: get a TTS provider instance."""
305
+ return registry.get_provider(ProviderType.TTS, provider_id)
306
+
307
+
308
+ def get_stt_provider(provider_id: Optional[str] = None) -> Any:
309
+ """Convenience: get an STT provider instance."""
310
+ return registry.get_provider(ProviderType.STT, provider_id)
311
+
312
+
313
+ __all__ = [
314
+ "ProviderType",
315
+ "ProviderRegistry",
316
+ "registry",
317
+ "get_llm_provider",
318
+ "get_tts_provider",
319
+ "get_stt_provider",
320
+ ]
@@ -0,0 +1,12 @@
1
+ """STT provider package.
2
+
3
+ Importing this package registers all STT providers with the registry.
4
+ """
5
+
6
+ from providers.stt.base import STTProvider, TranscriptionResult, STTError
7
+
8
+ # Import concrete providers so their registry.register() calls fire
9
+ from providers.stt import webspeech_provider # noqa: F401
10
+ from providers.stt import whisper_provider # noqa: F401
11
+
12
+ __all__ = ["STTProvider", "TranscriptionResult", "STTError"]
@@ -0,0 +1,58 @@
1
+ """
2
+ STT provider abstract base class.
3
+
4
+ Based on: future-dev-plans/02-PROVIDER-SYSTEMS.md (stt_providers/base.py section)
5
+ ADR-003: Abstract base class + registry pattern.
6
+ """
7
+
8
+ from abc import abstractmethod
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from providers.base import BaseProvider, ProviderError
13
+
14
+
15
+ @dataclass
16
+ class TranscriptionResult:
17
+ text: str
18
+ confidence: float = 0.0
19
+ language: str = "en"
20
+ duration_ms: float = 0.0
21
+ provider: str = ""
22
+ segments: Optional[List[Dict]] = field(default=None)
23
+
24
+
25
+ class STTProvider(BaseProvider):
26
+ """Abstract base class for STT providers (WebSpeech, Whisper, Deepgram, etc.)."""
27
+
28
+ @abstractmethod
29
+ def transcribe(
30
+ self,
31
+ audio_data: bytes,
32
+ language: Optional[str] = None,
33
+ **kwargs,
34
+ ) -> TranscriptionResult:
35
+ """Transcribe audio bytes to text."""
36
+ pass
37
+
38
+ def list_languages(self) -> List[str]:
39
+ return self._config.get("languages", ["en-US"])
40
+
41
+ def is_available(self) -> bool:
42
+ return self.get_info().get("status", "inactive") == "active"
43
+
44
+ def get_info(self) -> Dict[str, Any]:
45
+ return {
46
+ "name": self._config.get("name", self.__class__.__name__),
47
+ "languages": self.list_languages(),
48
+ "available": self.is_available(),
49
+ "status": "active",
50
+ }
51
+
52
+
53
+ class STTError(ProviderError):
54
+ """STT-specific provider error."""
55
+ pass
56
+
57
+
58
+ __all__ = ["STTProvider", "TranscriptionResult", "STTError"]
@@ -0,0 +1,49 @@
1
+ """
2
+ Web Speech API STT provider (browser-side stub).
3
+
4
+ Ref: future-dev-plans/02-PROVIDER-SYSTEMS.md (WebSpeechProvider section)
5
+ The actual recognition runs in the browser; this is the server-side registry entry
6
+ so profiles can reference 'webspeech' as their STT provider.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from providers.stt.base import STTError, STTProvider, TranscriptionResult
14
+ from providers.registry import ProviderType, registry
15
+
16
+
17
+ class WebSpeechProvider(STTProvider):
18
+ """Browser Web Speech API — server-side stub only."""
19
+
20
+ def transcribe(
21
+ self,
22
+ audio_data: bytes,
23
+ language: Optional[str] = None,
24
+ **kwargs,
25
+ ) -> TranscriptionResult:
26
+ raise STTError(
27
+ "webspeech",
28
+ "WebSpeech API runs in the browser. Server-side transcription is not supported.",
29
+ )
30
+
31
+ def is_available(self) -> bool:
32
+ # Always "available" as it's a browser-side component
33
+ return True
34
+
35
+ def list_languages(self) -> List[str]:
36
+ return ["en-US", "en-GB", "es-ES", "fr-FR", "de-DE", "ja-JP", "zh-CN"]
37
+
38
+ def get_info(self) -> Dict[str, Any]:
39
+ return {
40
+ "name": self._config.get("name", "Web Speech API"),
41
+ "type": "browser",
42
+ "status": "active",
43
+ "languages": self.list_languages(),
44
+ "available": True,
45
+ }
46
+
47
+
48
+ # Auto-register when this module is imported
49
+ registry.register(ProviderType.STT, "webspeech", WebSpeechProvider)