openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,189 @@
1
+ """Tencent Cloud TTS provider adapter (TC3-HMAC-SHA256 signed)."""
2
+ from __future__ import annotations
3
+
4
+ import base64
5
+ import hashlib
6
+ import hmac
7
+ import json
8
+ from openspeech.logging_config import logger
9
+ import time
10
+ import uuid
11
+ from collections.abc import AsyncIterator
12
+ from dataclasses import dataclass
13
+ from datetime import datetime, timezone
14
+ from typing import Any
15
+
16
+ import httpx
17
+
18
+ from openspeech.core.base import TTSProvider
19
+
20
+ from openspeech.core.enums import Capability, ExecMode, ProviderType
21
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
22
+ from openspeech.core.settings import BaseSettings
23
+
24
+ @dataclass
25
+ class TencentTTSSettings(BaseSettings):
26
+ secret_id: str = ""
27
+ secret_key: str = ""
28
+ voice_type: int = 1001
29
+ region: str = "ap-guangzhou"
30
+
31
+ class TencentTTS(TTSProvider):
32
+ name = "tencent-tts"
33
+ provider_type = ProviderType.TTS
34
+ execution_mode = ExecMode.IN_PROCESS
35
+ settings_cls = TencentTTSSettings
36
+ capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
37
+ field_options = {"voice_type": [1001, 1002, 1003, 1004, 1005, 1007, 1008, 1009, 1010, 1017, 1018, 101001, 101002], "region": ["ap-guangzhou", "ap-shanghai", "ap-beijing"]}
38
+
39
+ _SERVICE = "tts"
40
+ _HOST = "tts.tencentcloudapi.com"
41
+
42
+ def __init__(self, settings: TencentTTSSettings | None = None) -> None:
43
+ self.settings = settings or TencentTTSSettings()
44
+ self._client: httpx.AsyncClient | None = None
45
+ self._owns_client: bool = True
46
+
47
+ def set_http_client(self, client) -> None:
48
+ self._client = client
49
+ self._owns_client = False
50
+
51
+ async def start(self) -> None:
52
+ if self._client is None:
53
+ self._client = httpx.AsyncClient(timeout=60.0)
54
+ self._owns_client = True
55
+ logger.info("{} provider started", self.name)
56
+
57
+ async def stop(self) -> None:
58
+ if self._client is not None and self._owns_client:
59
+ await self._client.aclose()
60
+ self._client = None
61
+ logger.info("{} provider stopped", self.name)
62
+
63
+ async def health_check(self) -> bool:
64
+ return bool(self.settings.secret_id) and bool(self.settings.secret_key)
65
+
66
+ # ---- TC3-HMAC-SHA256 signing ------------------------------------------------
67
+
68
+ def _sign_request(
69
+ self, action: str, version: str, payload_json: str
70
+ ) -> dict[str, str]:
71
+ """Build Tencent Cloud API v3 signed headers."""
72
+ timestamp = int(time.time())
73
+ date = datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime(
74
+ "%Y-%m-%d"
75
+ )
76
+
77
+ # 1. Canonical request
78
+ http_method = "POST"
79
+ canonical_uri = "/"
80
+ canonical_querystring = ""
81
+ ct = "application/json; charset=utf-8"
82
+ canonical_headers = (
83
+ f"content-type:{ct}\nhost:{self._HOST}\nx-tc-action:{action.lower()}\n"
84
+ )
85
+ signed_headers = "content-type;host;x-tc-action"
86
+ hashed_payload = hashlib.sha256(payload_json.encode("utf-8")).hexdigest()
87
+ canonical_request = (
88
+ f"{http_method}\n{canonical_uri}\n{canonical_querystring}\n"
89
+ f"{canonical_headers}\n{signed_headers}\n{hashed_payload}"
90
+ )
91
+
92
+ # 2. String to sign
93
+ algorithm = "TC3-HMAC-SHA256"
94
+ credential_scope = f"{date}/{self._SERVICE}/tc3_request"
95
+ hashed_canonical = hashlib.sha256(
96
+ canonical_request.encode("utf-8")
97
+ ).hexdigest()
98
+ string_to_sign = (
99
+ f"{algorithm}\n{timestamp}\n{credential_scope}\n{hashed_canonical}"
100
+ )
101
+
102
+ # 3. Signing key
103
+ secret_date = hmac.new(
104
+ ("TC3" + self.settings.secret_key).encode("utf-8"),
105
+ date.encode("utf-8"),
106
+ hashlib.sha256,
107
+ ).digest()
108
+ secret_service = hmac.new(
109
+ secret_date, self._SERVICE.encode("utf-8"), hashlib.sha256
110
+ ).digest()
111
+ secret_signing = hmac.new(
112
+ secret_service, b"tc3_request", hashlib.sha256
113
+ ).digest()
114
+
115
+ # 4. Signature
116
+ signature = hmac.new(
117
+ secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256
118
+ ).hexdigest()
119
+
120
+ authorization = (
121
+ f"{algorithm} Credential={self.settings.secret_id}/{credential_scope}, "
122
+ f"SignedHeaders={signed_headers}, Signature={signature}"
123
+ )
124
+
125
+ return {
126
+ "Authorization": authorization,
127
+ "Content-Type": ct,
128
+ "Host": self._HOST,
129
+ "X-TC-Action": action,
130
+ "X-TC-Version": version,
131
+ "X-TC-Timestamp": str(timestamp),
132
+ "X-TC-Region": self.settings.region,
133
+ }
134
+
135
+ # ---- API call ----------------------------------------------------------------
136
+
137
+ async def synthesize(
138
+ self, text: str, opts: TTSOptions | None = None
139
+ ) -> AudioData:
140
+ if self._client is None:
141
+ raise RuntimeError("Provider not started — call start() first")
142
+ logger.info("{}: request received, text={} chars", self.name, len(text))
143
+ _t0 = time.perf_counter()
144
+ opts = opts or TTSOptions()
145
+
146
+ session_id = uuid.uuid4().hex
147
+ payload = {
148
+ "Text": text,
149
+ "SessionId": session_id,
150
+ "VoiceType": self.settings.voice_type,
151
+ "Codec": "wav",
152
+ "Volume": 0,
153
+ "Speed": 0,
154
+ }
155
+ payload_json = json.dumps(payload)
156
+ headers = self._sign_request("TextToVoice", "2019-08-23", payload_json)
157
+
158
+ resp = await self._client.post(
159
+ f"https://{self._HOST}",
160
+ content=payload_json,
161
+ headers=headers,
162
+ )
163
+ resp.raise_for_status()
164
+ result = resp.json()
165
+
166
+ response = result.get("Response", {})
167
+ if "Error" in response:
168
+ err = response["Error"]
169
+ raise RuntimeError(
170
+ f"Tencent TTS error [{err.get('Code')}]: {err.get('Message')}"
171
+ )
172
+
173
+ audio_b64 = response.get("Audio", "")
174
+ audio_bytes = base64.b64decode(audio_b64)
175
+
176
+ result = AudioData(
177
+ data=audio_bytes,
178
+ sample_rate=16000,
179
+ channels=1,
180
+ format=opts.output_format,
181
+ )
182
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
183
+ return result
184
+
185
+ async def synthesize_stream(
186
+ self, text: str, opts: TTSOptions | None = None
187
+ ) -> AsyncIterator[AudioChunk]:
188
+ raise NotImplementedError("Tencent TTS streaming not implemented")
189
+ yield # noqa: unreachable — makes this an async generator
@@ -0,0 +1,117 @@
1
+ """Volcengine (ByteDance) TTS provider adapter."""
2
+ from __future__ import annotations
3
+
4
+ import base64
5
+ from openspeech.logging_config import logger
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from openspeech.core.base import TTSProvider
14
+
15
+ from openspeech.core.enums import Capability, ExecMode, ProviderType
16
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
17
+ from openspeech.core.settings import BaseSettings
18
+
19
+ @dataclass
20
+ class VolcengineTTSSettings(BaseSettings):
21
+ access_token: str = ""
22
+ app_id: str = ""
23
+ cluster: str = "volcano_tts"
24
+ voice_type: str = "BV001_streaming"
25
+
26
+ class VolcengineTTS(TTSProvider):
27
+ name = "volcengine-tts"
28
+ provider_type = ProviderType.TTS
29
+ execution_mode = ExecMode.IN_PROCESS
30
+ settings_cls = VolcengineTTSSettings
31
+ capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
32
+ field_options = {"voice_type": ["BV001_streaming", "BV002_streaming", "BV700_streaming", "BV406_streaming", "BV407_streaming"]}
33
+
34
+ def __init__(self, settings: VolcengineTTSSettings | None = None) -> None:
35
+ self.settings = settings or VolcengineTTSSettings()
36
+ self._client: httpx.AsyncClient | None = None
37
+ self._owns_client: bool = True
38
+
39
+ def set_http_client(self, client) -> None:
40
+ self._client = client
41
+ self._owns_client = False
42
+
43
+ async def start(self) -> None:
44
+ if self._client is None:
45
+ self._client = httpx.AsyncClient(timeout=60.0)
46
+ self._owns_client = True
47
+ logger.info("{} provider started", self.name)
48
+
49
+ async def stop(self) -> None:
50
+ if self._client is not None and self._owns_client:
51
+ await self._client.aclose()
52
+ self._client = None
53
+ logger.info("{} provider stopped", self.name)
54
+
55
+ async def health_check(self) -> bool:
56
+ return bool(self.settings.access_token) and bool(self.settings.app_id)
57
+
58
+ async def synthesize(
59
+ self, text: str, opts: TTSOptions | None = None
60
+ ) -> AudioData:
61
+ if self._client is None:
62
+ raise RuntimeError("Provider not started — call start() first")
63
+ logger.info("{}: request received, text={} chars", self.name, len(text))
64
+ _t0 = time.perf_counter()
65
+ opts = opts or TTSOptions()
66
+
67
+ payload = {
68
+ "app": {
69
+ "appid": self.settings.app_id,
70
+ "cluster": self.settings.cluster,
71
+ },
72
+ "user": {"uid": "openspeech"},
73
+ "audio": {
74
+ "voice_type": self.settings.voice_type,
75
+ "encoding": "wav",
76
+ "speed_ratio": opts.speed,
77
+ },
78
+ "request": {
79
+ "text": text,
80
+ "operation": "query",
81
+ },
82
+ }
83
+ headers = {
84
+ "Authorization": f"Bearer;{self.settings.access_token}",
85
+ "Content-Type": "application/json",
86
+ }
87
+
88
+ resp = await self._client.post(
89
+ "https://openspeech.bytedance.com/api/v1/tts",
90
+ json=payload,
91
+ headers=headers,
92
+ )
93
+ resp.raise_for_status()
94
+ result = resp.json()
95
+
96
+ if result.get("code") != 0 and result.get("code") is not None:
97
+ raise RuntimeError(
98
+ f"Volcengine TTS error: {result.get('message', 'unknown error')}"
99
+ )
100
+
101
+ audio_b64 = result.get("data", "")
102
+ audio_bytes = base64.b64decode(audio_b64)
103
+
104
+ result = AudioData(
105
+ data=audio_bytes,
106
+ sample_rate=24000,
107
+ channels=1,
108
+ format=opts.output_format,
109
+ )
110
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
111
+ return result
112
+
113
+ async def synthesize_stream(
114
+ self, text: str, opts: TTSOptions | None = None
115
+ ) -> AsyncIterator[AudioChunk]:
116
+ raise NotImplementedError("Volcengine TTS streaming not implemented")
117
+ yield # noqa: unreachable — makes this an async generator
@@ -0,0 +1,234 @@
1
+ """Windows native TTS provider using SAPI5 via ``pyttsx3``."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import io
6
+ from openspeech.logging_config import logger
7
+ import os
8
+ import sys
9
+ import tempfile
10
+ import time
11
+ import wave
12
+ from collections.abc import AsyncIterator
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ from openspeech.core.base import TTSProvider
17
+
18
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
19
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
20
+ from openspeech.core.settings import BaseSettings
21
+
22
+ # Language prefix → human-readable group name (reuse same map as macos_say)
23
+ _LANG_GROUP_MAP: dict[str, str] = {
24
+ "zh": "中文",
25
+ "en": "English",
26
+ "ja": "日本語",
27
+ "ko": "한국어",
28
+ "fr": "Français",
29
+ "de": "Deutsch",
30
+ "es": "Español",
31
+ "it": "Italiano",
32
+ "pt": "Português",
33
+ "ru": "Русский",
34
+ "ar": "العربية",
35
+ }
36
+
37
+ @dataclass
38
+ class WindowsSapiSettings(BaseSettings):
39
+ default_voice: str = "" # Empty = system default; or voice name substring
40
+ default_rate: int = 200 # pyttsx3 rate (words per minute)
41
+
42
+ class WindowsSapiTTS(TTSProvider):
43
+ """Windows native TTS via SAPI5 (pyttsx3)."""
44
+
45
+ name = "windows-tts"
46
+ provider_type = ProviderType.TTS
47
+ execution_mode = ExecMode.IN_PROCESS
48
+ settings_cls = WindowsSapiSettings
49
+ capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
50
+
51
+ def __init__(self, settings: WindowsSapiSettings | None = None) -> None:
52
+ self.settings = settings or WindowsSapiSettings()
53
+ self._available: bool = False
54
+ self._voices_cache: list[dict] | None = None
55
+
56
+ # -- lifecycle ------------------------------------------------------------
57
+
58
+ async def start(self) -> None:
59
+ if sys.platform != "win32":
60
+ raise RuntimeError(
61
+ "Windows TTS (SAPI5) is only available on Windows"
62
+ )
63
+ try:
64
+ import pyttsx3 # noqa: F401
65
+ except ImportError:
66
+ raise RuntimeError(
67
+ "pyttsx3 is required for Windows TTS. "
68
+ "Install with: pip install pyttsx3"
69
+ )
70
+ # Quick validation: ensure engine can be created
71
+ await asyncio.to_thread(self._validate_engine)
72
+ self._available = True
73
+ logger.info("{} provider started", self.name)
74
+
75
+ async def stop(self) -> None:
76
+ self._available = False
77
+ self._voices_cache = None
78
+ logger.info("{} provider stopped", self.name)
79
+
80
+ async def health_check(self) -> bool:
81
+ if self._available:
82
+ return True
83
+ if sys.platform != "win32":
84
+ return False
85
+ try:
86
+ import pyttsx3 # noqa: F401
87
+ return True
88
+ except ImportError:
89
+ return False
90
+
91
+ # -- synthesis ------------------------------------------------------------
92
+
93
+ async def synthesize(
94
+ self, text: str, opts: TTSOptions | None = None
95
+ ) -> AudioData:
96
+ if not self._available:
97
+ raise RuntimeError("Provider not started — call start() first")
98
+ logger.info("{}: request received, text={} chars", self.name, len(text))
99
+ _t0 = time.perf_counter()
100
+
101
+ opts = opts or TTSOptions()
102
+ voice_hint = opts.voice or self.settings.default_voice
103
+ rate = int(self.settings.default_rate * opts.speed)
104
+
105
+ wav_bytes = await asyncio.to_thread(
106
+ self._synthesize_sync, text, voice_hint, rate,
107
+ )
108
+
109
+ # Parse WAV to extract real sample_rate/channels/duration
110
+ with wave.open(io.BytesIO(wav_bytes), "rb") as wf:
111
+ sample_rate = wf.getframerate()
112
+ channels = wf.getnchannels()
113
+ n_frames = wf.getnframes()
114
+ duration_ms = int(n_frames / sample_rate * 1000) if sample_rate else 0
115
+
116
+ result = AudioData(
117
+ data=wav_bytes,
118
+ sample_rate=sample_rate,
119
+ channels=channels,
120
+ format=AudioFormat.WAV,
121
+ duration_ms=duration_ms,
122
+ )
123
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
124
+ return result
125
+
126
+ async def synthesize_stream(
127
+ self, text: str, opts: TTSOptions | None = None
128
+ ) -> AsyncIterator[AudioChunk]:
129
+ """Batch-then-chunk fallback: synthesize full audio, then yield chunks."""
130
+ logger.info("{}: stream request, text={} chars", self.name, len(text))
131
+ _t0 = time.perf_counter()
132
+ result = await self.synthesize(text, opts)
133
+ chunk_size = 4096
134
+ sequence = 0
135
+ chunk_count = 0
136
+ for i in range(0, len(result.data), chunk_size):
137
+ chunk_data = result.data[i : i + chunk_size]
138
+ logger.debug("{}: chunk #{}, {} bytes", self.name, sequence, len(chunk_data))
139
+ yield AudioChunk(data=chunk_data, sequence=sequence)
140
+ sequence += 1
141
+ chunk_count += 1
142
+ yield AudioChunk(data=b"", sequence=sequence, is_final=True)
143
+ logger.info("{}: stream complete, {} chunks in {:.0f}ms", self.name, chunk_count, (time.perf_counter() - _t0) * 1000)
144
+
145
+ # -- voices ---------------------------------------------------------------
146
+
147
+ async def list_voices(self) -> list[dict]:
148
+ if self._voices_cache is not None:
149
+ return self._voices_cache
150
+
151
+ voices = await asyncio.to_thread(self._list_voices_sync)
152
+ self._voices_cache = voices
153
+ return self._voices_cache
154
+
155
+ # -- internal helpers (sync, run in thread) --------------------------------
156
+
157
+ @staticmethod
158
+ def _validate_engine() -> None:
159
+ """Create and immediately dispose a pyttsx3 engine to verify SAPI5."""
160
+ import pyttsx3
161
+ engine = pyttsx3.init("sapi5")
162
+ engine.stop()
163
+
164
+ @staticmethod
165
+ def _synthesize_sync(text: str, voice_hint: str, rate: int) -> bytes:
166
+ """Synchronous TTS: create engine, synthesize to WAV, return bytes."""
167
+ import pyttsx3
168
+
169
+ engine = pyttsx3.init("sapi5")
170
+ try:
171
+ # Set rate
172
+ engine.setProperty("rate", rate)
173
+
174
+ # Resolve and set voice
175
+ if voice_hint:
176
+ voices = engine.getProperty("voices")
177
+ hint_lower = voice_hint.lower()
178
+ for v in voices:
179
+ if hint_lower in v.name.lower() or hint_lower in v.id.lower():
180
+ engine.setProperty("voice", v.id)
181
+ break
182
+
183
+ # Synthesize to temp WAV file
184
+ fd, tmp_path = tempfile.mkstemp(suffix=".wav")
185
+ os.close(fd)
186
+ try:
187
+ engine.save_to_file(text, tmp_path)
188
+ engine.runAndWait()
189
+ wav_bytes = Path(tmp_path).read_bytes()
190
+ finally:
191
+ try:
192
+ Path(tmp_path).unlink(missing_ok=True)
193
+ except OSError:
194
+ pass
195
+
196
+ return wav_bytes
197
+ finally:
198
+ engine.stop()
199
+
200
+ @staticmethod
201
+ def _list_voices_sync() -> list[dict]:
202
+ """List SAPI5 voices synchronously."""
203
+ import pyttsx3
204
+
205
+ engine = pyttsx3.init("sapi5")
206
+ try:
207
+ voices = engine.getProperty("voices")
208
+ result: list[dict] = []
209
+ for v in voices:
210
+ # SAPI voice language is in v.languages (list of bytes)
211
+ # or can be parsed from v.id
212
+ lang_str = ""
213
+ if v.languages:
214
+ # pyttsx3 gives languages as list; first entry is primary
215
+ raw = v.languages[0]
216
+ if isinstance(raw, bytes):
217
+ lang_str = raw.decode("utf-8", errors="replace").strip("\x00")
218
+ elif isinstance(raw, str):
219
+ lang_str = raw
220
+
221
+ # Derive group from language
222
+ lang_prefix = lang_str.split("-")[0].split("_")[0].lower() if lang_str else ""
223
+ group = _LANG_GROUP_MAP.get(lang_prefix, lang_prefix.upper()) if lang_prefix else "Other"
224
+
225
+ result.append({
226
+ "name": v.name,
227
+ "language": lang_str,
228
+ "description": getattr(v, "description", "") or "",
229
+ "group": group,
230
+ "id": v.id,
231
+ })
232
+ return result
233
+ finally:
234
+ engine.stop()
@@ -0,0 +1 @@
1
+ """OpenSpeech FastAPI server."""
@@ -0,0 +1,72 @@
1
+ """FastAPI application wrapping ServiceDispatcher."""
2
+ from __future__ import annotations
3
+ from contextlib import asynccontextmanager
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from fastapi import FastAPI, Request
8
+
9
+ from openspeech.config import load_config
10
+ from openspeech.core.registry import ProviderRegistry
11
+ from openspeech.dispatch.dispatcher import ServiceDispatcher
12
+ from openspeech.dispatch.watcher import ConfigWatcher
13
+ from openspeech.local_engines import EngineManager
14
+ from openspeech.logging_config import ensure_configured
15
+ from openspeech.server.middleware import RequestContextMiddleware
16
+ from openspeech.server.routes import stt, tts, management, webui
17
+ from openspeech.server.ws import stt_stream, tts_stream
18
+
19
+
20
+ def create_app(config_path: Path, registry: ProviderRegistry) -> FastAPI:
21
+ """Create FastAPI app with dispatcher lifecycle."""
22
+ ensure_configured()
23
+ dispatcher = ServiceDispatcher.from_config(config_path, registry)
24
+ config = load_config(config_path)
25
+
26
+ @asynccontextmanager
27
+ async def lifespan(app: FastAPI):
28
+ await dispatcher.start()
29
+
30
+ async def _on_reload() -> dict:
31
+ return await dispatcher.reload_config(config_path, registry)
32
+
33
+ watcher = ConfigWatcher(config_path, on_reload=_on_reload)
34
+ watcher.start()
35
+
36
+ yield
37
+
38
+ await watcher.stop()
39
+ await dispatcher.stop()
40
+
41
+ app = FastAPI(title="OpenSpeech API", version="0.1.0", lifespan=lifespan)
42
+ app.state.dispatcher = dispatcher
43
+ app.state.config_path = config_path
44
+ app.state.registry = registry
45
+ app.state.server_config = config.server
46
+ app.state.engine_manager = EngineManager()
47
+
48
+ # Request-scoped logging context (request_id + route timing) — added first
49
+ # so that auth-middleware rejections still get logged with a request_id.
50
+ app.add_middleware(RequestContextMiddleware)
51
+
52
+ # Conditionally add authentication middleware
53
+ if config.server.auth_enabled and config.server.api_keys:
54
+ from openspeech.server.auth import AuthMiddleware
55
+ app.add_middleware(AuthMiddleware, api_keys=config.server.api_keys)
56
+
57
+ # Register routes
58
+ app.include_router(stt.router, prefix="/v1/stt", tags=["STT"])
59
+ app.include_router(tts.router, prefix="/v1/tts", tags=["TTS"])
60
+ app.include_router(management.router, prefix="/v1", tags=["Management"])
61
+ app.include_router(webui.router, tags=["WebUI"])
62
+
63
+ # Register WebSocket endpoints
64
+ app.include_router(stt_stream.router, prefix="/v1/stt", tags=["STT Streaming"])
65
+ app.include_router(tts_stream.router, prefix="/v1/tts", tags=["TTS Streaming"])
66
+
67
+ return app
68
+
69
+
70
+ def get_dispatcher(request: Request) -> ServiceDispatcher:
71
+ """Dependency to get dispatcher from app state."""
72
+ return request.app.state.dispatcher
@@ -0,0 +1,42 @@
1
+ """API Key authentication middleware."""
2
+ from __future__ import annotations
3
+ import json
4
+ from fastapi import Request
5
+ from starlette.middleware.base import BaseHTTPMiddleware
6
+ from starlette.responses import JSONResponse
7
+
8
+ # Exempt paths (no auth required)
9
+ EXEMPT_PATHS = {"/v1/health", "/docs", "/openapi.json", "/redoc"}
10
+ EXEMPT_PREFIXES = ("/ui",)
11
+
12
+
13
+ def _unauthorized(detail: str) -> JSONResponse:
14
+ return JSONResponse(status_code=401, content={"detail": detail})
15
+
16
+
17
+ class AuthMiddleware(BaseHTTPMiddleware):
18
+ def __init__(self, app, api_keys: list[str]) -> None:
19
+ super().__init__(app)
20
+ self._api_keys = set(api_keys)
21
+
22
+ async def dispatch(self, request: Request, call_next):
23
+ # Skip auth for exempt paths
24
+ if request.url.path in EXEMPT_PATHS:
25
+ return await call_next(request)
26
+ if request.url.path.startswith(EXEMPT_PREFIXES):
27
+ return await call_next(request)
28
+
29
+ # Skip auth for WebSocket (handled separately via query param)
30
+ if request.scope.get("type") == "websocket":
31
+ return await call_next(request)
32
+
33
+ # Check Bearer token
34
+ auth_header = request.headers.get("Authorization", "")
35
+ if not auth_header.startswith("Bearer "):
36
+ return _unauthorized("Missing or invalid Authorization header")
37
+
38
+ token = auth_header[7:] # Strip "Bearer "
39
+ if token not in self._api_keys:
40
+ return _unauthorized("Invalid API key")
41
+
42
+ return await call_next(request)