openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,158 @@
1
+ """Fish-Speech TTS provider adapter (HTTP API, in-process)."""
2
+ from __future__ import annotations
3
+
4
+ from collections.abc import AsyncIterator
5
+ from dataclasses import dataclass
6
+ import io
7
+ from openspeech.logging_config import logger
8
+ import time
9
+ from typing import Any
10
+ from urllib.parse import urljoin
11
+ import wave
12
+
13
+ from openspeech.core.base import TTSProvider
14
+
15
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
16
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
17
+ from openspeech.core.settings import BaseSettings
18
+
19
+ @dataclass
20
+ class FishSpeechTTSSettings(BaseSettings):
21
+ api_url: str = "http://localhost:8080"
22
+ reference_audio: str | None = None
23
+ reference_id: str | None = None
24
+ timeout_s: float = 60.0
25
+ retries: int = 0
26
+ health_path: str = "/health"
27
+ format: str = "wav"
28
+ latency: str = "balanced"
29
+ chunk_length: int = 200
30
+ max_new_tokens: int = 1024
31
+ top_p: float = 0.8
32
+ repetition_penalty: float = 1.1
33
+ temperature: float = 0.8
34
+ use_memory_cache: str = "on"
35
+ normalize: bool = True
36
+
37
+ class FishSpeechTTS(TTSProvider):
38
+ name = "fish-speech"
39
+ provider_type = ProviderType.TTS
40
+ execution_mode = ExecMode.IN_PROCESS
41
+ settings_cls = FishSpeechTTSSettings
42
+ capabilities = {Capability.STREAMING, Capability.VOICE_CLONE}
43
+ field_options = {
44
+ "format": ["wav", "mp3", "flac", "opus"],
45
+ "latency": ["normal", "balanced"],
46
+ }
47
+
48
+ def __init__(self, settings: FishSpeechTTSSettings | None = None) -> None:
49
+ self.settings = settings or FishSpeechTTSSettings()
50
+ self._client: Any = None
51
+ self._owns_client: bool = True
52
+
53
+ def set_http_client(self, client) -> None:
54
+ self._client = client
55
+ self._owns_client = False
56
+
57
+ async def start(self) -> None:
58
+ if self._client is None:
59
+ try:
60
+ import httpx
61
+ except ImportError:
62
+ raise ImportError(
63
+ "Install httpx: pip install openspeech[fish-speech]"
64
+ )
65
+ # Ignore process-level proxy env vars by default to avoid accidental SOCKS deps.
66
+ self._client = httpx.AsyncClient(timeout=self.settings.timeout_s, trust_env=False)
67
+ self._owns_client = True
68
+ logger.info("{} provider started", self.name)
69
+
70
+ async def stop(self) -> None:
71
+ if self._client is not None and self._owns_client:
72
+ await self._client.aclose()
73
+ self._client = None
74
+ logger.info("{} provider stopped", self.name)
75
+
76
+ async def health_check(self) -> bool:
77
+ if self._client is None:
78
+ return False
79
+ try:
80
+ url = urljoin(self.settings.api_url.rstrip("/") + "/", self.settings.health_path.lstrip("/"))
81
+ resp = await self._client.get(url)
82
+ return resp.status_code < 500
83
+ except Exception:
84
+ return False
85
+
86
+ async def synthesize(
87
+ self, text: str, opts: TTSOptions | None = None
88
+ ) -> AudioData:
89
+ if self._client is None:
90
+ raise RuntimeError("Provider not started — call start() first")
91
+ logger.info("{}: request received, text={} chars", self.name, len(text))
92
+ _t0 = time.perf_counter()
93
+ opts = opts or TTSOptions()
94
+ payload: dict[str, Any] = {
95
+ "text": text,
96
+ "format": self.settings.format,
97
+ "latency": self.settings.latency,
98
+ "chunk_length": self.settings.chunk_length,
99
+ "max_new_tokens": self.settings.max_new_tokens,
100
+ "top_p": self.settings.top_p,
101
+ "repetition_penalty": self.settings.repetition_penalty,
102
+ "temperature": self.settings.temperature,
103
+ "use_memory_cache": self.settings.use_memory_cache,
104
+ "normalize": self.settings.normalize,
105
+ }
106
+ if opts.voice:
107
+ payload["voice"] = opts.voice
108
+ if opts.speed and opts.speed != 1.0:
109
+ payload["speed"] = opts.speed
110
+ if self.settings.reference_audio:
111
+ payload["reference_audio"] = self.settings.reference_audio
112
+ if self.settings.reference_id:
113
+ payload["reference_id"] = self.settings.reference_id
114
+ last_exc: Exception | None = None
115
+ response = None
116
+ attempts = max(0, self.settings.retries) + 1
117
+ for _ in range(attempts):
118
+ try:
119
+ response = await self._client.post(
120
+ f"{self.settings.api_url}/v1/tts",
121
+ json=payload,
122
+ )
123
+ break
124
+ except Exception as exc: # noqa: BLE001
125
+ last_exc = exc
126
+ if response is None:
127
+ raise RuntimeError(f"Fish-Speech request failed: {last_exc}") from last_exc
128
+ response.raise_for_status()
129
+ duration_ms = self._wav_duration_ms(response.content)
130
+ result = AudioData(
131
+ data=response.content,
132
+ sample_rate=44100,
133
+ channels=1,
134
+ format=AudioFormat.WAV,
135
+ duration_ms=duration_ms,
136
+ )
137
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
138
+ return result
139
+
140
+ async def synthesize_stream(
141
+ self, text: str, opts: TTSOptions | None = None
142
+ ) -> AsyncIterator[AudioChunk]:
143
+ raise NotImplementedError(
144
+ "FishSpeechTTS.synthesize_stream() is not yet implemented"
145
+ )
146
+ yield # pragma: no cover
147
+
148
+ @staticmethod
149
+ def _wav_duration_ms(data: bytes) -> int | None:
150
+ try:
151
+ with wave.open(io.BytesIO(data), "rb") as wf:
152
+ sample_rate = wf.getframerate()
153
+ if sample_rate <= 0:
154
+ return None
155
+ frames = wf.getnframes()
156
+ return int((frames / sample_rate) * 1000)
157
+ except Exception:
158
+ return None
@@ -0,0 +1,107 @@
1
+ """Google Cloud TTS provider adapter (batch, httpx)."""
2
+ from __future__ import annotations
3
+
4
+ import base64
5
+ from openspeech.logging_config import logger
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from openspeech.core.base import TTSProvider
14
+
15
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
16
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
17
+ from openspeech.core.settings import BaseSettings
18
+
19
+ @dataclass
20
+ class GoogleCloudTTSSettings(BaseSettings):
21
+ api_key: str = ""
22
+ language: str = "en-US"
23
+ voice_name: str = "en-US-Standard-A"
24
+ speaking_rate: float = 1.0
25
+
26
+ class GoogleCloudTTS(TTSProvider):
27
+ name = "google-tts"
28
+ provider_type = ProviderType.TTS
29
+ execution_mode = ExecMode.REMOTE
30
+ settings_cls = GoogleCloudTTSSettings
31
+ capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
32
+ field_options = {"language": ["en-US", "zh-CN", "ja-JP", "ko-KR", "es-ES", "fr-FR", "de-DE", "pt-BR"], "voice_name": ["en-US-Standard-A", "en-US-Standard-B", "en-US-Standard-C", "en-US-Standard-D", "en-US-Wavenet-A", "en-US-Wavenet-B", "zh-CN-Standard-A", "zh-CN-Standard-B", "zh-CN-Standard-C", "zh-CN-Standard-D", "zh-CN-Wavenet-A", "ja-JP-Standard-A", "ja-JP-Standard-B"]}
33
+
34
+ def __init__(self, settings: GoogleCloudTTSSettings | None = None) -> None:
35
+ self.settings = settings or GoogleCloudTTSSettings()
36
+ self._client: httpx.AsyncClient | None = None
37
+ self._owns_client: bool = True
38
+
39
+ def set_http_client(self, client) -> None:
40
+ self._client = client
41
+ self._owns_client = False
42
+
43
+ async def start(self) -> None:
44
+ if self._client is None:
45
+ self._client = httpx.AsyncClient(timeout=60.0)
46
+ self._owns_client = True
47
+ logger.info("{} provider started", self.name)
48
+
49
+ async def stop(self) -> None:
50
+ if self._client is not None and self._owns_client:
51
+ await self._client.aclose()
52
+ self._client = None
53
+ logger.info("{} provider stopped", self.name)
54
+
55
+ async def health_check(self) -> bool:
56
+ return bool(self.settings.api_key)
57
+
58
+ async def synthesize(
59
+ self, text: str, opts: TTSOptions | None = None
60
+ ) -> AudioData:
61
+ if self._client is None:
62
+ raise RuntimeError("Provider not started — call start() first")
63
+ logger.info("{}: request received, text={} chars", self.name, len(text))
64
+ _t0 = time.perf_counter()
65
+ opts = opts or TTSOptions()
66
+ voice_name = opts.voice or self.settings.voice_name
67
+ language = self.settings.language
68
+ speaking_rate = opts.speed if opts.speed != 1.0 else self.settings.speaking_rate
69
+
70
+ url = (
71
+ "https://texttospeech.googleapis.com/v1/text:synthesize"
72
+ f"?key={self.settings.api_key}"
73
+ )
74
+ body = {
75
+ "input": {"text": text},
76
+ "voice": {
77
+ "languageCode": language,
78
+ "name": voice_name,
79
+ },
80
+ "audioConfig": {
81
+ "audioEncoding": "LINEAR16",
82
+ "speakingRate": speaking_rate,
83
+ },
84
+ }
85
+ response = await self._client.post(url, json=body)
86
+ if response.status_code != 200:
87
+ raise RuntimeError(
88
+ f"Google Cloud TTS API error {response.status_code}: {response.text}"
89
+ )
90
+ data = response.json()
91
+ audio_bytes = base64.b64decode(data["audioContent"])
92
+ result = AudioData(
93
+ data=audio_bytes,
94
+ sample_rate=24000,
95
+ channels=1,
96
+ format=AudioFormat.WAV,
97
+ )
98
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
99
+ return result
100
+
101
+ async def synthesize_stream(
102
+ self, text: str, opts: TTSOptions | None = None
103
+ ) -> AsyncIterator[AudioChunk]:
104
+ raise NotImplementedError(
105
+ "Google Cloud TTS batch provider does not support streaming output"
106
+ )
107
+ yield # pragma: no cover
@@ -0,0 +1,209 @@
1
+ """iFlytek (讯飞) TTS provider adapter — WebSocket-based."""
2
+ from __future__ import annotations
3
+
4
+ import base64
5
+ import hashlib
6
+ import hmac
7
+ import json
8
+ from openspeech.logging_config import logger
9
+ import urllib.parse
10
+ from collections.abc import AsyncIterator
11
+ from dataclasses import dataclass
12
+ from datetime import datetime, timezone
13
+ from email.utils import formatdate
14
+ from typing import Any
15
+
16
+ import httpx
17
+ import websockets
18
+
19
+ from openspeech.core.base import TTSProvider
20
+ from openspeech.core.enums import Capability, ExecMode, ProviderType
21
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
22
+ from openspeech.core.settings import BaseSettings
23
+
24
+ @dataclass
25
+ class IflytekTTSSettings(BaseSettings):
26
+ app_id: str = ""
27
+ api_key: str = ""
28
+ api_secret: str = ""
29
+ voice: str = "xiaoyan"
30
+ speed: int = 50
31
+
32
+ class IflytekTTS(TTSProvider):
33
+ name = "iflytek-tts"
34
+ provider_type = ProviderType.TTS
35
+ execution_mode = ExecMode.IN_PROCESS
36
+ settings_cls = IflytekTTSSettings
37
+ capabilities = {Capability.BATCH, Capability.STREAMING, Capability.MULTILINGUAL}
38
+ field_options = {"voice": [
39
+ "xiaoyan", "aisjiuxu", "aisxping", "aisjinger", "aisbabyxu",
40
+ "x4_lingxiaolu_em", "x4_lingfeizhe_em", "xiaoyu", "xiaoqi",
41
+ "xiaofeng", "xiaomei", "xiaolin", "xiaorong", "xiaoqian",
42
+ "catherine", "john", "laura", "yuka", "xiaoqiukor",
43
+ ]}
44
+
45
+ _WS_HOST = "tts-api.xfyun.cn"
46
+ _WS_PATH = "/v2/tts"
47
+
48
+ def __init__(self, settings: IflytekTTSSettings | None = None) -> None:
49
+ self.settings = settings or IflytekTTSSettings()
50
+ self._client: httpx.AsyncClient | None = None
51
+ self._owns_client: bool = True
52
+
53
+ def set_http_client(self, client) -> None:
54
+ self._client = client
55
+ self._owns_client = False
56
+
57
+ async def start(self) -> None:
58
+ if self._client is None:
59
+ self._client = httpx.AsyncClient(timeout=60.0)
60
+ self._owns_client = True
61
+
62
+ async def stop(self) -> None:
63
+ if self._client is not None and self._owns_client:
64
+ await self._client.aclose()
65
+ self._client = None
66
+
67
+ async def health_check(self) -> bool:
68
+ return bool(self.settings.app_id) and bool(self.settings.api_key) and bool(self.settings.api_secret)
69
+
70
+ def _build_auth_url(self) -> str:
71
+ """Build HMAC-SHA256 signed WebSocket URL."""
72
+ now = datetime.now(tz=timezone.utc)
73
+ date = formatdate(timeval=now.timestamp(), localtime=False, usegmt=True)
74
+
75
+ signature_origin = (
76
+ f"host: {self._WS_HOST}\n"
77
+ f"date: {date}\n"
78
+ f"GET {self._WS_PATH} HTTP/1.1"
79
+ )
80
+ signature_sha = hmac.new(
81
+ self.settings.api_secret.encode("utf-8"),
82
+ signature_origin.encode("utf-8"),
83
+ hashlib.sha256,
84
+ ).digest()
85
+ signature = base64.b64encode(signature_sha).decode("utf-8")
86
+
87
+ authorization_origin = (
88
+ f'api_key="{self.settings.api_key}", '
89
+ f'algorithm="hmac-sha256", '
90
+ f'headers="host date request-line", '
91
+ f'signature="{signature}"'
92
+ )
93
+ authorization = base64.b64encode(
94
+ authorization_origin.encode("utf-8")
95
+ ).decode("utf-8")
96
+
97
+ params = urllib.parse.urlencode(
98
+ {"authorization": authorization, "date": date, "host": self._WS_HOST}
99
+ )
100
+ return f"wss://{self._WS_HOST}{self._WS_PATH}?{params}"
101
+
102
+ async def synthesize(
103
+ self, text: str, opts: TTSOptions | None = None
104
+ ) -> AudioData:
105
+ """Batch synthesize by collecting all stream chunks."""
106
+ parts: list[bytes] = []
107
+ async for chunk in self.synthesize_stream(text, opts):
108
+ parts.append(chunk.data)
109
+ audio_bytes = b"".join(parts)
110
+ logger.info("iFlytek TTS: {} chunks, {} MP3 bytes total", len(parts), len(audio_bytes))
111
+ return AudioData(
112
+ data=audio_bytes,
113
+ sample_rate=16000,
114
+ channels=1,
115
+ format="mp3",
116
+ )
117
+
118
+ # iFlytek voice catalog — vcn → (display name, language, group)
119
+ _VOICES = [
120
+ # 中文
121
+ ("xiaoyan", "小燕 — 中文女声", "zh_cn", "中文"),
122
+ ("aisjiuxu", "许久 — 中文男声", "zh_cn", "中文"),
123
+ ("aisxping", "小萍 — 中文女声", "zh_cn", "中文"),
124
+ ("aisjinger", "小婧 — 中文女声", "zh_cn", "中文"),
125
+ ("aisbabyxu", "许小宝 — 童声", "zh_cn", "中文"),
126
+ ("x4_lingxiaolu_em", "凌小路 — 情感男声", "zh_cn", "中文"),
127
+ ("x4_lingfeizhe_em", "凌飞哲 — 情感男声", "zh_cn", "中文"),
128
+ ("xiaoyu", "小宇 — 中文男声", "zh_cn", "中文"),
129
+ ("xiaoqi", "小琪 — 中文女声", "zh_cn", "中文"),
130
+ ("xiaofeng", "小峰 — 中文男声", "zh_cn", "中文"),
131
+ ("xiaomei", "小梅 — 粤语女声", "zh_cn", "粤语"),
132
+ ("xiaolin", "小林 — 台湾女声", "zh_cn", "中文"),
133
+ ("xiaorong", "小蓉 — 四川女声", "zh_cn", "方言"),
134
+ ("xiaoqian", "小芊 — 东北女声", "zh_cn", "方言"),
135
+ # English
136
+ ("catherine", "Catherine — English Female", "en_us", "English"),
137
+ ("john", "John — English Male", "en_us", "English"),
138
+ ("laura", "Laura — English Female", "en_us", "English"),
139
+ # Japanese
140
+ ("yuka", "Yuka — Japanese Female", "ja_jp", "日本語"),
141
+ # Korean
142
+ ("xiaoqiukor", "小秋 — Korean Female", "ko_kr", "한국어"),
143
+ ]
144
+
145
+ def _build_ws_message(self, text: str, voice: str | None = None, speed: float | None = None) -> dict:
146
+ """Build the WebSocket request message."""
147
+ vcn = voice or self.settings.voice
148
+ spd = int(speed * 50) if speed and speed != 1.0 else self.settings.speed
149
+ text_b64 = base64.b64encode(text.encode("utf-8")).decode("utf-8")
150
+ return {
151
+ "common": {"app_id": self.settings.app_id},
152
+ "business": {
153
+ "aue": "lame",
154
+ "sfl": 1,
155
+ "vcn": vcn,
156
+ "speed": spd,
157
+ "tte": "UTF8",
158
+ },
159
+ "data": {
160
+ "status": 2,
161
+ "text": text_b64,
162
+ },
163
+ }
164
+
165
+ async def list_voices(self) -> list[dict]:
166
+ """Return available iFlytek voices."""
167
+ return [
168
+ {"name": vcn, "description": desc, "language": lang, "group": group}
169
+ for vcn, desc, lang, group in self._VOICES
170
+ ]
171
+
172
+ async def synthesize_stream(
173
+ self, text: str, opts: TTSOptions | None = None
174
+ ) -> AsyncIterator[AudioChunk]:
175
+ """Stream MP3 audio chunks as they arrive from iFlytek WebSocket."""
176
+ if self._client is None:
177
+ raise RuntimeError("Provider not started — call start() first")
178
+ opts = opts or TTSOptions()
179
+
180
+ voice = opts.voice or self.settings.voice
181
+ speed = opts.speed if opts.speed != 1.0 else None
182
+ logger.info("iFlytek stream: voice={}, speed={}, text={} chars", voice, speed, len(text))
183
+
184
+ url = self._build_auth_url()
185
+ msg = self._build_ws_message(text, voice=voice, speed=speed)
186
+ seq = 0
187
+
188
+ async with websockets.connect(url) as ws:
189
+ await ws.send(json.dumps(msg))
190
+
191
+ async for message in ws:
192
+ resp = json.loads(message)
193
+ code = resp.get("code", -1)
194
+ if code != 0:
195
+ raise RuntimeError(
196
+ f"iFlytek TTS error [{code}]: {resp.get('message', 'unknown')}"
197
+ )
198
+
199
+ data = resp.get("data", {})
200
+ audio_b64 = data.get("audio", "")
201
+ is_final = data.get("status") == 2
202
+
203
+ if audio_b64:
204
+ chunk_bytes = base64.b64decode(audio_b64)
205
+ yield AudioChunk(data=chunk_bytes, sequence=seq, is_final=is_final)
206
+ seq += 1
207
+
208
+ if is_final:
209
+ break