openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,251 @@
1
+ """macOS native TTS provider using the ``say`` command."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import io
6
+ from openspeech.logging_config import logger
7
+ import shutil
8
+ import struct
9
+ import tempfile
10
+ import time
11
+ import wave
12
+ from collections.abc import AsyncIterator
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ from openspeech.core.base import TTSProvider
17
+
18
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
19
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
20
+ from openspeech.core.settings import BaseSettings
21
+ from openspeech.utils.audio_converter import AudioConverter
22
+
23
+ # Language prefix → human-readable group name
24
+ _LANG_GROUP_MAP: dict[str, str] = {
25
+ "zh": "中文",
26
+ "en": "English",
27
+ "ja": "日本語",
28
+ "ko": "한국어",
29
+ "fr": "Français",
30
+ "de": "Deutsch",
31
+ "es": "Español",
32
+ "it": "Italiano",
33
+ "pt": "Português",
34
+ "ru": "Русский",
35
+ "ar": "العربية",
36
+ "nl": "Nederlands",
37
+ "sv": "Svenska",
38
+ "da": "Dansk",
39
+ "fi": "Suomi",
40
+ "nb": "Norsk",
41
+ "pl": "Polski",
42
+ "tr": "Türkçe",
43
+ "th": "ไทย",
44
+ "hi": "हिन्दी",
45
+ "he": "עברית",
46
+ "ro": "Română",
47
+ "hu": "Magyar",
48
+ "cs": "Čeština",
49
+ "el": "Ελληνικά",
50
+ "id": "Bahasa Indonesia",
51
+ "vi": "Tiếng Việt",
52
+ }
53
+
54
+ @dataclass
55
+ class MacOSSaySettings(BaseSettings):
56
+ default_voice: str = "Samantha"
57
+ default_rate: int = 200
58
+
59
+ class MacOSSayTTS(TTSProvider):
60
+ """macOS native TTS via the ``say`` command-line tool."""
61
+
62
+ name = "macos-say"
63
+ provider_type = ProviderType.TTS
64
+ execution_mode = ExecMode.IN_PROCESS
65
+ settings_cls = MacOSSaySettings
66
+ capabilities = {Capability.BATCH, Capability.MULTILINGUAL}
67
+
68
+ def __init__(self, settings: MacOSSaySettings | None = None) -> None:
69
+ self.settings = settings or MacOSSaySettings()
70
+ self._available: bool = False
71
+ self._voices_cache: list[dict] | None = None
72
+
73
+ # -- lifecycle ------------------------------------------------------------
74
+
75
+ async def start(self) -> None:
76
+ if shutil.which("say") is None:
77
+ raise RuntimeError(
78
+ "macOS 'say' command not found — this provider requires macOS"
79
+ )
80
+ self._available = True
81
+ logger.info("{} provider started", self.name)
82
+
83
+ async def stop(self) -> None:
84
+ self._available = False
85
+ self._voices_cache = None
86
+ logger.info("{} provider stopped", self.name)
87
+
88
+ async def health_check(self) -> bool:
89
+ if self._available:
90
+ return True
91
+ # Pre-start check: verify say command exists on macOS
92
+ return shutil.which("say") is not None
93
+
94
+ # -- synthesis ------------------------------------------------------------
95
+
96
+ async def synthesize(
97
+ self, text: str, opts: TTSOptions | None = None
98
+ ) -> AudioData:
99
+ if not self._available:
100
+ raise RuntimeError("Provider not started — call start() first")
101
+ logger.info("{}: request received, text={} chars", self.name, len(text))
102
+ _t0 = time.perf_counter()
103
+
104
+ opts = opts or TTSOptions()
105
+ voice = opts.voice or self.settings.default_voice
106
+ rate = int(self.settings.default_rate * opts.speed)
107
+
108
+ tmp_path: str | None = None
109
+ try:
110
+ # Create a temp file for the AIFF output
111
+ fd, tmp_path = tempfile.mkstemp(suffix=".aiff")
112
+ import os
113
+ os.close(fd)
114
+
115
+ await self._run_say(voice, rate, tmp_path, text)
116
+
117
+ aiff_data = Path(tmp_path).read_bytes()
118
+
119
+ # Convert AIFF → WAV via AudioConverter
120
+ aiff_audio = AudioData(
121
+ data=aiff_data,
122
+ sample_rate=22050, # placeholder; to_wav parses AIFF header
123
+ channels=1,
124
+ format=AudioFormat.AIFF,
125
+ )
126
+ wav_audio = AudioConverter.to_wav(aiff_audio)
127
+
128
+ # Parse WAV to compute duration
129
+ with wave.open(io.BytesIO(wav_audio.data), "rb") as wf:
130
+ n_frames = wf.getnframes()
131
+ frame_rate = wf.getframerate()
132
+ duration_ms = int(n_frames / frame_rate * 1000) if frame_rate else 0
133
+
134
+ result = AudioData(
135
+ data=wav_audio.data,
136
+ sample_rate=wav_audio.sample_rate,
137
+ channels=wav_audio.channels,
138
+ format=AudioFormat.WAV,
139
+ duration_ms=duration_ms,
140
+ )
141
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
142
+ return result
143
+ finally:
144
+ if tmp_path is not None:
145
+ try:
146
+ Path(tmp_path).unlink(missing_ok=True)
147
+ except OSError:
148
+ pass
149
+
150
+ async def synthesize_stream(
151
+ self, text: str, opts: TTSOptions | None = None
152
+ ) -> AsyncIterator[AudioChunk]:
153
+ """Batch-then-chunk fallback: synthesize full audio, then yield chunks."""
154
+ logger.info("{}: stream request, text={} chars", self.name, len(text))
155
+ _t0 = time.perf_counter()
156
+ result = await self.synthesize(text, opts)
157
+ chunk_size = 4096
158
+ sequence = 0
159
+ chunk_count = 0
160
+ for i in range(0, len(result.data), chunk_size):
161
+ chunk_data = result.data[i : i + chunk_size]
162
+ logger.debug("{}: chunk #{}, {} bytes", self.name, sequence, len(chunk_data))
163
+ yield AudioChunk(data=chunk_data, sequence=sequence)
164
+ sequence += 1
165
+ chunk_count += 1
166
+ yield AudioChunk(data=b"", sequence=sequence, is_final=True)
167
+ logger.info("{}: stream complete, {} chunks in {:.0f}ms", self.name, chunk_count, (time.perf_counter() - _t0) * 1000)
168
+
169
+ # -- voices ---------------------------------------------------------------
170
+
171
+ async def list_voices(self) -> list[dict]:
172
+ if self._voices_cache is not None:
173
+ return self._voices_cache
174
+
175
+ output = await self._run_command(["say", "-v", "?"])
176
+ self._voices_cache = self._parse_voices_output(output)
177
+ return self._voices_cache
178
+
179
+ # -- internal helpers -----------------------------------------------------
180
+
181
+ async def _run_say(
182
+ self, voice: str, rate: int, output_path: str, text: str
183
+ ) -> None:
184
+ """Execute ``say`` to produce an AIFF file."""
185
+ cmd = ["say", "-v", voice, "-r", str(rate), "-o", output_path, "--", text]
186
+ proc = await asyncio.create_subprocess_exec(
187
+ *cmd,
188
+ stdout=asyncio.subprocess.PIPE,
189
+ stderr=asyncio.subprocess.PIPE,
190
+ )
191
+ _, stderr = await proc.communicate()
192
+ if proc.returncode != 0:
193
+ raise RuntimeError(
194
+ f"say command failed (exit {proc.returncode}): "
195
+ f"{stderr.decode(errors='replace')}"
196
+ )
197
+
198
+ async def _run_command(self, cmd: list[str]) -> str:
199
+ """Run an arbitrary command and return its stdout."""
200
+ proc = await asyncio.create_subprocess_exec(
201
+ *cmd,
202
+ stdout=asyncio.subprocess.PIPE,
203
+ stderr=asyncio.subprocess.PIPE,
204
+ )
205
+ stdout, _ = await proc.communicate()
206
+ return stdout.decode(errors="replace")
207
+
208
+ @staticmethod
209
+ def _parse_voices_output(output: str) -> list[dict]:
210
+ """Parse the output of ``say -v ?`` into a list of voice dicts.
211
+
212
+ Expected line format::
213
+
214
+ Samantha en_US # Hello, my name is Samantha.
215
+ """
216
+ voices: list[dict] = []
217
+ for line in output.splitlines():
218
+ line = line.strip()
219
+ if not line:
220
+ continue
221
+
222
+ # Split on '#' to separate the description
223
+ description = ""
224
+ if "#" in line:
225
+ main_part, description = line.split("#", 1)
226
+ description = description.strip()
227
+ else:
228
+ main_part = line
229
+
230
+ # The name and language are separated by whitespace.
231
+ # Name can contain spaces, but language is always the last
232
+ # whitespace-separated token before the '#'.
233
+ parts = main_part.rsplit(None, 1)
234
+ if len(parts) < 2:
235
+ continue
236
+
237
+ name = parts[0].strip()
238
+ language = parts[1].strip()
239
+
240
+ # Derive group from language prefix
241
+ lang_prefix = language.split("_")[0].lower()
242
+ group = _LANG_GROUP_MAP.get(lang_prefix, lang_prefix.upper())
243
+
244
+ voices.append({
245
+ "name": name,
246
+ "language": language,
247
+ "description": description,
248
+ "group": group,
249
+ })
250
+
251
+ return voices
@@ -0,0 +1,122 @@
1
+ """MiniMax TTS provider adapter (REST API, in-process)."""
2
+ from __future__ import annotations
3
+
4
+ import base64
5
+ from openspeech.logging_config import logger
6
+ import time
7
+ from collections.abc import AsyncIterator
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ from openspeech.core.base import TTSProvider
12
+
13
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
14
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
15
+ from openspeech.core.settings import BaseSettings
16
+
17
+ @dataclass
18
+ class MinimaxTTSSettings(BaseSettings):
19
+ api_key: str = ""
20
+ group_id: str = ""
21
+ model: str = "speech-01"
22
+ voice_id: str = "male-qn-qingse"
23
+ speed: float = 1.0
24
+ vol: float = 1.0
25
+ pitch: int = 0
26
+
27
+ class MinimaxTTS(TTSProvider):
28
+ name = "minimax"
29
+ provider_type = ProviderType.TTS
30
+ execution_mode = ExecMode.IN_PROCESS
31
+ settings_cls = MinimaxTTSSettings
32
+ capabilities = {Capability.STREAMING}
33
+ field_options = {
34
+ "model": ["speech-01", "speech-01-turbo", "speech-02"],
35
+ "voice_id": [
36
+ "male-qn-qingse", "male-qn-jingying", "male-qn-badao", "male-qn-daxuesheng",
37
+ "female-shaonv", "female-yujie", "female-chengshu", "female-tianmei",
38
+ "presenter_male", "presenter_female",
39
+ ],
40
+ }
41
+
42
+ def __init__(self, settings: MinimaxTTSSettings | None = None) -> None:
43
+ self.settings = settings or MinimaxTTSSettings()
44
+ self._client: Any = None
45
+ self._owns_client: bool = True
46
+
47
+ def set_http_client(self, client) -> None:
48
+ self._client = client
49
+ self._owns_client = False
50
+
51
+ async def start(self) -> None:
52
+ if self._client is None:
53
+ try:
54
+ import httpx
55
+ except ImportError:
56
+ raise ImportError(
57
+ "Install httpx: pip install openspeech[minimax]"
58
+ )
59
+ self._client = httpx.AsyncClient(timeout=30.0)
60
+ self._owns_client = True
61
+ logger.info("{} provider started", self.name)
62
+
63
+ async def stop(self) -> None:
64
+ if self._client is not None and self._owns_client:
65
+ await self._client.aclose()
66
+ self._client = None
67
+ logger.info("{} provider stopped", self.name)
68
+
69
+ async def health_check(self) -> bool:
70
+ return bool(self.settings.api_key)
71
+
72
+ async def synthesize(
73
+ self, text: str, opts: TTSOptions | None = None
74
+ ) -> AudioData:
75
+ if self._client is None:
76
+ raise RuntimeError("Provider not started — call start() first")
77
+ logger.info("{}: request received, text={} chars", self.name, len(text))
78
+ _t0 = time.perf_counter()
79
+ opts = opts or TTSOptions()
80
+ url = f"https://api.minimax.chat/v1/t2a_v2?GroupId={self.settings.group_id}"
81
+ payload = {
82
+ "model": self.settings.model,
83
+ "text": text,
84
+ "stream": False,
85
+ "voice_setting": {
86
+ "voice_id": opts.voice or self.settings.voice_id,
87
+ "speed": opts.speed if opts.speed != 1.0 else self.settings.speed,
88
+ "vol": self.settings.vol,
89
+ "pitch": self.settings.pitch,
90
+ },
91
+ "audio_setting": {
92
+ "sample_rate": 32000,
93
+ "bitrate": 128000,
94
+ "format": "mp3",
95
+ },
96
+ }
97
+ headers = {
98
+ "Authorization": f"Bearer {self.settings.api_key}",
99
+ "Content-Type": "application/json",
100
+ }
101
+ response = await self._client.post(url, json=payload, headers=headers)
102
+ response.raise_for_status()
103
+ data = response.json()
104
+ if "data" in data and "audio" in data["data"]:
105
+ audio_bytes = base64.b64decode(data["data"]["audio"])
106
+ result = AudioData(
107
+ data=audio_bytes,
108
+ sample_rate=32000,
109
+ channels=1,
110
+ format=AudioFormat.MP3,
111
+ )
112
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
113
+ return result
114
+ raise RuntimeError(f"Minimax API error: {data}")
115
+
116
+ async def synthesize_stream(
117
+ self, text: str, opts: TTSOptions | None = None
118
+ ) -> AsyncIterator[AudioChunk]:
119
+ raise NotImplementedError(
120
+ "MinimaxTTS.synthesize_stream() — streaming not yet implemented"
121
+ )
122
+ yield # pragma: no cover
@@ -0,0 +1,104 @@
1
+ """OpenAI TTS provider adapter (speech API)."""
2
+ from __future__ import annotations
3
+
4
+ from openspeech.logging_config import logger
5
+ import time
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+ from openspeech.core.base import TTSProvider
11
+
12
+ from openspeech.core.enums import Capability, ExecMode, ProviderType
13
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
14
+ from openspeech.core.settings import BaseSettings
15
+
16
+ @dataclass
17
+ class OpenAITTSSettings(BaseSettings):
18
+ api_key: str = ""
19
+ base_url: str = ""
20
+ model: str = "tts-1"
21
+ voice: str = "alloy"
22
+ response_format: str = "pcm"
23
+
24
+ class OpenAITTS(TTSProvider):
25
+ name = "openai-tts"
26
+ provider_type = ProviderType.TTS
27
+ execution_mode = ExecMode.IN_PROCESS
28
+ settings_cls = OpenAITTSSettings
29
+ capabilities = {Capability.STREAMING, Capability.MULTILINGUAL}
30
+ field_options = {"model": ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"], "voice": ["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer", "verse"]}
31
+
32
+ def __init__(self, settings: OpenAITTSSettings | None = None) -> None:
33
+ self.settings = settings or OpenAITTSSettings()
34
+ self._client: Any = None
35
+
36
+ async def start(self) -> None:
37
+ try:
38
+ from openai import AsyncOpenAI
39
+ kwargs: dict[str, Any] = {"api_key": self.settings.api_key}
40
+ if self.settings.base_url:
41
+ kwargs["base_url"] = self.settings.base_url
42
+ self._client = AsyncOpenAI(**kwargs)
43
+ except ImportError:
44
+ raise ImportError("Install openai: pip install openspeech[openai]")
45
+ logger.info("{} provider started", self.name)
46
+
47
+ async def stop(self) -> None:
48
+ self._client = None
49
+ logger.info("{} provider stopped", self.name)
50
+
51
+ async def health_check(self) -> bool:
52
+ return bool(self.settings.api_key)
53
+
54
+ async def synthesize(
55
+ self, text: str, opts: TTSOptions | None = None
56
+ ) -> AudioData:
57
+ if self._client is None:
58
+ raise RuntimeError("Provider not started — call start() first")
59
+ logger.info("{}: request received, text={} chars", self.name, len(text))
60
+ _t0 = time.perf_counter()
61
+ opts = opts or TTSOptions()
62
+ voice = opts.voice or self.settings.voice
63
+ model = getattr(opts, "model", None) or self.settings.model
64
+ response = await self._client.audio.speech.create(
65
+ model=model,
66
+ voice=voice,
67
+ input=text,
68
+ response_format=self.settings.response_format,
69
+ )
70
+ audio_bytes = response.content
71
+ result = AudioData(
72
+ data=audio_bytes,
73
+ sample_rate=24000,
74
+ channels=1,
75
+ format=opts.output_format,
76
+ )
77
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
78
+ return result
79
+
80
+ async def synthesize_stream(
81
+ self, text: str, opts: TTSOptions | None = None
82
+ ) -> AsyncIterator[AudioChunk]:
83
+ if self._client is None:
84
+ raise RuntimeError("Provider not started — call start() first")
85
+ logger.info("{}: stream request, text={} chars", self.name, len(text))
86
+ _t0 = time.perf_counter()
87
+ opts = opts or TTSOptions()
88
+ voice = opts.voice or self.settings.voice
89
+ model = getattr(opts, "model", None) or self.settings.model
90
+ chunk_count = 0
91
+ async with self._client.audio.speech.with_streaming_response.create(
92
+ model=model,
93
+ voice=voice,
94
+ input=text,
95
+ response_format=self.settings.response_format,
96
+ ) as response:
97
+ sequence = 0
98
+ async for chunk in response.iter_bytes(chunk_size=4096):
99
+ logger.debug("{}: chunk #{}, {} bytes", self.name, sequence, len(chunk))
100
+ yield AudioChunk(data=chunk, sequence=sequence)
101
+ sequence += 1
102
+ chunk_count += 1
103
+ yield AudioChunk(data=b"", sequence=sequence, is_final=True)
104
+ logger.info("{}: stream complete, {} chunks in {:.0f}ms", self.name, chunk_count, (time.perf_counter() - _t0) * 1000)
@@ -0,0 +1,104 @@
1
+ """Piper TTS provider adapter (batch, in-process)."""
2
+ from __future__ import annotations
3
+
4
+ from openspeech.logging_config import logger
5
+ import time
6
+ from collections.abc import AsyncIterator
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+ from openspeech.core.base import TTSProvider
11
+
12
+ from openspeech.core.enums import AudioFormat, Capability, ExecMode, ProviderType
13
+ from openspeech.core.models import AudioChunk, AudioData, TTSOptions
14
+ from openspeech.core.settings import BaseSettings
15
+
16
+ @dataclass
17
+ class PiperTTSSettings(BaseSettings):
18
+ model_path: str = ""
19
+ config_path: str = ""
20
+ use_cuda: bool = False
21
+ noise_scale: float = 0.667
22
+ length_scale: float = 1.0
23
+ noise_w: float = 0.8
24
+
25
+ class PiperTTS(TTSProvider):
26
+ name = "piper"
27
+ provider_type = ProviderType.TTS
28
+ execution_mode = ExecMode.IN_PROCESS
29
+ settings_cls = PiperTTSSettings
30
+ capabilities = {Capability.BATCH}
31
+
32
+ def __init__(self, settings: PiperTTSSettings | None = None) -> None:
33
+ self.settings = settings or PiperTTSSettings()
34
+ self._client: Any = None
35
+ self._voice: Any = None
36
+
37
+ async def start(self) -> None:
38
+ try:
39
+ import piper
40
+ except ImportError:
41
+ raise ImportError(
42
+ "Install piper-tts: pip install openspeech[piper]"
43
+ )
44
+ self._voice = piper.PiperVoice.load(
45
+ self.settings.model_path,
46
+ config_path=self.settings.config_path or None,
47
+ use_cuda=self.settings.use_cuda,
48
+ )
49
+ self._client = self._voice
50
+ logger.info("{} provider started", self.name)
51
+
52
+ async def stop(self) -> None:
53
+ self._client = None
54
+ self._voice = None
55
+ logger.info("{} provider stopped", self.name)
56
+
57
+ async def health_check(self) -> bool:
58
+ return self._client is not None
59
+
60
+ async def synthesize(
61
+ self, text: str, opts: TTSOptions | None = None
62
+ ) -> AudioData:
63
+ if self._client is None:
64
+ raise RuntimeError("Provider not started — call start() first")
65
+ logger.info("{}: request received, text={} chars", self.name, len(text))
66
+ _t0 = time.perf_counter()
67
+ import io
68
+ import wave
69
+
70
+ audio_data = b""
71
+ for audio_bytes in self._voice.synthesize_stream_raw(
72
+ text,
73
+ length_scale=self.settings.length_scale,
74
+ noise_scale=self.settings.noise_scale,
75
+ noise_w=self.settings.noise_w,
76
+ ):
77
+ audio_data += audio_bytes
78
+
79
+ sample_rate = 22050
80
+ if hasattr(self._voice, "config") and hasattr(self._voice.config, "sample_rate"):
81
+ sample_rate = self._voice.config.sample_rate
82
+
83
+ buf = io.BytesIO()
84
+ with wave.open(buf, "wb") as wf:
85
+ wf.setnchannels(1)
86
+ wf.setsampwidth(2)
87
+ wf.setframerate(sample_rate)
88
+ wf.writeframes(audio_data)
89
+ result = AudioData(
90
+ data=buf.getvalue(),
91
+ sample_rate=sample_rate,
92
+ channels=1,
93
+ format=AudioFormat.WAV,
94
+ )
95
+ logger.info("{}: completed in {:.0f}ms, output={} bytes", self.name, (time.perf_counter() - _t0) * 1000, len(result.data))
96
+ return result
97
+
98
+ async def synthesize_stream(
99
+ self, text: str, opts: TTSOptions | None = None
100
+ ) -> AsyncIterator[AudioChunk]:
101
+ raise NotImplementedError(
102
+ "PiperTTS does not support streaming synthesis"
103
+ )
104
+ yield # pragma: no cover