openspeechapi 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. openspeech/__init__.py +75 -0
  2. openspeech/__main__.py +5 -0
  3. openspeech/cli.py +413 -0
  4. openspeech/client/__init__.py +4 -0
  5. openspeech/client/client.py +145 -0
  6. openspeech/config.py +212 -0
  7. openspeech/core/__init__.py +0 -0
  8. openspeech/core/base.py +75 -0
  9. openspeech/core/enums.py +39 -0
  10. openspeech/core/models.py +61 -0
  11. openspeech/core/registry.py +37 -0
  12. openspeech/core/settings.py +8 -0
  13. openspeech/demo.py +675 -0
  14. openspeech/dispatch/__init__.py +0 -0
  15. openspeech/dispatch/context.py +34 -0
  16. openspeech/dispatch/dispatcher.py +661 -0
  17. openspeech/dispatch/executors/__init__.py +0 -0
  18. openspeech/dispatch/executors/base.py +34 -0
  19. openspeech/dispatch/executors/in_process.py +66 -0
  20. openspeech/dispatch/executors/remote.py +64 -0
  21. openspeech/dispatch/executors/subprocess_exec.py +446 -0
  22. openspeech/dispatch/fanout.py +95 -0
  23. openspeech/dispatch/filters.py +73 -0
  24. openspeech/dispatch/lifecycle.py +178 -0
  25. openspeech/dispatch/watcher.py +82 -0
  26. openspeech/engine_catalog.py +236 -0
  27. openspeech/engine_registry.yaml +347 -0
  28. openspeech/exceptions.py +51 -0
  29. openspeech/factory.py +325 -0
  30. openspeech/local_engines/__init__.py +12 -0
  31. openspeech/local_engines/aim_resolver.py +91 -0
  32. openspeech/local_engines/backends/__init__.py +1 -0
  33. openspeech/local_engines/backends/docker_backend.py +490 -0
  34. openspeech/local_engines/backends/native_backend.py +902 -0
  35. openspeech/local_engines/base.py +30 -0
  36. openspeech/local_engines/engines/__init__.py +1 -0
  37. openspeech/local_engines/engines/faster_whisper.py +36 -0
  38. openspeech/local_engines/engines/fish_speech.py +33 -0
  39. openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  40. openspeech/local_engines/engines/whisper.py +41 -0
  41. openspeech/local_engines/engines/whisperlivekit.py +60 -0
  42. openspeech/local_engines/manager.py +208 -0
  43. openspeech/local_engines/models.py +50 -0
  44. openspeech/local_engines/progress.py +69 -0
  45. openspeech/local_engines/registry.py +19 -0
  46. openspeech/local_engines/task_store.py +52 -0
  47. openspeech/local_engines/tasks.py +71 -0
  48. openspeech/logging_config.py +607 -0
  49. openspeech/observe/__init__.py +0 -0
  50. openspeech/observe/base.py +79 -0
  51. openspeech/observe/debug.py +44 -0
  52. openspeech/observe/latency.py +19 -0
  53. openspeech/observe/metrics.py +47 -0
  54. openspeech/observe/tracing.py +44 -0
  55. openspeech/observe/usage.py +27 -0
  56. openspeech/providers/__init__.py +0 -0
  57. openspeech/providers/_template.py +101 -0
  58. openspeech/providers/stt/__init__.py +0 -0
  59. openspeech/providers/stt/alibaba.py +86 -0
  60. openspeech/providers/stt/assemblyai.py +135 -0
  61. openspeech/providers/stt/azure_speech.py +99 -0
  62. openspeech/providers/stt/baidu.py +135 -0
  63. openspeech/providers/stt/deepgram.py +311 -0
  64. openspeech/providers/stt/elevenlabs.py +385 -0
  65. openspeech/providers/stt/faster_whisper.py +211 -0
  66. openspeech/providers/stt/google_cloud.py +106 -0
  67. openspeech/providers/stt/iflytek.py +427 -0
  68. openspeech/providers/stt/macos_speech.py +226 -0
  69. openspeech/providers/stt/openai.py +84 -0
  70. openspeech/providers/stt/sherpa_onnx.py +353 -0
  71. openspeech/providers/stt/tencent.py +212 -0
  72. openspeech/providers/stt/volcengine.py +107 -0
  73. openspeech/providers/stt/whisper.py +153 -0
  74. openspeech/providers/stt/whisperlivekit.py +530 -0
  75. openspeech/providers/stt/windows_speech.py +249 -0
  76. openspeech/providers/tts/__init__.py +0 -0
  77. openspeech/providers/tts/alibaba.py +95 -0
  78. openspeech/providers/tts/azure_speech.py +123 -0
  79. openspeech/providers/tts/baidu.py +143 -0
  80. openspeech/providers/tts/coqui.py +64 -0
  81. openspeech/providers/tts/cosyvoice.py +90 -0
  82. openspeech/providers/tts/deepgram.py +174 -0
  83. openspeech/providers/tts/elevenlabs.py +311 -0
  84. openspeech/providers/tts/fish_speech.py +158 -0
  85. openspeech/providers/tts/google_cloud.py +107 -0
  86. openspeech/providers/tts/iflytek.py +209 -0
  87. openspeech/providers/tts/macos_say.py +251 -0
  88. openspeech/providers/tts/minimax.py +122 -0
  89. openspeech/providers/tts/openai.py +104 -0
  90. openspeech/providers/tts/piper.py +104 -0
  91. openspeech/providers/tts/tencent.py +189 -0
  92. openspeech/providers/tts/volcengine.py +117 -0
  93. openspeech/providers/tts/windows_sapi.py +234 -0
  94. openspeech/server/__init__.py +1 -0
  95. openspeech/server/app.py +72 -0
  96. openspeech/server/auth.py +42 -0
  97. openspeech/server/middleware.py +75 -0
  98. openspeech/server/routes/__init__.py +1 -0
  99. openspeech/server/routes/management.py +848 -0
  100. openspeech/server/routes/stt.py +121 -0
  101. openspeech/server/routes/tts.py +159 -0
  102. openspeech/server/routes/webui.py +29 -0
  103. openspeech/server/webui/app.js +2649 -0
  104. openspeech/server/webui/index.html +216 -0
  105. openspeech/server/webui/styles.css +617 -0
  106. openspeech/server/ws/__init__.py +1 -0
  107. openspeech/server/ws/stt_stream.py +263 -0
  108. openspeech/server/ws/tts_stream.py +207 -0
  109. openspeech/telemetry/__init__.py +21 -0
  110. openspeech/telemetry/perf.py +307 -0
  111. openspeech/utils/__init__.py +5 -0
  112. openspeech/utils/audio_converter.py +406 -0
  113. openspeech/utils/audio_playback.py +156 -0
  114. openspeech/vendor_registry.yaml +74 -0
  115. openspeechapi-0.1.0.dist-info/METADATA +101 -0
  116. openspeechapi-0.1.0.dist-info/RECORD +118 -0
  117. openspeechapi-0.1.0.dist-info/WHEEL +4 -0
  118. openspeechapi-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,347 @@
1
+ # Engine Registry — lists all available engines with basic metadata.
2
+ # Provider code supplies default_settings and field_options at runtime.
3
+ # This file can be updated from a remote URL in the future.
4
+ version: 1
5
+
6
+ engines:
7
+ # ---- Cloud STT ----
8
+ - name: openai-stt
9
+ vendor: openai
10
+ provider: openai-stt
11
+ type: stt
12
+ category: cloud
13
+ display_name: "OpenAI STT"
14
+ description: "OpenAI Whisper API (also compatible with Groq, SiliconFlow)"
15
+ default_alias: openai_stt
16
+ default_exec_mode: remote
17
+ pip_extras: [openai]
18
+
19
+ - name: deepgram-stt
20
+ vendor: deepgram
21
+ provider: deepgram
22
+ type: stt
23
+ category: cloud
24
+ display_name: "Deepgram STT"
25
+ description: "Deepgram Nova STT — fast, accurate, word-level timestamps"
26
+ default_alias: deepgram_stt
27
+ default_exec_mode: remote
28
+ pip_extras: [deepgram]
29
+
30
+ - name: elevenlabs-stt
31
+ vendor: elevenlabs
32
+ provider: elevenlabs-stt
33
+ type: stt
34
+ category: cloud
35
+ display_name: "ElevenLabs STT"
36
+ description: "ElevenLabs Scribe Speech-to-Text"
37
+ default_alias: elevenlabs_stt
38
+ default_exec_mode: remote
39
+ pip_extras: [elevenlabs-stt]
40
+
41
+ - name: deepgram-tts
42
+ vendor: deepgram
43
+ provider: deepgram-tts
44
+ type: tts
45
+ category: cloud
46
+ display_name: "Deepgram TTS"
47
+ description: "Deepgram Aura TTS — fast, natural text-to-speech"
48
+ default_alias: deepgram_tts
49
+ default_exec_mode: remote
50
+
51
+ - name: google-stt
52
+ vendor: google
53
+ provider: google-stt
54
+ type: stt
55
+ category: cloud
56
+ display_name: "Google STT"
57
+ description: "Google Cloud Speech-to-Text v1"
58
+ default_alias: google_stt
59
+ default_exec_mode: remote
60
+
61
+ - name: azure-stt
62
+ vendor: azure
63
+ provider: azure-stt
64
+ type: stt
65
+ category: cloud
66
+ display_name: "Azure STT"
67
+ description: "Azure Cognitive Services Speech-to-Text"
68
+ default_alias: azure_stt
69
+ default_exec_mode: remote
70
+
71
+ - name: assemblyai-stt
72
+ vendor: assemblyai
73
+ provider: assemblyai-stt
74
+ type: stt
75
+ category: cloud
76
+ display_name: "AssemblyAI STT"
77
+ description: "AssemblyAI STT — async upload + poll"
78
+ default_alias: assemblyai_stt
79
+ default_exec_mode: remote
80
+
81
+ - name: volcengine-stt
82
+ vendor: volcengine
83
+ provider: volcengine-stt
84
+ type: stt
85
+ category: cloud
86
+ display_name: "Volcengine STT"
87
+ description: "Volcengine (火山引擎) Speech-to-Text"
88
+ default_alias: volcengine_stt
89
+ default_exec_mode: remote
90
+
91
+ - name: alibaba-stt
92
+ vendor: alibaba
93
+ provider: alibaba-stt
94
+ type: stt
95
+ category: cloud
96
+ display_name: "Alibaba STT"
97
+ description: "Alibaba Cloud (阿里云百炼) STT — OpenAI-compatible"
98
+ default_alias: alibaba_stt
99
+ default_exec_mode: remote
100
+
101
+ - name: tencent-stt
102
+ vendor: tencent
103
+ provider: tencent-stt
104
+ type: stt
105
+ category: cloud
106
+ display_name: "Tencent STT"
107
+ description: "Tencent Cloud (腾讯云) Speech-to-Text"
108
+ default_alias: tencent_stt
109
+ default_exec_mode: remote
110
+
111
+ - name: baidu-stt
112
+ vendor: baidu
113
+ provider: baidu-stt
114
+ type: stt
115
+ category: cloud
116
+ display_name: "Baidu STT"
117
+ description: "Baidu (百度) Speech-to-Text"
118
+ default_alias: baidu_stt
119
+ default_exec_mode: remote
120
+
121
+ - name: iflytek-stt
122
+ vendor: iflytek
123
+ provider: iflytek-stt
124
+ type: stt
125
+ category: cloud
126
+ display_name: "iFlytek STT"
127
+ description: "iFlytek (科大讯飞) Speech-to-Text"
128
+ default_alias: iflytek_stt
129
+ default_exec_mode: remote
130
+ pip_extras: [iflytek-stt]
131
+
132
+ # ---- Cloud TTS ----
133
+ - name: openai-tts
134
+ vendor: openai
135
+ provider: openai-tts
136
+ type: tts
137
+ category: cloud
138
+ display_name: "OpenAI TTS"
139
+ description: "OpenAI TTS API (also compatible with Groq, SiliconFlow, Minimax)"
140
+ default_alias: openai_tts
141
+ default_exec_mode: remote
142
+ pip_extras: [openai]
143
+
144
+ - name: elevenlabs-tts
145
+ vendor: elevenlabs
146
+ provider: elevenlabs
147
+ type: tts
148
+ category: cloud
149
+ display_name: "ElevenLabs TTS"
150
+ description: "ElevenLabs TTS — high quality, voice cloning"
151
+ default_alias: elevenlabs
152
+ default_exec_mode: remote
153
+ pip_extras: [elevenlabs-tts]
154
+
155
+ - name: minimax-tts
156
+ vendor: minimax
157
+ provider: minimax
158
+ type: tts
159
+ category: cloud
160
+ display_name: "MiniMax TTS"
161
+ description: "MiniMax TTS — 300+ voices, OpenAI-compatible"
162
+ default_alias: minimax
163
+ default_exec_mode: remote
164
+
165
+ - name: google-tts
166
+ vendor: google
167
+ provider: google-tts
168
+ type: tts
169
+ category: cloud
170
+ display_name: "Google TTS"
171
+ description: "Google Cloud Text-to-Speech"
172
+ default_alias: google_tts
173
+ default_exec_mode: remote
174
+
175
+ - name: azure-tts
176
+ vendor: azure
177
+ provider: azure-tts
178
+ type: tts
179
+ category: cloud
180
+ display_name: "Azure TTS"
181
+ description: "Azure Cognitive Services Text-to-Speech"
182
+ default_alias: azure_tts
183
+ default_exec_mode: remote
184
+
185
+ - name: volcengine-tts
186
+ vendor: volcengine
187
+ provider: volcengine-tts
188
+ type: tts
189
+ category: cloud
190
+ display_name: "Volcengine TTS"
191
+ description: "Volcengine (火山引擎) Text-to-Speech"
192
+ default_alias: volcengine_tts
193
+ default_exec_mode: remote
194
+
195
+ - name: alibaba-tts
196
+ vendor: alibaba
197
+ provider: alibaba-tts
198
+ type: tts
199
+ category: cloud
200
+ display_name: "Alibaba TTS"
201
+ description: "Alibaba Cloud (阿里云百炼) TTS — OpenAI-compatible"
202
+ default_alias: alibaba_tts
203
+ default_exec_mode: remote
204
+
205
+ - name: tencent-tts
206
+ vendor: tencent
207
+ provider: tencent-tts
208
+ type: tts
209
+ category: cloud
210
+ display_name: "Tencent TTS"
211
+ description: "Tencent Cloud (腾讯云) Text-to-Speech"
212
+ default_alias: tencent_tts
213
+ default_exec_mode: remote
214
+
215
+ - name: baidu-tts
216
+ vendor: baidu
217
+ provider: baidu-tts
218
+ type: tts
219
+ category: cloud
220
+ display_name: "Baidu TTS"
221
+ description: "Baidu (百度) Text-to-Speech"
222
+ default_alias: baidu_tts
223
+ default_exec_mode: remote
224
+
225
+ - name: iflytek-tts
226
+ vendor: iflytek
227
+ provider: iflytek-tts
228
+ type: tts
229
+ category: cloud
230
+ display_name: "iFlytek TTS"
231
+ description: "iFlytek (科大讯飞) Text-to-Speech"
232
+ default_alias: iflytek_tts
233
+ default_exec_mode: remote
234
+ pip_extras: [iflytek-tts]
235
+
236
+ # ---- Local STT ----
237
+ - name: faster-whisper
238
+ provider: faster-whisper
239
+ type: stt
240
+ category: local
241
+ display_name: "Faster Whisper"
242
+ description: "Faster Whisper — CTranslate2-based, fast local STT"
243
+ default_alias: stt_fw
244
+ default_exec_mode: subprocess
245
+ pip_extras: [faster-whisper]
246
+
247
+ - name: whisper
248
+ provider: whisper
249
+ type: stt
250
+ category: local
251
+ display_name: "Whisper"
252
+ description: "OpenAI Whisper — original local STT model"
253
+ default_alias: stt_whisper
254
+ default_exec_mode: subprocess
255
+ pip_extras: [whisper]
256
+
257
+ - name: whisperlivekit
258
+ provider: whisperlivekit-stt
259
+ type: stt
260
+ category: local
261
+ display_name: "WhisperLiveKit"
262
+ description: "WhisperLiveKit — streaming STT with MLX backend"
263
+ default_alias: stt_wlk
264
+ default_exec_mode: local
265
+ pip_extras: [whisperlivekit]
266
+
267
+ - name: sherpa-onnx
268
+ provider: sherpa-onnx-stt
269
+ type: stt
270
+ category: local
271
+ display_name: "Sherpa ONNX"
272
+ description: "Sherpa-ONNX — lightweight streaming STT"
273
+ default_alias: stt_sherpa
274
+ default_exec_mode: local
275
+
276
+ # ---- Local TTS ----
277
+ - name: fish-speech
278
+ provider: fish-speech
279
+ type: tts
280
+ category: local
281
+ display_name: "Fish Speech"
282
+ description: "Fish Speech — high quality local TTS"
283
+ default_alias: fish_speech
284
+ default_exec_mode: local
285
+ pip_extras: [fish-speech]
286
+
287
+ # ---- macOS Native ----
288
+ - name: macos-stt
289
+ provider: macos-stt
290
+ type: stt
291
+ category: native
292
+ display_name: "macOS STT"
293
+ description: "macOS SFSpeechRecognizer — built-in speech recognition"
294
+ default_alias: macos_stt
295
+ default_exec_mode: in_process
296
+ platforms: [darwin]
297
+
298
+ - name: macos-tts
299
+ provider: macos-say
300
+ type: tts
301
+ category: native
302
+ display_name: "macOS TTS"
303
+ description: "macOS say command — built-in text-to-speech"
304
+ default_alias: macos_tts
305
+ default_exec_mode: in_process
306
+ platforms: [darwin]
307
+
308
+ # ---- Windows Native ----
309
+ - name: windows-stt
310
+ provider: windows-stt
311
+ type: stt
312
+ category: native
313
+ display_name: "Windows STT"
314
+ description: "Windows System.Speech — built-in speech recognition"
315
+ default_alias: windows_stt
316
+ default_exec_mode: in_process
317
+ platforms: [win32]
318
+
319
+ - name: windows-tts
320
+ provider: windows-tts
321
+ type: tts
322
+ category: native
323
+ display_name: "Windows TTS"
324
+ description: "Windows SAPI5 — built-in text-to-speech"
325
+ default_alias: windows_tts
326
+ default_exec_mode: in_process
327
+ platforms: [win32]
328
+ pip_extras: [windows-tts]
329
+
330
+ # ---- Cross-platform Native (auto-detect) ----
331
+ - name: native-stt
332
+ provider: native-stt
333
+ type: stt
334
+ category: native
335
+ display_name: "Native STT"
336
+ description: "Platform native STT (macOS SFSpeech / Windows System.Speech)"
337
+ default_alias: native_stt
338
+ default_exec_mode: in_process
339
+
340
+ - name: native-tts
341
+ provider: native-tts
342
+ type: tts
343
+ category: native
344
+ display_name: "Native TTS"
345
+ description: "Platform native TTS (macOS say / Windows SAPI5)"
346
+ default_alias: native_tts
347
+ default_exec_mode: in_process
@@ -0,0 +1,51 @@
1
+ """OpenSpeech exception hierarchy."""
2
+
3
+
4
+ class OpenSpeechError(Exception):
5
+ """Base exception for all OpenSpeech errors."""
6
+
7
+
8
+ class ProviderError(OpenSpeechError):
9
+ """Error from a provider invocation."""
10
+
11
+ def __init__(self, provider_name: str, original_type: str, message: str) -> None:
12
+ self.provider_name = provider_name
13
+ self.original_type = original_type
14
+ super().__init__(f"[{provider_name}] {original_type}: {message}")
15
+
16
+
17
+ class ProviderCrashedError(OpenSpeechError):
18
+ """Provider subprocess crashed during invocation."""
19
+
20
+ def __init__(self, provider_name: str) -> None:
21
+ self.provider_name = provider_name
22
+ super().__init__(f"Provider '{provider_name}' crashed during invocation")
23
+
24
+
25
+ class ProviderUnavailableError(OpenSpeechError):
26
+ """Provider exhausted restart attempts and is unavailable."""
27
+
28
+ def __init__(self, provider_name: str) -> None:
29
+ self.provider_name = provider_name
30
+ super().__init__(f"Provider '{provider_name}' is unavailable (max restarts exhausted)")
31
+
32
+
33
+ class ConfigError(OpenSpeechError):
34
+ """Configuration loading or validation error."""
35
+
36
+
37
+ class FanOutAllFailedError(OpenSpeechError):
38
+ """All providers in a fanout invocation failed."""
39
+
40
+ def __init__(self, errors: dict[str, Exception]) -> None:
41
+ self.errors = errors
42
+ names = ", ".join(errors.keys())
43
+ super().__init__(f"All fanout providers failed: {names}")
44
+
45
+
46
+ class ProviderNotFoundError(OpenSpeechError):
47
+ """Provider not found in registry or dispatcher."""
48
+
49
+ def __init__(self, name: str) -> None:
50
+ self.name = name
51
+ super().__init__(f"Provider '{name}' not found")