openspeechapi 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. openspeechapi-0.1.0/.dockerignore +16 -0
  2. openspeechapi-0.1.0/.env.example +12 -0
  3. openspeechapi-0.1.0/.github/workflows/ci.yml +31 -0
  4. openspeechapi-0.1.0/.gitignore +29 -0
  5. openspeechapi-0.1.0/.tmp/audio/en.aiff +0 -0
  6. openspeechapi-0.1.0/.tmp/audio/en_16k.wav +0 -0
  7. openspeechapi-0.1.0/.tmp/audio/en_16k_pad6.wav +0 -0
  8. openspeechapi-0.1.0/.tmp/audio/en_long.aiff +0 -0
  9. openspeechapi-0.1.0/.tmp/audio/en_long_16k.wav +0 -0
  10. openspeechapi-0.1.0/.tmp/audio/en_mid.aiff +0 -0
  11. openspeechapi-0.1.0/.tmp/audio/en_mid_16k.wav +0 -0
  12. openspeechapi-0.1.0/.tmp/audio/zh.aiff +0 -0
  13. openspeechapi-0.1.0/.tmp/audio/zh_16k.wav +0 -0
  14. openspeechapi-0.1.0/.tmp/openspeech-8600.log +5 -0
  15. openspeechapi-0.1.0/.tmp/openspeech-serve.log +0 -0
  16. openspeechapi-0.1.0/.tmp/webui-server.log +5 -0
  17. openspeechapi-0.1.0/.tmp/webui-server.pid +1 -0
  18. openspeechapi-0.1.0/.tmp/wlk12101.log +15 -0
  19. openspeechapi-0.1.0/.tmp/wlk12101.pid +1 -0
  20. openspeechapi-0.1.0/.tmp/wlk12102.log +14 -0
  21. openspeechapi-0.1.0/.tmp/wlk12102.pid +1 -0
  22. openspeechapi-0.1.0/AGENTS.md +36 -0
  23. openspeechapi-0.1.0/CLAUDE.md +73 -0
  24. openspeechapi-0.1.0/Dockerfile +20 -0
  25. openspeechapi-0.1.0/PKG-INFO +101 -0
  26. openspeechapi-0.1.0/README.md +640 -0
  27. openspeechapi-0.1.0/aibox-script/aibox-1.0.0-SNAPSHOT-stdout.log +33495 -0
  28. openspeechapi-0.1.0/aibox-script/aibox.2026-04-02.log +96410 -0
  29. openspeechapi-0.1.0/aibox-script/com.user.restart-jar.plist +29 -0
  30. openspeechapi-0.1.0/aibox-script/restart-jar.sh +45 -0
  31. openspeechapi-0.1.0/aibox-script.tar.gz +0 -0
  32. openspeechapi-0.1.0/docker-compose.yml +30 -0
  33. openspeechapi-0.1.0/docs/architecture/local-engine-manager.md +55 -0
  34. openspeechapi-0.1.0/docs/architecture/logging-spec.md +225 -0
  35. openspeechapi-0.1.0/docs/architecture/stt-engineering-optimization-guide.md +778 -0
  36. openspeechapi-0.1.0/docs/architecture/stt-streaming-spec.md +302 -0
  37. openspeechapi-0.1.0/docs/architecture/webui-phase-a.md +38 -0
  38. openspeechapi-0.1.0/docs/engines/fish-speech-docker.md +40 -0
  39. openspeechapi-0.1.0/docs/engines/fish-speech-native.md +35 -0
  40. openspeechapi-0.1.0/docs/engines/stt-native-models.md +93 -0
  41. openspeechapi-0.1.0/docs/superpowers/plans/2026-04-01-phase1-implementation.md +4171 -0
  42. openspeechapi-0.1.0/docs/superpowers/plans/2026-04-11-macos-native-tts-stt.md +1982 -0
  43. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-01-openspeech-api-design.md +515 -0
  44. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-03-hot-lazy-loading.md +155 -0
  45. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-03-phase2-protocol-layer.md +355 -0
  46. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-03-phase3-production.md +96 -0
  47. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-11-macos-native-tts-stt-design.md +546 -0
  48. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-12-cloud-providers-webui-design.md +150 -0
  49. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-15-streaming-tts-stt-fixes-display-names.md +218 -0
  50. openspeechapi-0.1.0/docs/superpowers/specs/2026-04-16-provider-management-engines-rename.md +273 -0
  51. openspeechapi-0.1.0/examples/client_stt.py +33 -0
  52. openspeechapi-0.1.0/examples/client_tts.py +28 -0
  53. openspeechapi-0.1.0/examples/stt_simple.py +48 -0
  54. openspeechapi-0.1.0/examples/tts_simple.py +39 -0
  55. openspeechapi-0.1.0/openspeech/__init__.py +75 -0
  56. openspeechapi-0.1.0/openspeech/__main__.py +5 -0
  57. openspeechapi-0.1.0/openspeech/cli.py +413 -0
  58. openspeechapi-0.1.0/openspeech/client/__init__.py +4 -0
  59. openspeechapi-0.1.0/openspeech/client/client.py +145 -0
  60. openspeechapi-0.1.0/openspeech/config.py +212 -0
  61. openspeechapi-0.1.0/openspeech/core/__init__.py +0 -0
  62. openspeechapi-0.1.0/openspeech/core/base.py +75 -0
  63. openspeechapi-0.1.0/openspeech/core/enums.py +39 -0
  64. openspeechapi-0.1.0/openspeech/core/models.py +61 -0
  65. openspeechapi-0.1.0/openspeech/core/registry.py +37 -0
  66. openspeechapi-0.1.0/openspeech/core/settings.py +8 -0
  67. openspeechapi-0.1.0/openspeech/demo.py +675 -0
  68. openspeechapi-0.1.0/openspeech/dispatch/__init__.py +0 -0
  69. openspeechapi-0.1.0/openspeech/dispatch/context.py +34 -0
  70. openspeechapi-0.1.0/openspeech/dispatch/dispatcher.py +661 -0
  71. openspeechapi-0.1.0/openspeech/dispatch/executors/__init__.py +0 -0
  72. openspeechapi-0.1.0/openspeech/dispatch/executors/base.py +34 -0
  73. openspeechapi-0.1.0/openspeech/dispatch/executors/in_process.py +66 -0
  74. openspeechapi-0.1.0/openspeech/dispatch/executors/remote.py +64 -0
  75. openspeechapi-0.1.0/openspeech/dispatch/executors/subprocess_exec.py +446 -0
  76. openspeechapi-0.1.0/openspeech/dispatch/fanout.py +95 -0
  77. openspeechapi-0.1.0/openspeech/dispatch/filters.py +73 -0
  78. openspeechapi-0.1.0/openspeech/dispatch/lifecycle.py +178 -0
  79. openspeechapi-0.1.0/openspeech/dispatch/watcher.py +82 -0
  80. openspeechapi-0.1.0/openspeech/engine_catalog.py +236 -0
  81. openspeechapi-0.1.0/openspeech/engine_registry.yaml +347 -0
  82. openspeechapi-0.1.0/openspeech/exceptions.py +51 -0
  83. openspeechapi-0.1.0/openspeech/factory.py +325 -0
  84. openspeechapi-0.1.0/openspeech/local_engines/__init__.py +12 -0
  85. openspeechapi-0.1.0/openspeech/local_engines/aim_resolver.py +91 -0
  86. openspeechapi-0.1.0/openspeech/local_engines/backends/__init__.py +1 -0
  87. openspeechapi-0.1.0/openspeech/local_engines/backends/docker_backend.py +490 -0
  88. openspeechapi-0.1.0/openspeech/local_engines/backends/native_backend.py +902 -0
  89. openspeechapi-0.1.0/openspeech/local_engines/base.py +30 -0
  90. openspeechapi-0.1.0/openspeech/local_engines/engines/__init__.py +1 -0
  91. openspeechapi-0.1.0/openspeech/local_engines/engines/faster_whisper.py +36 -0
  92. openspeechapi-0.1.0/openspeech/local_engines/engines/fish_speech.py +33 -0
  93. openspeechapi-0.1.0/openspeech/local_engines/engines/sherpa_onnx.py +56 -0
  94. openspeechapi-0.1.0/openspeech/local_engines/engines/whisper.py +41 -0
  95. openspeechapi-0.1.0/openspeech/local_engines/engines/whisperlivekit.py +60 -0
  96. openspeechapi-0.1.0/openspeech/local_engines/manager.py +208 -0
  97. openspeechapi-0.1.0/openspeech/local_engines/models.py +50 -0
  98. openspeechapi-0.1.0/openspeech/local_engines/progress.py +69 -0
  99. openspeechapi-0.1.0/openspeech/local_engines/registry.py +19 -0
  100. openspeechapi-0.1.0/openspeech/local_engines/task_store.py +52 -0
  101. openspeechapi-0.1.0/openspeech/local_engines/tasks.py +71 -0
  102. openspeechapi-0.1.0/openspeech/logging_config.py +607 -0
  103. openspeechapi-0.1.0/openspeech/observe/__init__.py +0 -0
  104. openspeechapi-0.1.0/openspeech/observe/base.py +79 -0
  105. openspeechapi-0.1.0/openspeech/observe/debug.py +44 -0
  106. openspeechapi-0.1.0/openspeech/observe/latency.py +19 -0
  107. openspeechapi-0.1.0/openspeech/observe/metrics.py +47 -0
  108. openspeechapi-0.1.0/openspeech/observe/tracing.py +44 -0
  109. openspeechapi-0.1.0/openspeech/observe/usage.py +27 -0
  110. openspeechapi-0.1.0/openspeech/providers/__init__.py +0 -0
  111. openspeechapi-0.1.0/openspeech/providers/_template.py +101 -0
  112. openspeechapi-0.1.0/openspeech/providers/stt/__init__.py +0 -0
  113. openspeechapi-0.1.0/openspeech/providers/stt/alibaba.py +86 -0
  114. openspeechapi-0.1.0/openspeech/providers/stt/assemblyai.py +135 -0
  115. openspeechapi-0.1.0/openspeech/providers/stt/azure_speech.py +99 -0
  116. openspeechapi-0.1.0/openspeech/providers/stt/baidu.py +135 -0
  117. openspeechapi-0.1.0/openspeech/providers/stt/deepgram.py +311 -0
  118. openspeechapi-0.1.0/openspeech/providers/stt/elevenlabs.py +385 -0
  119. openspeechapi-0.1.0/openspeech/providers/stt/faster_whisper.py +211 -0
  120. openspeechapi-0.1.0/openspeech/providers/stt/google_cloud.py +106 -0
  121. openspeechapi-0.1.0/openspeech/providers/stt/iflytek.py +427 -0
  122. openspeechapi-0.1.0/openspeech/providers/stt/macos_speech.py +226 -0
  123. openspeechapi-0.1.0/openspeech/providers/stt/openai.py +84 -0
  124. openspeechapi-0.1.0/openspeech/providers/stt/sherpa_onnx.py +353 -0
  125. openspeechapi-0.1.0/openspeech/providers/stt/tencent.py +212 -0
  126. openspeechapi-0.1.0/openspeech/providers/stt/volcengine.py +107 -0
  127. openspeechapi-0.1.0/openspeech/providers/stt/whisper.py +153 -0
  128. openspeechapi-0.1.0/openspeech/providers/stt/whisperlivekit.py +530 -0
  129. openspeechapi-0.1.0/openspeech/providers/stt/windows_speech.py +249 -0
  130. openspeechapi-0.1.0/openspeech/providers/tts/__init__.py +0 -0
  131. openspeechapi-0.1.0/openspeech/providers/tts/alibaba.py +95 -0
  132. openspeechapi-0.1.0/openspeech/providers/tts/azure_speech.py +123 -0
  133. openspeechapi-0.1.0/openspeech/providers/tts/baidu.py +143 -0
  134. openspeechapi-0.1.0/openspeech/providers/tts/coqui.py +64 -0
  135. openspeechapi-0.1.0/openspeech/providers/tts/cosyvoice.py +90 -0
  136. openspeechapi-0.1.0/openspeech/providers/tts/deepgram.py +174 -0
  137. openspeechapi-0.1.0/openspeech/providers/tts/elevenlabs.py +311 -0
  138. openspeechapi-0.1.0/openspeech/providers/tts/fish_speech.py +158 -0
  139. openspeechapi-0.1.0/openspeech/providers/tts/google_cloud.py +107 -0
  140. openspeechapi-0.1.0/openspeech/providers/tts/iflytek.py +209 -0
  141. openspeechapi-0.1.0/openspeech/providers/tts/macos_say.py +251 -0
  142. openspeechapi-0.1.0/openspeech/providers/tts/minimax.py +122 -0
  143. openspeechapi-0.1.0/openspeech/providers/tts/openai.py +104 -0
  144. openspeechapi-0.1.0/openspeech/providers/tts/piper.py +104 -0
  145. openspeechapi-0.1.0/openspeech/providers/tts/tencent.py +189 -0
  146. openspeechapi-0.1.0/openspeech/providers/tts/volcengine.py +117 -0
  147. openspeechapi-0.1.0/openspeech/providers/tts/windows_sapi.py +234 -0
  148. openspeechapi-0.1.0/openspeech/server/__init__.py +1 -0
  149. openspeechapi-0.1.0/openspeech/server/app.py +72 -0
  150. openspeechapi-0.1.0/openspeech/server/auth.py +42 -0
  151. openspeechapi-0.1.0/openspeech/server/middleware.py +75 -0
  152. openspeechapi-0.1.0/openspeech/server/routes/__init__.py +1 -0
  153. openspeechapi-0.1.0/openspeech/server/routes/management.py +848 -0
  154. openspeechapi-0.1.0/openspeech/server/routes/stt.py +121 -0
  155. openspeechapi-0.1.0/openspeech/server/routes/tts.py +159 -0
  156. openspeechapi-0.1.0/openspeech/server/routes/webui.py +29 -0
  157. openspeechapi-0.1.0/openspeech/server/webui/app.js +2649 -0
  158. openspeechapi-0.1.0/openspeech/server/webui/index.html +216 -0
  159. openspeechapi-0.1.0/openspeech/server/webui/styles.css +617 -0
  160. openspeechapi-0.1.0/openspeech/server/ws/__init__.py +1 -0
  161. openspeechapi-0.1.0/openspeech/server/ws/stt_stream.py +263 -0
  162. openspeechapi-0.1.0/openspeech/server/ws/tts_stream.py +207 -0
  163. openspeechapi-0.1.0/openspeech/telemetry/__init__.py +21 -0
  164. openspeechapi-0.1.0/openspeech/telemetry/perf.py +307 -0
  165. openspeechapi-0.1.0/openspeech/utils/__init__.py +5 -0
  166. openspeechapi-0.1.0/openspeech/utils/audio_converter.py +406 -0
  167. openspeechapi-0.1.0/openspeech/utils/audio_playback.py +156 -0
  168. openspeechapi-0.1.0/openspeech/vendor_registry.yaml +74 -0
  169. openspeechapi-0.1.0/output/output.wav +0 -0
  170. openspeechapi-0.1.0/output.wav +0 -0
  171. openspeechapi-0.1.0/providers.example.yaml +232 -0
  172. openspeechapi-0.1.0/pyproject.toml +116 -0
  173. openspeechapi-0.1.0/scripts/engines/cloud/install.sh +48 -0
  174. openspeechapi-0.1.0/scripts/engines/faster-whisper/native/install.sh +72 -0
  175. openspeechapi-0.1.0/scripts/engines/fish-speech/native/install.sh +54 -0
  176. openspeechapi-0.1.0/scripts/engines/macos-stt/install.sh +80 -0
  177. openspeechapi-0.1.0/scripts/engines/macos-stt/macos_stt.swift +255 -0
  178. openspeechapi-0.1.0/scripts/engines/macos-stt/request_auth.swift +39 -0
  179. openspeechapi-0.1.0/scripts/engines/sherpa-onnx/native/install.sh +133 -0
  180. openspeechapi-0.1.0/scripts/engines/sherpa-onnx/native/run_streaming_server.py +113 -0
  181. openspeechapi-0.1.0/scripts/engines/whisper/native/install.sh +58 -0
  182. openspeechapi-0.1.0/scripts/engines/whisperlivekit/native/install.sh +86 -0
  183. openspeechapi-0.1.0/tests/__init__.py +0 -0
  184. openspeechapi-0.1.0/tests/conftest.py +77 -0
  185. openspeechapi-0.1.0/tests/e2e/__init__.py +0 -0
  186. openspeechapi-0.1.0/tests/e2e/conftest.py +40 -0
  187. openspeechapi-0.1.0/tests/e2e/test_fanout_e2e.py +105 -0
  188. openspeechapi-0.1.0/tests/e2e/test_faster_whisper_e2e.py +78 -0
  189. openspeechapi-0.1.0/tests/e2e/test_openai_e2e.py +104 -0
  190. openspeechapi-0.1.0/tests/e2e/test_webui_e2e.py +696 -0
  191. openspeechapi-0.1.0/tests/fixtures/hello.wav +0 -0
  192. openspeechapi-0.1.0/tests/integration/__init__.py +0 -0
  193. openspeechapi-0.1.0/tests/integration/test_fanout_integration.py +53 -0
  194. openspeechapi-0.1.0/tests/integration/test_in_process_integration.py +71 -0
  195. openspeechapi-0.1.0/tests/integration/test_server_client.py +110 -0
  196. openspeechapi-0.1.0/tests/unit/__init__.py +0 -0
  197. openspeechapi-0.1.0/tests/unit/test_aim_resolver.py +77 -0
  198. openspeechapi-0.1.0/tests/unit/test_audio_converter.py +296 -0
  199. openspeechapi-0.1.0/tests/unit/test_audio_playback.py +41 -0
  200. openspeechapi-0.1.0/tests/unit/test_base.py +77 -0
  201. openspeechapi-0.1.0/tests/unit/test_cli.py +101 -0
  202. openspeechapi-0.1.0/tests/unit/test_cli_engine.py +137 -0
  203. openspeechapi-0.1.0/tests/unit/test_client.py +94 -0
  204. openspeechapi-0.1.0/tests/unit/test_config.py +81 -0
  205. openspeechapi-0.1.0/tests/unit/test_context.py +56 -0
  206. openspeechapi-0.1.0/tests/unit/test_debug_observer.py +52 -0
  207. openspeechapi-0.1.0/tests/unit/test_dispatcher.py +271 -0
  208. openspeechapi-0.1.0/tests/unit/test_docker_backend_progress.py +28 -0
  209. openspeechapi-0.1.0/tests/unit/test_engine_registry.py +13 -0
  210. openspeechapi-0.1.0/tests/unit/test_enums.py +39 -0
  211. openspeechapi-0.1.0/tests/unit/test_executor_base.py +50 -0
  212. openspeechapi-0.1.0/tests/unit/test_fanout.py +166 -0
  213. openspeechapi-0.1.0/tests/unit/test_filters.py +171 -0
  214. openspeechapi-0.1.0/tests/unit/test_hot_reload.py +211 -0
  215. openspeechapi-0.1.0/tests/unit/test_in_process.py +104 -0
  216. openspeechapi-0.1.0/tests/unit/test_latency_observer.py +63 -0
  217. openspeechapi-0.1.0/tests/unit/test_lifecycle.py +115 -0
  218. openspeechapi-0.1.0/tests/unit/test_local_engine_task_store.py +31 -0
  219. openspeechapi-0.1.0/tests/unit/test_local_engines_manager.py +102 -0
  220. openspeechapi-0.1.0/tests/unit/test_logging.py +374 -0
  221. openspeechapi-0.1.0/tests/unit/test_metrics_observer.py +85 -0
  222. openspeechapi-0.1.0/tests/unit/test_models.py +93 -0
  223. openspeechapi-0.1.0/tests/unit/test_native_backend.py +269 -0
  224. openspeechapi-0.1.0/tests/unit/test_observer_base.py +257 -0
  225. openspeechapi-0.1.0/tests/unit/test_plugin_mechanism.py +120 -0
  226. openspeechapi-0.1.0/tests/unit/test_providers/__init__.py +0 -0
  227. openspeechapi-0.1.0/tests/unit/test_providers/test_cloud_providers.py +342 -0
  228. openspeechapi-0.1.0/tests/unit/test_providers/test_elevenlabs_stt.py +209 -0
  229. openspeechapi-0.1.0/tests/unit/test_providers/test_macos_say.py +324 -0
  230. openspeechapi-0.1.0/tests/unit/test_providers/test_macos_speech.py +315 -0
  231. openspeechapi-0.1.0/tests/unit/test_providers/test_openai_base_url.py +47 -0
  232. openspeechapi-0.1.0/tests/unit/test_providers/test_openai_stt.py +163 -0
  233. openspeechapi-0.1.0/tests/unit/test_providers/test_openai_tts.py +194 -0
  234. openspeechapi-0.1.0/tests/unit/test_providers/test_sherpa_onnx_stt.py +84 -0
  235. openspeechapi-0.1.0/tests/unit/test_providers/test_stt_stubs.py +448 -0
  236. openspeechapi-0.1.0/tests/unit/test_providers/test_tts_stubs.py +826 -0
  237. openspeechapi-0.1.0/tests/unit/test_providers/test_whisperlivekit_stt.py +187 -0
  238. openspeechapi-0.1.0/tests/unit/test_registry.py +55 -0
  239. openspeechapi-0.1.0/tests/unit/test_remote.py +81 -0
  240. openspeechapi-0.1.0/tests/unit/test_server/__init__.py +0 -0
  241. openspeechapi-0.1.0/tests/unit/test_server/test_auth.py +76 -0
  242. openspeechapi-0.1.0/tests/unit/test_server/test_config_api.py +159 -0
  243. openspeechapi-0.1.0/tests/unit/test_server/test_routes.py +399 -0
  244. openspeechapi-0.1.0/tests/unit/test_server/test_websocket.py +159 -0
  245. openspeechapi-0.1.0/tests/unit/test_subprocess.py +138 -0
  246. openspeechapi-0.1.0/tests/unit/test_usage_observer.py +87 -0
  247. openspeechapi-0.1.0/tests/unit/test_watcher.py +179 -0
@@ -0,0 +1,16 @@
1
+ .venv/
2
+ .git/
3
+ .env
4
+ __pycache__/
5
+ *.pyc
6
+ .pytest_cache/
7
+ .coverage
8
+ dist/
9
+ *.egg-info/
10
+ tests/
11
+ docs/
12
+ examples/
13
+ scripts/
14
+ output.wav
15
+ output/
16
+ .DS_Store
@@ -0,0 +1,12 @@
1
+ # OpenSpeech API Keys
2
+ # 复制此文件为 .env 并填入你的 Key: cp .env.example .env
3
+
4
+ # ── 云端 Provider Keys ──────────────────────────
5
+ OPENAI_API_KEY=sk-... # OpenAI STT (Whisper) + TTS
6
+ DEEPGRAM_API_KEY= # Deepgram STT (实时流式)
7
+ ELEVENLABS_API_KEY= # ElevenLabs TTS
8
+ MINIMAX_API_KEY= # Minimax TTS
9
+ MINIMAX_GROUP_ID= # Minimax Group ID
10
+
11
+ # ── 服务端认证 ──────────────────────────────────
12
+ OPENSPEECH_API_KEY= # HTTP 服务 Bearer token (可选)
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python 3.11
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Install dependencies
22
+ run: pip install -e ".[dev]"
23
+
24
+ - name: Lint with ruff
25
+ run: ruff check openspeech/ tests/
26
+
27
+ - name: Run unit and integration tests
28
+ run: pytest tests/unit tests/integration -v --tb=short
29
+
30
+ - name: Check coverage
31
+ run: pytest tests/unit tests/integration --cov=openspeech --cov-report=term-missing --cov-fail-under=70
@@ -0,0 +1,29 @@
1
+ .superpowers/
2
+ .venv/
3
+ .env
4
+
5
+ # Local runtime config (copy providers.example.yaml → providers.yaml on first run)
6
+ providers.yaml
7
+ __pycache__/
8
+ *.egg-info/
9
+ .pytest_cache/
10
+ .coverage
11
+ dist/
12
+ build/
13
+ *.pyc
14
+ *.pyo
15
+
16
+ # macOS STT compiled artifacts
17
+ scripts/engines/macos-stt/macos-stt-helper
18
+ scripts/engines/macos-stt/macos-stt-request-auth
19
+ scripts/engines/macos-stt/MacOSSTTHelper.app/
20
+ scripts/engines/macos-stt/Info.plist
21
+
22
+ # Runtime logs (structured JSONL + server stdio captures)
23
+ logs/
24
+ server_err.log
25
+ server_stdout.log
26
+ server_stderr.log
27
+
28
+ # Claude Code project-local config
29
+ .claude/
Binary file
Binary file
@@ -0,0 +1,5 @@
1
+ INFO: Started server process [22552]
2
+ INFO: Waiting for application startup.
3
+ 2026-04-10 11:28:53.753 | INFO | openspeech.dispatch.watcher:start:35 - Config watcher started: .tmp/providers.webui.stt.yaml
4
+ INFO: Application startup complete.
5
+ INFO: Uvicorn running on http://127.0.0.1:8600 (Press CTRL+C to quit)
File without changes
@@ -0,0 +1,5 @@
1
+ INFO: Started server process [96229]
2
+ INFO: Waiting for application startup.
3
+ 2026-04-08 16:36:24.899 | INFO | openspeech.dispatch.watcher:start:35 - Config watcher started: .tmp/providers.webui.yaml
4
+ INFO: Application startup complete.
5
+ INFO: Uvicorn running on http://127.0.0.1:8600 (Press CTRL+C to quit)
@@ -0,0 +1 @@
1
+ 96229
@@ -0,0 +1,15 @@
1
+
2
+ WhisperLiveKit
3
+ Backend: mlx-whisper | Model: base | Language: auto
4
+ Accelerator: MPS (Apple Silicon), MLX
5
+
6
+ Web UI: http://127.0.0.1:12101/
7
+ WebSocket: ws://127.0.0.1:12101/asr
8
+ Deepgram: ws://127.0.0.1:12101/v1/listen
9
+ REST API: http://127.0.0.1:12101/v1/audio/transcriptions
10
+ Models: http://127.0.0.1:12101/v1/models
11
+ Health: http://127.0.0.1:12101/health
12
+
13
+ INFO: Started server process [93444]
14
+ INFO: Waiting for application startup.
15
+
@@ -0,0 +1 @@
1
+ 93444
@@ -0,0 +1,14 @@
1
+
2
+ WhisperLiveKit
3
+ Backend: mlx-whisper | Model: base | Language: auto
4
+ Accelerator: MPS (Apple Silicon), MLX
5
+
6
+ Web UI: http://127.0.0.1:12102/
7
+ WebSocket: ws://127.0.0.1:12102/asr
8
+ Deepgram: ws://127.0.0.1:12102/v1/listen
9
+ REST API: http://127.0.0.1:12102/v1/audio/transcriptions
10
+ Models: http://127.0.0.1:12102/v1/models
11
+ Health: http://127.0.0.1:12102/health
12
+
13
+ INFO: Started server process [23522]
14
+ INFO: Waiting for application startup.
@@ -0,0 +1 @@
1
+ 23522
@@ -0,0 +1,36 @@
1
+ # 语音大模型统一接口项目规范
2
+
3
+ ## Agent 调度规则
4
+
5
+ **所有 Agent 工具调用必须加上 `mode: "bypassPermissions"`**,示例:
6
+
7
+ ```json
8
+ {
9
+ "subagent_type": "general-purpose",
10
+ "mode": "bypassPermissions",
11
+ "prompt": "..."
12
+ }
13
+ ```
14
+
15
+ ### Agent角色分工
16
+ 1. 可选的Agent角色参考 [~/.Codex/AGENTS.md](~/.Codex/AGENTS.md)文件的描述。
17
+ 2. 默认启动项目经理角色,项目经理仅负责项目当前进度的同步管理和任务调度分派,其他工作根据已设定的agent,按照需要选择合适的角色启动并进行工作分配。
18
+
19
+ ## 功能实现规范
20
+
21
+ 1. 每次对话提到问题bug或者功能点变化时,先进行问题分析和功能实现方案设计,并和用户确认后再执行实际开发修复工作。
22
+ 2. 功能修复或者功能点更新完成后,同步检查是否需要更新对应的文档。
23
+
24
+ ## 测试规范
25
+
26
+ ### 核心要求
27
+
28
+ 1. **UI E2E 测试(强制)**:所有界面功能必须具备从"前端页面操作"到"前端页面最终结果"的完整 E2E 测试用例
29
+ 2. **GIVEN → WHEN → THEN → AND**:每个用例必须覆盖前置条件、页面操作、后端状态变更、前端结果验证
30
+ 3. **新增/修改界面功能时必须同步新增/更新对应的 UI E2E 用例**
31
+
32
+ ## 文档&同步规则
33
+
34
+ ### 文档规范
35
+ **默认将项目过程和设计文档都存放于[docs/](docs/)目录下**
36
+
@@ -0,0 +1,73 @@
1
+ # 语音大模型统一接口项目规范
2
+
3
+ ## 系统架构和整体规范
4
+ 1. 当前系统为MacOS,对模型和引擎的选择,优先匹配macOS和mlx框架
5
+ 2. 随时要确保前端页面和后台服务之间的状态是保持同步一致的,避免服务已经异常,但前台没有任何状态表现
6
+
7
+ ## Agent 调度规则
8
+
9
+ **所有 Agent 工具调用必须加上 `mode: "bypassPermissions"`**,示例:
10
+
11
+ ```json
12
+ {
13
+ "subagent_type": "general-purpose",
14
+ "mode": "bypassPermissions",
15
+ "prompt": "..."
16
+ }
17
+ ```
18
+
19
+ ### Agent角色分工
20
+ 1. 可选的Agent角色参考 [~/.claude/AGENTS.md](~/.claude/AGENTS.md)文件的描述。
21
+ 2. 默认启动项目经理角色,项目经理仅负责项目当前进度的同步管理和任务调度分派,其他工作根据已设定的agent,按照需要选择合适的角色启动并进行工作分配。
22
+
23
+ ## 功能实现规范
24
+
25
+ 1. 每次对话提到问题bug或者功能点变化时,先进行问题分析和功能实现方案设计,并和用户确认后再执行实际开发修复工作。
26
+ 2. 功能修复或者功能点更新完成后,同步检查是否需要更新对应的文档。
27
+
28
+ ## Provider 开发规范
29
+
30
+ ### STT 流式识别规范
31
+ 所有声明了 `Capability.STREAMING` 的 STT Provider 必须遵循 [STT 流式识别开发规范](docs/architecture/stt-streaming-spec.md),核心要求:
32
+
33
+ 1. **`is_partial` 标记**:中间结果 `is_partial=True`,最终结果(VAD/用户停止)`is_partial=False`,服务端据此区分 `partial`/`final` 消息类型
34
+ 2. **全文快照 yield**:每次 yield 的 `Transcription.text` 必须是完整文本快照(非增量片段),前端直接替换显示
35
+ 3. **sender/receiver 并发模型**:使用 `_sender_stop` Event 协调、sender `ConnectionClosed` 容错、`send_task.cancel()` 防挂起、Queue sentinel `None` 保证退出
36
+ 4. **前端自动停止**:收到 `final` 后自动停止录音释放麦克风,无需用户手动点击 Stop
37
+ 5. **性能日志(里程碑计时)**:WS 连接耗时、首帧发送、首次响应(含协议元数据)、最终结果(含响应计数+文本预览)、流式完成总耗时
38
+ 6. **批量模式帧间 pacing**:预录音频通过 WS 发送时需添加帧间延迟(~10ms),防止服务端读超时
39
+ 7. **新增流式 STT Provider 时**,必须对照文档末尾的 Checklist 逐项确认
40
+
41
+ ### field_options(下拉可选项)
42
+ 每个 Provider 必须在类属性中定义 `field_options`,为所有枚举型参数提供完整的可选值列表。该属性被 Config 页面和 Lab 页面用于生成下拉选择框。
43
+
44
+ **规则:**
45
+ 1. **所有具有固定可选值的 settings 字段**(如 model、voice、language、device、format 等)必须在 `field_options` 中列出
46
+ 2. **布尔值、数值、自由文本字段**(如 speed、temperature、api_url)不需要列入
47
+ 3. **Vendor 共享凭据字段**(如 api_key、api_secret)不应出现在 engine 的 `default_settings` 中,由 vendor 层注入
48
+ 4. 新增或修改 Provider 时,必须同步更新 `field_options`,确保 UI 下拉选项与 API 文档一致
49
+ 5. 定期检查各 Provider 的 `field_options` 是否与上游 API 保持同步
50
+
51
+ **示例:**
52
+ ```python
53
+ class MyTTSProvider(TTSProvider):
54
+ field_options = {
55
+ "model": ["model-v1", "model-v2"],
56
+ "voice": ["alice", "bob", "charlie"],
57
+ "language": ["en-US", "zh-CN", "ja"],
58
+ }
59
+ ```
60
+
61
+ ## 测试规范
62
+
63
+ ### 核心要求
64
+
65
+ 1. **UI E2E 测试(强制)**:所有界面功能必须具备从"前端页面操作"到"前端页面最终结果"的完整 E2E 测试用例
66
+ 2. **GIVEN → WHEN → THEN → AND**:每个用例必须覆盖前置条件、页面操作、后端状态变更、前端结果验证
67
+ 3. **新增/修改界面功能时必须同步新增/更新对应的 UI E2E 用例**
68
+
69
+ ## 文档&同步规则
70
+
71
+ ### 文档规范
72
+ **默认将项目过程和设计文档都存放于[docs/](docs/)目录下**
73
+
@@ -0,0 +1,20 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for audio processing
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ ffmpeg \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy project files
11
+ COPY pyproject.toml .
12
+ COPY openspeech/ openspeech/
13
+ COPY providers.example.yaml providers.yaml
14
+
15
+ # Install with all provider deps + server
16
+ RUN pip install --no-cache-dir -e ".[all,server]"
17
+
18
+ EXPOSE 8600
19
+
20
+ CMD ["openspeech", "serve", "--config", "providers.yaml", "--host", "0.0.0.0", "--port", "8600"]
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: openspeechapi
3
+ Version: 0.1.0
4
+ Summary: Unified speech interface for STT/TTS providers
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: loguru>=0.7
8
+ Requires-Dist: msgpack>=1.0
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Provides-Extra: alibaba
12
+ Provides-Extra: alibaba-stt
13
+ Provides-Extra: alibaba-tts
14
+ Provides-Extra: all
15
+ Requires-Dist: elevenlabs; extra == 'all'
16
+ Requires-Dist: faster-whisper; extra == 'all'
17
+ Requires-Dist: openai; extra == 'all'
18
+ Requires-Dist: openai-whisper; extra == 'all'
19
+ Requires-Dist: piper-tts; extra == 'all'
20
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'all'
21
+ Requires-Dist: torchaudio; extra == 'all'
22
+ Requires-Dist: tts; extra == 'all'
23
+ Requires-Dist: websockets; extra == 'all'
24
+ Provides-Extra: assemblyai-stt
25
+ Provides-Extra: audio
26
+ Requires-Dist: numpy; extra == 'audio'
27
+ Requires-Dist: sounddevice; extra == 'audio'
28
+ Provides-Extra: azure
29
+ Provides-Extra: azure-stt
30
+ Provides-Extra: azure-tts
31
+ Provides-Extra: baidu
32
+ Provides-Extra: baidu-stt
33
+ Provides-Extra: baidu-tts
34
+ Provides-Extra: cloud
35
+ Requires-Dist: websockets; extra == 'cloud'
36
+ Provides-Extra: coqui-tts
37
+ Requires-Dist: tts; extra == 'coqui-tts'
38
+ Provides-Extra: cosyvoice-tts
39
+ Requires-Dist: torchaudio; extra == 'cosyvoice-tts'
40
+ Provides-Extra: deepgram
41
+ Requires-Dist: websockets; extra == 'deepgram'
42
+ Provides-Extra: deepgram-stt
43
+ Requires-Dist: websockets; extra == 'deepgram-stt'
44
+ Provides-Extra: deepgram-tts
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
47
+ Requires-Dist: pytest-cov; extra == 'dev'
48
+ Requires-Dist: pytest-dotenv; extra == 'dev'
49
+ Requires-Dist: pytest>=8.0; extra == 'dev'
50
+ Requires-Dist: ruff; extra == 'dev'
51
+ Provides-Extra: elevenlabs
52
+ Requires-Dist: elevenlabs; extra == 'elevenlabs'
53
+ Requires-Dist: websockets; extra == 'elevenlabs'
54
+ Provides-Extra: elevenlabs-stt
55
+ Requires-Dist: websockets; extra == 'elevenlabs-stt'
56
+ Provides-Extra: elevenlabs-tts
57
+ Requires-Dist: elevenlabs; extra == 'elevenlabs-tts'
58
+ Provides-Extra: faster-whisper-stt
59
+ Requires-Dist: faster-whisper; extra == 'faster-whisper-stt'
60
+ Provides-Extra: fish-speech-tts
61
+ Provides-Extra: google
62
+ Provides-Extra: google-stt
63
+ Provides-Extra: google-tts
64
+ Provides-Extra: iflytek
65
+ Requires-Dist: websockets; extra == 'iflytek'
66
+ Provides-Extra: iflytek-stt
67
+ Requires-Dist: websockets; extra == 'iflytek-stt'
68
+ Provides-Extra: iflytek-tts
69
+ Requires-Dist: websockets; extra == 'iflytek-tts'
70
+ Provides-Extra: macos-native
71
+ Provides-Extra: minimax-tts
72
+ Provides-Extra: openai
73
+ Requires-Dist: openai; extra == 'openai'
74
+ Provides-Extra: openai-stt
75
+ Requires-Dist: openai; extra == 'openai-stt'
76
+ Provides-Extra: openai-tts
77
+ Requires-Dist: openai; extra == 'openai-tts'
78
+ Provides-Extra: piper-tts
79
+ Requires-Dist: piper-tts; extra == 'piper-tts'
80
+ Provides-Extra: server
81
+ Requires-Dist: fastapi; extra == 'server'
82
+ Requires-Dist: python-multipart; extra == 'server'
83
+ Requires-Dist: uvicorn; extra == 'server'
84
+ Requires-Dist: websockets; extra == 'server'
85
+ Provides-Extra: sherpa-onnx-stt
86
+ Requires-Dist: websockets; extra == 'sherpa-onnx-stt'
87
+ Provides-Extra: tencent
88
+ Provides-Extra: tencent-stt
89
+ Provides-Extra: tencent-tts
90
+ Provides-Extra: tracing
91
+ Requires-Dist: opentelemetry-api; extra == 'tracing'
92
+ Requires-Dist: opentelemetry-sdk; extra == 'tracing'
93
+ Provides-Extra: volcengine
94
+ Provides-Extra: volcengine-stt
95
+ Provides-Extra: volcengine-tts
96
+ Provides-Extra: whisper-stt
97
+ Requires-Dist: openai-whisper; extra == 'whisper-stt'
98
+ Provides-Extra: whisperlivekit-stt
99
+ Requires-Dist: websockets; extra == 'whisperlivekit-stt'
100
+ Provides-Extra: windows-native
101
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'windows-native'