openspeechapi 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/.gitignore +1 -0
  2. openspeechapi-0.2.9/README.md → openspeechapi-0.2.11/PKG-INFO +417 -14
  3. openspeechapi-0.2.9/PKG-INFO → openspeechapi-0.2.11/README.md +248 -119
  4. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/__init__.py +1 -1
  5. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/cli.py +17 -9
  6. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/config.py +120 -0
  7. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/core/base.py +96 -1
  8. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/core/enums.py +4 -1
  9. openspeechapi-0.2.11/openspeechapi/core/model_hub.py +488 -0
  10. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/core/models.py +3 -0
  11. openspeechapi-0.2.11/openspeechapi/core/settings.py +13 -0
  12. openspeechapi-0.2.11/openspeechapi/dispatch/aim_provision.py +91 -0
  13. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/dispatcher.py +42 -1
  14. openspeechapi-0.2.11/openspeechapi/dispatch/executors/subprocess_exec.py +907 -0
  15. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/lifecycle.py +17 -0
  16. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/engine_catalog.py +10 -0
  17. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/engine_registry.yaml +186 -0
  18. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/exceptions.py +17 -0
  19. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/factory.py +85 -0
  20. openspeechapi-0.2.11/openspeechapi/local_engines/aim_resolver.py +191 -0
  21. openspeechapi-0.2.11/openspeechapi/local_engines/isolated_venv.py +164 -0
  22. openspeechapi-0.2.11/openspeechapi/providers/stt/_local_audio.py +43 -0
  23. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/alibaba.py +87 -86
  24. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/assemblyai.py +2 -1
  25. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/baidu.py +136 -135
  26. openspeechapi-0.2.11/openspeechapi/providers/stt/canary_qwen_stt.py +135 -0
  27. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/deepgram.py +22 -21
  28. openspeechapi-0.2.11/openspeechapi/providers/stt/dolphin_stt.py +242 -0
  29. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/elevenlabs.py +3 -1
  30. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/faster_whisper.py +212 -211
  31. openspeechapi-0.2.11/openspeechapi/providers/stt/fireredasr_stt.py +174 -0
  32. openspeechapi-0.2.11/openspeechapi/providers/stt/funasr_stt.py +197 -0
  33. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/gemma4.py +145 -35
  34. openspeechapi-0.2.11/openspeechapi/providers/stt/kimi_audio_stt.py +149 -0
  35. openspeechapi-0.2.11/openspeechapi/providers/stt/mlx_whisper_stt.py +191 -0
  36. openspeechapi-0.2.11/openspeechapi/providers/stt/mms_languages.json +5627 -0
  37. openspeechapi-0.2.11/openspeechapi/providers/stt/mms_stt.py +224 -0
  38. openspeechapi-0.2.11/openspeechapi/providers/stt/moonshine_stt.py +128 -0
  39. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/openai.py +94 -84
  40. openspeechapi-0.2.11/openspeechapi/providers/stt/paraformer.py +277 -0
  41. openspeechapi-0.2.11/openspeechapi/providers/stt/parakeet_mlx_stt.py +138 -0
  42. openspeechapi-0.2.11/openspeechapi/providers/stt/phi4_multimodal_stt.py +203 -0
  43. openspeechapi-0.2.11/openspeechapi/providers/stt/qwen3_asr.py +186 -0
  44. openspeechapi-0.2.11/openspeechapi/providers/stt/qwen3_omni_stt.py +169 -0
  45. openspeechapi-0.2.11/openspeechapi/providers/stt/sensevoice.py +284 -0
  46. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/tencent.py +213 -212
  47. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/volcengine.py +108 -107
  48. openspeechapi-0.2.11/openspeechapi/providers/stt/vosk_stt.py +200 -0
  49. openspeechapi-0.2.11/openspeechapi/providers/stt/voxtral_stt.py +190 -0
  50. openspeechapi-0.2.11/openspeechapi/providers/stt/wenet_stt.py +182 -0
  51. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/whisper.py +154 -153
  52. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/cosyvoice.py +27 -5
  53. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/piper.py +41 -2
  54. openspeechapi-0.2.11/openspeechapi/server/app.py +154 -0
  55. openspeechapi-0.2.11/openspeechapi/server/extras_installer.py +220 -0
  56. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/routes/management.py +292 -25
  57. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/routes/stt.py +6 -1
  58. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/webui/app.js +559 -93
  59. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/webui/index.html +28 -1
  60. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/webui/styles.css +27 -0
  61. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/ws/stt_stream.py +29 -0
  62. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/utils/audio_converter.py +51 -1
  63. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/providers.example.yaml +4 -0
  64. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/pyproject.toml +58 -3
  65. openspeechapi-0.2.11/scripts/aim_adopt.py +146 -0
  66. openspeechapi-0.2.11/scripts/aim_consumers.py +159 -0
  67. openspeechapi-0.2.11/scripts/gen_mms_languages.py +86 -0
  68. openspeechapi-0.2.11/scripts/preload_stt_model.py +109 -0
  69. openspeechapi-0.2.11/scripts/release.sh +111 -0
  70. openspeechapi-0.2.9/openspeechapi/core/settings.py +0 -8
  71. openspeechapi-0.2.9/openspeechapi/dispatch/executors/subprocess_exec.py +0 -461
  72. openspeechapi-0.2.9/openspeechapi/local_engines/aim_resolver.py +0 -91
  73. openspeechapi-0.2.9/openspeechapi/server/app.py +0 -71
  74. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/__main__.py +0 -0
  75. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/client/__init__.py +0 -0
  76. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/client/client.py +0 -0
  77. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/core/__init__.py +0 -0
  78. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/core/registry.py +0 -0
  79. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/demo.py +0 -0
  80. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/__init__.py +0 -0
  81. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/context.py +0 -0
  82. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/executors/__init__.py +0 -0
  83. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/executors/base.py +0 -0
  84. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/executors/in_process.py +0 -0
  85. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/executors/remote.py +0 -0
  86. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/fanout.py +0 -0
  87. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/filters.py +0 -0
  88. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/dispatch/watcher.py +0 -0
  89. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/__init__.py +0 -0
  90. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/backends/__init__.py +0 -0
  91. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
  92. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/backends/native_backend.py +0 -0
  93. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/base.py +0 -0
  94. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/__init__.py +0 -0
  95. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
  96. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
  97. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
  98. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/whisper.py +0 -0
  99. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
  100. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/manager.py +0 -0
  101. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/models.py +0 -0
  102. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/progress.py +0 -0
  103. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/registry.py +0 -0
  104. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/task_store.py +0 -0
  105. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/local_engines/tasks.py +0 -0
  106. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/logging_config.py +0 -0
  107. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/__init__.py +0 -0
  108. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/base.py +0 -0
  109. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/debug.py +0 -0
  110. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/latency.py +0 -0
  111. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/metrics.py +0 -0
  112. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/tracing.py +0 -0
  113. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/observe/usage.py +0 -0
  114. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/__init__.py +0 -0
  115. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/_template.py +0 -0
  116. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/__init__.py +0 -0
  117. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/azure_speech.py +0 -0
  118. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/google_cloud.py +0 -0
  119. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/iflytek.py +0 -0
  120. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/macos_speech.py +0 -0
  121. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
  122. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/whisperlivekit.py +0 -0
  123. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/stt/windows_speech.py +0 -0
  124. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/__init__.py +0 -0
  125. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/alibaba.py +0 -0
  126. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/azure_speech.py +0 -0
  127. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/baidu.py +0 -0
  128. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/coqui.py +0 -0
  129. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/deepgram.py +0 -0
  130. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/elevenlabs.py +0 -0
  131. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/fish_speech.py +0 -0
  132. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/google_cloud.py +0 -0
  133. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/iflytek.py +0 -0
  134. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/macos_say.py +0 -0
  135. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/minimax.py +0 -0
  136. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/openai.py +0 -0
  137. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/tencent.py +0 -0
  138. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/volcengine.py +0 -0
  139. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/providers/tts/windows_sapi.py +0 -0
  140. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/__init__.py +0 -0
  141. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/auth.py +0 -0
  142. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/middleware.py +0 -0
  143. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/native_installer.py +0 -0
  144. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/routes/__init__.py +0 -0
  145. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/routes/tts.py +0 -0
  146. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/routes/webui.py +0 -0
  147. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/ws/__init__.py +0 -0
  148. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/server/ws/tts_stream.py +0 -0
  149. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/telemetry/__init__.py +0 -0
  150. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/telemetry/perf.py +0 -0
  151. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/utils/__init__.py +0 -0
  152. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/utils/audio_playback.py +0 -0
  153. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/openspeechapi/vendor_registry.yaml +0 -0
  154. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/cloud/install.sh +0 -0
  155. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/faster-whisper/native/install.sh +0 -0
  156. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/fish-speech/native/install.sh +0 -0
  157. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/macos-stt/_bundle.sh +0 -0
  158. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/macos-stt/install.sh +0 -0
  159. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/macos-stt/macos_stt.swift +0 -0
  160. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/macos-stt/request_auth.swift +0 -0
  161. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
  162. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
  163. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/whisper/native/install.sh +0 -0
  164. {openspeechapi-0.2.9 → openspeechapi-0.2.11}/scripts/engines/whisperlivekit/native/install.sh +0 -0
@@ -12,6 +12,7 @@ dist/
12
12
  build/
13
13
  *.pyc
14
14
  *.pyo
15
+ .DS_Store
15
16
 
16
17
  # macOS STT compiled artifacts
17
18
  scripts/engines/macos-stt/macos-stt-helper
@@ -1,3 +1,172 @@
1
+ Metadata-Version: 2.4
2
+ Name: openspeechapi
3
+ Version: 0.2.11
4
+ Summary: Unified speech interface for STT/TTS providers
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: loguru>=0.7
8
+ Requires-Dist: msgpack>=1.0
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Provides-Extra: alibaba
12
+ Provides-Extra: alibaba-stt
13
+ Provides-Extra: alibaba-tts
14
+ Provides-Extra: all
15
+ Requires-Dist: elevenlabs; extra == 'all'
16
+ Requires-Dist: faster-whisper; extra == 'all'
17
+ Requires-Dist: openai; extra == 'all'
18
+ Requires-Dist: openai-whisper; extra == 'all'
19
+ Requires-Dist: piper-tts; extra == 'all'
20
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'all'
21
+ Requires-Dist: torchaudio; extra == 'all'
22
+ Requires-Dist: tts; extra == 'all'
23
+ Requires-Dist: websockets; extra == 'all'
24
+ Provides-Extra: assemblyai-stt
25
+ Provides-Extra: audio
26
+ Requires-Dist: numpy; extra == 'audio'
27
+ Requires-Dist: sounddevice; extra == 'audio'
28
+ Provides-Extra: azure
29
+ Provides-Extra: azure-stt
30
+ Provides-Extra: azure-tts
31
+ Provides-Extra: baidu
32
+ Provides-Extra: baidu-stt
33
+ Provides-Extra: baidu-tts
34
+ Provides-Extra: canary-qwen-stt
35
+ Provides-Extra: cloud
36
+ Requires-Dist: websockets; extra == 'cloud'
37
+ Provides-Extra: coqui-tts
38
+ Requires-Dist: tts; extra == 'coqui-tts'
39
+ Provides-Extra: cosyvoice-tts
40
+ Requires-Dist: torchaudio; extra == 'cosyvoice-tts'
41
+ Provides-Extra: deepgram
42
+ Requires-Dist: websockets; extra == 'deepgram'
43
+ Provides-Extra: deepgram-stt
44
+ Requires-Dist: websockets; extra == 'deepgram-stt'
45
+ Provides-Extra: deepgram-tts
46
+ Provides-Extra: dev
47
+ Requires-Dist: numpy; extra == 'dev'
48
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
49
+ Requires-Dist: pytest-cov; extra == 'dev'
50
+ Requires-Dist: pytest-dotenv; extra == 'dev'
51
+ Requires-Dist: pytest>=8.0; extra == 'dev'
52
+ Requires-Dist: ruff==0.15.*; extra == 'dev'
53
+ Provides-Extra: dolphin-stt
54
+ Requires-Dist: dataoceanai-dolphin; extra == 'dolphin-stt'
55
+ Requires-Dist: torchcodec; extra == 'dolphin-stt'
56
+ Provides-Extra: elevenlabs
57
+ Requires-Dist: elevenlabs; extra == 'elevenlabs'
58
+ Requires-Dist: websockets; extra == 'elevenlabs'
59
+ Provides-Extra: elevenlabs-stt
60
+ Requires-Dist: websockets; extra == 'elevenlabs-stt'
61
+ Provides-Extra: elevenlabs-tts
62
+ Requires-Dist: elevenlabs; extra == 'elevenlabs-tts'
63
+ Provides-Extra: faster-whisper-stt
64
+ Requires-Dist: faster-whisper; extra == 'faster-whisper-stt'
65
+ Provides-Extra: fireredasr-stt
66
+ Requires-Dist: fireredasr; extra == 'fireredasr-stt'
67
+ Provides-Extra: fish-speech-tts
68
+ Provides-Extra: funasr-stt
69
+ Requires-Dist: funasr>=1.1.0; extra == 'funasr-stt'
70
+ Requires-Dist: torch; extra == 'funasr-stt'
71
+ Requires-Dist: torchaudio; extra == 'funasr-stt'
72
+ Provides-Extra: gemma4-stt
73
+ Requires-Dist: accelerate; (sys_platform != 'darwin') and extra == 'gemma4-stt'
74
+ Requires-Dist: librosa; (sys_platform != 'darwin') and extra == 'gemma4-stt'
75
+ Requires-Dist: mlx-vlm<0.6.2,>=0.6.1; (sys_platform == 'darwin') and extra == 'gemma4-stt'
76
+ Requires-Dist: torch; (sys_platform != 'darwin') and extra == 'gemma4-stt'
77
+ Requires-Dist: transformers; (sys_platform != 'darwin') and extra == 'gemma4-stt'
78
+ Provides-Extra: google
79
+ Provides-Extra: google-stt
80
+ Provides-Extra: google-tts
81
+ Provides-Extra: iflytek
82
+ Requires-Dist: websockets; extra == 'iflytek'
83
+ Provides-Extra: iflytek-stt
84
+ Requires-Dist: websockets; extra == 'iflytek-stt'
85
+ Provides-Extra: iflytek-tts
86
+ Requires-Dist: websockets; extra == 'iflytek-tts'
87
+ Provides-Extra: kimi-audio-stt
88
+ Requires-Dist: torch; extra == 'kimi-audio-stt'
89
+ Provides-Extra: macos-native
90
+ Provides-Extra: minimax-tts
91
+ Provides-Extra: mlx-whisper-stt
92
+ Requires-Dist: mlx-whisper; (sys_platform == 'darwin') and extra == 'mlx-whisper-stt'
93
+ Provides-Extra: mms-stt
94
+ Requires-Dist: soundfile; extra == 'mms-stt'
95
+ Requires-Dist: torch; extra == 'mms-stt'
96
+ Requires-Dist: transformers; extra == 'mms-stt'
97
+ Provides-Extra: moonshine-stt
98
+ Requires-Dist: soundfile; extra == 'moonshine-stt'
99
+ Requires-Dist: torch; extra == 'moonshine-stt'
100
+ Requires-Dist: transformers; extra == 'moonshine-stt'
101
+ Provides-Extra: openai
102
+ Requires-Dist: openai; extra == 'openai'
103
+ Provides-Extra: openai-stt
104
+ Requires-Dist: openai; extra == 'openai-stt'
105
+ Provides-Extra: openai-tts
106
+ Requires-Dist: openai; extra == 'openai-tts'
107
+ Provides-Extra: paraformer-stt
108
+ Requires-Dist: funasr>=1.1.0; extra == 'paraformer-stt'
109
+ Requires-Dist: torch; extra == 'paraformer-stt'
110
+ Requires-Dist: torchaudio; extra == 'paraformer-stt'
111
+ Provides-Extra: parakeet-stt
112
+ Requires-Dist: parakeet-mlx; (sys_platform == 'darwin') and extra == 'parakeet-stt'
113
+ Provides-Extra: phi4-multimodal-stt
114
+ Requires-Dist: accelerate; extra == 'phi4-multimodal-stt'
115
+ Requires-Dist: backoff; extra == 'phi4-multimodal-stt'
116
+ Requires-Dist: peft; extra == 'phi4-multimodal-stt'
117
+ Requires-Dist: pillow; extra == 'phi4-multimodal-stt'
118
+ Requires-Dist: scipy; extra == 'phi4-multimodal-stt'
119
+ Requires-Dist: soundfile; extra == 'phi4-multimodal-stt'
120
+ Requires-Dist: torch; extra == 'phi4-multimodal-stt'
121
+ Requires-Dist: torchvision; extra == 'phi4-multimodal-stt'
122
+ Requires-Dist: transformers; extra == 'phi4-multimodal-stt'
123
+ Provides-Extra: piper-tts
124
+ Requires-Dist: piper-tts; extra == 'piper-tts'
125
+ Provides-Extra: qwen3-asr-stt
126
+ Requires-Dist: modelscope; extra == 'qwen3-asr-stt'
127
+ Requires-Dist: qwen-asr; extra == 'qwen3-asr-stt'
128
+ Provides-Extra: qwen3-omni-stt
129
+ Requires-Dist: accelerate; extra == 'qwen3-omni-stt'
130
+ Requires-Dist: qwen-omni-utils; extra == 'qwen3-omni-stt'
131
+ Requires-Dist: torch; extra == 'qwen3-omni-stt'
132
+ Requires-Dist: transformers; extra == 'qwen3-omni-stt'
133
+ Provides-Extra: sensevoice-stt
134
+ Requires-Dist: funasr>=1.1.0; extra == 'sensevoice-stt'
135
+ Requires-Dist: torch; extra == 'sensevoice-stt'
136
+ Requires-Dist: torchaudio; extra == 'sensevoice-stt'
137
+ Provides-Extra: server
138
+ Requires-Dist: fastapi; extra == 'server'
139
+ Requires-Dist: python-multipart; extra == 'server'
140
+ Requires-Dist: uvicorn; extra == 'server'
141
+ Requires-Dist: websockets; extra == 'server'
142
+ Provides-Extra: sherpa-onnx-stt
143
+ Requires-Dist: websockets; extra == 'sherpa-onnx-stt'
144
+ Provides-Extra: tencent
145
+ Provides-Extra: tencent-stt
146
+ Provides-Extra: tencent-tts
147
+ Provides-Extra: tracing
148
+ Requires-Dist: opentelemetry-api; extra == 'tracing'
149
+ Requires-Dist: opentelemetry-sdk; extra == 'tracing'
150
+ Provides-Extra: volcengine
151
+ Provides-Extra: volcengine-stt
152
+ Provides-Extra: volcengine-tts
153
+ Provides-Extra: vosk-stt
154
+ Requires-Dist: huggingface-hub; extra == 'vosk-stt'
155
+ Requires-Dist: vosk; extra == 'vosk-stt'
156
+ Provides-Extra: voxtral-stt
157
+ Requires-Dist: accelerate; extra == 'voxtral-stt'
158
+ Requires-Dist: mistral-common[audio]>=1.8.1; extra == 'voxtral-stt'
159
+ Requires-Dist: torch; extra == 'voxtral-stt'
160
+ Requires-Dist: transformers>=4.54.0; extra == 'voxtral-stt'
161
+ Provides-Extra: wenet-stt
162
+ Provides-Extra: whisper-stt
163
+ Requires-Dist: openai-whisper; extra == 'whisper-stt'
164
+ Provides-Extra: whisperlivekit-stt
165
+ Requires-Dist: websockets; extra == 'whisperlivekit-stt'
166
+ Provides-Extra: windows-native
167
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'windows-native'
168
+ Description-Content-Type: text/markdown
169
+
1
170
  # OpenSpeechAPI
2
171
 
3
172
  > Unified speech interface for STT/TTS providers — one API, multiple backends.
@@ -8,22 +177,24 @@ OpenSpeechAPI 提供统一的语音接口,通过字符串指定 provider 即
8
177
 
9
178
  ### 安装
10
179
 
180
+ **方式一 · 通过 PyPI 安装(直接使用)**
11
181
  ```bash
12
- # 安装全部 provider
13
- pip install -e ".[all]"
14
-
15
- # 或按需安装
16
- pip install -e ".[openai]" # OpenAI Whisper STT + TTS
17
- pip install -e ".[faster-whisper]" # 本地 faster-whisper STT
18
- pip install -e ".[openai,faster-whisper]" # 指定多个
19
-
20
- # 仅核心包(不含任何 provider)
21
- pip install -e .
182
+ pip install "openspeechapi[server]" # HTTP 服务 / WebUI 必须带 [server](fastapi/uvicorn)
183
+ pip install "openspeechapi[server,openai]" # 服务 + 指定 provider
184
+ pip install "openspeechapi[server,all]" # 服务 + 全部 provider
185
+ pip install openspeechapi # 仅核心库(库模式;不含服务,也起不了 server)
186
+ ```
22
187
 
23
- # 开发环境
24
- pip install -e ".[dev]"
188
+ **方式二 · 源码安装(开发,可编辑)**
189
+ ```bash
190
+ git clone https://github.com/wingsfly/OpenSpeechAPI.git
191
+ cd OpenSpeechAPI
192
+ uv venv && uv pip install -e ".[server,dev]" # 或 pip install -e ".[server,dev]";按需换 .[all] 等
25
193
  ```
26
194
 
195
+ > ⚠️ 纯 `pip install openspeechapi`(核心库)**不含 fastapi/uvicorn**,无法 `serve`;起服务请带 `[server]`。
196
+ > 两种方式启动服务的差异见下方 [启动服务](#启动服务)。
197
+
27
198
  ### 30 秒上手 — TTS
28
199
 
29
200
  ```python
@@ -205,7 +376,24 @@ python -m openspeechapi.demo tts -t "Hello world" --play \
205
376
  | `whisperlivekit-stt` | STT | WhisperLiveKit 本地服务(Deepgram 兼容 WS,支持 MLX 后端) | local | `pip install -e ".[whisperlivekit]"` |
206
377
  | `elevenlabs-stt` | STT | ElevenLabs Scribe API(云端,支持实时流式 WS + 批量) | remote | `pip install -e ".[elevenlabs-stt]"` |
207
378
  | `deepgram` | STT | Deepgram API(云端,支持实时流式) | remote | `pip install -e ".[deepgram]"` |
208
- | `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E4B 默认/12B 可选,>30s 自动分段,支持转写/翻译/理解) | subprocess | `pip install -e ".[gemma4-stt]"` |
379
+ | `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E2B/E4B,>30s 自动分段;任务:转写 / 翻译(任意目标语言) / 理解 / 问答 / 语种识别) | subprocess | `pip install -e ".[gemma4-stt]"` |
380
+ | `sensevoice` | STT | SenseVoice-Small 本地多语种 ASR(FunASR,zh/粤/en/ja/ko,比 Whisper 快 ~15-50×) | subprocess | `pip install -e ".[sensevoice-stt]"` |
381
+ | `qwen3-asr` | STT | Qwen3-ASR 本地多语种 ASR(2026 开源 SOTA,中/方言/英,0.6B/1.7B) | subprocess | `pip install -e ".[qwen3-asr-stt]"` |
382
+ | `mlx-whisper` | STT | Whisper on Apple MLX(本地,large-v3 / turbo,中/英多语种,仅 Apple Silicon) | subprocess | `pip install -e ".[mlx-whisper-stt]"` |
383
+ | `paraformer` | STT | Paraformer 本地 ASR(FunASR,普通话 SOTA 级,VAD+标点,zh/en) | subprocess | `pip install -e ".[paraformer-stt]"` |
384
+ | `funasr` | STT | FunASR 总入口(任选模型库 + VAD/标点/说话人分离) | subprocess | `pip install -e ".[funasr-stt]"` |
385
+ | `fireredasr` | STT | 小红书 FireRedASR(普通话 SOTA+方言+英文,歌词识别,AED/LLM) | subprocess | `pip install -e ".[fireredasr-stt]"` |
386
+ | `dolphin` | STT | DataoceanAI Dolphin(40 东方语种 + 22 中文方言,small/base) | subprocess | `pip install -e ".[dolphin-stt]"` |
387
+ | `wenet` | STT | WeNet U2++ Conformer(生产级,zh/en 预置;流式后续) | subprocess | WebUI Engines 安装,或 `pip install 'wenet @ git+https://github.com/wenet-e2e/wenet.git'` |
388
+ | `canary-qwen` | STT | NVIDIA Canary-Qwen-2.5B(Open ASR 英文第1,SALM;仅英文,需 NeMo+GPU) | subprocess | WebUI Engines 安装,或 `pip install 'nemo_toolkit[asr] @ git+https://github.com/NVIDIA/NeMo.git'` |
389
+ | `parakeet` | STT | NVIDIA Parakeet-TDT on MLX(最快,v2 英文/v3 欧语;中文弱,仅 Apple Silicon) | subprocess | `pip install -e ".[parakeet-stt]"` |
390
+ | `qwen3-omni` | STT | Qwen3-Omni-30B 全模态 LLM(ASR+理解,zh/en+;需大 GPU ~60GB) | subprocess | `pip install -e ".[qwen3-omni-stt]"` |
391
+ | `voxtral` | STT | Mistral Voxtral(Mini-3B/Small-24B,转写+理解,多语种;建议 GPU) | subprocess | `pip install -e ".[voxtral-stt]"` |
392
+ | `phi4-multimodal` | STT | 微软 Phi-4-multimodal(多模态 LLM,ASR+理解,zh/en+;建议 GPU) | subprocess | `pip install -e ".[phi4-multimodal-stt]"` |
393
+ | `kimi-audio` | STT | 月之暗面 Kimi-Audio-7B(音频基础模型,ASR+理解,zh/en;需 Linux+CUDA/flash-attn) | subprocess | WebUI Engines 安装,或 `pip install 'kimi-audio @ git+https://github.com/MoonshotAI/Kimi-Audio.git'` |
394
+ | `moonshine` | STT | Useful Sensors Moonshine(边缘/实时英文 ASR,tiny/base,轻量) | subprocess | `pip install -e ".[moonshine-stt]"` |
395
+ | `vosk` | STT | Vosk(Kaldi 离线,20+ 语言含 zh/en,轻量低资源) | subprocess | `pip install -e ".[vosk-stt]"` |
396
+ | `mms` | STT | Meta MMS(Wav2Vec2-CTC,1000+ 语言含 zh/en,按 ISO 639-3 选语言) | subprocess | `pip install -e ".[mms-stt]"` |
209
397
  | `openai-tts` | TTS | OpenAI Speech API(云端,支持流式) | remote | `pip install -e ".[openai]"` |
210
398
  | `elevenlabs` | TTS | ElevenLabs 高质量语音(云端,支持 HTTP/WS 流式) | remote | `pip install -e ".[elevenlabs-tts]"` |
211
399
  | `minimax` | TTS | Minimax 语音合成(云端) | remote | `pip install -e ".[minimax]"` |
@@ -229,6 +417,8 @@ print(list_providers())
229
417
  # 'whisperlivekit-stt']
230
418
  ```
231
419
 
420
+ > **音频输入格式**:STT 上传支持 WAV/PCM/MP3/FLAC/OGG/WebM 等。引擎不能直接处理的格式会由服务端自动转为 16k 单声道 WAV(压缩格式需 `ffmpeg`);缺 ffmpeg 且格式不被支持时返回 400,Web UI 会在上传/录音前拦截提示。详见 [docs/architecture/audio-format-negotiation.md](docs/architecture/audio-format-negotiation.md)。
421
+
232
422
  ## Provider 参数
233
423
 
234
424
  ### `openai-stt`
@@ -263,6 +453,204 @@ create_provider("faster-whisper",
263
453
  )
264
454
  ```
265
455
 
456
+ ### `gemma4`
457
+
458
+ ```python
459
+ create_provider("gemma4",
460
+ model="mlx-community/gemma-4-E4B-it-8bit", # E2B/E4B(8bit 翻译更准;勿用 12B)
461
+ task="transcribe", # transcribe|translate|understand|qa|detect_language
462
+ target_language="English", # task=translate 的目标语言(任意语言)
463
+ include_transcript=False, # task=translate:同时输出源转写 + 译文
464
+ )
465
+ ```
466
+
467
+ macOS / Apple Silicon 本地多模态 ASR(mlx-vlm)。5 个任务及全部字段可在 Web UI 的 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/gemma4-stt-provider.md](docs/architecture/gemma4-stt-provider.md)。
468
+
469
+ ### `sensevoice`
470
+
471
+ ```python
472
+ create_provider("sensevoice",
473
+ model="FunAudioLLM/SenseVoiceSmall",
474
+ language="auto", # auto|zh|en|yue|ja|ko|nospeech
475
+ device="cpu", # cpu|mps|cuda
476
+ use_itn=True, # 标点/数字规整
477
+ )
478
+ ```
479
+
480
+ FunASR 本地多语种 ASR(zh/粤/en/ja/ko),非自回归、极快;全部字段可在 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/sensevoice-stt-provider.md](docs/architecture/sensevoice-stt-provider.md)。
481
+
482
+ ### `qwen3-asr`
483
+
484
+ ```python
485
+ create_provider("qwen3-asr",
486
+ model="Qwen/Qwen3-ASR-0.6B", # 或 Qwen/Qwen3-ASR-1.7B
487
+ language="auto", # auto|Chinese|English|Cantonese|Japanese|Korean
488
+ device="cpu", # cpu|mps|cuda
489
+ )
490
+ ```
491
+
492
+ 阿里 Qwen3-ASR(2026 开源 ASR SOTA,中/方言/英)本地推理(qwen-asr 包)。需另装 `torch`。详见 [docs/architecture/qwen3-asr-stt-provider.md](docs/architecture/qwen3-asr-stt-provider.md)。
493
+
494
+ ### `mlx-whisper`
495
+
496
+ ```python
497
+ create_provider("mlx-whisper",
498
+ model="mlx-community/whisper-large-v3-turbo", # 或 whisper-large-v3-mlx
499
+ language="auto", # auto|en|zh|yue|ja|ko|...
500
+ )
501
+ ```
502
+
503
+ Apple Silicon 原生 Whisper(MLX),large-v3 / turbo,中英文多语种。仅 macOS/Apple Silicon。详见 [docs/architecture/mlx-whisper-stt-provider.md](docs/architecture/mlx-whisper-stt-provider.md)。
504
+
505
+ ### `paraformer`
506
+
507
+ ```python
508
+ create_provider("paraformer",
509
+ model="funasr/paraformer-zh", # 或 funasr/paraformer-en
510
+ vad=True, punc=True, # VAD 切分 + 标点恢复
511
+ )
512
+ ```
513
+
514
+ 阿里 Paraformer(FunASR),普通话 SOTA 级非自回归 ASR,带 VAD + 标点。详见 [docs/architecture/paraformer-stt-provider.md](docs/architecture/paraformer-stt-provider.md)。
515
+
516
+ ### `funasr`
517
+
518
+ ```python
519
+ create_provider("funasr",
520
+ model="funasr/paraformer-zh", # 模型库任意条目
521
+ spk=True, # CAM++ 说话人分离 → [spk0]/[spk1] 标注
522
+ )
523
+ ```
524
+
525
+ FunASR 通用总入口:任选模型库模型 + VAD/标点/**说话人分离**。详见 [docs/architecture/funasr-stt-provider.md](docs/architecture/funasr-stt-provider.md)。
526
+
527
+ ### `fireredasr`
528
+
529
+ ```python
530
+ create_provider("fireredasr",
531
+ model_type="aed", # aed(≤60s) | llm(≤30s);权重自动下载
532
+ )
533
+ ```
534
+
535
+ 小红书 FireRedASR,普通话公开基准 SOTA + 方言 + 英文,歌词识别强。详见 [docs/architecture/fireredasr-stt-provider.md](docs/architecture/fireredasr-stt-provider.md)。
536
+
537
+ ### `dolphin`
538
+
539
+ ```python
540
+ create_provider("dolphin",
541
+ model_name="small", # small | base
542
+ lang_sym="zh", region_sym="CN", # 留空则自动检测
543
+ )
544
+ ```
545
+
546
+ DataoceanAI Dolphin,40 种东方语言 + 22 种中文方言。详见 [docs/architecture/dolphin-stt-provider.md](docs/architecture/dolphin-stt-provider.md)。
547
+
548
+ ### `wenet`
549
+
550
+ ```python
551
+ create_provider("wenet",
552
+ model="chinese", # chinese | english
553
+ )
554
+ ```
555
+
556
+ WeNet 生产级 U2++ Conformer(zh/en 预置)。从 git 安装(不在 PyPI)。详见 [docs/architecture/wenet-stt-provider.md](docs/architecture/wenet-stt-provider.md)。
557
+
558
+ ### `canary-qwen`
559
+
560
+ ```python
561
+ create_provider("canary-qwen",
562
+ model="nvidia/canary-qwen-2.5b",
563
+ device="cuda", dtype="bfloat16", # 仅英文;强烈建议 GPU
564
+ )
565
+ ```
566
+
567
+ NVIDIA Canary-Qwen-2.5B(Open ASR 英文第 1,SALM)。**仅英文**;NeMo 重型安装 + 建议 GPU。详见 [docs/architecture/canary-qwen-stt-provider.md](docs/architecture/canary-qwen-stt-provider.md)。
568
+
569
+ ### `parakeet`
570
+
571
+ ```python
572
+ create_provider("parakeet",
573
+ model="mlx-community/parakeet-tdt-0.6b-v2", # v2 英文;v3 + 欧洲语言
574
+ )
575
+ ```
576
+
577
+ NVIDIA Parakeet-TDT on Apple MLX,榜上最快。英文/欧语为主,**中文弱**;仅 Apple Silicon。详见 [docs/architecture/parakeet-stt-provider.md](docs/architecture/parakeet-stt-provider.md)。
578
+
579
+ ### `qwen3-omni`
580
+
581
+ ```python
582
+ create_provider("qwen3-omni",
583
+ model="Qwen/Qwen3-Omni-30B-A3B-Instruct",
584
+ prompt="Transcribe the audio into text.", # 改成问题即可做音频问答
585
+ )
586
+ ```
587
+
588
+ 阿里 Qwen3-Omni-30B-A3B 全模态 LLM(ASR + 音频理解,zh/en+)。**需大显存 GPU(~60GB),笔记本装不下**。详见 [docs/architecture/qwen3-omni-stt-provider.md](docs/architecture/qwen3-omni-stt-provider.md)。
589
+
590
+ ### `voxtral`
591
+
592
+ ```python
593
+ create_provider("voxtral",
594
+ model="mistralai/Voxtral-Mini-3B-2507", # 或 Voxtral-Small-24B-2507
595
+ language="en",
596
+ )
597
+ ```
598
+
599
+ Mistral Voxtral(转写 + 音频理解,多语种)。3B/24B,建议 GPU。详见 [docs/architecture/voxtral-stt-provider.md](docs/architecture/voxtral-stt-provider.md)。
600
+
601
+ ### `phi4-multimodal`
602
+
603
+ ```python
604
+ create_provider("phi4-multimodal",
605
+ model="microsoft/Phi-4-multimodal-instruct",
606
+ prompt="Transcribe the audio clip into text.",
607
+ )
608
+ ```
609
+
610
+ 微软 Phi-4-multimodal,紧凑多模态 LLM(ASR + 音频理解,zh/en+)。建议 GPU。详见 [docs/architecture/phi4-multimodal-stt-provider.md](docs/architecture/phi4-multimodal-stt-provider.md)。
611
+
612
+ ### `kimi-audio`
613
+
614
+ ```python
615
+ create_provider("kimi-audio",
616
+ model="moonshotai/Kimi-Audio-7B-Instruct",
617
+ prompt="Please transcribe the audio into text.",
618
+ )
619
+ ```
620
+
621
+ 月之暗面 Kimi-Audio-7B 音频基础模型(ASR + 音频理解,zh/en)。git 安装 + 建议 GPU。详见 [docs/architecture/kimi-audio-stt-provider.md](docs/architecture/kimi-audio-stt-provider.md)。
622
+
623
+ ### `moonshine`
624
+
625
+ ```python
626
+ create_provider("moonshine",
627
+ model="UsefulSensors/moonshine-base", # base | tiny
628
+ )
629
+ ```
630
+
631
+ Useful Sensors Moonshine,边缘/实时英文 ASR,轻量快速。详见 [docs/architecture/moonshine-stt-provider.md](docs/architecture/moonshine-stt-provider.md)。
632
+
633
+ ### `vosk`
634
+
635
+ ```python
636
+ create_provider("vosk",
637
+ model="vosk-model-small-en-us-0.15", # 中文: vosk-model-small-cn-0.22
638
+ )
639
+ ```
640
+
641
+ Vosk(Kaldi 离线),20+ 语言,轻量低资源,模型自动下载。详见 [docs/architecture/vosk-stt-provider.md](docs/architecture/vosk-stt-provider.md)。
642
+
643
+ ### `mms`
644
+
645
+ ```python
646
+ create_provider("mms",
647
+ model="facebook/mms-1b-all",
648
+ language="eng", # ISO 639-3: eng / cmn / yue / jpn ...
649
+ )
650
+ ```
651
+
652
+ Meta MMS(Wav2Vec2-CTC),1000+ 语言含中英,按 **ISO 639-3** 码切换语言适配器;CTC 输出小写无标点。详见 [docs/architecture/mms-stt-provider.md](docs/architecture/mms-stt-provider.md)。
653
+
266
654
  ### `openai-tts`
267
655
 
268
656
  ```python
@@ -407,10 +795,25 @@ bash scripts/engines/macos-stt/install.sh
407
795
 
408
796
  ### 启动服务
409
797
 
798
+ **pip 安装后**(已带 `[server]`)—— 配置自动解析/生成,开箱即起:
410
799
  ```bash
411
- openspeechapi serve --config providers.yaml --port 8600
800
+ openspeechapi serve # 自动解析配置;没有则生成默认(macOS 默认 macos_tts)
801
+ openspeechapi serve --port 8600 # 指定端口
412
802
  ```
413
803
 
804
+ **源码目录运行**:
805
+ ```bash
806
+ python -m openspeechapi.cli serve # 或 openspeechapi serve;在仓库目录优先用 ./providers.yaml
807
+ ```
808
+
809
+ 启动后打开 WebUI:**http://127.0.0.1:8600/ui/**
810
+
811
+ **配置解析顺序**(`--config` 可放在 `serve` **前或后**,例如 `openspeechapi --config x serve` 或 `openspeechapi serve --config x`):
812
+ 1. 显式 `--config <path>`
813
+ 2. 当前目录 `./providers.yaml`(源码目录运行时优先)
814
+ 3. `~/.config/openspeechapi/providers.yaml`(遵循 `XDG_CONFIG_HOME`)
815
+ 4. 都没有 → 在 `~/.config/openspeechapi/providers.yaml` **自动生成**一份可用默认配置
816
+
414
817
  ### Python Client(与 Library 模式接口一致)
415
818
 
416
819
  ```python