openspeechapi 0.2.9__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/.gitignore +1 -0
  2. openspeechapi-0.2.9/README.md → openspeechapi-0.2.10/PKG-INFO +413 -14
  3. openspeechapi-0.2.9/PKG-INFO → openspeechapi-0.2.10/README.md +248 -119
  4. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/__init__.py +1 -1
  5. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/cli.py +17 -9
  6. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/config.py +105 -0
  7. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/base.py +91 -1
  8. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/enums.py +4 -1
  9. openspeechapi-0.2.10/openspeechapi/core/model_hub.py +257 -0
  10. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/models.py +3 -0
  11. openspeechapi-0.2.10/openspeechapi/core/settings.py +13 -0
  12. openspeechapi-0.2.10/openspeechapi/dispatch/aim_provision.py +91 -0
  13. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/dispatcher.py +39 -1
  14. openspeechapi-0.2.10/openspeechapi/dispatch/executors/subprocess_exec.py +907 -0
  15. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/lifecycle.py +17 -0
  16. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/engine_catalog.py +10 -0
  17. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/engine_registry.yaml +186 -0
  18. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/exceptions.py +17 -0
  19. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/factory.py +85 -0
  20. openspeechapi-0.2.10/openspeechapi/local_engines/aim_resolver.py +179 -0
  21. openspeechapi-0.2.10/openspeechapi/local_engines/isolated_venv.py +164 -0
  22. openspeechapi-0.2.10/openspeechapi/providers/stt/_local_audio.py +43 -0
  23. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/assemblyai.py +2 -1
  24. openspeechapi-0.2.10/openspeechapi/providers/stt/canary_qwen_stt.py +135 -0
  25. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/deepgram.py +22 -21
  26. openspeechapi-0.2.10/openspeechapi/providers/stt/dolphin_stt.py +242 -0
  27. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/elevenlabs.py +2 -1
  28. openspeechapi-0.2.10/openspeechapi/providers/stt/fireredasr_stt.py +173 -0
  29. openspeechapi-0.2.10/openspeechapi/providers/stt/funasr_stt.py +192 -0
  30. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/gemma4.py +48 -17
  31. openspeechapi-0.2.10/openspeechapi/providers/stt/kimi_audio_stt.py +152 -0
  32. openspeechapi-0.2.10/openspeechapi/providers/stt/mlx_whisper_stt.py +192 -0
  33. openspeechapi-0.2.10/openspeechapi/providers/stt/mms_languages.json +5627 -0
  34. openspeechapi-0.2.10/openspeechapi/providers/stt/mms_stt.py +224 -0
  35. openspeechapi-0.2.10/openspeechapi/providers/stt/moonshine_stt.py +128 -0
  36. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/openai.py +11 -2
  37. openspeechapi-0.2.10/openspeechapi/providers/stt/paraformer.py +276 -0
  38. openspeechapi-0.2.10/openspeechapi/providers/stt/parakeet_mlx_stt.py +138 -0
  39. openspeechapi-0.2.10/openspeechapi/providers/stt/phi4_multimodal_stt.py +202 -0
  40. openspeechapi-0.2.10/openspeechapi/providers/stt/qwen3_asr.py +185 -0
  41. openspeechapi-0.2.10/openspeechapi/providers/stt/qwen3_omni_stt.py +168 -0
  42. openspeechapi-0.2.10/openspeechapi/providers/stt/sensevoice.py +283 -0
  43. openspeechapi-0.2.10/openspeechapi/providers/stt/vosk_stt.py +199 -0
  44. openspeechapi-0.2.10/openspeechapi/providers/stt/voxtral_stt.py +191 -0
  45. openspeechapi-0.2.10/openspeechapi/providers/stt/wenet_stt.py +181 -0
  46. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/cosyvoice.py +27 -5
  47. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/piper.py +41 -2
  48. openspeechapi-0.2.10/openspeechapi/server/app.py +122 -0
  49. openspeechapi-0.2.10/openspeechapi/server/extras_installer.py +200 -0
  50. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/management.py +288 -25
  51. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/stt.py +6 -1
  52. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/app.js +557 -93
  53. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/index.html +28 -1
  54. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/styles.css +27 -0
  55. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/stt_stream.py +29 -0
  56. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/audio_converter.py +51 -1
  57. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/providers.example.yaml +4 -0
  58. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/pyproject.toml +50 -3
  59. openspeechapi-0.2.10/scripts/aim_adopt.py +146 -0
  60. openspeechapi-0.2.10/scripts/aim_consumers.py +159 -0
  61. openspeechapi-0.2.10/scripts/gen_mms_languages.py +86 -0
  62. openspeechapi-0.2.10/scripts/preload_stt_model.py +109 -0
  63. openspeechapi-0.2.10/scripts/release.sh +111 -0
  64. openspeechapi-0.2.9/openspeechapi/core/settings.py +0 -8
  65. openspeechapi-0.2.9/openspeechapi/dispatch/executors/subprocess_exec.py +0 -461
  66. openspeechapi-0.2.9/openspeechapi/local_engines/aim_resolver.py +0 -91
  67. openspeechapi-0.2.9/openspeechapi/server/app.py +0 -71
  68. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/__main__.py +0 -0
  69. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/client/__init__.py +0 -0
  70. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/client/client.py +0 -0
  71. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/__init__.py +0 -0
  72. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/registry.py +0 -0
  73. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/demo.py +0 -0
  74. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/__init__.py +0 -0
  75. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/context.py +0 -0
  76. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/__init__.py +0 -0
  77. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/base.py +0 -0
  78. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/in_process.py +0 -0
  79. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/remote.py +0 -0
  80. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/fanout.py +0 -0
  81. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/filters.py +0 -0
  82. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/watcher.py +0 -0
  83. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/__init__.py +0 -0
  84. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/__init__.py +0 -0
  85. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
  86. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/native_backend.py +0 -0
  87. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/base.py +0 -0
  88. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/__init__.py +0 -0
  89. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
  90. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
  91. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
  92. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/whisper.py +0 -0
  93. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
  94. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/manager.py +0 -0
  95. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/models.py +0 -0
  96. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/progress.py +0 -0
  97. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/registry.py +0 -0
  98. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/task_store.py +0 -0
  99. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/tasks.py +0 -0
  100. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/logging_config.py +0 -0
  101. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/__init__.py +0 -0
  102. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/base.py +0 -0
  103. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/debug.py +0 -0
  104. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/latency.py +0 -0
  105. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/metrics.py +0 -0
  106. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/tracing.py +0 -0
  107. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/usage.py +0 -0
  108. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/__init__.py +0 -0
  109. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/_template.py +0 -0
  110. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/__init__.py +0 -0
  111. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/alibaba.py +0 -0
  112. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/azure_speech.py +0 -0
  113. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/baidu.py +0 -0
  114. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/faster_whisper.py +0 -0
  115. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/google_cloud.py +0 -0
  116. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/iflytek.py +0 -0
  117. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/macos_speech.py +0 -0
  118. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
  119. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/tencent.py +0 -0
  120. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/volcengine.py +0 -0
  121. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/whisper.py +0 -0
  122. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/whisperlivekit.py +0 -0
  123. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/windows_speech.py +0 -0
  124. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/__init__.py +0 -0
  125. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/alibaba.py +0 -0
  126. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/azure_speech.py +0 -0
  127. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/baidu.py +0 -0
  128. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/coqui.py +0 -0
  129. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/deepgram.py +0 -0
  130. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/elevenlabs.py +0 -0
  131. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/fish_speech.py +0 -0
  132. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/google_cloud.py +0 -0
  133. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/iflytek.py +0 -0
  134. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/macos_say.py +0 -0
  135. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/minimax.py +0 -0
  136. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/openai.py +0 -0
  137. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/tencent.py +0 -0
  138. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/volcengine.py +0 -0
  139. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/windows_sapi.py +0 -0
  140. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/__init__.py +0 -0
  141. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/auth.py +0 -0
  142. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/middleware.py +0 -0
  143. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/native_installer.py +0 -0
  144. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/__init__.py +0 -0
  145. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/tts.py +0 -0
  146. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/webui.py +0 -0
  147. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/__init__.py +0 -0
  148. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/tts_stream.py +0 -0
  149. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/telemetry/__init__.py +0 -0
  150. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/telemetry/perf.py +0 -0
  151. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/__init__.py +0 -0
  152. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/audio_playback.py +0 -0
  153. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/vendor_registry.yaml +0 -0
  154. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/cloud/install.sh +0 -0
  155. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/faster-whisper/native/install.sh +0 -0
  156. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/fish-speech/native/install.sh +0 -0
  157. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/_bundle.sh +0 -0
  158. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/install.sh +0 -0
  159. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/macos_stt.swift +0 -0
  160. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/request_auth.swift +0 -0
  161. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
  162. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
  163. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/whisper/native/install.sh +0 -0
  164. {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/whisperlivekit/native/install.sh +0 -0
@@ -12,6 +12,7 @@ dist/
12
12
  build/
13
13
  *.pyc
14
14
  *.pyo
15
+ .DS_Store
15
16
 
16
17
  # macOS STT compiled artifacts
17
18
  scripts/engines/macos-stt/macos-stt-helper
@@ -1,3 +1,168 @@
1
+ Metadata-Version: 2.4
2
+ Name: openspeechapi
3
+ Version: 0.2.10
4
+ Summary: Unified speech interface for STT/TTS providers
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: loguru>=0.7
8
+ Requires-Dist: msgpack>=1.0
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Provides-Extra: alibaba
12
+ Provides-Extra: alibaba-stt
13
+ Provides-Extra: alibaba-tts
14
+ Provides-Extra: all
15
+ Requires-Dist: elevenlabs; extra == 'all'
16
+ Requires-Dist: faster-whisper; extra == 'all'
17
+ Requires-Dist: openai; extra == 'all'
18
+ Requires-Dist: openai-whisper; extra == 'all'
19
+ Requires-Dist: piper-tts; extra == 'all'
20
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'all'
21
+ Requires-Dist: torchaudio; extra == 'all'
22
+ Requires-Dist: tts; extra == 'all'
23
+ Requires-Dist: websockets; extra == 'all'
24
+ Provides-Extra: assemblyai-stt
25
+ Provides-Extra: audio
26
+ Requires-Dist: numpy; extra == 'audio'
27
+ Requires-Dist: sounddevice; extra == 'audio'
28
+ Provides-Extra: azure
29
+ Provides-Extra: azure-stt
30
+ Provides-Extra: azure-tts
31
+ Provides-Extra: baidu
32
+ Provides-Extra: baidu-stt
33
+ Provides-Extra: baidu-tts
34
+ Provides-Extra: canary-qwen-stt
35
+ Provides-Extra: cloud
36
+ Requires-Dist: websockets; extra == 'cloud'
37
+ Provides-Extra: coqui-tts
38
+ Requires-Dist: tts; extra == 'coqui-tts'
39
+ Provides-Extra: cosyvoice-tts
40
+ Requires-Dist: torchaudio; extra == 'cosyvoice-tts'
41
+ Provides-Extra: deepgram
42
+ Requires-Dist: websockets; extra == 'deepgram'
43
+ Provides-Extra: deepgram-stt
44
+ Requires-Dist: websockets; extra == 'deepgram-stt'
45
+ Provides-Extra: deepgram-tts
46
+ Provides-Extra: dev
47
+ Requires-Dist: numpy; extra == 'dev'
48
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
49
+ Requires-Dist: pytest-cov; extra == 'dev'
50
+ Requires-Dist: pytest-dotenv; extra == 'dev'
51
+ Requires-Dist: pytest>=8.0; extra == 'dev'
52
+ Requires-Dist: ruff==0.15.*; extra == 'dev'
53
+ Provides-Extra: dolphin-stt
54
+ Requires-Dist: dataoceanai-dolphin; extra == 'dolphin-stt'
55
+ Requires-Dist: torchcodec; extra == 'dolphin-stt'
56
+ Provides-Extra: elevenlabs
57
+ Requires-Dist: elevenlabs; extra == 'elevenlabs'
58
+ Requires-Dist: websockets; extra == 'elevenlabs'
59
+ Provides-Extra: elevenlabs-stt
60
+ Requires-Dist: websockets; extra == 'elevenlabs-stt'
61
+ Provides-Extra: elevenlabs-tts
62
+ Requires-Dist: elevenlabs; extra == 'elevenlabs-tts'
63
+ Provides-Extra: faster-whisper-stt
64
+ Requires-Dist: faster-whisper; extra == 'faster-whisper-stt'
65
+ Provides-Extra: fireredasr-stt
66
+ Requires-Dist: fireredasr; extra == 'fireredasr-stt'
67
+ Provides-Extra: fish-speech-tts
68
+ Provides-Extra: funasr-stt
69
+ Requires-Dist: funasr>=1.1.0; extra == 'funasr-stt'
70
+ Requires-Dist: torch; extra == 'funasr-stt'
71
+ Requires-Dist: torchaudio; extra == 'funasr-stt'
72
+ Provides-Extra: gemma4-stt
73
+ Requires-Dist: mlx-vlm<0.6.2,>=0.6.1; (sys_platform == 'darwin') and extra == 'gemma4-stt'
74
+ Provides-Extra: google
75
+ Provides-Extra: google-stt
76
+ Provides-Extra: google-tts
77
+ Provides-Extra: iflytek
78
+ Requires-Dist: websockets; extra == 'iflytek'
79
+ Provides-Extra: iflytek-stt
80
+ Requires-Dist: websockets; extra == 'iflytek-stt'
81
+ Provides-Extra: iflytek-tts
82
+ Requires-Dist: websockets; extra == 'iflytek-tts'
83
+ Provides-Extra: kimi-audio-stt
84
+ Requires-Dist: torch; extra == 'kimi-audio-stt'
85
+ Provides-Extra: macos-native
86
+ Provides-Extra: minimax-tts
87
+ Provides-Extra: mlx-whisper-stt
88
+ Requires-Dist: mlx-whisper; (sys_platform == 'darwin') and extra == 'mlx-whisper-stt'
89
+ Provides-Extra: mms-stt
90
+ Requires-Dist: soundfile; extra == 'mms-stt'
91
+ Requires-Dist: torch; extra == 'mms-stt'
92
+ Requires-Dist: transformers; extra == 'mms-stt'
93
+ Provides-Extra: moonshine-stt
94
+ Requires-Dist: soundfile; extra == 'moonshine-stt'
95
+ Requires-Dist: torch; extra == 'moonshine-stt'
96
+ Requires-Dist: transformers; extra == 'moonshine-stt'
97
+ Provides-Extra: openai
98
+ Requires-Dist: openai; extra == 'openai'
99
+ Provides-Extra: openai-stt
100
+ Requires-Dist: openai; extra == 'openai-stt'
101
+ Provides-Extra: openai-tts
102
+ Requires-Dist: openai; extra == 'openai-tts'
103
+ Provides-Extra: paraformer-stt
104
+ Requires-Dist: funasr>=1.1.0; extra == 'paraformer-stt'
105
+ Requires-Dist: torch; extra == 'paraformer-stt'
106
+ Requires-Dist: torchaudio; extra == 'paraformer-stt'
107
+ Provides-Extra: parakeet-stt
108
+ Requires-Dist: parakeet-mlx; (sys_platform == 'darwin') and extra == 'parakeet-stt'
109
+ Provides-Extra: phi4-multimodal-stt
110
+ Requires-Dist: accelerate; extra == 'phi4-multimodal-stt'
111
+ Requires-Dist: backoff; extra == 'phi4-multimodal-stt'
112
+ Requires-Dist: peft; extra == 'phi4-multimodal-stt'
113
+ Requires-Dist: pillow; extra == 'phi4-multimodal-stt'
114
+ Requires-Dist: scipy; extra == 'phi4-multimodal-stt'
115
+ Requires-Dist: soundfile; extra == 'phi4-multimodal-stt'
116
+ Requires-Dist: torch; extra == 'phi4-multimodal-stt'
117
+ Requires-Dist: torchvision; extra == 'phi4-multimodal-stt'
118
+ Requires-Dist: transformers; extra == 'phi4-multimodal-stt'
119
+ Provides-Extra: piper-tts
120
+ Requires-Dist: piper-tts; extra == 'piper-tts'
121
+ Provides-Extra: qwen3-asr-stt
122
+ Requires-Dist: modelscope; extra == 'qwen3-asr-stt'
123
+ Requires-Dist: qwen-asr; extra == 'qwen3-asr-stt'
124
+ Provides-Extra: qwen3-omni-stt
125
+ Requires-Dist: accelerate; extra == 'qwen3-omni-stt'
126
+ Requires-Dist: qwen-omni-utils; extra == 'qwen3-omni-stt'
127
+ Requires-Dist: torch; extra == 'qwen3-omni-stt'
128
+ Requires-Dist: transformers; extra == 'qwen3-omni-stt'
129
+ Provides-Extra: sensevoice-stt
130
+ Requires-Dist: funasr>=1.1.0; extra == 'sensevoice-stt'
131
+ Requires-Dist: torch; extra == 'sensevoice-stt'
132
+ Requires-Dist: torchaudio; extra == 'sensevoice-stt'
133
+ Provides-Extra: server
134
+ Requires-Dist: fastapi; extra == 'server'
135
+ Requires-Dist: python-multipart; extra == 'server'
136
+ Requires-Dist: uvicorn; extra == 'server'
137
+ Requires-Dist: websockets; extra == 'server'
138
+ Provides-Extra: sherpa-onnx-stt
139
+ Requires-Dist: websockets; extra == 'sherpa-onnx-stt'
140
+ Provides-Extra: tencent
141
+ Provides-Extra: tencent-stt
142
+ Provides-Extra: tencent-tts
143
+ Provides-Extra: tracing
144
+ Requires-Dist: opentelemetry-api; extra == 'tracing'
145
+ Requires-Dist: opentelemetry-sdk; extra == 'tracing'
146
+ Provides-Extra: volcengine
147
+ Provides-Extra: volcengine-stt
148
+ Provides-Extra: volcengine-tts
149
+ Provides-Extra: vosk-stt
150
+ Requires-Dist: huggingface-hub; extra == 'vosk-stt'
151
+ Requires-Dist: vosk; extra == 'vosk-stt'
152
+ Provides-Extra: voxtral-stt
153
+ Requires-Dist: accelerate; extra == 'voxtral-stt'
154
+ Requires-Dist: mistral-common[audio]>=1.8.1; extra == 'voxtral-stt'
155
+ Requires-Dist: torch; extra == 'voxtral-stt'
156
+ Requires-Dist: transformers>=4.54.0; extra == 'voxtral-stt'
157
+ Provides-Extra: wenet-stt
158
+ Provides-Extra: whisper-stt
159
+ Requires-Dist: openai-whisper; extra == 'whisper-stt'
160
+ Provides-Extra: whisperlivekit-stt
161
+ Requires-Dist: websockets; extra == 'whisperlivekit-stt'
162
+ Provides-Extra: windows-native
163
+ Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'windows-native'
164
+ Description-Content-Type: text/markdown
165
+
1
166
  # OpenSpeechAPI
2
167
 
3
168
  > Unified speech interface for STT/TTS providers — one API, multiple backends.
@@ -8,22 +173,24 @@ OpenSpeechAPI 提供统一的语音接口,通过字符串指定 provider 即
8
173
 
9
174
  ### 安装
10
175
 
176
+ **方式一 · 通过 PyPI 安装(直接使用)**
11
177
  ```bash
12
- # 安装全部 provider
13
- pip install -e ".[all]"
14
-
15
- # 或按需安装
16
- pip install -e ".[openai]" # OpenAI Whisper STT + TTS
17
- pip install -e ".[faster-whisper]" # 本地 faster-whisper STT
18
- pip install -e ".[openai,faster-whisper]" # 指定多个
19
-
20
- # 仅核心包(不含任何 provider)
21
- pip install -e .
178
+ pip install "openspeechapi[server]" # HTTP 服务 / WebUI 必须带 [server](fastapi/uvicorn)
179
+ pip install "openspeechapi[server,openai]" # 服务 + 指定 provider
180
+ pip install "openspeechapi[server,all]" # 服务 + 全部 provider
181
+ pip install openspeechapi # 仅核心库(库模式;不含服务,也起不了 server)
182
+ ```
22
183
 
23
- # 开发环境
24
- pip install -e ".[dev]"
184
+ **方式二 · 源码安装(开发,可编辑)**
185
+ ```bash
186
+ git clone https://github.com/wingsfly/OpenSpeechAPI.git
187
+ cd OpenSpeechAPI
188
+ uv venv && uv pip install -e ".[server,dev]" # 或 pip install -e ".[server,dev]";按需换 .[all] 等
25
189
  ```
26
190
 
191
+ > ⚠️ 纯 `pip install openspeechapi`(核心库)**不含 fastapi/uvicorn**,无法 `serve`;起服务请带 `[server]`。
192
+ > 两种方式启动服务的差异见下方 [启动服务](#启动服务)。
193
+
27
194
  ### 30 秒上手 — TTS
28
195
 
29
196
  ```python
@@ -205,7 +372,24 @@ python -m openspeechapi.demo tts -t "Hello world" --play \
205
372
  | `whisperlivekit-stt` | STT | WhisperLiveKit 本地服务(Deepgram 兼容 WS,支持 MLX 后端) | local | `pip install -e ".[whisperlivekit]"` |
206
373
  | `elevenlabs-stt` | STT | ElevenLabs Scribe API(云端,支持实时流式 WS + 批量) | remote | `pip install -e ".[elevenlabs-stt]"` |
207
374
  | `deepgram` | STT | Deepgram API(云端,支持实时流式) | remote | `pip install -e ".[deepgram]"` |
208
- | `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E4B 默认/12B 可选,>30s 自动分段,支持转写/翻译/理解) | subprocess | `pip install -e ".[gemma4-stt]"` |
375
+ | `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E2B/E4B,>30s 自动分段;任务:转写 / 翻译(任意目标语言) / 理解 / 问答 / 语种识别) | subprocess | `pip install -e ".[gemma4-stt]"` |
376
+ | `sensevoice` | STT | SenseVoice-Small 本地多语种 ASR(FunASR,zh/粤/en/ja/ko,比 Whisper 快 ~15-50×) | subprocess | `pip install -e ".[sensevoice-stt]"` |
377
+ | `qwen3-asr` | STT | Qwen3-ASR 本地多语种 ASR(2026 开源 SOTA,中/方言/英,0.6B/1.7B) | subprocess | `pip install -e ".[qwen3-asr-stt]"` |
378
+ | `mlx-whisper` | STT | Whisper on Apple MLX(本地,large-v3 / turbo,中/英多语种,仅 Apple Silicon) | subprocess | `pip install -e ".[mlx-whisper-stt]"` |
379
+ | `paraformer` | STT | Paraformer 本地 ASR(FunASR,普通话 SOTA 级,VAD+标点,zh/en) | subprocess | `pip install -e ".[paraformer-stt]"` |
380
+ | `funasr` | STT | FunASR 总入口(任选模型库 + VAD/标点/说话人分离) | subprocess | `pip install -e ".[funasr-stt]"` |
381
+ | `fireredasr` | STT | 小红书 FireRedASR(普通话 SOTA+方言+英文,歌词识别,AED/LLM) | subprocess | `pip install -e ".[fireredasr-stt]"` |
382
+ | `dolphin` | STT | DataoceanAI Dolphin(40 东方语种 + 22 中文方言,small/base) | subprocess | `pip install -e ".[dolphin-stt]"` |
383
+ | `wenet` | STT | WeNet U2++ Conformer(生产级,zh/en 预置;流式后续) | subprocess | WebUI Engines 安装,或 `pip install 'wenet @ git+https://github.com/wenet-e2e/wenet.git'` |
384
+ | `canary-qwen` | STT | NVIDIA Canary-Qwen-2.5B(Open ASR 英文第1,SALM;仅英文,需 NeMo+GPU) | subprocess | WebUI Engines 安装,或 `pip install 'nemo_toolkit[asr] @ git+https://github.com/NVIDIA/NeMo.git'` |
385
+ | `parakeet` | STT | NVIDIA Parakeet-TDT on MLX(最快,v2 英文/v3 欧语;中文弱,仅 Apple Silicon) | subprocess | `pip install -e ".[parakeet-stt]"` |
386
+ | `qwen3-omni` | STT | Qwen3-Omni-30B 全模态 LLM(ASR+理解,zh/en+;需大 GPU ~60GB) | subprocess | `pip install -e ".[qwen3-omni-stt]"` |
387
+ | `voxtral` | STT | Mistral Voxtral(Mini-3B/Small-24B,转写+理解,多语种;建议 GPU) | subprocess | `pip install -e ".[voxtral-stt]"` |
388
+ | `phi4-multimodal` | STT | 微软 Phi-4-multimodal(多模态 LLM,ASR+理解,zh/en+;建议 GPU) | subprocess | `pip install -e ".[phi4-multimodal-stt]"` |
389
+ | `kimi-audio` | STT | 月之暗面 Kimi-Audio-7B(音频基础模型,ASR+理解,zh/en;需 Linux+CUDA/flash-attn) | subprocess | WebUI Engines 安装,或 `pip install 'kimi-audio @ git+https://github.com/MoonshotAI/Kimi-Audio.git'` |
390
+ | `moonshine` | STT | Useful Sensors Moonshine(边缘/实时英文 ASR,tiny/base,轻量) | subprocess | `pip install -e ".[moonshine-stt]"` |
391
+ | `vosk` | STT | Vosk(Kaldi 离线,20+ 语言含 zh/en,轻量低资源) | subprocess | `pip install -e ".[vosk-stt]"` |
392
+ | `mms` | STT | Meta MMS(Wav2Vec2-CTC,1000+ 语言含 zh/en,按 ISO 639-3 选语言) | subprocess | `pip install -e ".[mms-stt]"` |
209
393
  | `openai-tts` | TTS | OpenAI Speech API(云端,支持流式) | remote | `pip install -e ".[openai]"` |
210
394
  | `elevenlabs` | TTS | ElevenLabs 高质量语音(云端,支持 HTTP/WS 流式) | remote | `pip install -e ".[elevenlabs-tts]"` |
211
395
  | `minimax` | TTS | Minimax 语音合成(云端) | remote | `pip install -e ".[minimax]"` |
@@ -229,6 +413,8 @@ print(list_providers())
229
413
  # 'whisperlivekit-stt']
230
414
  ```
231
415
 
416
+ > **音频输入格式**:STT 上传支持 WAV/PCM/MP3/FLAC/OGG/WebM 等。引擎不能直接处理的格式会由服务端自动转为 16k 单声道 WAV(压缩格式需 `ffmpeg`);缺 ffmpeg 且格式不被支持时返回 400,Web UI 会在上传/录音前拦截提示。详见 [docs/architecture/audio-format-negotiation.md](docs/architecture/audio-format-negotiation.md)。
417
+
232
418
  ## Provider 参数
233
419
 
234
420
  ### `openai-stt`
@@ -263,6 +449,204 @@ create_provider("faster-whisper",
263
449
  )
264
450
  ```
265
451
 
452
+ ### `gemma4`
453
+
454
+ ```python
455
+ create_provider("gemma4",
456
+ model="mlx-community/gemma-4-E4B-it-8bit", # E2B/E4B(8bit 翻译更准;勿用 12B)
457
+ task="transcribe", # transcribe|translate|understand|qa|detect_language
458
+ target_language="English", # task=translate 的目标语言(任意语言)
459
+ include_transcript=False, # task=translate:同时输出源转写 + 译文
460
+ )
461
+ ```
462
+
463
+ macOS / Apple Silicon 本地多模态 ASR(mlx-vlm)。5 个任务及全部字段可在 Web UI 的 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/gemma4-stt-provider.md](docs/architecture/gemma4-stt-provider.md)。
464
+
465
+ ### `sensevoice`
466
+
467
+ ```python
468
+ create_provider("sensevoice",
469
+ model="FunAudioLLM/SenseVoiceSmall",
470
+ language="auto", # auto|zh|en|yue|ja|ko|nospeech
471
+ device="cpu", # cpu|mps|cuda
472
+ use_itn=True, # 标点/数字规整
473
+ )
474
+ ```
475
+
476
+ FunASR 本地多语种 ASR(zh/粤/en/ja/ko),非自回归、极快;全部字段可在 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/sensevoice-stt-provider.md](docs/architecture/sensevoice-stt-provider.md)。
477
+
478
+ ### `qwen3-asr`
479
+
480
+ ```python
481
+ create_provider("qwen3-asr",
482
+ model="Qwen/Qwen3-ASR-0.6B", # 或 Qwen/Qwen3-ASR-1.7B
483
+ language="auto", # auto|Chinese|English|Cantonese|Japanese|Korean
484
+ device="cpu", # cpu|mps|cuda
485
+ )
486
+ ```
487
+
488
+ 阿里 Qwen3-ASR(2026 开源 ASR SOTA,中/方言/英)本地推理(qwen-asr 包)。需另装 `torch`。详见 [docs/architecture/qwen3-asr-stt-provider.md](docs/architecture/qwen3-asr-stt-provider.md)。
489
+
490
+ ### `mlx-whisper`
491
+
492
+ ```python
493
+ create_provider("mlx-whisper",
494
+ model="mlx-community/whisper-large-v3-turbo", # 或 whisper-large-v3-mlx
495
+ language="auto", # auto|en|zh|yue|ja|ko|...
496
+ )
497
+ ```
498
+
499
+ Apple Silicon 原生 Whisper(MLX),large-v3 / turbo,中英文多语种。仅 macOS/Apple Silicon。详见 [docs/architecture/mlx-whisper-stt-provider.md](docs/architecture/mlx-whisper-stt-provider.md)。
500
+
501
+ ### `paraformer`
502
+
503
+ ```python
504
+ create_provider("paraformer",
505
+ model="funasr/paraformer-zh", # 或 funasr/paraformer-en
506
+ vad=True, punc=True, # VAD 切分 + 标点恢复
507
+ )
508
+ ```
509
+
510
+ 阿里 Paraformer(FunASR),普通话 SOTA 级非自回归 ASR,带 VAD + 标点。详见 [docs/architecture/paraformer-stt-provider.md](docs/architecture/paraformer-stt-provider.md)。
511
+
512
+ ### `funasr`
513
+
514
+ ```python
515
+ create_provider("funasr",
516
+ model="funasr/paraformer-zh", # 模型库任意条目
517
+ spk=True, # CAM++ 说话人分离 → [spk0]/[spk1] 标注
518
+ )
519
+ ```
520
+
521
+ FunASR 通用总入口:任选模型库模型 + VAD/标点/**说话人分离**。详见 [docs/architecture/funasr-stt-provider.md](docs/architecture/funasr-stt-provider.md)。
522
+
523
+ ### `fireredasr`
524
+
525
+ ```python
526
+ create_provider("fireredasr",
527
+ model_type="aed", # aed(≤60s) | llm(≤30s);权重自动下载
528
+ )
529
+ ```
530
+
531
+ 小红书 FireRedASR,普通话公开基准 SOTA + 方言 + 英文,歌词识别强。详见 [docs/architecture/fireredasr-stt-provider.md](docs/architecture/fireredasr-stt-provider.md)。
532
+
533
+ ### `dolphin`
534
+
535
+ ```python
536
+ create_provider("dolphin",
537
+ model_name="small", # small | base
538
+ lang_sym="zh", region_sym="CN", # 留空则自动检测
539
+ )
540
+ ```
541
+
542
+ DataoceanAI Dolphin,40 种东方语言 + 22 种中文方言。详见 [docs/architecture/dolphin-stt-provider.md](docs/architecture/dolphin-stt-provider.md)。
543
+
544
+ ### `wenet`
545
+
546
+ ```python
547
+ create_provider("wenet",
548
+ model="chinese", # chinese | english
549
+ )
550
+ ```
551
+
552
+ WeNet 生产级 U2++ Conformer(zh/en 预置)。从 git 安装(不在 PyPI)。详见 [docs/architecture/wenet-stt-provider.md](docs/architecture/wenet-stt-provider.md)。
553
+
554
+ ### `canary-qwen`
555
+
556
+ ```python
557
+ create_provider("canary-qwen",
558
+ model="nvidia/canary-qwen-2.5b",
559
+ device="cuda", dtype="bfloat16", # 仅英文;强烈建议 GPU
560
+ )
561
+ ```
562
+
563
+ NVIDIA Canary-Qwen-2.5B(Open ASR 英文第 1,SALM)。**仅英文**;NeMo 重型安装 + 建议 GPU。详见 [docs/architecture/canary-qwen-stt-provider.md](docs/architecture/canary-qwen-stt-provider.md)。
564
+
565
+ ### `parakeet`
566
+
567
+ ```python
568
+ create_provider("parakeet",
569
+ model="mlx-community/parakeet-tdt-0.6b-v2", # v2 英文;v3 + 欧洲语言
570
+ )
571
+ ```
572
+
573
+ NVIDIA Parakeet-TDT on Apple MLX,榜上最快。英文/欧语为主,**中文弱**;仅 Apple Silicon。详见 [docs/architecture/parakeet-stt-provider.md](docs/architecture/parakeet-stt-provider.md)。
574
+
575
+ ### `qwen3-omni`
576
+
577
+ ```python
578
+ create_provider("qwen3-omni",
579
+ model="Qwen/Qwen3-Omni-30B-A3B-Instruct",
580
+ prompt="Transcribe the audio into text.", # 改成问题即可做音频问答
581
+ )
582
+ ```
583
+
584
+ 阿里 Qwen3-Omni-30B-A3B 全模态 LLM(ASR + 音频理解,zh/en+)。**需大显存 GPU(~60GB),笔记本装不下**。详见 [docs/architecture/qwen3-omni-stt-provider.md](docs/architecture/qwen3-omni-stt-provider.md)。
585
+
586
+ ### `voxtral`
587
+
588
+ ```python
589
+ create_provider("voxtral",
590
+ model="mistralai/Voxtral-Mini-3B-2507", # 或 Voxtral-Small-24B-2507
591
+ language="en",
592
+ )
593
+ ```
594
+
595
+ Mistral Voxtral(转写 + 音频理解,多语种)。3B/24B,建议 GPU。详见 [docs/architecture/voxtral-stt-provider.md](docs/architecture/voxtral-stt-provider.md)。
596
+
597
+ ### `phi4-multimodal`
598
+
599
+ ```python
600
+ create_provider("phi4-multimodal",
601
+ model="microsoft/Phi-4-multimodal-instruct",
602
+ prompt="Transcribe the audio clip into text.",
603
+ )
604
+ ```
605
+
606
+ 微软 Phi-4-multimodal,紧凑多模态 LLM(ASR + 音频理解,zh/en+)。建议 GPU。详见 [docs/architecture/phi4-multimodal-stt-provider.md](docs/architecture/phi4-multimodal-stt-provider.md)。
607
+
608
+ ### `kimi-audio`
609
+
610
+ ```python
611
+ create_provider("kimi-audio",
612
+ model="moonshotai/Kimi-Audio-7B-Instruct",
613
+ prompt="Please transcribe the audio into text.",
614
+ )
615
+ ```
616
+
617
+ 月之暗面 Kimi-Audio-7B 音频基础模型(ASR + 音频理解,zh/en)。git 安装 + 建议 GPU。详见 [docs/architecture/kimi-audio-stt-provider.md](docs/architecture/kimi-audio-stt-provider.md)。
618
+
619
+ ### `moonshine`
620
+
621
+ ```python
622
+ create_provider("moonshine",
623
+ model="UsefulSensors/moonshine-base", # base | tiny
624
+ )
625
+ ```
626
+
627
+ Useful Sensors Moonshine,边缘/实时英文 ASR,轻量快速。详见 [docs/architecture/moonshine-stt-provider.md](docs/architecture/moonshine-stt-provider.md)。
628
+
629
+ ### `vosk`
630
+
631
+ ```python
632
+ create_provider("vosk",
633
+ model="vosk-model-small-en-us-0.15", # 中文: vosk-model-small-cn-0.22
634
+ )
635
+ ```
636
+
637
+ Vosk(Kaldi 离线),20+ 语言,轻量低资源,模型自动下载。详见 [docs/architecture/vosk-stt-provider.md](docs/architecture/vosk-stt-provider.md)。
638
+
639
+ ### `mms`
640
+
641
+ ```python
642
+ create_provider("mms",
643
+ model="facebook/mms-1b-all",
644
+ language="eng", # ISO 639-3: eng / cmn / yue / jpn ...
645
+ )
646
+ ```
647
+
648
+ Meta MMS(Wav2Vec2-CTC),1000+ 语言含中英,按 **ISO 639-3** 码切换语言适配器;CTC 输出小写无标点。详见 [docs/architecture/mms-stt-provider.md](docs/architecture/mms-stt-provider.md)。
649
+
266
650
  ### `openai-tts`
267
651
 
268
652
  ```python
@@ -407,10 +791,25 @@ bash scripts/engines/macos-stt/install.sh
407
791
 
408
792
  ### 启动服务
409
793
 
794
+ **pip 安装后**(已带 `[server]`)—— 配置自动解析/生成,开箱即起:
410
795
  ```bash
411
- openspeechapi serve --config providers.yaml --port 8600
796
+ openspeechapi serve # 自动解析配置;没有则生成默认(macOS 默认 macos_tts)
797
+ openspeechapi serve --port 8600 # 指定端口
412
798
  ```
413
799
 
800
+ **源码目录运行**:
801
+ ```bash
802
+ python -m openspeechapi.cli serve # 或 openspeechapi serve;在仓库目录优先用 ./providers.yaml
803
+ ```
804
+
805
+ 启动后打开 WebUI:**http://127.0.0.1:8600/ui/**
806
+
807
+ **配置解析顺序**(`--config` 可放在 `serve` **前或后**,例如 `openspeechapi --config x serve` 或 `openspeechapi serve --config x`):
808
+ 1. 显式 `--config <path>`
809
+ 2. 当前目录 `./providers.yaml`(源码目录运行时优先)
810
+ 3. `~/.config/openspeechapi/providers.yaml`(遵循 `XDG_CONFIG_HOME`)
811
+ 4. 都没有 → 在 `~/.config/openspeechapi/providers.yaml` **自动生成**一份可用默认配置
812
+
414
813
  ### Python Client(与 Library 模式接口一致)
415
814
 
416
815
  ```python