openspeechapi 0.2.9__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/.gitignore +1 -0
- openspeechapi-0.2.9/README.md → openspeechapi-0.2.10/PKG-INFO +413 -14
- openspeechapi-0.2.9/PKG-INFO → openspeechapi-0.2.10/README.md +248 -119
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/__init__.py +1 -1
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/cli.py +17 -9
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/config.py +105 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/base.py +91 -1
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/enums.py +4 -1
- openspeechapi-0.2.10/openspeechapi/core/model_hub.py +257 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/models.py +3 -0
- openspeechapi-0.2.10/openspeechapi/core/settings.py +13 -0
- openspeechapi-0.2.10/openspeechapi/dispatch/aim_provision.py +91 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/dispatcher.py +39 -1
- openspeechapi-0.2.10/openspeechapi/dispatch/executors/subprocess_exec.py +907 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/lifecycle.py +17 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/engine_catalog.py +10 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/engine_registry.yaml +186 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/exceptions.py +17 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/factory.py +85 -0
- openspeechapi-0.2.10/openspeechapi/local_engines/aim_resolver.py +179 -0
- openspeechapi-0.2.10/openspeechapi/local_engines/isolated_venv.py +164 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/_local_audio.py +43 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/assemblyai.py +2 -1
- openspeechapi-0.2.10/openspeechapi/providers/stt/canary_qwen_stt.py +135 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/deepgram.py +22 -21
- openspeechapi-0.2.10/openspeechapi/providers/stt/dolphin_stt.py +242 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/elevenlabs.py +2 -1
- openspeechapi-0.2.10/openspeechapi/providers/stt/fireredasr_stt.py +173 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/funasr_stt.py +192 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/gemma4.py +48 -17
- openspeechapi-0.2.10/openspeechapi/providers/stt/kimi_audio_stt.py +152 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/mlx_whisper_stt.py +192 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/mms_languages.json +5627 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/mms_stt.py +224 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/moonshine_stt.py +128 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/openai.py +11 -2
- openspeechapi-0.2.10/openspeechapi/providers/stt/paraformer.py +276 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/parakeet_mlx_stt.py +138 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/phi4_multimodal_stt.py +202 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/qwen3_asr.py +185 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/qwen3_omni_stt.py +168 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/sensevoice.py +283 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/vosk_stt.py +199 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/voxtral_stt.py +191 -0
- openspeechapi-0.2.10/openspeechapi/providers/stt/wenet_stt.py +181 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/cosyvoice.py +27 -5
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/piper.py +41 -2
- openspeechapi-0.2.10/openspeechapi/server/app.py +122 -0
- openspeechapi-0.2.10/openspeechapi/server/extras_installer.py +200 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/management.py +288 -25
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/stt.py +6 -1
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/app.js +557 -93
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/index.html +28 -1
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/webui/styles.css +27 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/stt_stream.py +29 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/audio_converter.py +51 -1
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/providers.example.yaml +4 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/pyproject.toml +50 -3
- openspeechapi-0.2.10/scripts/aim_adopt.py +146 -0
- openspeechapi-0.2.10/scripts/aim_consumers.py +159 -0
- openspeechapi-0.2.10/scripts/gen_mms_languages.py +86 -0
- openspeechapi-0.2.10/scripts/preload_stt_model.py +109 -0
- openspeechapi-0.2.10/scripts/release.sh +111 -0
- openspeechapi-0.2.9/openspeechapi/core/settings.py +0 -8
- openspeechapi-0.2.9/openspeechapi/dispatch/executors/subprocess_exec.py +0 -461
- openspeechapi-0.2.9/openspeechapi/local_engines/aim_resolver.py +0 -91
- openspeechapi-0.2.9/openspeechapi/server/app.py +0 -71
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/__main__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/client/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/client/client.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/core/registry.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/demo.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/context.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/base.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/in_process.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/executors/remote.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/fanout.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/filters.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/dispatch/watcher.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/backends/native_backend.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/base.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/whisper.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/manager.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/models.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/progress.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/registry.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/task_store.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/local_engines/tasks.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/logging_config.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/base.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/debug.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/latency.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/metrics.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/tracing.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/observe/usage.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/_template.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/alibaba.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/azure_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/baidu.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/faster_whisper.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/google_cloud.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/iflytek.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/macos_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/tencent.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/volcengine.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/whisper.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/whisperlivekit.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/stt/windows_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/alibaba.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/azure_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/baidu.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/coqui.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/deepgram.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/elevenlabs.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/fish_speech.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/google_cloud.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/iflytek.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/macos_say.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/minimax.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/openai.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/tencent.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/volcengine.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/providers/tts/windows_sapi.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/auth.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/middleware.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/native_installer.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/tts.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/routes/webui.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/server/ws/tts_stream.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/telemetry/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/telemetry/perf.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/__init__.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/utils/audio_playback.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/openspeechapi/vendor_registry.yaml +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/cloud/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/faster-whisper/native/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/fish-speech/native/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/_bundle.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/macos_stt.swift +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/macos-stt/request_auth.swift +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/whisper/native/install.sh +0 -0
- {openspeechapi-0.2.9 → openspeechapi-0.2.10}/scripts/engines/whisperlivekit/native/install.sh +0 -0
|
@@ -1,3 +1,168 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openspeechapi
|
|
3
|
+
Version: 0.2.10
|
|
4
|
+
Summary: Unified speech interface for STT/TTS providers
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: httpx>=0.27
|
|
7
|
+
Requires-Dist: loguru>=0.7
|
|
8
|
+
Requires-Dist: msgpack>=1.0
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Provides-Extra: alibaba
|
|
12
|
+
Provides-Extra: alibaba-stt
|
|
13
|
+
Provides-Extra: alibaba-tts
|
|
14
|
+
Provides-Extra: all
|
|
15
|
+
Requires-Dist: elevenlabs; extra == 'all'
|
|
16
|
+
Requires-Dist: faster-whisper; extra == 'all'
|
|
17
|
+
Requires-Dist: openai; extra == 'all'
|
|
18
|
+
Requires-Dist: openai-whisper; extra == 'all'
|
|
19
|
+
Requires-Dist: piper-tts; extra == 'all'
|
|
20
|
+
Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'all'
|
|
21
|
+
Requires-Dist: torchaudio; extra == 'all'
|
|
22
|
+
Requires-Dist: tts; extra == 'all'
|
|
23
|
+
Requires-Dist: websockets; extra == 'all'
|
|
24
|
+
Provides-Extra: assemblyai-stt
|
|
25
|
+
Provides-Extra: audio
|
|
26
|
+
Requires-Dist: numpy; extra == 'audio'
|
|
27
|
+
Requires-Dist: sounddevice; extra == 'audio'
|
|
28
|
+
Provides-Extra: azure
|
|
29
|
+
Provides-Extra: azure-stt
|
|
30
|
+
Provides-Extra: azure-tts
|
|
31
|
+
Provides-Extra: baidu
|
|
32
|
+
Provides-Extra: baidu-stt
|
|
33
|
+
Provides-Extra: baidu-tts
|
|
34
|
+
Provides-Extra: canary-qwen-stt
|
|
35
|
+
Provides-Extra: cloud
|
|
36
|
+
Requires-Dist: websockets; extra == 'cloud'
|
|
37
|
+
Provides-Extra: coqui-tts
|
|
38
|
+
Requires-Dist: tts; extra == 'coqui-tts'
|
|
39
|
+
Provides-Extra: cosyvoice-tts
|
|
40
|
+
Requires-Dist: torchaudio; extra == 'cosyvoice-tts'
|
|
41
|
+
Provides-Extra: deepgram
|
|
42
|
+
Requires-Dist: websockets; extra == 'deepgram'
|
|
43
|
+
Provides-Extra: deepgram-stt
|
|
44
|
+
Requires-Dist: websockets; extra == 'deepgram-stt'
|
|
45
|
+
Provides-Extra: deepgram-tts
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: numpy; extra == 'dev'
|
|
48
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
50
|
+
Requires-Dist: pytest-dotenv; extra == 'dev'
|
|
51
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
52
|
+
Requires-Dist: ruff==0.15.*; extra == 'dev'
|
|
53
|
+
Provides-Extra: dolphin-stt
|
|
54
|
+
Requires-Dist: dataoceanai-dolphin; extra == 'dolphin-stt'
|
|
55
|
+
Requires-Dist: torchcodec; extra == 'dolphin-stt'
|
|
56
|
+
Provides-Extra: elevenlabs
|
|
57
|
+
Requires-Dist: elevenlabs; extra == 'elevenlabs'
|
|
58
|
+
Requires-Dist: websockets; extra == 'elevenlabs'
|
|
59
|
+
Provides-Extra: elevenlabs-stt
|
|
60
|
+
Requires-Dist: websockets; extra == 'elevenlabs-stt'
|
|
61
|
+
Provides-Extra: elevenlabs-tts
|
|
62
|
+
Requires-Dist: elevenlabs; extra == 'elevenlabs-tts'
|
|
63
|
+
Provides-Extra: faster-whisper-stt
|
|
64
|
+
Requires-Dist: faster-whisper; extra == 'faster-whisper-stt'
|
|
65
|
+
Provides-Extra: fireredasr-stt
|
|
66
|
+
Requires-Dist: fireredasr; extra == 'fireredasr-stt'
|
|
67
|
+
Provides-Extra: fish-speech-tts
|
|
68
|
+
Provides-Extra: funasr-stt
|
|
69
|
+
Requires-Dist: funasr>=1.1.0; extra == 'funasr-stt'
|
|
70
|
+
Requires-Dist: torch; extra == 'funasr-stt'
|
|
71
|
+
Requires-Dist: torchaudio; extra == 'funasr-stt'
|
|
72
|
+
Provides-Extra: gemma4-stt
|
|
73
|
+
Requires-Dist: mlx-vlm<0.6.2,>=0.6.1; (sys_platform == 'darwin') and extra == 'gemma4-stt'
|
|
74
|
+
Provides-Extra: google
|
|
75
|
+
Provides-Extra: google-stt
|
|
76
|
+
Provides-Extra: google-tts
|
|
77
|
+
Provides-Extra: iflytek
|
|
78
|
+
Requires-Dist: websockets; extra == 'iflytek'
|
|
79
|
+
Provides-Extra: iflytek-stt
|
|
80
|
+
Requires-Dist: websockets; extra == 'iflytek-stt'
|
|
81
|
+
Provides-Extra: iflytek-tts
|
|
82
|
+
Requires-Dist: websockets; extra == 'iflytek-tts'
|
|
83
|
+
Provides-Extra: kimi-audio-stt
|
|
84
|
+
Requires-Dist: torch; extra == 'kimi-audio-stt'
|
|
85
|
+
Provides-Extra: macos-native
|
|
86
|
+
Provides-Extra: minimax-tts
|
|
87
|
+
Provides-Extra: mlx-whisper-stt
|
|
88
|
+
Requires-Dist: mlx-whisper; (sys_platform == 'darwin') and extra == 'mlx-whisper-stt'
|
|
89
|
+
Provides-Extra: mms-stt
|
|
90
|
+
Requires-Dist: soundfile; extra == 'mms-stt'
|
|
91
|
+
Requires-Dist: torch; extra == 'mms-stt'
|
|
92
|
+
Requires-Dist: transformers; extra == 'mms-stt'
|
|
93
|
+
Provides-Extra: moonshine-stt
|
|
94
|
+
Requires-Dist: soundfile; extra == 'moonshine-stt'
|
|
95
|
+
Requires-Dist: torch; extra == 'moonshine-stt'
|
|
96
|
+
Requires-Dist: transformers; extra == 'moonshine-stt'
|
|
97
|
+
Provides-Extra: openai
|
|
98
|
+
Requires-Dist: openai; extra == 'openai'
|
|
99
|
+
Provides-Extra: openai-stt
|
|
100
|
+
Requires-Dist: openai; extra == 'openai-stt'
|
|
101
|
+
Provides-Extra: openai-tts
|
|
102
|
+
Requires-Dist: openai; extra == 'openai-tts'
|
|
103
|
+
Provides-Extra: paraformer-stt
|
|
104
|
+
Requires-Dist: funasr>=1.1.0; extra == 'paraformer-stt'
|
|
105
|
+
Requires-Dist: torch; extra == 'paraformer-stt'
|
|
106
|
+
Requires-Dist: torchaudio; extra == 'paraformer-stt'
|
|
107
|
+
Provides-Extra: parakeet-stt
|
|
108
|
+
Requires-Dist: parakeet-mlx; (sys_platform == 'darwin') and extra == 'parakeet-stt'
|
|
109
|
+
Provides-Extra: phi4-multimodal-stt
|
|
110
|
+
Requires-Dist: accelerate; extra == 'phi4-multimodal-stt'
|
|
111
|
+
Requires-Dist: backoff; extra == 'phi4-multimodal-stt'
|
|
112
|
+
Requires-Dist: peft; extra == 'phi4-multimodal-stt'
|
|
113
|
+
Requires-Dist: pillow; extra == 'phi4-multimodal-stt'
|
|
114
|
+
Requires-Dist: scipy; extra == 'phi4-multimodal-stt'
|
|
115
|
+
Requires-Dist: soundfile; extra == 'phi4-multimodal-stt'
|
|
116
|
+
Requires-Dist: torch; extra == 'phi4-multimodal-stt'
|
|
117
|
+
Requires-Dist: torchvision; extra == 'phi4-multimodal-stt'
|
|
118
|
+
Requires-Dist: transformers; extra == 'phi4-multimodal-stt'
|
|
119
|
+
Provides-Extra: piper-tts
|
|
120
|
+
Requires-Dist: piper-tts; extra == 'piper-tts'
|
|
121
|
+
Provides-Extra: qwen3-asr-stt
|
|
122
|
+
Requires-Dist: modelscope; extra == 'qwen3-asr-stt'
|
|
123
|
+
Requires-Dist: qwen-asr; extra == 'qwen3-asr-stt'
|
|
124
|
+
Provides-Extra: qwen3-omni-stt
|
|
125
|
+
Requires-Dist: accelerate; extra == 'qwen3-omni-stt'
|
|
126
|
+
Requires-Dist: qwen-omni-utils; extra == 'qwen3-omni-stt'
|
|
127
|
+
Requires-Dist: torch; extra == 'qwen3-omni-stt'
|
|
128
|
+
Requires-Dist: transformers; extra == 'qwen3-omni-stt'
|
|
129
|
+
Provides-Extra: sensevoice-stt
|
|
130
|
+
Requires-Dist: funasr>=1.1.0; extra == 'sensevoice-stt'
|
|
131
|
+
Requires-Dist: torch; extra == 'sensevoice-stt'
|
|
132
|
+
Requires-Dist: torchaudio; extra == 'sensevoice-stt'
|
|
133
|
+
Provides-Extra: server
|
|
134
|
+
Requires-Dist: fastapi; extra == 'server'
|
|
135
|
+
Requires-Dist: python-multipart; extra == 'server'
|
|
136
|
+
Requires-Dist: uvicorn; extra == 'server'
|
|
137
|
+
Requires-Dist: websockets; extra == 'server'
|
|
138
|
+
Provides-Extra: sherpa-onnx-stt
|
|
139
|
+
Requires-Dist: websockets; extra == 'sherpa-onnx-stt'
|
|
140
|
+
Provides-Extra: tencent
|
|
141
|
+
Provides-Extra: tencent-stt
|
|
142
|
+
Provides-Extra: tencent-tts
|
|
143
|
+
Provides-Extra: tracing
|
|
144
|
+
Requires-Dist: opentelemetry-api; extra == 'tracing'
|
|
145
|
+
Requires-Dist: opentelemetry-sdk; extra == 'tracing'
|
|
146
|
+
Provides-Extra: volcengine
|
|
147
|
+
Provides-Extra: volcengine-stt
|
|
148
|
+
Provides-Extra: volcengine-tts
|
|
149
|
+
Provides-Extra: vosk-stt
|
|
150
|
+
Requires-Dist: huggingface-hub; extra == 'vosk-stt'
|
|
151
|
+
Requires-Dist: vosk; extra == 'vosk-stt'
|
|
152
|
+
Provides-Extra: voxtral-stt
|
|
153
|
+
Requires-Dist: accelerate; extra == 'voxtral-stt'
|
|
154
|
+
Requires-Dist: mistral-common[audio]>=1.8.1; extra == 'voxtral-stt'
|
|
155
|
+
Requires-Dist: torch; extra == 'voxtral-stt'
|
|
156
|
+
Requires-Dist: transformers>=4.54.0; extra == 'voxtral-stt'
|
|
157
|
+
Provides-Extra: wenet-stt
|
|
158
|
+
Provides-Extra: whisper-stt
|
|
159
|
+
Requires-Dist: openai-whisper; extra == 'whisper-stt'
|
|
160
|
+
Provides-Extra: whisperlivekit-stt
|
|
161
|
+
Requires-Dist: websockets; extra == 'whisperlivekit-stt'
|
|
162
|
+
Provides-Extra: windows-native
|
|
163
|
+
Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'windows-native'
|
|
164
|
+
Description-Content-Type: text/markdown
|
|
165
|
+
|
|
1
166
|
# OpenSpeechAPI
|
|
2
167
|
|
|
3
168
|
> Unified speech interface for STT/TTS providers — one API, multiple backends.
|
|
@@ -8,22 +173,24 @@ OpenSpeechAPI 提供统一的语音接口,通过字符串指定 provider 即
|
|
|
8
173
|
|
|
9
174
|
### 安装
|
|
10
175
|
|
|
176
|
+
**方式一 · 通过 PyPI 安装(直接使用)**
|
|
11
177
|
```bash
|
|
12
|
-
#
|
|
13
|
-
pip install
|
|
14
|
-
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
pip install -e ".[faster-whisper]" # 本地 faster-whisper STT
|
|
18
|
-
pip install -e ".[openai,faster-whisper]" # 指定多个
|
|
19
|
-
|
|
20
|
-
# 仅核心包(不含任何 provider)
|
|
21
|
-
pip install -e .
|
|
178
|
+
pip install "openspeechapi[server]" # 起 HTTP 服务 / WebUI 必须带 [server](fastapi/uvicorn)
|
|
179
|
+
pip install "openspeechapi[server,openai]" # 服务 + 指定 provider
|
|
180
|
+
pip install "openspeechapi[server,all]" # 服务 + 全部 provider
|
|
181
|
+
pip install openspeechapi # 仅核心库(库模式;不含服务,也起不了 server)
|
|
182
|
+
```
|
|
22
183
|
|
|
23
|
-
|
|
24
|
-
|
|
184
|
+
**方式二 · 源码安装(开发,可编辑)**
|
|
185
|
+
```bash
|
|
186
|
+
git clone https://github.com/wingsfly/OpenSpeechAPI.git
|
|
187
|
+
cd OpenSpeechAPI
|
|
188
|
+
uv venv && uv pip install -e ".[server,dev]" # 或 pip install -e ".[server,dev]";按需换 .[all] 等
|
|
25
189
|
```
|
|
26
190
|
|
|
191
|
+
> ⚠️ 纯 `pip install openspeechapi`(核心库)**不含 fastapi/uvicorn**,无法 `serve`;起服务请带 `[server]`。
|
|
192
|
+
> 两种方式启动服务的差异见下方 [启动服务](#启动服务)。
|
|
193
|
+
|
|
27
194
|
### 30 秒上手 — TTS
|
|
28
195
|
|
|
29
196
|
```python
|
|
@@ -205,7 +372,24 @@ python -m openspeechapi.demo tts -t "Hello world" --play \
|
|
|
205
372
|
| `whisperlivekit-stt` | STT | WhisperLiveKit 本地服务(Deepgram 兼容 WS,支持 MLX 后端) | local | `pip install -e ".[whisperlivekit]"` |
|
|
206
373
|
| `elevenlabs-stt` | STT | ElevenLabs Scribe API(云端,支持实时流式 WS + 批量) | remote | `pip install -e ".[elevenlabs-stt]"` |
|
|
207
374
|
| `deepgram` | STT | Deepgram API(云端,支持实时流式) | remote | `pip install -e ".[deepgram]"` |
|
|
208
|
-
| `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E4B
|
|
375
|
+
| `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E2B/E4B,>30s 自动分段;任务:转写 / 翻译(任意目标语言) / 理解 / 问答 / 语种识别) | subprocess | `pip install -e ".[gemma4-stt]"` |
|
|
376
|
+
| `sensevoice` | STT | SenseVoice-Small 本地多语种 ASR(FunASR,zh/粤/en/ja/ko,比 Whisper 快 ~15-50×) | subprocess | `pip install -e ".[sensevoice-stt]"` |
|
|
377
|
+
| `qwen3-asr` | STT | Qwen3-ASR 本地多语种 ASR(2026 开源 SOTA,中/方言/英,0.6B/1.7B) | subprocess | `pip install -e ".[qwen3-asr-stt]"` |
|
|
378
|
+
| `mlx-whisper` | STT | Whisper on Apple MLX(本地,large-v3 / turbo,中/英多语种,仅 Apple Silicon) | subprocess | `pip install -e ".[mlx-whisper-stt]"` |
|
|
379
|
+
| `paraformer` | STT | Paraformer 本地 ASR(FunASR,普通话 SOTA 级,VAD+标点,zh/en) | subprocess | `pip install -e ".[paraformer-stt]"` |
|
|
380
|
+
| `funasr` | STT | FunASR 总入口(任选模型库 + VAD/标点/说话人分离) | subprocess | `pip install -e ".[funasr-stt]"` |
|
|
381
|
+
| `fireredasr` | STT | 小红书 FireRedASR(普通话 SOTA+方言+英文,歌词识别,AED/LLM) | subprocess | `pip install -e ".[fireredasr-stt]"` |
|
|
382
|
+
| `dolphin` | STT | DataoceanAI Dolphin(40 东方语种 + 22 中文方言,small/base) | subprocess | `pip install -e ".[dolphin-stt]"` |
|
|
383
|
+
| `wenet` | STT | WeNet U2++ Conformer(生产级,zh/en 预置;流式后续) | subprocess | WebUI Engines 安装,或 `pip install 'wenet @ git+https://github.com/wenet-e2e/wenet.git'` |
|
|
384
|
+
| `canary-qwen` | STT | NVIDIA Canary-Qwen-2.5B(Open ASR 英文第1,SALM;仅英文,需 NeMo+GPU) | subprocess | WebUI Engines 安装,或 `pip install 'nemo_toolkit[asr] @ git+https://github.com/NVIDIA/NeMo.git'` |
|
|
385
|
+
| `parakeet` | STT | NVIDIA Parakeet-TDT on MLX(最快,v2 英文/v3 欧语;中文弱,仅 Apple Silicon) | subprocess | `pip install -e ".[parakeet-stt]"` |
|
|
386
|
+
| `qwen3-omni` | STT | Qwen3-Omni-30B 全模态 LLM(ASR+理解,zh/en+;需大 GPU ~60GB) | subprocess | `pip install -e ".[qwen3-omni-stt]"` |
|
|
387
|
+
| `voxtral` | STT | Mistral Voxtral(Mini-3B/Small-24B,转写+理解,多语种;建议 GPU) | subprocess | `pip install -e ".[voxtral-stt]"` |
|
|
388
|
+
| `phi4-multimodal` | STT | 微软 Phi-4-multimodal(多模态 LLM,ASR+理解,zh/en+;建议 GPU) | subprocess | `pip install -e ".[phi4-multimodal-stt]"` |
|
|
389
|
+
| `kimi-audio` | STT | 月之暗面 Kimi-Audio-7B(音频基础模型,ASR+理解,zh/en;需 Linux+CUDA/flash-attn) | subprocess | WebUI Engines 安装,或 `pip install 'kimi-audio @ git+https://github.com/MoonshotAI/Kimi-Audio.git'` |
|
|
390
|
+
| `moonshine` | STT | Useful Sensors Moonshine(边缘/实时英文 ASR,tiny/base,轻量) | subprocess | `pip install -e ".[moonshine-stt]"` |
|
|
391
|
+
| `vosk` | STT | Vosk(Kaldi 离线,20+ 语言含 zh/en,轻量低资源) | subprocess | `pip install -e ".[vosk-stt]"` |
|
|
392
|
+
| `mms` | STT | Meta MMS(Wav2Vec2-CTC,1000+ 语言含 zh/en,按 ISO 639-3 选语言) | subprocess | `pip install -e ".[mms-stt]"` |
|
|
209
393
|
| `openai-tts` | TTS | OpenAI Speech API(云端,支持流式) | remote | `pip install -e ".[openai]"` |
|
|
210
394
|
| `elevenlabs` | TTS | ElevenLabs 高质量语音(云端,支持 HTTP/WS 流式) | remote | `pip install -e ".[elevenlabs-tts]"` |
|
|
211
395
|
| `minimax` | TTS | Minimax 语音合成(云端) | remote | `pip install -e ".[minimax]"` |
|
|
@@ -229,6 +413,8 @@ print(list_providers())
|
|
|
229
413
|
# 'whisperlivekit-stt']
|
|
230
414
|
```
|
|
231
415
|
|
|
416
|
+
> **音频输入格式**:STT 上传支持 WAV/PCM/MP3/FLAC/OGG/WebM 等。引擎不能直接处理的格式会由服务端自动转为 16k 单声道 WAV(压缩格式需 `ffmpeg`);缺 ffmpeg 且格式不被支持时返回 400,Web UI 会在上传/录音前拦截提示。详见 [docs/architecture/audio-format-negotiation.md](docs/architecture/audio-format-negotiation.md)。
|
|
417
|
+
|
|
232
418
|
## Provider 参数
|
|
233
419
|
|
|
234
420
|
### `openai-stt`
|
|
@@ -263,6 +449,204 @@ create_provider("faster-whisper",
|
|
|
263
449
|
)
|
|
264
450
|
```
|
|
265
451
|
|
|
452
|
+
### `gemma4`
|
|
453
|
+
|
|
454
|
+
```python
|
|
455
|
+
create_provider("gemma4",
|
|
456
|
+
model="mlx-community/gemma-4-E4B-it-8bit", # E2B/E4B(8bit 翻译更准;勿用 12B)
|
|
457
|
+
task="transcribe", # transcribe|translate|understand|qa|detect_language
|
|
458
|
+
target_language="English", # task=translate 的目标语言(任意语言)
|
|
459
|
+
include_transcript=False, # task=translate:同时输出源转写 + 译文
|
|
460
|
+
)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
macOS / Apple Silicon 本地多模态 ASR(mlx-vlm)。5 个任务及全部字段可在 Web UI 的 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/gemma4-stt-provider.md](docs/architecture/gemma4-stt-provider.md)。
|
|
464
|
+
|
|
465
|
+
### `sensevoice`
|
|
466
|
+
|
|
467
|
+
```python
|
|
468
|
+
create_provider("sensevoice",
|
|
469
|
+
model="FunAudioLLM/SenseVoiceSmall",
|
|
470
|
+
language="auto", # auto|zh|en|yue|ja|ko|nospeech
|
|
471
|
+
device="cpu", # cpu|mps|cuda
|
|
472
|
+
use_itn=True, # 标点/数字规整
|
|
473
|
+
)
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
FunASR 本地多语种 ASR(zh/粤/en/ja/ko),非自回归、极快;全部字段可在 Lab「Advanced Options」按请求覆盖。详见 [docs/architecture/sensevoice-stt-provider.md](docs/architecture/sensevoice-stt-provider.md)。
|
|
477
|
+
|
|
478
|
+
### `qwen3-asr`
|
|
479
|
+
|
|
480
|
+
```python
|
|
481
|
+
create_provider("qwen3-asr",
|
|
482
|
+
model="Qwen/Qwen3-ASR-0.6B", # 或 Qwen/Qwen3-ASR-1.7B
|
|
483
|
+
language="auto", # auto|Chinese|English|Cantonese|Japanese|Korean
|
|
484
|
+
device="cpu", # cpu|mps|cuda
|
|
485
|
+
)
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
阿里 Qwen3-ASR(2026 开源 ASR SOTA,中/方言/英)本地推理(qwen-asr 包)。需另装 `torch`。详见 [docs/architecture/qwen3-asr-stt-provider.md](docs/architecture/qwen3-asr-stt-provider.md)。
|
|
489
|
+
|
|
490
|
+
### `mlx-whisper`
|
|
491
|
+
|
|
492
|
+
```python
|
|
493
|
+
create_provider("mlx-whisper",
|
|
494
|
+
model="mlx-community/whisper-large-v3-turbo", # 或 whisper-large-v3-mlx
|
|
495
|
+
language="auto", # auto|en|zh|yue|ja|ko|...
|
|
496
|
+
)
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
Apple Silicon 原生 Whisper(MLX),large-v3 / turbo,中英文多语种。仅 macOS/Apple Silicon。详见 [docs/architecture/mlx-whisper-stt-provider.md](docs/architecture/mlx-whisper-stt-provider.md)。
|
|
500
|
+
|
|
501
|
+
### `paraformer`
|
|
502
|
+
|
|
503
|
+
```python
|
|
504
|
+
create_provider("paraformer",
|
|
505
|
+
model="funasr/paraformer-zh", # 或 funasr/paraformer-en
|
|
506
|
+
vad=True, punc=True, # VAD 切分 + 标点恢复
|
|
507
|
+
)
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
阿里 Paraformer(FunASR),普通话 SOTA 级非自回归 ASR,带 VAD + 标点。详见 [docs/architecture/paraformer-stt-provider.md](docs/architecture/paraformer-stt-provider.md)。
|
|
511
|
+
|
|
512
|
+
### `funasr`
|
|
513
|
+
|
|
514
|
+
```python
|
|
515
|
+
create_provider("funasr",
|
|
516
|
+
model="funasr/paraformer-zh", # 模型库任意条目
|
|
517
|
+
spk=True, # CAM++ 说话人分离 → [spk0]/[spk1] 标注
|
|
518
|
+
)
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
FunASR 通用总入口:任选模型库模型 + VAD/标点/**说话人分离**。详见 [docs/architecture/funasr-stt-provider.md](docs/architecture/funasr-stt-provider.md)。
|
|
522
|
+
|
|
523
|
+
### `fireredasr`
|
|
524
|
+
|
|
525
|
+
```python
|
|
526
|
+
create_provider("fireredasr",
|
|
527
|
+
model_type="aed", # aed(≤60s) | llm(≤30s);权重自动下载
|
|
528
|
+
)
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
小红书 FireRedASR,普通话公开基准 SOTA + 方言 + 英文,歌词识别强。详见 [docs/architecture/fireredasr-stt-provider.md](docs/architecture/fireredasr-stt-provider.md)。
|
|
532
|
+
|
|
533
|
+
### `dolphin`
|
|
534
|
+
|
|
535
|
+
```python
|
|
536
|
+
create_provider("dolphin",
|
|
537
|
+
model_name="small", # small | base
|
|
538
|
+
lang_sym="zh", region_sym="CN", # 留空则自动检测
|
|
539
|
+
)
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
DataoceanAI Dolphin,40 种东方语言 + 22 种中文方言。详见 [docs/architecture/dolphin-stt-provider.md](docs/architecture/dolphin-stt-provider.md)。
|
|
543
|
+
|
|
544
|
+
### `wenet`
|
|
545
|
+
|
|
546
|
+
```python
|
|
547
|
+
create_provider("wenet",
|
|
548
|
+
model="chinese", # chinese | english
|
|
549
|
+
)
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
WeNet 生产级 U2++ Conformer(zh/en 预置)。从 git 安装(不在 PyPI)。详见 [docs/architecture/wenet-stt-provider.md](docs/architecture/wenet-stt-provider.md)。
|
|
553
|
+
|
|
554
|
+
### `canary-qwen`
|
|
555
|
+
|
|
556
|
+
```python
|
|
557
|
+
create_provider("canary-qwen",
|
|
558
|
+
model="nvidia/canary-qwen-2.5b",
|
|
559
|
+
device="cuda", dtype="bfloat16", # 仅英文;强烈建议 GPU
|
|
560
|
+
)
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
NVIDIA Canary-Qwen-2.5B(Open ASR 英文第 1,SALM)。**仅英文**;NeMo 重型安装 + 建议 GPU。详见 [docs/architecture/canary-qwen-stt-provider.md](docs/architecture/canary-qwen-stt-provider.md)。
|
|
564
|
+
|
|
565
|
+
### `parakeet`
|
|
566
|
+
|
|
567
|
+
```python
|
|
568
|
+
create_provider("parakeet",
|
|
569
|
+
model="mlx-community/parakeet-tdt-0.6b-v2", # v2 英文;v3 + 欧洲语言
|
|
570
|
+
)
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
NVIDIA Parakeet-TDT on Apple MLX,榜上最快。英文/欧语为主,**中文弱**;仅 Apple Silicon。详见 [docs/architecture/parakeet-stt-provider.md](docs/architecture/parakeet-stt-provider.md)。
|
|
574
|
+
|
|
575
|
+
### `qwen3-omni`
|
|
576
|
+
|
|
577
|
+
```python
|
|
578
|
+
create_provider("qwen3-omni",
|
|
579
|
+
model="Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
580
|
+
prompt="Transcribe the audio into text.", # 改成问题即可做音频问答
|
|
581
|
+
)
|
|
582
|
+
```
|
|
583
|
+
|
|
584
|
+
阿里 Qwen3-Omni-30B-A3B 全模态 LLM(ASR + 音频理解,zh/en+)。**需大显存 GPU(~60GB),笔记本装不下**。详见 [docs/architecture/qwen3-omni-stt-provider.md](docs/architecture/qwen3-omni-stt-provider.md)。
|
|
585
|
+
|
|
586
|
+
### `voxtral`
|
|
587
|
+
|
|
588
|
+
```python
|
|
589
|
+
create_provider("voxtral",
|
|
590
|
+
model="mistralai/Voxtral-Mini-3B-2507", # 或 Voxtral-Small-24B-2507
|
|
591
|
+
language="en",
|
|
592
|
+
)
|
|
593
|
+
```
|
|
594
|
+
|
|
595
|
+
Mistral Voxtral(转写 + 音频理解,多语种)。3B/24B,建议 GPU。详见 [docs/architecture/voxtral-stt-provider.md](docs/architecture/voxtral-stt-provider.md)。
|
|
596
|
+
|
|
597
|
+
### `phi4-multimodal`
|
|
598
|
+
|
|
599
|
+
```python
|
|
600
|
+
create_provider("phi4-multimodal",
|
|
601
|
+
model="microsoft/Phi-4-multimodal-instruct",
|
|
602
|
+
prompt="Transcribe the audio clip into text.",
|
|
603
|
+
)
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
微软 Phi-4-multimodal,紧凑多模态 LLM(ASR + 音频理解,zh/en+)。建议 GPU。详见 [docs/architecture/phi4-multimodal-stt-provider.md](docs/architecture/phi4-multimodal-stt-provider.md)。
|
|
607
|
+
|
|
608
|
+
### `kimi-audio`
|
|
609
|
+
|
|
610
|
+
```python
|
|
611
|
+
create_provider("kimi-audio",
|
|
612
|
+
model="moonshotai/Kimi-Audio-7B-Instruct",
|
|
613
|
+
prompt="Please transcribe the audio into text.",
|
|
614
|
+
)
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
月之暗面 Kimi-Audio-7B 音频基础模型(ASR + 音频理解,zh/en)。git 安装 + 建议 GPU。详见 [docs/architecture/kimi-audio-stt-provider.md](docs/architecture/kimi-audio-stt-provider.md)。
|
|
618
|
+
|
|
619
|
+
### `moonshine`
|
|
620
|
+
|
|
621
|
+
```python
|
|
622
|
+
create_provider("moonshine",
|
|
623
|
+
model="UsefulSensors/moonshine-base", # base | tiny
|
|
624
|
+
)
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
Useful Sensors Moonshine,边缘/实时英文 ASR,轻量快速。详见 [docs/architecture/moonshine-stt-provider.md](docs/architecture/moonshine-stt-provider.md)。
|
|
628
|
+
|
|
629
|
+
### `vosk`
|
|
630
|
+
|
|
631
|
+
```python
|
|
632
|
+
create_provider("vosk",
|
|
633
|
+
model="vosk-model-small-en-us-0.15", # 中文: vosk-model-small-cn-0.22
|
|
634
|
+
)
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
Vosk(Kaldi 离线),20+ 语言,轻量低资源,模型自动下载。详见 [docs/architecture/vosk-stt-provider.md](docs/architecture/vosk-stt-provider.md)。
|
|
638
|
+
|
|
639
|
+
### `mms`
|
|
640
|
+
|
|
641
|
+
```python
|
|
642
|
+
create_provider("mms",
|
|
643
|
+
model="facebook/mms-1b-all",
|
|
644
|
+
language="eng", # ISO 639-3: eng / cmn / yue / jpn ...
|
|
645
|
+
)
|
|
646
|
+
```
|
|
647
|
+
|
|
648
|
+
Meta MMS(Wav2Vec2-CTC),1000+ 语言含中英,按 **ISO 639-3** 码切换语言适配器;CTC 输出小写无标点。详见 [docs/architecture/mms-stt-provider.md](docs/architecture/mms-stt-provider.md)。
|
|
649
|
+
|
|
266
650
|
### `openai-tts`
|
|
267
651
|
|
|
268
652
|
```python
|
|
@@ -407,10 +791,25 @@ bash scripts/engines/macos-stt/install.sh
|
|
|
407
791
|
|
|
408
792
|
### 启动服务
|
|
409
793
|
|
|
794
|
+
**pip 安装后**(已带 `[server]`)—— 配置自动解析/生成,开箱即起:
|
|
410
795
|
```bash
|
|
411
|
-
openspeechapi serve
|
|
796
|
+
openspeechapi serve # 自动解析配置;没有则生成默认(macOS 默认 macos_tts)
|
|
797
|
+
openspeechapi serve --port 8600 # 指定端口
|
|
412
798
|
```
|
|
413
799
|
|
|
800
|
+
**源码目录运行**:
|
|
801
|
+
```bash
|
|
802
|
+
python -m openspeechapi.cli serve # 或 openspeechapi serve;在仓库目录优先用 ./providers.yaml
|
|
803
|
+
```
|
|
804
|
+
|
|
805
|
+
启动后打开 WebUI:**http://127.0.0.1:8600/ui/**
|
|
806
|
+
|
|
807
|
+
**配置解析顺序**(`--config` 可放在 `serve` **前或后**,例如 `openspeechapi --config x serve` 或 `openspeechapi serve --config x`):
|
|
808
|
+
1. 显式 `--config <path>`
|
|
809
|
+
2. 当前目录 `./providers.yaml`(源码目录运行时优先)
|
|
810
|
+
3. `~/.config/openspeechapi/providers.yaml`(遵循 `XDG_CONFIG_HOME`)
|
|
811
|
+
4. 都没有 → 在 `~/.config/openspeechapi/providers.yaml` **自动生成**一份可用默认配置
|
|
812
|
+
|
|
414
813
|
### Python Client(与 Library 模式接口一致)
|
|
415
814
|
|
|
416
815
|
```python
|