openspeechapi 0.2.8__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openspeechapi-0.2.9/PKG-INFO +763 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/README.md +50 -32
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/__init__.py +5 -5
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/client/client.py +1 -1
- openspeechapi-0.2.9/openspeechapi/core/base.py +142 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/core/models.py +5 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/demo.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/dispatcher.py +12 -5
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/executors/subprocess_exec.py +43 -28
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/engine_catalog.py +67 -3
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/engine_registry.yaml +48 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/factory.py +9 -1
- openspeechapi-0.2.9/openspeechapi/providers/stt/gemma4.py +283 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/iflytek.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/macos_speech.py +5 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/whisperlivekit.py +1 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/alibaba.py +1 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/azure_speech.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/baidu.py +1 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/coqui.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/cosyvoice.py +1 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/deepgram.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/google_cloud.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/macos_say.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/tencent.py +1 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/volcengine.py +1 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/app.py +0 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/auth.py +0 -1
- openspeechapi-0.2.9/openspeechapi/server/native_installer.py +211 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/routes/management.py +128 -11
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/routes/stt.py +15 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/routes/webui.py +8 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/webui/app.js +193 -9
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/webui/index.html +5 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/webui/styles.css +6 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/ws/stt_stream.py +13 -1
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/utils/audio_converter.py +0 -2
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/providers.example.yaml +14 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/pyproject.toml +14 -2
- openspeechapi-0.2.9/scripts/engines/macos-stt/_bundle.sh +51 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/macos-stt/install.sh +6 -35
- openspeechapi-0.2.8/.dockerignore +0 -16
- openspeechapi-0.2.8/.env.example +0 -12
- openspeechapi-0.2.8/.github/workflows/ci.yml +0 -31
- openspeechapi-0.2.8/.tmp/audio/en.aiff +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_16k.wav +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_16k_pad6.wav +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_long.aiff +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_long_16k.wav +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_mid.aiff +0 -0
- openspeechapi-0.2.8/.tmp/audio/en_mid_16k.wav +0 -0
- openspeechapi-0.2.8/.tmp/audio/zh.aiff +0 -0
- openspeechapi-0.2.8/.tmp/audio/zh_16k.wav +0 -0
- openspeechapi-0.2.8/.tmp/openspeech-8600.log +0 -5
- openspeechapi-0.2.8/.tmp/openspeech-serve.log +0 -0
- openspeechapi-0.2.8/.tmp/webui-server.log +0 -5
- openspeechapi-0.2.8/.tmp/webui-server.pid +0 -1
- openspeechapi-0.2.8/.tmp/wlk12101.log +0 -15
- openspeechapi-0.2.8/.tmp/wlk12101.pid +0 -1
- openspeechapi-0.2.8/.tmp/wlk12102.log +0 -14
- openspeechapi-0.2.8/.tmp/wlk12102.pid +0 -1
- openspeechapi-0.2.8/AGENTS.md +0 -36
- openspeechapi-0.2.8/CLAUDE.md +0 -73
- openspeechapi-0.2.8/Dockerfile +0 -20
- openspeechapi-0.2.8/PKG-INFO +0 -101
- openspeechapi-0.2.8/aibox-script/aibox-1.0.0-SNAPSHOT-stdout.log +0 -33495
- openspeechapi-0.2.8/aibox-script/aibox.2026-04-02.log +0 -96410
- openspeechapi-0.2.8/aibox-script/com.user.restart-jar.plist +0 -29
- openspeechapi-0.2.8/aibox-script/restart-jar.sh +0 -45
- openspeechapi-0.2.8/aibox-script.tar.gz +0 -0
- openspeechapi-0.2.8/docker-compose.yml +0 -30
- openspeechapi-0.2.8/docs/architecture/local-engine-manager.md +0 -55
- openspeechapi-0.2.8/docs/architecture/logging-spec.md +0 -225
- openspeechapi-0.2.8/docs/architecture/stt-engineering-optimization-guide.md +0 -778
- openspeechapi-0.2.8/docs/architecture/stt-streaming-spec.md +0 -302
- openspeechapi-0.2.8/docs/architecture/webui-phase-a.md +0 -38
- openspeechapi-0.2.8/docs/engines/fish-speech-docker.md +0 -40
- openspeechapi-0.2.8/docs/engines/fish-speech-native.md +0 -35
- openspeechapi-0.2.8/docs/engines/stt-native-models.md +0 -93
- openspeechapi-0.2.8/docs/superpowers/plans/2026-04-01-phase1-implementation.md +0 -4171
- openspeechapi-0.2.8/docs/superpowers/plans/2026-04-11-macos-native-tts-stt.md +0 -1982
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-01-openspeech-api-design.md +0 -515
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-03-hot-lazy-loading.md +0 -155
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-03-phase2-protocol-layer.md +0 -355
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-03-phase3-production.md +0 -96
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-11-macos-native-tts-stt-design.md +0 -546
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-12-cloud-providers-webui-design.md +0 -150
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-15-streaming-tts-stt-fixes-display-names.md +0 -218
- openspeechapi-0.2.8/docs/superpowers/specs/2026-04-16-provider-management-engines-rename.md +0 -273
- openspeechapi-0.2.8/examples/client_stt.py +0 -33
- openspeechapi-0.2.8/examples/client_tts.py +0 -28
- openspeechapi-0.2.8/examples/stt_simple.py +0 -48
- openspeechapi-0.2.8/examples/tts_simple.py +0 -39
- openspeechapi-0.2.8/openspeechapi/core/base.py +0 -75
- openspeechapi-0.2.8/output/output.wav +0 -0
- openspeechapi-0.2.8/output.wav +0 -0
- openspeechapi-0.2.8/tests/__init__.py +0 -0
- openspeechapi-0.2.8/tests/conftest.py +0 -77
- openspeechapi-0.2.8/tests/e2e/__init__.py +0 -0
- openspeechapi-0.2.8/tests/e2e/conftest.py +0 -40
- openspeechapi-0.2.8/tests/e2e/test_fanout_e2e.py +0 -105
- openspeechapi-0.2.8/tests/e2e/test_faster_whisper_e2e.py +0 -78
- openspeechapi-0.2.8/tests/e2e/test_openai_e2e.py +0 -104
- openspeechapi-0.2.8/tests/e2e/test_webui_e2e.py +0 -696
- openspeechapi-0.2.8/tests/fixtures/hello.wav +0 -0
- openspeechapi-0.2.8/tests/integration/__init__.py +0 -0
- openspeechapi-0.2.8/tests/integration/test_fanout_integration.py +0 -53
- openspeechapi-0.2.8/tests/integration/test_in_process_integration.py +0 -71
- openspeechapi-0.2.8/tests/integration/test_server_client.py +0 -110
- openspeechapi-0.2.8/tests/unit/__init__.py +0 -0
- openspeechapi-0.2.8/tests/unit/test_aim_resolver.py +0 -77
- openspeechapi-0.2.8/tests/unit/test_audio_converter.py +0 -296
- openspeechapi-0.2.8/tests/unit/test_audio_playback.py +0 -41
- openspeechapi-0.2.8/tests/unit/test_base.py +0 -77
- openspeechapi-0.2.8/tests/unit/test_cli.py +0 -101
- openspeechapi-0.2.8/tests/unit/test_cli_engine.py +0 -137
- openspeechapi-0.2.8/tests/unit/test_client.py +0 -94
- openspeechapi-0.2.8/tests/unit/test_config.py +0 -81
- openspeechapi-0.2.8/tests/unit/test_context.py +0 -56
- openspeechapi-0.2.8/tests/unit/test_debug_observer.py +0 -52
- openspeechapi-0.2.8/tests/unit/test_dispatcher.py +0 -271
- openspeechapi-0.2.8/tests/unit/test_docker_backend_progress.py +0 -28
- openspeechapi-0.2.8/tests/unit/test_engine_registry.py +0 -13
- openspeechapi-0.2.8/tests/unit/test_enums.py +0 -39
- openspeechapi-0.2.8/tests/unit/test_executor_base.py +0 -50
- openspeechapi-0.2.8/tests/unit/test_fanout.py +0 -166
- openspeechapi-0.2.8/tests/unit/test_filters.py +0 -171
- openspeechapi-0.2.8/tests/unit/test_hot_reload.py +0 -211
- openspeechapi-0.2.8/tests/unit/test_in_process.py +0 -104
- openspeechapi-0.2.8/tests/unit/test_latency_observer.py +0 -63
- openspeechapi-0.2.8/tests/unit/test_lifecycle.py +0 -115
- openspeechapi-0.2.8/tests/unit/test_local_engine_task_store.py +0 -31
- openspeechapi-0.2.8/tests/unit/test_local_engines_manager.py +0 -102
- openspeechapi-0.2.8/tests/unit/test_logging.py +0 -406
- openspeechapi-0.2.8/tests/unit/test_metrics_observer.py +0 -85
- openspeechapi-0.2.8/tests/unit/test_models.py +0 -93
- openspeechapi-0.2.8/tests/unit/test_native_backend.py +0 -269
- openspeechapi-0.2.8/tests/unit/test_observer_base.py +0 -257
- openspeechapi-0.2.8/tests/unit/test_plugin_mechanism.py +0 -120
- openspeechapi-0.2.8/tests/unit/test_providers/__init__.py +0 -0
- openspeechapi-0.2.8/tests/unit/test_providers/test_cloud_providers.py +0 -342
- openspeechapi-0.2.8/tests/unit/test_providers/test_elevenlabs_stt.py +0 -209
- openspeechapi-0.2.8/tests/unit/test_providers/test_macos_say.py +0 -324
- openspeechapi-0.2.8/tests/unit/test_providers/test_macos_speech.py +0 -315
- openspeechapi-0.2.8/tests/unit/test_providers/test_openai_base_url.py +0 -47
- openspeechapi-0.2.8/tests/unit/test_providers/test_openai_stt.py +0 -163
- openspeechapi-0.2.8/tests/unit/test_providers/test_openai_tts.py +0 -194
- openspeechapi-0.2.8/tests/unit/test_providers/test_sherpa_onnx_stt.py +0 -84
- openspeechapi-0.2.8/tests/unit/test_providers/test_stt_stubs.py +0 -448
- openspeechapi-0.2.8/tests/unit/test_providers/test_tts_stubs.py +0 -826
- openspeechapi-0.2.8/tests/unit/test_providers/test_whisperlivekit_stt.py +0 -187
- openspeechapi-0.2.8/tests/unit/test_registry.py +0 -55
- openspeechapi-0.2.8/tests/unit/test_remote.py +0 -81
- openspeechapi-0.2.8/tests/unit/test_server/__init__.py +0 -0
- openspeechapi-0.2.8/tests/unit/test_server/test_auth.py +0 -76
- openspeechapi-0.2.8/tests/unit/test_server/test_config_api.py +0 -159
- openspeechapi-0.2.8/tests/unit/test_server/test_routes.py +0 -399
- openspeechapi-0.2.8/tests/unit/test_server/test_websocket.py +0 -159
- openspeechapi-0.2.8/tests/unit/test_subprocess.py +0 -138
- openspeechapi-0.2.8/tests/unit/test_usage_observer.py +0 -87
- openspeechapi-0.2.8/tests/unit/test_watcher.py +0 -179
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/.gitignore +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/__main__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/cli.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/client/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/config.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/core/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/core/enums.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/core/registry.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/core/settings.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/context.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/executors/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/executors/base.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/executors/in_process.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/executors/remote.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/fanout.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/filters.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/lifecycle.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/dispatch/watcher.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/exceptions.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/aim_resolver.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/backends/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/backends/native_backend.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/base.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/whisper.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/manager.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/models.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/progress.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/registry.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/task_store.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/local_engines/tasks.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/logging_config.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/base.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/debug.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/latency.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/metrics.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/tracing.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/observe/usage.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/_template.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/alibaba.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/assemblyai.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/azure_speech.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/baidu.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/deepgram.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/elevenlabs.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/faster_whisper.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/google_cloud.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/openai.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/tencent.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/volcengine.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/whisper.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/stt/windows_speech.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/elevenlabs.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/fish_speech.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/iflytek.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/minimax.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/openai.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/piper.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/providers/tts/windows_sapi.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/middleware.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/routes/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/routes/tts.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/ws/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/server/ws/tts_stream.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/telemetry/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/telemetry/perf.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/utils/__init__.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/utils/audio_playback.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/openspeechapi/vendor_registry.yaml +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/cloud/install.sh +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/faster-whisper/native/install.sh +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/fish-speech/native/install.sh +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/macos-stt/macos_stt.swift +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/macos-stt/request_auth.swift +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/whisper/native/install.sh +0 -0
- {openspeechapi-0.2.8 → openspeechapi-0.2.9}/scripts/engines/whisperlivekit/native/install.sh +0 -0
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openspeechapi
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: Unified speech interface for STT/TTS providers
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: httpx>=0.27
|
|
7
|
+
Requires-Dist: loguru>=0.7
|
|
8
|
+
Requires-Dist: msgpack>=1.0
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Provides-Extra: alibaba
|
|
12
|
+
Provides-Extra: alibaba-stt
|
|
13
|
+
Provides-Extra: alibaba-tts
|
|
14
|
+
Provides-Extra: all
|
|
15
|
+
Requires-Dist: elevenlabs; extra == 'all'
|
|
16
|
+
Requires-Dist: faster-whisper; extra == 'all'
|
|
17
|
+
Requires-Dist: openai; extra == 'all'
|
|
18
|
+
Requires-Dist: openai-whisper; extra == 'all'
|
|
19
|
+
Requires-Dist: piper-tts; extra == 'all'
|
|
20
|
+
Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'all'
|
|
21
|
+
Requires-Dist: torchaudio; extra == 'all'
|
|
22
|
+
Requires-Dist: tts; extra == 'all'
|
|
23
|
+
Requires-Dist: websockets; extra == 'all'
|
|
24
|
+
Provides-Extra: assemblyai-stt
|
|
25
|
+
Provides-Extra: audio
|
|
26
|
+
Requires-Dist: numpy; extra == 'audio'
|
|
27
|
+
Requires-Dist: sounddevice; extra == 'audio'
|
|
28
|
+
Provides-Extra: azure
|
|
29
|
+
Provides-Extra: azure-stt
|
|
30
|
+
Provides-Extra: azure-tts
|
|
31
|
+
Provides-Extra: baidu
|
|
32
|
+
Provides-Extra: baidu-stt
|
|
33
|
+
Provides-Extra: baidu-tts
|
|
34
|
+
Provides-Extra: cloud
|
|
35
|
+
Requires-Dist: websockets; extra == 'cloud'
|
|
36
|
+
Provides-Extra: coqui-tts
|
|
37
|
+
Requires-Dist: tts; extra == 'coqui-tts'
|
|
38
|
+
Provides-Extra: cosyvoice-tts
|
|
39
|
+
Requires-Dist: torchaudio; extra == 'cosyvoice-tts'
|
|
40
|
+
Provides-Extra: deepgram
|
|
41
|
+
Requires-Dist: websockets; extra == 'deepgram'
|
|
42
|
+
Provides-Extra: deepgram-stt
|
|
43
|
+
Requires-Dist: websockets; extra == 'deepgram-stt'
|
|
44
|
+
Provides-Extra: deepgram-tts
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
47
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
48
|
+
Requires-Dist: pytest-dotenv; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: ruff==0.15.*; extra == 'dev'
|
|
51
|
+
Provides-Extra: elevenlabs
|
|
52
|
+
Requires-Dist: elevenlabs; extra == 'elevenlabs'
|
|
53
|
+
Requires-Dist: websockets; extra == 'elevenlabs'
|
|
54
|
+
Provides-Extra: elevenlabs-stt
|
|
55
|
+
Requires-Dist: websockets; extra == 'elevenlabs-stt'
|
|
56
|
+
Provides-Extra: elevenlabs-tts
|
|
57
|
+
Requires-Dist: elevenlabs; extra == 'elevenlabs-tts'
|
|
58
|
+
Provides-Extra: faster-whisper-stt
|
|
59
|
+
Requires-Dist: faster-whisper; extra == 'faster-whisper-stt'
|
|
60
|
+
Provides-Extra: fish-speech-tts
|
|
61
|
+
Provides-Extra: gemma4-stt
|
|
62
|
+
Requires-Dist: mlx-vlm; (sys_platform == 'darwin') and extra == 'gemma4-stt'
|
|
63
|
+
Provides-Extra: google
|
|
64
|
+
Provides-Extra: google-stt
|
|
65
|
+
Provides-Extra: google-tts
|
|
66
|
+
Provides-Extra: iflytek
|
|
67
|
+
Requires-Dist: websockets; extra == 'iflytek'
|
|
68
|
+
Provides-Extra: iflytek-stt
|
|
69
|
+
Requires-Dist: websockets; extra == 'iflytek-stt'
|
|
70
|
+
Provides-Extra: iflytek-tts
|
|
71
|
+
Requires-Dist: websockets; extra == 'iflytek-tts'
|
|
72
|
+
Provides-Extra: macos-native
|
|
73
|
+
Provides-Extra: minimax-tts
|
|
74
|
+
Provides-Extra: openai
|
|
75
|
+
Requires-Dist: openai; extra == 'openai'
|
|
76
|
+
Provides-Extra: openai-stt
|
|
77
|
+
Requires-Dist: openai; extra == 'openai-stt'
|
|
78
|
+
Provides-Extra: openai-tts
|
|
79
|
+
Requires-Dist: openai; extra == 'openai-tts'
|
|
80
|
+
Provides-Extra: piper-tts
|
|
81
|
+
Requires-Dist: piper-tts; extra == 'piper-tts'
|
|
82
|
+
Provides-Extra: server
|
|
83
|
+
Requires-Dist: fastapi; extra == 'server'
|
|
84
|
+
Requires-Dist: python-multipart; extra == 'server'
|
|
85
|
+
Requires-Dist: uvicorn; extra == 'server'
|
|
86
|
+
Requires-Dist: websockets; extra == 'server'
|
|
87
|
+
Provides-Extra: sherpa-onnx-stt
|
|
88
|
+
Requires-Dist: websockets; extra == 'sherpa-onnx-stt'
|
|
89
|
+
Provides-Extra: tencent
|
|
90
|
+
Provides-Extra: tencent-stt
|
|
91
|
+
Provides-Extra: tencent-tts
|
|
92
|
+
Provides-Extra: tracing
|
|
93
|
+
Requires-Dist: opentelemetry-api; extra == 'tracing'
|
|
94
|
+
Requires-Dist: opentelemetry-sdk; extra == 'tracing'
|
|
95
|
+
Provides-Extra: volcengine
|
|
96
|
+
Provides-Extra: volcengine-stt
|
|
97
|
+
Provides-Extra: volcengine-tts
|
|
98
|
+
Provides-Extra: whisper-stt
|
|
99
|
+
Requires-Dist: openai-whisper; extra == 'whisper-stt'
|
|
100
|
+
Provides-Extra: whisperlivekit-stt
|
|
101
|
+
Requires-Dist: websockets; extra == 'whisperlivekit-stt'
|
|
102
|
+
Provides-Extra: windows-native
|
|
103
|
+
Requires-Dist: pyttsx3; (sys_platform == 'win32') and extra == 'windows-native'
|
|
104
|
+
Description-Content-Type: text/markdown
|
|
105
|
+
|
|
106
|
+
# OpenSpeechAPI
|
|
107
|
+
|
|
108
|
+
> Unified speech interface for STT/TTS providers — one API, multiple backends.
|
|
109
|
+
|
|
110
|
+
OpenSpeechAPI 提供统一的语音接口,通过字符串指定 provider 即可切换不同的 STT/TTS 后端(云端 API、本地模型),无需关心底层实现。
|
|
111
|
+
|
|
112
|
+
## Quick Start
|
|
113
|
+
|
|
114
|
+
### 安装
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# 安装全部 provider
|
|
118
|
+
pip install -e ".[all]"
|
|
119
|
+
|
|
120
|
+
# 或按需安装
|
|
121
|
+
pip install -e ".[openai]" # OpenAI Whisper STT + TTS
|
|
122
|
+
pip install -e ".[faster-whisper]" # 本地 faster-whisper STT
|
|
123
|
+
pip install -e ".[openai,faster-whisper]" # 指定多个
|
|
124
|
+
|
|
125
|
+
# 仅核心包(不含任何 provider)
|
|
126
|
+
pip install -e .
|
|
127
|
+
|
|
128
|
+
# 开发环境
|
|
129
|
+
pip install -e ".[dev]"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### 30 秒上手 — TTS
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
import asyncio
|
|
136
|
+
from openspeechapi import create_provider
|
|
137
|
+
|
|
138
|
+
async def main():
|
|
139
|
+
tts = create_provider("openai-tts", api_key="sk-...")
|
|
140
|
+
await tts.start()
|
|
141
|
+
|
|
142
|
+
audio = await tts.synthesize("Hello, OpenSpeechAPI!")
|
|
143
|
+
|
|
144
|
+
import wave
|
|
145
|
+
with wave.open("output.wav", "wb") as wf:
|
|
146
|
+
wf.setnchannels(audio.channels)
|
|
147
|
+
wf.setsampwidth(2)
|
|
148
|
+
wf.setframerate(audio.sample_rate)
|
|
149
|
+
wf.writeframes(audio.data)
|
|
150
|
+
|
|
151
|
+
await tts.stop()
|
|
152
|
+
|
|
153
|
+
asyncio.run(main())
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### 30 秒上手 — STT
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
import asyncio
|
|
160
|
+
from openspeechapi import create_provider, AudioData, AudioFormat
|
|
161
|
+
from pathlib import Path
|
|
162
|
+
|
|
163
|
+
async def main():
|
|
164
|
+
stt = create_provider("faster-whisper", model_size="tiny")
|
|
165
|
+
await stt.start()
|
|
166
|
+
|
|
167
|
+
audio = AudioData(
|
|
168
|
+
data=Path("output.wav").read_bytes(),
|
|
169
|
+
sample_rate=16000, channels=1, format=AudioFormat.WAV,
|
|
170
|
+
)
|
|
171
|
+
result = await stt.transcribe(audio)
|
|
172
|
+
print(result.text) # "Hello, OpenSpeechAPI!"
|
|
173
|
+
print(result.language) # "en"
|
|
174
|
+
print(result.confidence) # 0.98
|
|
175
|
+
|
|
176
|
+
await stt.stop()
|
|
177
|
+
|
|
178
|
+
asyncio.run(main())
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### macOS 零依赖快速上手
|
|
182
|
+
|
|
183
|
+
在 macOS 上无需任何 API Key 或模型下载,开箱即用:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
# 1. 克隆项目
|
|
187
|
+
git clone https://github.com/wingsfly/OpenSpeechAPI.git
|
|
188
|
+
cd OpenSpeechAPI
|
|
189
|
+
|
|
190
|
+
# 2. 安装(仅核心包 + 服务依赖)
|
|
191
|
+
pip install -e ".[server]"
|
|
192
|
+
|
|
193
|
+
# 3. 启动服务和 WebUI
|
|
194
|
+
python -m openspeechapi.cli --config providers.yaml serve
|
|
195
|
+
|
|
196
|
+
# 4. 浏览器打开 http://127.0.0.1:8600/ui/
|
|
197
|
+
# - TTS:选择 macos_tts → 选择发音人(如 Tingting)→ 输入文本 → Run TTS
|
|
198
|
+
# - STT:前往 Engine Catalog → macOS STT → Install(自动下载预编译包)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
#### macOS STT 安装(通过 WebUI)
|
|
202
|
+
|
|
203
|
+
`macos-stt` 默认未写入配置(避免"假可用"),需通过 Engine Catalog 一键安装:
|
|
204
|
+
|
|
205
|
+
1. WebUI → **Engine Catalog** → macOS STT → **Install**
|
|
206
|
+
- 优先下载 CI 预构建的 universal `.app`(无需 Xcode,通过 `gh` 自动完成)
|
|
207
|
+
- 若 `gh` 不可用或资产下载失败,自动回退到本地编译(需 Xcode Command Line Tools)
|
|
208
|
+
2. 安装完成后,**手动授权语音识别**(每台机器一次):
|
|
209
|
+
```bash
|
|
210
|
+
open scripts/engines/macos-stt/MacOSSTTHelper.app
|
|
211
|
+
# 弹出对话框后点击"允许"
|
|
212
|
+
```
|
|
213
|
+
3. **手动下载听写语言模型**(每台机器一次):
|
|
214
|
+
系统设置 > 键盘 > 听写 > 下载所需语言模型(中文/英文等)
|
|
215
|
+
|
|
216
|
+
安装完成后 `macos_stt` 自动写入配置并热重载,Dashboard 显示 healthy。
|
|
217
|
+
|
|
218
|
+
> 授权和听写模型下载是 macOS TCC 系统限制,无法自动化,必须每台机器手动执行一次。
|
|
219
|
+
> 详细机制见 [docs/architecture/native-engine-install.md](docs/architecture/native-engine-install.md)。
|
|
220
|
+
|
|
221
|
+
## CLI Demo
|
|
222
|
+
|
|
223
|
+
无需写代码,直接在命令行体验:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
# TTS:文本 → 语音
|
|
227
|
+
python -m openspeechapi.demo tts -t "Hello world" -o output.wav
|
|
228
|
+
|
|
229
|
+
# STT:语音 → 文本
|
|
230
|
+
python -m openspeechapi.demo stt -i output.wav -p faster-whisper
|
|
231
|
+
|
|
232
|
+
# Roundtrip:文本 → TTS → STT → 文本
|
|
233
|
+
python -m openspeechapi.demo roundtrip -t "Hello world"
|
|
234
|
+
|
|
235
|
+
# Compare:多引擎对比
|
|
236
|
+
python -m openspeechapi.demo compare -i output.wav -p openai,faster-whisper
|
|
237
|
+
|
|
238
|
+
# REPL:交互模式
|
|
239
|
+
python -m openspeechapi.demo repl
|
|
240
|
+
|
|
241
|
+
# WebUI(Phase A)
|
|
242
|
+
python -m openspeechapi.cli serve --host 0.0.0.0 --port 8600
|
|
243
|
+
# 浏览器打开 http://127.0.0.1:8600/ui
|
|
244
|
+
|
|
245
|
+
# 实时 STT:优先使用 WebSocket PCM 流式(/v1/stt/stream),
|
|
246
|
+
# 若浏览器或链路异常会自动回退到分片 HTTP 转写模式。
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### 本地引擎管理(实验特性)
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
# 1) 下载/更新运行镜像
|
|
253
|
+
python -m openspeechapi.cli engine install --name fish-speech --runtime docker --follow
|
|
254
|
+
|
|
255
|
+
# 2) 启动本地引擎(含健康检查)
|
|
256
|
+
python -m openspeechapi.cli engine start --name fish-speech --runtime docker --follow
|
|
257
|
+
|
|
258
|
+
# 3) 查看运行状态/日志
|
|
259
|
+
python -m openspeechapi.cli engine status --name fish-speech --runtime docker
|
|
260
|
+
python -m openspeechapi.cli engine logs --name fish-speech --runtime docker --lines 200
|
|
261
|
+
|
|
262
|
+
# 4) 停止
|
|
263
|
+
python -m openspeechapi.cli engine stop --name fish-speech --runtime docker --follow
|
|
264
|
+
|
|
265
|
+
# 5) 跨进程查询任务
|
|
266
|
+
python -m openspeechapi.cli engine task list --name fish-speech --limit 20
|
|
267
|
+
python -m openspeechapi.cli engine task status --task-id <TASK_ID>
|
|
268
|
+
python -m openspeechapi.cli engine task follow --task-id <TASK_ID>
|
|
269
|
+
python -m openspeechapi.cli engine task cancel --task-id <TASK_ID>
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
进度反馈会显示 task id、阶段、百分比和当前消息,便于追踪长耗时任务。
|
|
273
|
+
|
|
274
|
+
#### STT 本地模型引擎(复用已有模型路径)
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
# faster-whisper 模型资产(native,无常驻服务)
|
|
278
|
+
python -m openspeechapi.cli engine install --name faster-whisper --runtime native --follow
|
|
279
|
+
python -m openspeechapi.cli engine start --name faster-whisper --runtime native --follow
|
|
280
|
+
python -m openspeechapi.cli engine status --name faster-whisper --runtime native
|
|
281
|
+
|
|
282
|
+
# whisper 模型资产(native,无常驻服务)
|
|
283
|
+
python -m openspeechapi.cli engine install --name whisper --runtime native --follow
|
|
284
|
+
python -m openspeechapi.cli engine start --name whisper --runtime native --follow
|
|
285
|
+
python -m openspeechapi.cli engine status --name whisper --runtime native
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
说明:安装会优先读取 `~/.aim/config.json + ~/.aim/registry.json` 的 provision 信息来定位模型;若 AIM 未命中,再回退默认本地路径候选。若仍未找到,可按配置走“模拟下载”流程以验证安装进度。
|
|
289
|
+
|
|
290
|
+
### Demo 音频播放
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
# 合成后直接播放
|
|
294
|
+
python -m openspeechapi.demo tts -t "Hello world" --play
|
|
295
|
+
|
|
296
|
+
# 指定播放参数
|
|
297
|
+
python -m openspeechapi.demo tts -t "Hello world" --play \
|
|
298
|
+
--play-backend sounddevice --play-device 2 --play-volume 0.8
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Providers
|
|
302
|
+
|
|
303
|
+
### 已实现
|
|
304
|
+
|
|
305
|
+
| Provider | 类型 | 说明 | 执行模式 | 安装 |
|
|
306
|
+
|----------|------|------|----------|------|
|
|
307
|
+
| `openai-stt` | STT | OpenAI Whisper API(云端) | remote | `pip install -e ".[openai]"` |
|
|
308
|
+
| `faster-whisper` | STT | 本地 Whisper 推理(GPU/CPU) | subprocess | `pip install -e ".[faster-whisper]"` |
|
|
309
|
+
| `whisper` | STT | OpenAI Whisper 本地推理(CPU/GPU) | subprocess | `pip install -e ".[whisper]"` |
|
|
310
|
+
| `whisperlivekit-stt` | STT | WhisperLiveKit 本地服务(Deepgram 兼容 WS,支持 MLX 后端) | local | `pip install -e ".[whisperlivekit]"` |
|
|
311
|
+
| `elevenlabs-stt` | STT | ElevenLabs Scribe API(云端,支持实时流式 WS + 批量) | remote | `pip install -e ".[elevenlabs-stt]"` |
|
|
312
|
+
| `deepgram` | STT | Deepgram API(云端,支持实时流式) | remote | `pip install -e ".[deepgram]"` |
|
|
313
|
+
| `gemma4` | STT | Google Gemma 4 多模态 ASR(macOS/MLX 本地,E4B 默认/12B 可选,>30s 自动分段,支持转写/翻译/理解) | subprocess | `pip install -e ".[gemma4-stt]"` |
|
|
314
|
+
| `openai-tts` | TTS | OpenAI Speech API(云端,支持流式) | remote | `pip install -e ".[openai]"` |
|
|
315
|
+
| `elevenlabs` | TTS | ElevenLabs 高质量语音(云端,支持 HTTP/WS 流式) | remote | `pip install -e ".[elevenlabs-tts]"` |
|
|
316
|
+
| `minimax` | TTS | Minimax 语音合成(云端) | remote | `pip install -e ".[minimax]"` |
|
|
317
|
+
| `cosyvoice` | TTS | CosyVoice 本地中文语音合成(GPU) | subprocess | 需手动安装 CosyVoice |
|
|
318
|
+
| `fish-speech` | TTS | Fish-Speech 本地多语 TTS + voice clone | local | `pip install -e ".[fish-speech]"` |
|
|
319
|
+
| `piper` | TTS | Piper 轻量级本地 TTS(CPU 即可) | in_process | `pip install -e ".[piper]"` |
|
|
320
|
+
| `macos-say` | TTS | macOS 内置语音合成(`say` 命令,零依赖) | in_process | 无需安装,macOS 自带 |
|
|
321
|
+
| `macos-stt` | STT | macOS 内置语音识别(SFSpeechRecognizer) | in_process | WebUI Engine Catalog → Install(预编译优先,编译兜底) |
|
|
322
|
+
|
|
323
|
+
### Stub(待实现)
|
|
324
|
+
|
|
325
|
+
`coqui`
|
|
326
|
+
|
|
327
|
+
### 查看所有 provider
|
|
328
|
+
|
|
329
|
+
```python
|
|
330
|
+
from openspeechapi import list_providers
|
|
331
|
+
print(list_providers())
|
|
332
|
+
# ['coqui', 'cosyvoice', 'deepgram', 'elevenlabs', 'faster-whisper',
|
|
333
|
+
# 'fish-speech', 'minimax', 'openai-stt', 'openai-tts', 'piper', 'whisper',
|
|
334
|
+
# 'whisperlivekit-stt']
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Provider 参数
|
|
338
|
+
|
|
339
|
+
### `openai-stt`
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
create_provider("openai-stt",
|
|
343
|
+
api_key="sk-...", # 必填,OpenAI API Key
|
|
344
|
+
model="whisper-1", # 模型名称
|
|
345
|
+
)
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
转录选项通过 `STTOptions` 传入:
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
from openspeechapi import STTOptions
|
|
352
|
+
result = await stt.transcribe(audio, STTOptions(
|
|
353
|
+
language="zh", # 语言提示
|
|
354
|
+
prompt="技术会议记录", # 上下文提示
|
|
355
|
+
temperature=0.0, # 0.0-1.0
|
|
356
|
+
))
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### `faster-whisper`
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
create_provider("faster-whisper",
|
|
363
|
+
model_size="base", # tiny / base / small / medium / large-v3
|
|
364
|
+
device="auto", # auto / cuda / cpu
|
|
365
|
+
compute_type="default", # default / int8 / float16
|
|
366
|
+
beam_size=5, # beam search 宽度
|
|
367
|
+
download_root=None, # 模型缓存目录
|
|
368
|
+
)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### `openai-tts`
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
create_provider("openai-tts",
|
|
375
|
+
api_key="sk-...", # 必填,OpenAI API Key
|
|
376
|
+
model="tts-1", # tts-1 / tts-1-hd
|
|
377
|
+
voice="alloy", # alloy / echo / fable / onyx / nova / shimmer
|
|
378
|
+
response_format="pcm", # 输出格式
|
|
379
|
+
)
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
合成选项通过 `TTSOptions` 传入:
|
|
383
|
+
|
|
384
|
+
```python
|
|
385
|
+
from openspeechapi import TTSOptions
|
|
386
|
+
audio = await tts.synthesize("Hello", TTSOptions(
|
|
387
|
+
voice="nova", # 覆盖默认声音
|
|
388
|
+
speed=1.2, # 语速倍率
|
|
389
|
+
))
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### `deepgram`
|
|
393
|
+
|
|
394
|
+
```python
|
|
395
|
+
create_provider("deepgram",
|
|
396
|
+
api_key="...", # 必填,Deepgram API Key
|
|
397
|
+
model="nova-2", # 模型名称
|
|
398
|
+
language="en", # 默认语言
|
|
399
|
+
punctuate=True, # 自动标点
|
|
400
|
+
smart_format=True, # 智能格式化
|
|
401
|
+
)
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
支持实时流式转录(`transcribe_stream`),详见[流式 STT](#流式-stt) 章节。
|
|
405
|
+
|
|
406
|
+
### `elevenlabs`
|
|
407
|
+
|
|
408
|
+
```python
|
|
409
|
+
create_provider("elevenlabs",
|
|
410
|
+
api_key="...", # 必填,ElevenLabs API Key
|
|
411
|
+
voice_id="21m00Tcm4TlvDq8ikWAM", # 声音 ID
|
|
412
|
+
model_id="eleven_monolingual_v1", # 模型
|
|
413
|
+
stability=0.5, # 声音稳定性
|
|
414
|
+
similarity_boost=0.75, # 相似度增强
|
|
415
|
+
)
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
### `minimax`
|
|
419
|
+
|
|
420
|
+
```python
|
|
421
|
+
create_provider("minimax",
|
|
422
|
+
api_key="...", # 必填,Minimax API Key
|
|
423
|
+
group_id="...", # 必填,Minimax Group ID
|
|
424
|
+
model="speech-01-turbo", # 模型
|
|
425
|
+
voice_id="male-qn-qingse", # 声音 ID
|
|
426
|
+
speed=1.0, # 语速
|
|
427
|
+
)
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
### `cosyvoice`
|
|
431
|
+
|
|
432
|
+
```python
|
|
433
|
+
create_provider("cosyvoice",
|
|
434
|
+
model_dir="/path/to/model", # 必填,本地模型目录
|
|
435
|
+
device="auto", # auto / cuda / cpu
|
|
436
|
+
spk_id="中文女", # 说话人 ID
|
|
437
|
+
)
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
### `fish-speech`
|
|
441
|
+
|
|
442
|
+
```python
|
|
443
|
+
create_provider("fish-speech",
|
|
444
|
+
api_url="http://localhost:8080", # Fish-Speech 本地服务地址
|
|
445
|
+
reference_audio=None, # 参考音频路径(voice clone)
|
|
446
|
+
)
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
### `piper`
|
|
450
|
+
|
|
451
|
+
```python
|
|
452
|
+
create_provider("piper",
|
|
453
|
+
model_path="/path/to/model.onnx", # 必填,模型文件路径
|
|
454
|
+
config_path="/path/to/config.json", # 必填,配置文件路径
|
|
455
|
+
use_cuda=False, # 是否使用 GPU
|
|
456
|
+
length_scale=1.0, # 语速(越大越慢)
|
|
457
|
+
noise_scale=0.667, # 噪声比例
|
|
458
|
+
)
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
### `macos-say`(macOS 原生 TTS)
|
|
462
|
+
|
|
463
|
+
零额外依赖,使用 macOS 内置 `say` 命令,支持系统所有发音人。
|
|
464
|
+
|
|
465
|
+
```python
|
|
466
|
+
create_provider("macos-say",
|
|
467
|
+
default_voice="Tingting", # 默认发音人(say -v '?' 查看全部)
|
|
468
|
+
default_rate=200, # 默认语速(words per minute)
|
|
469
|
+
)
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
支持通过 `list_voices()` 获取所有可用发音人(按语言分组)。合成时通过 `TTSOptions(voice="Samantha", speed=1.5)` 指定发音人和语速。
|
|
473
|
+
|
|
474
|
+
### `macos-stt`(macOS 原生 STT)
|
|
475
|
+
|
|
476
|
+
使用 macOS 内置 SFSpeechRecognizer,通过 Swift CLI 助手(`.app` bundle)实现。
|
|
477
|
+
**推荐通过 WebUI Engine Catalog 一键安装**(自动下载预编译 universal 包,无需 Xcode)。
|
|
478
|
+
|
|
479
|
+
```python
|
|
480
|
+
create_provider("macos-stt",
|
|
481
|
+
language="zh-CN", # 默认识别语言
|
|
482
|
+
binary_path="", # Swift 工具路径,空则自动检测
|
|
483
|
+
)
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
**安装方式(推荐):** WebUI → Engine Catalog → macOS STT → **Install**
|
|
487
|
+
|
|
488
|
+
安装流程:B 预编译优先(`gh release download`),B 不可用时自动 C 兜底(`bash install.sh`,需 Xcode CLT)。
|
|
489
|
+
详见 [docs/architecture/native-engine-install.md](docs/architecture/native-engine-install.md)。
|
|
490
|
+
|
|
491
|
+
**每台机器必须手动完成一次(无法自动化):**
|
|
492
|
+
|
|
493
|
+
```bash
|
|
494
|
+
# 1. 授权语音识别权限(安装后运行,弹出对话框后点击"允许")
|
|
495
|
+
open scripts/engines/macos-stt/MacOSSTTHelper.app
|
|
496
|
+
|
|
497
|
+
# 2. 验证授权状态
|
|
498
|
+
scripts/engines/macos-stt/MacOSSTTHelper.app/Contents/MacOS/macos-stt-helper --check --language en-US
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
- **系统设置 > 键盘 > 听写** → 下载对应语言的离线听写模型(中文、英文等)
|
|
502
|
+
- macOS 13+ 支持完全离线识别,旧版本需联网
|
|
503
|
+
|
|
504
|
+
**高级 / 离线手动安装(不依赖 WebUI 或 `gh`):**
|
|
505
|
+
|
|
506
|
+
```bash
|
|
507
|
+
# 需要 Xcode Command Line Tools(xcode-select --install)
|
|
508
|
+
bash scripts/engines/macos-stt/install.sh
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
## HTTP 服务 + Client 模式
|
|
512
|
+
|
|
513
|
+
### 启动服务
|
|
514
|
+
|
|
515
|
+
```bash
|
|
516
|
+
openspeechapi serve --config providers.yaml --port 8600
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
### Python Client(与 Library 模式接口一致)
|
|
520
|
+
|
|
521
|
+
```python
|
|
522
|
+
from openspeechapi import Client
|
|
523
|
+
|
|
524
|
+
async with Client("http://localhost:8600") as c:
|
|
525
|
+
# STT
|
|
526
|
+
result = await c.stt.transcribe("faster-whisper", audio)
|
|
527
|
+
|
|
528
|
+
# TTS
|
|
529
|
+
audio = await c.tts.synthesize("openai-tts", "Hello world")
|
|
530
|
+
|
|
531
|
+
# FanOut
|
|
532
|
+
result = await c.stt.fanout(["openai", "faster-whisper"], audio, strategy="collect_all")
|
|
533
|
+
|
|
534
|
+
# 管理
|
|
535
|
+
providers = await c.list_providers()
|
|
536
|
+
health = await c.health()
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
### REST API
|
|
540
|
+
|
|
541
|
+
```bash
|
|
542
|
+
# STT
|
|
543
|
+
curl -X POST http://localhost:8600/v1/stt/transcribe \
|
|
544
|
+
-F audio=@audio.wav -F provider=faster-whisper
|
|
545
|
+
|
|
546
|
+
# TTS
|
|
547
|
+
curl -X POST http://localhost:8600/v1/tts/synthesize \
|
|
548
|
+
-H "Content-Type: application/json" \
|
|
549
|
+
-d '{"text": "Hello", "provider": "openai-tts"}' --output out.wav
|
|
550
|
+
|
|
551
|
+
# 管理
|
|
552
|
+
curl http://localhost:8600/v1/providers
|
|
553
|
+
curl http://localhost:8600/v1/health
|
|
554
|
+
curl http://localhost:8600/v1/metrics
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
## 高级用法
|
|
558
|
+
|
|
559
|
+
### Config-Driven(YAML 配置)
|
|
560
|
+
|
|
561
|
+
```yaml
|
|
562
|
+
# providers.yaml
|
|
563
|
+
providers:
|
|
564
|
+
cloud-stt:
|
|
565
|
+
provider: openai-stt
|
|
566
|
+
exec_mode: remote
|
|
567
|
+
settings:
|
|
568
|
+
api_key: ${OPENAI_API_KEY}
|
|
569
|
+
|
|
570
|
+
local-stt:
|
|
571
|
+
provider: faster-whisper
|
|
572
|
+
exec_mode: subprocess # 独立进程,隔离 GPU 内存
|
|
573
|
+
settings:
|
|
574
|
+
model_size: large-v3
|
|
575
|
+
device: cuda
|
|
576
|
+
```
|
|
577
|
+
|
|
578
|
+
`exec_mode` 约定:
|
|
579
|
+
- `subprocess`:子进程模型推理(IPC)
|
|
580
|
+
- `local`:本地服务引擎(HTTP/HTTPS)
|
|
581
|
+
- `remote`:云端服务 API
|
|
582
|
+
- `in_process`:预留给真正进程内推理(兼容旧配置,建议迁移)
|
|
583
|
+
|
|
584
|
+
```python
|
|
585
|
+
from openspeechapi import ServiceDispatcher, ProviderRegistry
|
|
586
|
+
from openspeechapi.providers.stt.openai import OpenAISTT
|
|
587
|
+
from openspeechapi.providers.stt.faster_whisper import FasterWhisperSTT
|
|
588
|
+
|
|
589
|
+
registry = ProviderRegistry()
|
|
590
|
+
registry.register("openai-stt", OpenAISTT)
|
|
591
|
+
registry.register("faster-whisper", FasterWhisperSTT)
|
|
592
|
+
|
|
593
|
+
dispatcher = ServiceDispatcher.from_config("providers.yaml", registry)
|
|
594
|
+
await dispatcher.start()
|
|
595
|
+
|
|
596
|
+
result = await dispatcher.stt.transcribe("cloud-stt", audio)
|
|
597
|
+
await dispatcher.stop()
|
|
598
|
+
```
|
|
599
|
+
|
|
600
|
+
### FanOut — 多引擎并发
|
|
601
|
+
|
|
602
|
+
```python
|
|
603
|
+
from openspeechapi.dispatch.fanout import FirstCompleted, CollectAll
|
|
604
|
+
|
|
605
|
+
# 取最快返回的结果
|
|
606
|
+
result = await dispatcher.stt.fanout(
|
|
607
|
+
["cloud-stt", "local-stt"], audio, strategy=FirstCompleted()
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# 收集所有结果对比
|
|
611
|
+
results = await dispatcher.stt.fanout(
|
|
612
|
+
["cloud-stt", "local-stt"], audio, strategy=CollectAll()
|
|
613
|
+
)
|
|
614
|
+
for name, t in results.successes.items():
|
|
615
|
+
print(f"{name}: {t.text}")
|
|
616
|
+
```
|
|
617
|
+
|
|
618
|
+
### Result Filters
|
|
619
|
+
|
|
620
|
+
```yaml
|
|
621
|
+
providers:
|
|
622
|
+
my-stt:
|
|
623
|
+
provider: faster-whisper
|
|
624
|
+
exec_mode: subprocess
|
|
625
|
+
settings:
|
|
626
|
+
model_size: base
|
|
627
|
+
filters:
|
|
628
|
+
- type: confidence
|
|
629
|
+
min: 0.8 # 过滤低置信度结果
|
|
630
|
+
- type: language
|
|
631
|
+
allow: ["zh", "en"] # 只保留中英文
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
### Observers(可观测性)
|
|
635
|
+
|
|
636
|
+
```python
|
|
637
|
+
from openspeechapi.observe.metrics import MetricsObserver
|
|
638
|
+
from openspeechapi.observe.debug import DebugLogObserver
|
|
639
|
+
|
|
640
|
+
dispatcher.add_observer(MetricsObserver()) # TTFB、耗时、吞吐
|
|
641
|
+
dispatcher.add_observer(DebugLogObserver()) # 详细日志
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
内置 5 个 Observer:`MetricsObserver` `LatencyObserver` `DebugLogObserver` `UsageObserver` `TracingObserver`
|
|
645
|
+
|
|
646
|
+
## 数据模型
|
|
647
|
+
|
|
648
|
+
```python
|
|
649
|
+
AudioData(data=bytes, sample_rate=int, channels=int, format=AudioFormat, duration_ms=int|None)
|
|
650
|
+
Transcription(text=str, language=str|None, confidence=float|None, words=list[Word]|None)
|
|
651
|
+
Word(text=str, start_ms=int, end_ms=int, confidence=float|None)
|
|
652
|
+
STTOptions(language=str|None, prompt=str|None, temperature=float|None)
|
|
653
|
+
TTSOptions(voice=str|None, speed=float, output_format=AudioFormat)
|
|
654
|
+
AudioFormat: PCM_16K | PCM_44K | WAV | AIFF | MP3 | OGG | FLAC | OPUS
|
|
655
|
+
```
|
|
656
|
+
|
|
657
|
+
## 项目结构
|
|
658
|
+
|
|
659
|
+
```
|
|
660
|
+
openspeechapi/
|
|
661
|
+
core/ # L1: Provider 抽象层(models, enums, base, registry)
|
|
662
|
+
providers/ # Provider 适配器(stt/ 5个含macos, tts/ 8个含macos)
|
|
663
|
+
utils/ # 工具模块(audio_converter, audio_playback)
|
|
664
|
+
dispatch/ # L2: 调度层(dispatcher, executors, fanout, filters)
|
|
665
|
+
observe/ # 可观测性(metrics, latency, debug, usage, tracing)
|
|
666
|
+
server/ # L3: FastAPI HTTP/WebSocket 服务
|
|
667
|
+
client/ # Python 薄客户端
|
|
668
|
+
factory.py # create_provider() 工厂函数
|
|
669
|
+
config.py # YAML 配置加载
|
|
670
|
+
cli.py # openspeechapi list / check / serve
|
|
671
|
+
demo.py # 交互式 demo CLI
|
|
672
|
+
examples/ # 示例脚本(Library 模式 + Client 模式)
|
|
673
|
+
tests/ # 332 tests(unit + integration + E2E)
|
|
674
|
+
Dockerfile # 容器化部署
|
|
675
|
+
docker-compose.yml
|
|
676
|
+
.github/workflows/ci.yml # GitHub Actions CI
|
|
677
|
+
```
|
|
678
|
+
|
|
679
|
+
## 环境变量
|
|
680
|
+
|
|
681
|
+
| 变量 | 用途 |
|
|
682
|
+
|------|------|
|
|
683
|
+
| `OPENAI_API_KEY` | OpenAI STT/TTS 所需的 API Key |
|
|
684
|
+
| `DEEPGRAM_API_KEY` | Deepgram STT 所需的 API Key |
|
|
685
|
+
| `ELEVENLABS_API_KEY` | ElevenLabs TTS 所需的 API Key |
|
|
686
|
+
| `MINIMAX_API_KEY` | Minimax TTS 所需的 API Key |
|
|
687
|
+
| `OPENSPEECH_API_KEY` | HTTP 服务 Bearer token 认证 Key |
|
|
688
|
+
|
|
689
|
+
支持 `.env` 文件自动加载(需 `python-dotenv`)。
|
|
690
|
+
|
|
691
|
+
## 部署
|
|
692
|
+
|
|
693
|
+
**Docker:**
|
|
694
|
+
```bash
|
|
695
|
+
# 构建并启动
|
|
696
|
+
docker-compose up -d
|
|
697
|
+
|
|
698
|
+
# 查看日志
|
|
699
|
+
docker-compose logs -f
|
|
700
|
+
|
|
701
|
+
# GPU 支持(编辑 docker-compose.yml 取消注释 openspeechapi-gpu 服务)
|
|
702
|
+
```
|
|
703
|
+
|
|
704
|
+
**直接启动:**
|
|
705
|
+
```bash
|
|
706
|
+
openspeechapi serve --config providers.yaml --port 8600
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
## 认证
|
|
710
|
+
|
|
711
|
+
在 `providers.yaml` 中配置 API Key 认证:
|
|
712
|
+
|
|
713
|
+
```yaml
|
|
714
|
+
server:
|
|
715
|
+
auth:
|
|
716
|
+
enabled: true
|
|
717
|
+
api_keys:
|
|
718
|
+
- ${OPENSPEECH_API_KEY}
|
|
719
|
+
```
|
|
720
|
+
|
|
721
|
+
启用后所有 REST 请求需携带 Bearer token:
|
|
722
|
+
```bash
|
|
723
|
+
curl -H "Authorization: Bearer your-key" http://localhost:8600/v1/providers
|
|
724
|
+
```
|
|
725
|
+
|
|
726
|
+
WebSocket 通过查询参数传递:
|
|
727
|
+
```
|
|
728
|
+
ws://localhost:8600/v1/stt/stream?provider=deepgram&token=your-key
|
|
729
|
+
```
|
|
730
|
+
|
|
731
|
+
`/v1/health` 端点免认证。不配置 `server.auth` 则无认证(开发模式)。
|
|
732
|
+
|
|
733
|
+
## 流式 STT
|
|
734
|
+
|
|
735
|
+
Deepgram 支持实时流式转录:
|
|
736
|
+
|
|
737
|
+
```python
|
|
738
|
+
async with Client("http://localhost:8600") as c:
|
|
739
|
+
async def audio_source():
|
|
740
|
+
# 从麦克风或文件读取 PCM 音频块
|
|
741
|
+
yield pcm_chunk
|
|
742
|
+
|
|
743
|
+
async for transcription in c.stt.transcribe_stream("deepgram", audio_source()):
|
|
744
|
+
print(transcription.text)
|
|
745
|
+
```
|
|
746
|
+
|
|
747
|
+
WebSocket 方式:
|
|
748
|
+
```
|
|
749
|
+
ws://localhost:8600/v1/stt/stream?provider=deepgram
|
|
750
|
+
# 发送: binary PCM audio frames
|
|
751
|
+
# 接收: {"type": "partial", "text": "..."}
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
## CI
|
|
755
|
+
|
|
756
|
+
项目使用 GitHub Actions 自动化测试。每次 push 到 main 或 PR 时自动运行:
|
|
757
|
+
- ruff lint
|
|
758
|
+
- 单元测试 + 集成测试
|
|
759
|
+
- 代码覆盖率检查(≥70%)
|
|
760
|
+
|
|
761
|
+
## License
|
|
762
|
+
|
|
763
|
+
Private — personal multi-project reuse.
|