openspeechapi 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/PKG-INFO +1 -1
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/__init__.py +1 -1
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/models.py +19 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/iflytek.py +224 -83
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/iflytek.py +59 -22
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/providers.example.yaml +15 -2
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/pyproject.toml +1 -1
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.dockerignore +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.env.example +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.github/workflows/ci.yml +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.gitignore +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en.aiff +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_16k.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_16k_pad6.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_long.aiff +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_long_16k.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_mid.aiff +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_mid_16k.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/zh.aiff +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/zh_16k.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/openspeech-8600.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/openspeech-serve.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/webui-server.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/webui-server.pid +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12101.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12101.pid +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12102.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12102.pid +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/AGENTS.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/CLAUDE.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/Dockerfile +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/README.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/aibox-1.0.0-SNAPSHOT-stdout.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/aibox.2026-04-02.log +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/com.user.restart-jar.plist +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/restart-jar.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script.tar.gz +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docker-compose.yml +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/local-engine-manager.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/logging-spec.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/stt-engineering-optimization-guide.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/stt-streaming-spec.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/webui-phase-a.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/fish-speech-docker.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/fish-speech-native.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/stt-native-models.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/plans/2026-04-01-phase1-implementation.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/plans/2026-04-11-macos-native-tts-stt.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-01-openspeech-api-design.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-hot-lazy-loading.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-phase2-protocol-layer.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-phase3-production.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-11-macos-native-tts-stt-design.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-12-cloud-providers-webui-design.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-15-streaming-tts-stt-fixes-display-names.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-16-provider-management-engines-rename.md +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/client_stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/client_tts.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/stt_simple.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/tts_simple.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/__main__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/cli.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/client/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/client/client.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/config.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/enums.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/registry.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/settings.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/demo.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/context.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/dispatcher.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/in_process.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/remote.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/subprocess_exec.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/fanout.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/filters.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/lifecycle.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/watcher.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/engine_catalog.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/engine_registry.yaml +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/exceptions.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/factory.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/aim_resolver.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/native_backend.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/whisper.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/manager.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/models.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/progress.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/registry.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/task_store.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/tasks.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/logging_config.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/debug.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/latency.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/metrics.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/tracing.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/usage.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/_template.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/alibaba.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/assemblyai.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/azure_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/baidu.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/deepgram.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/elevenlabs.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/faster_whisper.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/google_cloud.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/macos_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/openai.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/tencent.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/volcengine.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/whisper.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/whisperlivekit.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/windows_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/alibaba.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/azure_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/baidu.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/coqui.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/cosyvoice.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/deepgram.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/elevenlabs.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/fish_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/google_cloud.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/macos_say.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/minimax.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/openai.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/piper.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/tencent.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/volcengine.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/windows_sapi.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/app.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/auth.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/middleware.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/management.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/tts.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/webui.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/app.js +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/index.html +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/styles.css +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/stt_stream.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/tts_stream.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/telemetry/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/telemetry/perf.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/audio_converter.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/audio_playback.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/vendor_registry.yaml +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/output/output.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/output.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/cloud/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/faster-whisper/native/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/fish-speech/native/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/macos_stt.swift +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/request_auth.swift +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/whisper/native/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/whisperlivekit/native/install.sh +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/conftest.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/conftest.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_fanout_e2e.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_faster_whisper_e2e.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_openai_e2e.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_webui_e2e.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/fixtures/hello.wav +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_fanout_integration.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_in_process_integration.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_server_client.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_aim_resolver.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_audio_converter.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_audio_playback.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_cli.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_cli_engine.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_client.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_config.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_context.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_debug_observer.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_dispatcher.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_docker_backend_progress.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_engine_registry.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_enums.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_executor_base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_fanout.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_filters.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_hot_reload.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_in_process.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_latency_observer.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_lifecycle.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_local_engine_task_store.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_local_engines_manager.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_logging.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_metrics_observer.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_models.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_native_backend.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_observer_base.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_plugin_mechanism.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_cloud_providers.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_elevenlabs_stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_macos_say.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_macos_speech.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_base_url.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_tts.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_sherpa_onnx_stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_stt_stubs.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_tts_stubs.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_whisperlivekit_stt.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_registry.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_remote.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/__init__.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_auth.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_config_api.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_routes.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_websocket.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_subprocess.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_usage_observer.py +0 -0
- {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_watcher.py +0 -0
|
@@ -60,6 +60,25 @@ class STTOptions:
|
|
|
60
60
|
# voice assistant. Providers that don't support VAD finalization
|
|
61
61
|
# (Whisper, Faster-Whisper) silently ignore this field.
|
|
62
62
|
vad_eos: int | None = None
|
|
63
|
+
# ── iFlytek IAT pass-through (matches Java AsrServiceImpl) ───────
|
|
64
|
+
# Wallex's Java AsrService forwards the client-supplied
|
|
65
|
+
# ``audio.common`` / ``audio.business`` / extra ``audio.data``
|
|
66
|
+
# fields verbatim to iFlytek's WS, treating the panel as the
|
|
67
|
+
# source of truth for ASR parameters. The Python pipeline now
|
|
68
|
+
# mirrors that contract: when these fields are non-None, the
|
|
69
|
+
# iFlytek provider uses them as the basis for the WS first frame
|
|
70
|
+
# (with ``setdefault`` fallback to its own settings for any keys
|
|
71
|
+
# the client omitted) instead of building the blocks purely from
|
|
72
|
+
# ``speech_providers.yaml``. ``None`` preserves the existing
|
|
73
|
+
# yaml-driven behaviour. Other STT providers ignore these fields.
|
|
74
|
+
iflytek_common: dict | None = None
|
|
75
|
+
iflytek_business: dict | None = None
|
|
76
|
+
# Extra fields to merge into the iFlytek ``data`` block beyond the
|
|
77
|
+
# canonical ``status``/``format``/``encoding``/``audio`` quadruple
|
|
78
|
+
# (e.g. panel-supplied ``data_type``). Keys that collide with the
|
|
79
|
+
# canonical set are preserved (the provider's defaults still win,
|
|
80
|
+
# since the canonical set is required by the IAT spec).
|
|
81
|
+
iflytek_data_extras: dict | None = None
|
|
63
82
|
|
|
64
83
|
|
|
65
84
|
@dataclass
|
|
@@ -52,6 +52,54 @@ class IflytekSTTSettings(BaseSettings):
|
|
|
52
52
|
# via ``speech_providers.yaml`` so different sites can pick their
|
|
53
53
|
# own latency-vs-tolerance trade-off.
|
|
54
54
|
vad_eos: int = 2000
|
|
55
|
+
# ``ltc`` — sentence-level timestamp granularity sent in the
|
|
56
|
+
# business block of the IAT request (1 = sentence segments only;
|
|
57
|
+
# 2 = + word boundaries; 3 = + character boundaries). Java's
|
|
58
|
+
# ``AsrConfig.ltc`` defaults to 3; we mirror that so downstream
|
|
59
|
+
# consumers expecting per-character timing offsets keep working.
|
|
60
|
+
# Lower values shave a few bytes per response and slightly reduce
|
|
61
|
+
# post-processing work for callers that don't use the timestamps.
|
|
62
|
+
ltc: int = 3
|
|
63
|
+
# ``ws_host`` / ``ws_path`` — iFlytek IAT WebSocket endpoint. The
|
|
64
|
+
# default ``iat-api.xfyun.cn`` is the global endpoint; multi-region
|
|
65
|
+
# deployments (e.g. directed-domain endpoints such as
|
|
66
|
+
# ``ws-api-dx.xfyun.cn``) override these in yaml or via env var
|
|
67
|
+
# so the WS URL never requires a code change.
|
|
68
|
+
ws_host: str = "iat-api.xfyun.cn"
|
|
69
|
+
ws_path: str = "/v2/iat"
|
|
70
|
+
# ``timeout_secs`` — connect / read timeout for the underlying
|
|
71
|
+
# httpx AsyncClient. Java's AsrConfig defaults to 15s; we mirror
|
|
72
|
+
# that. Lower if the network has aggressive proxies, higher only
|
|
73
|
+
# if the iFlytek endpoint is consistently slow to handshake.
|
|
74
|
+
timeout_secs: int = 15
|
|
75
|
+
# ── Java AsrConfig parity (used as setdefault fallbacks) ────────
|
|
76
|
+
# When a wallex client (panel) supplies ``audio.business``/
|
|
77
|
+
# ``audio.common`` per-frame, those values flow through via
|
|
78
|
+
# ``STTOptions.iflytek_business``/``STTOptions.iflytek_common`` and
|
|
79
|
+
# become the WS first frame body. The settings below act as
|
|
80
|
+
# ``setdefault`` fallbacks for keys the client omits, mirroring
|
|
81
|
+
# Java ``AsrConfig``'s field set so the two implementations
|
|
82
|
+
# produce identical wire frames given the same panel payload.
|
|
83
|
+
#
|
|
84
|
+
# ``domain`` — iFlytek IAT domain. Java default ``iat``; a few
|
|
85
|
+
# vertical models (``medical`` / ``tv``) exist but most
|
|
86
|
+
# deployments stay on the general one.
|
|
87
|
+
domain: str = "iat"
|
|
88
|
+
# ``accent`` — only meaningful when ``language=="zh_cn"`` (selects
|
|
89
|
+
# mandarin vs. cantonese etc.). Java sends ``mandarin`` blindly;
|
|
90
|
+
# we keep the same default so the WS frame matches Java byte-for-
|
|
91
|
+
# byte when the panel omits ``business.accent``. iFlytek treats
|
|
92
|
+
# it as a no-op for non-Chinese language codes.
|
|
93
|
+
accent: str = "mandarin"
|
|
94
|
+
# ``dwa`` — dynamic word adjustment / wpgs (实时纠错). Java's
|
|
95
|
+
# default ``wpgs`` is the realtime-correction mode panels rely on
|
|
96
|
+
# for the partial-result protocol described in
|
|
97
|
+
# ``stt-streaming-spec.md``. Empty disables it.
|
|
98
|
+
dwa: str = "wpgs"
|
|
99
|
+
# ``sample_rate`` — required by the IAT directed-domain endpoint
|
|
100
|
+
# (``ws-api-dx.xfyun.cn``) which expects it in ``business``. Java
|
|
101
|
+
# AsrConfig.sampleRate=16000.
|
|
102
|
+
sample_rate: int = 16000
|
|
55
103
|
|
|
56
104
|
|
|
57
105
|
# iFlytek expects the full locale tag; common ISO short codes need to
|
|
@@ -92,9 +140,6 @@ class IflytekSTT(STTProvider):
|
|
|
92
140
|
"language": ["zh_cn", "en_us", "ja_jp", "ko_kr", "ru-ru"],
|
|
93
141
|
}
|
|
94
142
|
|
|
95
|
-
_WS_HOST = "iat-api.xfyun.cn"
|
|
96
|
-
_WS_PATH = "/v2/iat"
|
|
97
|
-
|
|
98
143
|
def __init__(self, settings: IflytekSTTSettings | None = None) -> None:
|
|
99
144
|
self.settings = settings or IflytekSTTSettings()
|
|
100
145
|
self._client: httpx.AsyncClient | None = None
|
|
@@ -106,7 +151,7 @@ class IflytekSTT(STTProvider):
|
|
|
106
151
|
|
|
107
152
|
async def start(self) -> None:
|
|
108
153
|
if self._client is None:
|
|
109
|
-
self._client = httpx.AsyncClient(timeout=
|
|
154
|
+
self._client = httpx.AsyncClient(timeout=float(self.settings.timeout_secs))
|
|
110
155
|
self._owns_client = True
|
|
111
156
|
# Surface the effective language (after alias mapping) and
|
|
112
157
|
# vad_eos at startup so deployments can verify the iFlytek model
|
|
@@ -142,10 +187,12 @@ class IflytekSTT(STTProvider):
|
|
|
142
187
|
now = datetime.now(tz=timezone.utc)
|
|
143
188
|
date = formatdate(timeval=now.timestamp(), localtime=False, usegmt=True)
|
|
144
189
|
|
|
190
|
+
host = self.settings.ws_host
|
|
191
|
+
path = self.settings.ws_path
|
|
145
192
|
signature_origin = (
|
|
146
|
-
f"host: {
|
|
193
|
+
f"host: {host}\n"
|
|
147
194
|
f"date: {date}\n"
|
|
148
|
-
f"GET {
|
|
195
|
+
f"GET {path} HTTP/1.1"
|
|
149
196
|
)
|
|
150
197
|
signature_sha = hmac.new(
|
|
151
198
|
self.settings.api_secret.encode("utf-8"),
|
|
@@ -165,9 +212,120 @@ class IflytekSTT(STTProvider):
|
|
|
165
212
|
).decode("utf-8")
|
|
166
213
|
|
|
167
214
|
params = urllib.parse.urlencode(
|
|
168
|
-
{"authorization": authorization, "date": date, "host":
|
|
215
|
+
{"authorization": authorization, "date": date, "host": host}
|
|
216
|
+
)
|
|
217
|
+
return f"wss://{host}{path}?{params}"
|
|
218
|
+
|
|
219
|
+
def _build_first_frame_blocks(
|
|
220
|
+
self,
|
|
221
|
+
opts: STTOptions | None,
|
|
222
|
+
*,
|
|
223
|
+
include_dwa: bool,
|
|
224
|
+
) -> tuple[dict, dict]:
|
|
225
|
+
"""Build the ``common`` / ``business`` blocks for the WS first frame.
|
|
226
|
+
|
|
227
|
+
Mirrors Java ``AsrServiceImpl.sendToAsr`` semantics: when
|
|
228
|
+
``opts.iflytek_common``/``opts.iflytek_business`` is provided
|
|
229
|
+
(typically by wallex relaying the panel's per-frame
|
|
230
|
+
``audio.common`` / ``audio.business``), those dicts are the
|
|
231
|
+
source of truth. We only ``setdefault`` keys the client omitted,
|
|
232
|
+
falling back to ``self.settings`` so a panel that misses a
|
|
233
|
+
single field doesn't get a malformed frame.
|
|
234
|
+
|
|
235
|
+
``include_dwa`` differs between ``transcribe()`` (batch — no
|
|
236
|
+
wpgs because there's no streaming protocol) and
|
|
237
|
+
``transcribe_stream()`` (always wpgs).
|
|
238
|
+
"""
|
|
239
|
+
canon = _canonical_language(self.settings.language)
|
|
240
|
+
eos = (opts.vad_eos
|
|
241
|
+
if opts is not None and opts.vad_eos is not None
|
|
242
|
+
else self.settings.vad_eos)
|
|
243
|
+
|
|
244
|
+
# ── business block ─────────────────────────────────────────
|
|
245
|
+
if opts is not None and opts.iflytek_business:
|
|
246
|
+
# Panel-supplied is authoritative; copy then fill missing
|
|
247
|
+
# keys from yaml so we never send a partial frame.
|
|
248
|
+
business = dict(opts.iflytek_business)
|
|
249
|
+
else:
|
|
250
|
+
business = {}
|
|
251
|
+
|
|
252
|
+
business.setdefault("language", canon)
|
|
253
|
+
business.setdefault("domain", self.settings.domain)
|
|
254
|
+
business.setdefault("vad_eos", eos)
|
|
255
|
+
business.setdefault("ltc", self.settings.ltc)
|
|
256
|
+
if include_dwa:
|
|
257
|
+
business.setdefault("dwa", self.settings.dwa)
|
|
258
|
+
# ``accent`` is only meaningful for the Chinese model. Java
|
|
259
|
+
# sends ``mandarin`` blindly; we keep that for byte-for-byte
|
|
260
|
+
# parity when the panel omits it AND language is zh_cn. For
|
|
261
|
+
# other languages we leave it out entirely (sending it is a
|
|
262
|
+
# no-op on iFlytek's side but confuses log readers).
|
|
263
|
+
if "accent" not in business and canon == "zh_cn":
|
|
264
|
+
business["accent"] = self.settings.accent
|
|
265
|
+
|
|
266
|
+
# ── common block ──────────────────────────────────────────
|
|
267
|
+
if opts is not None and opts.iflytek_common:
|
|
268
|
+
common = dict(opts.iflytek_common)
|
|
269
|
+
else:
|
|
270
|
+
common = {}
|
|
271
|
+
common.setdefault("app_id", self.settings.app_id)
|
|
272
|
+
|
|
273
|
+
return common, business
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _build_data_block(
|
|
277
|
+
*, status: int, audio_b64: str, opts: STTOptions | None,
|
|
278
|
+
) -> dict:
|
|
279
|
+
"""Assemble the ``data`` block, merging panel-supplied extras.
|
|
280
|
+
|
|
281
|
+
Canonical keys (``status``/``format``/``encoding``/``audio``)
|
|
282
|
+
always win over ``iflytek_data_extras`` because the IAT spec
|
|
283
|
+
requires them in a specific shape; extras like the panel's
|
|
284
|
+
``data_type`` flow through.
|
|
285
|
+
"""
|
|
286
|
+
if opts is not None and opts.iflytek_data_extras:
|
|
287
|
+
data = dict(opts.iflytek_data_extras)
|
|
288
|
+
else:
|
|
289
|
+
data = {}
|
|
290
|
+
data["status"] = status
|
|
291
|
+
data["format"] = "audio/L16;rate=16000"
|
|
292
|
+
data["encoding"] = "raw"
|
|
293
|
+
data["audio"] = audio_b64
|
|
294
|
+
return data
|
|
295
|
+
|
|
296
|
+
async def _connect_with_retry(self) -> "websockets.ClientConnection":
|
|
297
|
+
"""Connect to iFlytek IAT WS with backoff, mirroring Java parity.
|
|
298
|
+
|
|
299
|
+
Java ``AsrServiceImpl.connectWithRetry`` does 4 attempts with
|
|
300
|
+
300/600/1200ms backoff before giving up. The previous Python
|
|
301
|
+
path was one-shot: a single TCP/handshake hiccup surfaced as a
|
|
302
|
+
hard ASR failure. Aligning the retry budget keeps wallex's
|
|
303
|
+
Python and Java front-ends behaviourally interchangeable on
|
|
304
|
+
flaky links.
|
|
305
|
+
"""
|
|
306
|
+
backoffs = (0.3, 0.6, 1.2) # delays AFTER attempts 1, 2, 3
|
|
307
|
+
last_exc: Exception | None = None
|
|
308
|
+
for attempt in range(4):
|
|
309
|
+
try:
|
|
310
|
+
url = self._build_auth_url()
|
|
311
|
+
ws = await websockets.connect(url)
|
|
312
|
+
if attempt > 0:
|
|
313
|
+
logger.info(
|
|
314
|
+
"{}: WS connected on attempt {}/4",
|
|
315
|
+
self.name, attempt + 1,
|
|
316
|
+
)
|
|
317
|
+
return ws
|
|
318
|
+
except Exception as e: # noqa: BLE001 — retry boundary
|
|
319
|
+
last_exc = e
|
|
320
|
+
logger.warning(
|
|
321
|
+
"{}: WS connect failed (attempt {}/4): {}",
|
|
322
|
+
self.name, attempt + 1, e,
|
|
323
|
+
)
|
|
324
|
+
if attempt < len(backoffs):
|
|
325
|
+
await asyncio.sleep(backoffs[attempt])
|
|
326
|
+
raise RuntimeError(
|
|
327
|
+
f"iFlytek STT connect failed after 4 attempts: {last_exc}"
|
|
169
328
|
)
|
|
170
|
-
return f"wss://{self._WS_HOST}{self._WS_PATH}?{params}"
|
|
171
329
|
|
|
172
330
|
async def transcribe(
|
|
173
331
|
self, audio: AudioData, opts: STTOptions | None = None
|
|
@@ -177,7 +335,6 @@ class IflytekSTT(STTProvider):
|
|
|
177
335
|
logger.info("{}: request received, audio={} bytes", self.name, len(audio.data))
|
|
178
336
|
_t0 = time.perf_counter()
|
|
179
337
|
|
|
180
|
-
url = self._build_auth_url()
|
|
181
338
|
audio_bytes = audio.data
|
|
182
339
|
# iFlytek recommends ~40ms per frame at 16kHz 16bit mono = 1280 bytes.
|
|
183
340
|
# Use larger frames (8000 bytes = ~250ms) with pacing to avoid server
|
|
@@ -190,7 +347,8 @@ class IflytekSTT(STTProvider):
|
|
|
190
347
|
|
|
191
348
|
result_texts: list[str] = []
|
|
192
349
|
|
|
193
|
-
|
|
350
|
+
ws = await self._connect_with_retry()
|
|
351
|
+
async with ws:
|
|
194
352
|
# Send audio in chunks with interleaved receive
|
|
195
353
|
total = len(audio_bytes)
|
|
196
354
|
offset = 0
|
|
@@ -209,46 +367,36 @@ class IflytekSTT(STTProvider):
|
|
|
209
367
|
frame_data = base64.b64encode(chunk).decode("utf-8")
|
|
210
368
|
|
|
211
369
|
if status == 0:
|
|
212
|
-
# First frame
|
|
213
|
-
#
|
|
214
|
-
#
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
# ``parameter.iat.eos`` through here so a kiosk
|
|
222
|
-
# can ship a tighter or looser silence threshold
|
|
223
|
-
# than the deployment yaml.
|
|
224
|
-
eos = (opts.vad_eos
|
|
225
|
-
if opts is not None and opts.vad_eos is not None
|
|
226
|
-
else self.settings.vad_eos)
|
|
227
|
-
business = {
|
|
228
|
-
"language": canon,
|
|
229
|
-
"domain": "iat",
|
|
230
|
-
"vad_eos": eos,
|
|
231
|
-
}
|
|
232
|
-
if canon == "zh_cn":
|
|
233
|
-
business["accent"] = "mandarin"
|
|
370
|
+
# First frame: panel-supplied common/business win;
|
|
371
|
+
# batch path doesn't carry wpgs (no streaming
|
|
372
|
+
# protocol) so include_dwa=False.
|
|
373
|
+
common, business = self._build_first_frame_blocks(
|
|
374
|
+
opts, include_dwa=False,
|
|
375
|
+
)
|
|
376
|
+
data_block = self._build_data_block(
|
|
377
|
+
status=0, audio_b64=frame_data, opts=opts,
|
|
378
|
+
)
|
|
234
379
|
msg = {
|
|
235
|
-
"common":
|
|
380
|
+
"common": common,
|
|
236
381
|
"business": business,
|
|
237
|
-
"data":
|
|
238
|
-
"status": 0,
|
|
239
|
-
"format": "audio/L16;rate=16000",
|
|
240
|
-
"encoding": "raw",
|
|
241
|
-
"audio": frame_data,
|
|
242
|
-
},
|
|
382
|
+
"data": data_block,
|
|
243
383
|
}
|
|
384
|
+
# Java parity: log the exact blocks we're about to
|
|
385
|
+
# ship to iFlytek. Debugging "wrong language /
|
|
386
|
+
# wrong endpoint" reports needs to see this from
|
|
387
|
+
# the log alone — Java's AsrServiceImpl prints the
|
|
388
|
+
# equivalent line at INFO.
|
|
389
|
+
logger.info(
|
|
390
|
+
"{}: ASR first frame business={}, common={}",
|
|
391
|
+
self.name,
|
|
392
|
+
json.dumps(business, ensure_ascii=False),
|
|
393
|
+
json.dumps(common, ensure_ascii=False),
|
|
394
|
+
)
|
|
244
395
|
else:
|
|
245
396
|
msg = {
|
|
246
|
-
"data":
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
"encoding": "raw",
|
|
250
|
-
"audio": frame_data,
|
|
251
|
-
}
|
|
397
|
+
"data": self._build_data_block(
|
|
398
|
+
status=status, audio_b64=frame_data, opts=opts,
|
|
399
|
+
)
|
|
252
400
|
}
|
|
253
401
|
|
|
254
402
|
await ws.send(json.dumps(msg))
|
|
@@ -320,7 +468,6 @@ class IflytekSTT(STTProvider):
|
|
|
320
468
|
if self._client is None:
|
|
321
469
|
raise RuntimeError("Provider not started — call start() first")
|
|
322
470
|
|
|
323
|
-
url = self._build_auth_url()
|
|
324
471
|
results: asyncio.Queue[Transcription | None] = asyncio.Queue()
|
|
325
472
|
_t0 = time.perf_counter()
|
|
326
473
|
_frames_sent = 0
|
|
@@ -332,7 +479,8 @@ class IflytekSTT(STTProvider):
|
|
|
332
479
|
_sender_stop = asyncio.Event()
|
|
333
480
|
|
|
334
481
|
logger.debug("{}: connecting to iFlytek WebSocket...", self.name)
|
|
335
|
-
|
|
482
|
+
ws = await self._connect_with_retry()
|
|
483
|
+
async with ws:
|
|
336
484
|
_t_connected = time.perf_counter()
|
|
337
485
|
logger.info("{}: WS connected in {:.0f}ms", self.name,
|
|
338
486
|
(_t_connected - _t0) * 1000)
|
|
@@ -350,42 +498,38 @@ class IflytekSTT(STTProvider):
|
|
|
350
498
|
break
|
|
351
499
|
frame_data = base64.b64encode(chunk).decode("utf-8")
|
|
352
500
|
if is_first:
|
|
353
|
-
#
|
|
354
|
-
#
|
|
355
|
-
|
|
356
|
-
#
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
"language": canon,
|
|
364
|
-
"domain": "iat",
|
|
365
|
-
"dwa": "wpgs",
|
|
366
|
-
"vad_eos": eos,
|
|
367
|
-
}
|
|
368
|
-
if canon == "zh_cn":
|
|
369
|
-
business["accent"] = "mandarin"
|
|
501
|
+
# First frame: panel-supplied common/business win;
|
|
502
|
+
# streaming path always carries wpgs (see
|
|
503
|
+
# stt-streaming-spec.md realtime-correction
|
|
504
|
+
# protocol) so include_dwa=True.
|
|
505
|
+
common, business = self._build_first_frame_blocks(
|
|
506
|
+
opts, include_dwa=True,
|
|
507
|
+
)
|
|
508
|
+
data_block = self._build_data_block(
|
|
509
|
+
status=0, audio_b64=frame_data, opts=opts,
|
|
510
|
+
)
|
|
370
511
|
msg = {
|
|
371
|
-
"common":
|
|
512
|
+
"common": common,
|
|
372
513
|
"business": business,
|
|
373
|
-
"data":
|
|
374
|
-
"status": 0,
|
|
375
|
-
"format": "audio/L16;rate=16000",
|
|
376
|
-
"encoding": "raw",
|
|
377
|
-
"audio": frame_data,
|
|
378
|
-
},
|
|
514
|
+
"data": data_block,
|
|
379
515
|
}
|
|
516
|
+
# Java parity (AsrServiceImpl line 221): log
|
|
517
|
+
# the first-frame business + common at INFO so
|
|
518
|
+
# operators can verify which language/eos/dwa
|
|
519
|
+
# the panel actually requested without
|
|
520
|
+
# rebuilding the call from yaml + STTOptions.
|
|
521
|
+
logger.info(
|
|
522
|
+
"{}: ASR first frame business={}, common={}",
|
|
523
|
+
self.name,
|
|
524
|
+
json.dumps(business, ensure_ascii=False),
|
|
525
|
+
json.dumps(common, ensure_ascii=False),
|
|
526
|
+
)
|
|
380
527
|
is_first = False
|
|
381
528
|
else:
|
|
382
529
|
msg = {
|
|
383
|
-
"data":
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
"encoding": "raw",
|
|
387
|
-
"audio": frame_data,
|
|
388
|
-
}
|
|
530
|
+
"data": self._build_data_block(
|
|
531
|
+
status=1, audio_b64=frame_data, opts=opts,
|
|
532
|
+
)
|
|
389
533
|
}
|
|
390
534
|
await ws.send(json.dumps(msg))
|
|
391
535
|
_frames_sent += 1
|
|
@@ -396,12 +540,9 @@ class IflytekSTT(STTProvider):
|
|
|
396
540
|
# Send empty last frame to signal end (only if WS still open)
|
|
397
541
|
if not _sender_stop.is_set():
|
|
398
542
|
last_msg = {
|
|
399
|
-
"data":
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
"encoding": "raw",
|
|
403
|
-
"audio": "",
|
|
404
|
-
}
|
|
543
|
+
"data": self._build_data_block(
|
|
544
|
+
status=2, audio_b64="", opts=opts,
|
|
545
|
+
)
|
|
405
546
|
}
|
|
406
547
|
await ws.send(json.dumps(last_msg))
|
|
407
548
|
except websockets.exceptions.ConnectionClosed:
|
|
@@ -29,16 +29,22 @@ class IflytekTTSSettings(BaseSettings):
|
|
|
29
29
|
voice: str = "xiaoyan"
|
|
30
30
|
speed: int = 50
|
|
31
31
|
# Audio output encoding requested from iFlytek.
|
|
32
|
-
# - "lame":
|
|
33
|
-
# - "raw":
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
32
|
+
# - "lame": MP3 frames (default; smaller, but caller must decode)
|
|
33
|
+
# - "raw": 16-bit PCM @ 16 kHz mono, big-endian L16 (drop-in
|
|
34
|
+
# playable as raw PCM; required by callers that wrap the
|
|
35
|
+
# bytes in a fixed-format wire envelope and assume PCM,
|
|
36
|
+
# e.g. wallex's RESP_VOICE which advertises
|
|
37
|
+
# encoding=raw/bitDepth=16/sampleRate=16000 to the
|
|
38
|
+
# panel — feeding MP3 bytes through that envelope plays
|
|
39
|
+
# back as pure noise on the speaker).
|
|
40
|
+
# - "speex"/"speex-wb-7": Speex narrowband / wideband (low-bitrate,
|
|
41
|
+
# used by some embedded Wallex panels with constrained
|
|
42
|
+
# uplink). Requires ``speex_size`` to declare the frame
|
|
43
|
+
# size iFlytek should produce. Caller must run a Speex
|
|
44
|
+
# decoder; not auto-handled by browsers.
|
|
40
45
|
# Default stays "lame" for backward-compat; deployments that need
|
|
41
|
-
# PCM (wallex / direct hardware playback)
|
|
46
|
+
# PCM (wallex / direct hardware playback) or Speex (embedded panels)
|
|
47
|
+
# override via yaml.
|
|
42
48
|
aue: str = "lame"
|
|
43
49
|
# Output sample rate for raw PCM mode (only meaningful when
|
|
44
50
|
# aue="raw"). 16000 matches what the panel and the iFlytek
|
|
@@ -50,6 +56,22 @@ class IflytekTTSSettings(BaseSettings):
|
|
|
50
56
|
volume: int = 50
|
|
51
57
|
# Pitch (0-100). Same rationale as volume.
|
|
52
58
|
pitch: int = 50
|
|
59
|
+
# Speex frame size (only meaningful when aue startswith "speex").
|
|
60
|
+
# iFlytek expects an integer that selects a Speex bitrate / frame
|
|
61
|
+
# mode; ``0`` is "auto-pick by aue tag". Leave 0 unless the client
|
|
62
|
+
# decoder requires a specific frame size. Mirrors Java
|
|
63
|
+
# ``AsrConfig.speex-size`` / ``TtsConfig`` parameter.
|
|
64
|
+
speex_size: int = 0
|
|
65
|
+
# ``ws_host`` / ``ws_path`` — iFlytek TTS WebSocket endpoint.
|
|
66
|
+
# Override in yaml (or via ``OPENSPEECH_IFLYTEK_TTS_HOST`` env var)
|
|
67
|
+
# for region-specific endpoints. Default is the global endpoint.
|
|
68
|
+
ws_host: str = "tts-api.xfyun.cn"
|
|
69
|
+
ws_path: str = "/v2/tts"
|
|
70
|
+
# ``timeout_secs`` — connect / read timeout for the underlying
|
|
71
|
+
# httpx AsyncClient. Java's TtsConfig defaults to 8s; we mirror
|
|
72
|
+
# that for parity. Increase only when the iFlytek endpoint is
|
|
73
|
+
# consistently slow to handshake.
|
|
74
|
+
timeout_secs: int = 8
|
|
53
75
|
|
|
54
76
|
class IflytekTTS(TTSProvider):
|
|
55
77
|
name = "iflytek-tts"
|
|
@@ -66,12 +88,9 @@ class IflytekTTS(TTSProvider):
|
|
|
66
88
|
# English assistant-style voices used by wallex deployments.
|
|
67
89
|
"x4_enuk_ashleigh_assist",
|
|
68
90
|
],
|
|
69
|
-
"aue": ["lame", "raw"],
|
|
91
|
+
"aue": ["lame", "raw", "speex", "speex-wb-7"],
|
|
70
92
|
}
|
|
71
93
|
|
|
72
|
-
_WS_HOST = "tts-api.xfyun.cn"
|
|
73
|
-
_WS_PATH = "/v2/tts"
|
|
74
|
-
|
|
75
94
|
def __init__(self, settings: IflytekTTSSettings | None = None) -> None:
|
|
76
95
|
self.settings = settings or IflytekTTSSettings()
|
|
77
96
|
self._client: httpx.AsyncClient | None = None
|
|
@@ -83,7 +102,7 @@ class IflytekTTS(TTSProvider):
|
|
|
83
102
|
|
|
84
103
|
async def start(self) -> None:
|
|
85
104
|
if self._client is None:
|
|
86
|
-
self._client = httpx.AsyncClient(timeout=
|
|
105
|
+
self._client = httpx.AsyncClient(timeout=float(self.settings.timeout_secs))
|
|
87
106
|
self._owns_client = True
|
|
88
107
|
|
|
89
108
|
async def stop(self) -> None:
|
|
@@ -99,10 +118,12 @@ class IflytekTTS(TTSProvider):
|
|
|
99
118
|
now = datetime.now(tz=timezone.utc)
|
|
100
119
|
date = formatdate(timeval=now.timestamp(), localtime=False, usegmt=True)
|
|
101
120
|
|
|
121
|
+
host = self.settings.ws_host
|
|
122
|
+
path = self.settings.ws_path
|
|
102
123
|
signature_origin = (
|
|
103
|
-
f"host: {
|
|
124
|
+
f"host: {host}\n"
|
|
104
125
|
f"date: {date}\n"
|
|
105
|
-
f"GET {
|
|
126
|
+
f"GET {path} HTTP/1.1"
|
|
106
127
|
)
|
|
107
128
|
signature_sha = hmac.new(
|
|
108
129
|
self.settings.api_secret.encode("utf-8"),
|
|
@@ -122,9 +143,9 @@ class IflytekTTS(TTSProvider):
|
|
|
122
143
|
).decode("utf-8")
|
|
123
144
|
|
|
124
145
|
params = urllib.parse.urlencode(
|
|
125
|
-
{"authorization": authorization, "date": date, "host":
|
|
146
|
+
{"authorization": authorization, "date": date, "host": host}
|
|
126
147
|
)
|
|
127
|
-
return f"wss://{
|
|
148
|
+
return f"wss://{host}{path}?{params}"
|
|
128
149
|
|
|
129
150
|
async def synthesize(
|
|
130
151
|
self, text: str, opts: TTSOptions | None = None
|
|
@@ -138,8 +159,17 @@ class IflytekTTS(TTSProvider):
|
|
|
138
159
|
# callers downstream may set wire-protocol encoding metadata from
|
|
139
160
|
# this field, and a wrong tag on the bytes plays back as noise on
|
|
140
161
|
# raw-PCM consumers.
|
|
141
|
-
|
|
142
|
-
|
|
162
|
+
aue = self.settings.aue
|
|
163
|
+
if aue == "raw":
|
|
164
|
+
fmt = "pcm_s16le"
|
|
165
|
+
sr = self.settings.auf_rate
|
|
166
|
+
elif aue.startswith("speex"):
|
|
167
|
+
# Speex narrowband is 8 kHz, wideband ("speex-wb-*") is 16 kHz.
|
|
168
|
+
fmt = "speex"
|
|
169
|
+
sr = 16000 if "wb" in aue else 8000
|
|
170
|
+
else:
|
|
171
|
+
fmt = "mp3"
|
|
172
|
+
sr = 16000
|
|
143
173
|
logger.info(
|
|
144
174
|
"iFlytek TTS: {} chunks, {} bytes total, format={}, sample_rate={}",
|
|
145
175
|
len(parts), len(audio_bytes), fmt, sr,
|
|
@@ -191,7 +221,8 @@ class IflytekTTS(TTSProvider):
|
|
|
191
221
|
"pitch": self.settings.pitch,
|
|
192
222
|
"tte": "UTF8",
|
|
193
223
|
}
|
|
194
|
-
|
|
224
|
+
aue = self.settings.aue
|
|
225
|
+
if aue == "lame":
|
|
195
226
|
# ``sfl=1`` (stream-frame-length) is an MP3-only knob that
|
|
196
227
|
# tells iFlytek to emit per-frame audio rather than waiting
|
|
197
228
|
# for the whole file. It has no meaning for raw PCM (raw is
|
|
@@ -199,11 +230,17 @@ class IflytekTTS(TTSProvider):
|
|
|
199
230
|
# combo with a code 10005 "invalid parameter" — so we only
|
|
200
231
|
# send it on the lame path.
|
|
201
232
|
business["sfl"] = 1
|
|
202
|
-
|
|
233
|
+
elif aue == "raw":
|
|
203
234
|
# Raw / L16 mode requires ``auf`` to declare the PCM
|
|
204
235
|
# sample-rate iFlytek should produce. Java wallex sends
|
|
205
236
|
# ``audio/L16;rate=16000`` here; we mirror that exactly.
|
|
206
237
|
business["auf"] = f"audio/L16;rate={self.settings.auf_rate}"
|
|
238
|
+
elif aue.startswith("speex"):
|
|
239
|
+
# Speex narrowband / wideband. ``speex_size`` is the iFlytek
|
|
240
|
+
# frame-size selector (0 = engine default; non-zero values
|
|
241
|
+
# match the Java ``TtsConfig.speex-size`` parameter).
|
|
242
|
+
if self.settings.speex_size:
|
|
243
|
+
business["speex_size"] = self.settings.speex_size
|
|
207
244
|
return {
|
|
208
245
|
"common": {"app_id": self.settings.app_id},
|
|
209
246
|
"business": business,
|
|
@@ -123,7 +123,12 @@ engines:
|
|
|
123
123
|
# exec_mode: remote
|
|
124
124
|
# preload: true
|
|
125
125
|
# settings:
|
|
126
|
-
# language: zh_cn
|
|
126
|
+
# language: zh_cn # zh_cn / en_us / ja_jp / ko_kr / ru-ru
|
|
127
|
+
# vad_eos: 2000 # ms of trailing silence before final
|
|
128
|
+
# ltc: 3 # 1 sentence / 2 +word / 3 +char timestamps
|
|
129
|
+
# ws_host: iat-api.xfyun.cn # override for region-specific endpoints
|
|
130
|
+
# ws_path: /v2/iat
|
|
131
|
+
# timeout_secs: 15
|
|
127
132
|
|
|
128
133
|
# # pip install 'openspeechapi[faster-whisper-stt]'
|
|
129
134
|
# faster_whisper_stt:
|
|
@@ -190,7 +195,15 @@ engines:
|
|
|
190
195
|
# exec_mode: remote
|
|
191
196
|
# settings:
|
|
192
197
|
# voice: xiaoyan
|
|
193
|
-
# speed: 50
|
|
198
|
+
# speed: 50 # 0-100
|
|
199
|
+
# volume: 50 # 0-100
|
|
200
|
+
# pitch: 50 # 0-100
|
|
201
|
+
# aue: lame # lame / raw / speex / speex-wb-7
|
|
202
|
+
# auf_rate: 16000 # only used when aue=raw (8000 / 16000 / 24000)
|
|
203
|
+
# speex_size: 0 # only used when aue startswith speex (0 = auto)
|
|
204
|
+
# ws_host: tts-api.xfyun.cn # override for region-specific endpoints
|
|
205
|
+
# ws_path: /v2/tts
|
|
206
|
+
# timeout_secs: 8
|
|
194
207
|
|
|
195
208
|
# # pip install 'openspeechapi[piper-tts]'
|
|
196
209
|
# piper_tts:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|