openspeechapi 0.2.5__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/PKG-INFO +1 -1
  2. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/__init__.py +1 -1
  3. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/models.py +19 -0
  4. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/iflytek.py +224 -83
  5. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/iflytek.py +59 -22
  6. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/providers.example.yaml +15 -2
  7. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/pyproject.toml +1 -1
  8. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.dockerignore +0 -0
  9. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.env.example +0 -0
  10. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.github/workflows/ci.yml +0 -0
  11. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.gitignore +0 -0
  12. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en.aiff +0 -0
  13. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_16k.wav +0 -0
  14. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_16k_pad6.wav +0 -0
  15. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_long.aiff +0 -0
  16. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_long_16k.wav +0 -0
  17. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_mid.aiff +0 -0
  18. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/en_mid_16k.wav +0 -0
  19. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/zh.aiff +0 -0
  20. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/audio/zh_16k.wav +0 -0
  21. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/openspeech-8600.log +0 -0
  22. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/openspeech-serve.log +0 -0
  23. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/webui-server.log +0 -0
  24. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/webui-server.pid +0 -0
  25. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12101.log +0 -0
  26. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12101.pid +0 -0
  27. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12102.log +0 -0
  28. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/.tmp/wlk12102.pid +0 -0
  29. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/AGENTS.md +0 -0
  30. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/CLAUDE.md +0 -0
  31. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/Dockerfile +0 -0
  32. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/README.md +0 -0
  33. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/aibox-1.0.0-SNAPSHOT-stdout.log +0 -0
  34. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/aibox.2026-04-02.log +0 -0
  35. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/com.user.restart-jar.plist +0 -0
  36. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script/restart-jar.sh +0 -0
  37. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/aibox-script.tar.gz +0 -0
  38. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docker-compose.yml +0 -0
  39. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/local-engine-manager.md +0 -0
  40. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/logging-spec.md +0 -0
  41. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/stt-engineering-optimization-guide.md +0 -0
  42. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/stt-streaming-spec.md +0 -0
  43. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/architecture/webui-phase-a.md +0 -0
  44. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/fish-speech-docker.md +0 -0
  45. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/fish-speech-native.md +0 -0
  46. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/engines/stt-native-models.md +0 -0
  47. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/plans/2026-04-01-phase1-implementation.md +0 -0
  48. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/plans/2026-04-11-macos-native-tts-stt.md +0 -0
  49. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-01-openspeech-api-design.md +0 -0
  50. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-hot-lazy-loading.md +0 -0
  51. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-phase2-protocol-layer.md +0 -0
  52. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-03-phase3-production.md +0 -0
  53. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-11-macos-native-tts-stt-design.md +0 -0
  54. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-12-cloud-providers-webui-design.md +0 -0
  55. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-15-streaming-tts-stt-fixes-display-names.md +0 -0
  56. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/docs/superpowers/specs/2026-04-16-provider-management-engines-rename.md +0 -0
  57. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/client_stt.py +0 -0
  58. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/client_tts.py +0 -0
  59. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/stt_simple.py +0 -0
  60. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/examples/tts_simple.py +0 -0
  61. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/__main__.py +0 -0
  62. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/cli.py +0 -0
  63. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/client/__init__.py +0 -0
  64. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/client/client.py +0 -0
  65. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/config.py +0 -0
  66. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/__init__.py +0 -0
  67. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/base.py +0 -0
  68. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/enums.py +0 -0
  69. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/registry.py +0 -0
  70. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/core/settings.py +0 -0
  71. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/demo.py +0 -0
  72. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/__init__.py +0 -0
  73. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/context.py +0 -0
  74. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/dispatcher.py +0 -0
  75. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/__init__.py +0 -0
  76. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/base.py +0 -0
  77. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/in_process.py +0 -0
  78. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/remote.py +0 -0
  79. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/executors/subprocess_exec.py +0 -0
  80. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/fanout.py +0 -0
  81. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/filters.py +0 -0
  82. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/lifecycle.py +0 -0
  83. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/dispatch/watcher.py +0 -0
  84. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/engine_catalog.py +0 -0
  85. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/engine_registry.yaml +0 -0
  86. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/exceptions.py +0 -0
  87. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/factory.py +0 -0
  88. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/__init__.py +0 -0
  89. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/aim_resolver.py +0 -0
  90. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/__init__.py +0 -0
  91. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/docker_backend.py +0 -0
  92. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/backends/native_backend.py +0 -0
  93. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/base.py +0 -0
  94. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/__init__.py +0 -0
  95. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/faster_whisper.py +0 -0
  96. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/fish_speech.py +0 -0
  97. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/sherpa_onnx.py +0 -0
  98. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/whisper.py +0 -0
  99. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/engines/whisperlivekit.py +0 -0
  100. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/manager.py +0 -0
  101. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/models.py +0 -0
  102. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/progress.py +0 -0
  103. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/registry.py +0 -0
  104. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/task_store.py +0 -0
  105. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/local_engines/tasks.py +0 -0
  106. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/logging_config.py +0 -0
  107. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/__init__.py +0 -0
  108. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/base.py +0 -0
  109. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/debug.py +0 -0
  110. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/latency.py +0 -0
  111. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/metrics.py +0 -0
  112. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/tracing.py +0 -0
  113. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/observe/usage.py +0 -0
  114. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/__init__.py +0 -0
  115. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/_template.py +0 -0
  116. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/__init__.py +0 -0
  117. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/alibaba.py +0 -0
  118. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/assemblyai.py +0 -0
  119. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/azure_speech.py +0 -0
  120. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/baidu.py +0 -0
  121. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/deepgram.py +0 -0
  122. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/elevenlabs.py +0 -0
  123. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/faster_whisper.py +0 -0
  124. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/google_cloud.py +0 -0
  125. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/macos_speech.py +0 -0
  126. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/openai.py +0 -0
  127. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/sherpa_onnx.py +0 -0
  128. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/tencent.py +0 -0
  129. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/volcengine.py +0 -0
  130. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/whisper.py +0 -0
  131. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/whisperlivekit.py +0 -0
  132. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/stt/windows_speech.py +0 -0
  133. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/__init__.py +0 -0
  134. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/alibaba.py +0 -0
  135. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/azure_speech.py +0 -0
  136. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/baidu.py +0 -0
  137. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/coqui.py +0 -0
  138. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/cosyvoice.py +0 -0
  139. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/deepgram.py +0 -0
  140. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/elevenlabs.py +0 -0
  141. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/fish_speech.py +0 -0
  142. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/google_cloud.py +0 -0
  143. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/macos_say.py +0 -0
  144. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/minimax.py +0 -0
  145. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/openai.py +0 -0
  146. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/piper.py +0 -0
  147. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/tencent.py +0 -0
  148. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/volcengine.py +0 -0
  149. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/providers/tts/windows_sapi.py +0 -0
  150. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/__init__.py +0 -0
  151. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/app.py +0 -0
  152. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/auth.py +0 -0
  153. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/middleware.py +0 -0
  154. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/__init__.py +0 -0
  155. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/management.py +0 -0
  156. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/stt.py +0 -0
  157. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/tts.py +0 -0
  158. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/routes/webui.py +0 -0
  159. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/app.js +0 -0
  160. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/index.html +0 -0
  161. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/webui/styles.css +0 -0
  162. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/__init__.py +0 -0
  163. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/stt_stream.py +0 -0
  164. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/server/ws/tts_stream.py +0 -0
  165. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/telemetry/__init__.py +0 -0
  166. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/telemetry/perf.py +0 -0
  167. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/__init__.py +0 -0
  168. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/audio_converter.py +0 -0
  169. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/utils/audio_playback.py +0 -0
  170. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/openspeechapi/vendor_registry.yaml +0 -0
  171. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/output/output.wav +0 -0
  172. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/output.wav +0 -0
  173. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/cloud/install.sh +0 -0
  174. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/faster-whisper/native/install.sh +0 -0
  175. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/fish-speech/native/install.sh +0 -0
  176. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/install.sh +0 -0
  177. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/macos_stt.swift +0 -0
  178. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/macos-stt/request_auth.swift +0 -0
  179. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/sherpa-onnx/native/install.sh +0 -0
  180. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/sherpa-onnx/native/run_streaming_server.py +0 -0
  181. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/whisper/native/install.sh +0 -0
  182. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/scripts/engines/whisperlivekit/native/install.sh +0 -0
  183. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/__init__.py +0 -0
  184. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/conftest.py +0 -0
  185. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/__init__.py +0 -0
  186. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/conftest.py +0 -0
  187. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_fanout_e2e.py +0 -0
  188. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_faster_whisper_e2e.py +0 -0
  189. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_openai_e2e.py +0 -0
  190. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/e2e/test_webui_e2e.py +0 -0
  191. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/fixtures/hello.wav +0 -0
  192. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/__init__.py +0 -0
  193. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_fanout_integration.py +0 -0
  194. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_in_process_integration.py +0 -0
  195. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/integration/test_server_client.py +0 -0
  196. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/__init__.py +0 -0
  197. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_aim_resolver.py +0 -0
  198. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_audio_converter.py +0 -0
  199. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_audio_playback.py +0 -0
  200. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_base.py +0 -0
  201. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_cli.py +0 -0
  202. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_cli_engine.py +0 -0
  203. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_client.py +0 -0
  204. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_config.py +0 -0
  205. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_context.py +0 -0
  206. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_debug_observer.py +0 -0
  207. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_dispatcher.py +0 -0
  208. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_docker_backend_progress.py +0 -0
  209. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_engine_registry.py +0 -0
  210. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_enums.py +0 -0
  211. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_executor_base.py +0 -0
  212. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_fanout.py +0 -0
  213. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_filters.py +0 -0
  214. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_hot_reload.py +0 -0
  215. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_in_process.py +0 -0
  216. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_latency_observer.py +0 -0
  217. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_lifecycle.py +0 -0
  218. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_local_engine_task_store.py +0 -0
  219. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_local_engines_manager.py +0 -0
  220. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_logging.py +0 -0
  221. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_metrics_observer.py +0 -0
  222. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_models.py +0 -0
  223. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_native_backend.py +0 -0
  224. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_observer_base.py +0 -0
  225. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_plugin_mechanism.py +0 -0
  226. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/__init__.py +0 -0
  227. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_cloud_providers.py +0 -0
  228. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_elevenlabs_stt.py +0 -0
  229. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_macos_say.py +0 -0
  230. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_macos_speech.py +0 -0
  231. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_base_url.py +0 -0
  232. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_stt.py +0 -0
  233. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_openai_tts.py +0 -0
  234. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_sherpa_onnx_stt.py +0 -0
  235. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_stt_stubs.py +0 -0
  236. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_tts_stubs.py +0 -0
  237. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_providers/test_whisperlivekit_stt.py +0 -0
  238. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_registry.py +0 -0
  239. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_remote.py +0 -0
  240. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/__init__.py +0 -0
  241. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_auth.py +0 -0
  242. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_config_api.py +0 -0
  243. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_routes.py +0 -0
  244. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_server/test_websocket.py +0 -0
  245. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_subprocess.py +0 -0
  246. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_usage_observer.py +0 -0
  247. {openspeechapi-0.2.5 → openspeechapi-0.2.7}/tests/unit/test_watcher.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openspeechapi
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Summary: Unified speech interface for STT/TTS providers
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: httpx>=0.27
@@ -1,6 +1,6 @@
1
1
  """OpenSpeechAPI — Unified speech interface for STT/TTS providers."""
2
2
 
3
- __version__ = "0.2.5"
3
+ __version__ = "0.2.7"
4
4
 
5
5
  from openspeechapi.config import load_config
6
6
  from openspeechapi.core.base import SpeechProvider, STTProvider, TTSProvider
@@ -60,6 +60,25 @@ class STTOptions:
60
60
  # voice assistant. Providers that don't support VAD finalization
61
61
  # (Whisper, Faster-Whisper) silently ignore this field.
62
62
  vad_eos: int | None = None
63
+ # ── iFlytek IAT pass-through (matches Java AsrServiceImpl) ───────
64
+ # Wallex's Java AsrService forwards the client-supplied
65
+ # ``audio.common`` / ``audio.business`` / extra ``audio.data``
66
+ # fields verbatim to iFlytek's WS, treating the panel as the
67
+ # source of truth for ASR parameters. The Python pipeline now
68
+ # mirrors that contract: when these fields are non-None, the
69
+ # iFlytek provider uses them as the basis for the WS first frame
70
+ # (with ``setdefault`` fallback to its own settings for any keys
71
+ # the client omitted) instead of building the blocks purely from
72
+ # ``speech_providers.yaml``. ``None`` preserves the existing
73
+ # yaml-driven behaviour. Other STT providers ignore these fields.
74
+ iflytek_common: dict | None = None
75
+ iflytek_business: dict | None = None
76
+ # Extra fields to merge into the iFlytek ``data`` block beyond the
77
+ # canonical ``status``/``format``/``encoding``/``audio`` quadruple
78
+ # (e.g. panel-supplied ``data_type``). Keys that collide with the
79
+ # canonical set are preserved (the provider's defaults still win,
80
+ # since the canonical set is required by the IAT spec).
81
+ iflytek_data_extras: dict | None = None
63
82
 
64
83
 
65
84
  @dataclass
@@ -52,6 +52,54 @@ class IflytekSTTSettings(BaseSettings):
52
52
  # via ``speech_providers.yaml`` so different sites can pick their
53
53
  # own latency-vs-tolerance trade-off.
54
54
  vad_eos: int = 2000
55
+ # ``ltc`` — sentence-level timestamp granularity sent in the
56
+ # business block of the IAT request (1 = sentence segments only;
57
+ # 2 = + word boundaries; 3 = + character boundaries). Java's
58
+ # ``AsrConfig.ltc`` defaults to 3; we mirror that so downstream
59
+ # consumers expecting per-character timing offsets keep working.
60
+ # Lower values shave a few bytes per response and slightly reduce
61
+ # post-processing work for callers that don't use the timestamps.
62
+ ltc: int = 3
63
+ # ``ws_host`` / ``ws_path`` — iFlytek IAT WebSocket endpoint. The
64
+ # default ``iat-api.xfyun.cn`` is the global endpoint; multi-region
65
+ # deployments (e.g. directed-domain endpoints such as
66
+ # ``ws-api-dx.xfyun.cn``) override these in yaml or via env var
67
+ # so the WS URL never requires a code change.
68
+ ws_host: str = "iat-api.xfyun.cn"
69
+ ws_path: str = "/v2/iat"
70
+ # ``timeout_secs`` — connect / read timeout for the underlying
71
+ # httpx AsyncClient. Java's AsrConfig defaults to 15s; we mirror
72
+ # that. Lower if the network has aggressive proxies, higher only
73
+ # if the iFlytek endpoint is consistently slow to handshake.
74
+ timeout_secs: int = 15
75
+ # ── Java AsrConfig parity (used as setdefault fallbacks) ────────
76
+ # When a wallex client (panel) supplies ``audio.business``/
77
+ # ``audio.common`` per-frame, those values flow through via
78
+ # ``STTOptions.iflytek_business``/``STTOptions.iflytek_common`` and
79
+ # become the WS first frame body. The settings below act as
80
+ # ``setdefault`` fallbacks for keys the client omits, mirroring
81
+ # Java ``AsrConfig``'s field set so the two implementations
82
+ # produce identical wire frames given the same panel payload.
83
+ #
84
+ # ``domain`` — iFlytek IAT domain. Java default ``iat``; a few
85
+ # vertical models (``medical`` / ``tv``) exist but most
86
+ # deployments stay on the general one.
87
+ domain: str = "iat"
88
+ # ``accent`` — only meaningful when ``language=="zh_cn"`` (selects
89
+ # mandarin vs. cantonese etc.). Java sends ``mandarin`` blindly;
90
+ # we keep the same default so the WS frame matches Java byte-for-
91
+ # byte when the panel omits ``business.accent``. iFlytek treats
92
+ # it as a no-op for non-Chinese language codes.
93
+ accent: str = "mandarin"
94
+ # ``dwa`` — dynamic word adjustment / wpgs (实时纠错). Java's
95
+ # default ``wpgs`` is the realtime-correction mode panels rely on
96
+ # for the partial-result protocol described in
97
+ # ``stt-streaming-spec.md``. Empty disables it.
98
+ dwa: str = "wpgs"
99
+ # ``sample_rate`` — required by the IAT directed-domain endpoint
100
+ # (``ws-api-dx.xfyun.cn``) which expects it in ``business``. Java
101
+ # AsrConfig.sampleRate=16000.
102
+ sample_rate: int = 16000
55
103
 
56
104
 
57
105
  # iFlytek expects the full locale tag; common ISO short codes need to
@@ -92,9 +140,6 @@ class IflytekSTT(STTProvider):
92
140
  "language": ["zh_cn", "en_us", "ja_jp", "ko_kr", "ru-ru"],
93
141
  }
94
142
 
95
- _WS_HOST = "iat-api.xfyun.cn"
96
- _WS_PATH = "/v2/iat"
97
-
98
143
  def __init__(self, settings: IflytekSTTSettings | None = None) -> None:
99
144
  self.settings = settings or IflytekSTTSettings()
100
145
  self._client: httpx.AsyncClient | None = None
@@ -106,7 +151,7 @@ class IflytekSTT(STTProvider):
106
151
 
107
152
  async def start(self) -> None:
108
153
  if self._client is None:
109
- self._client = httpx.AsyncClient(timeout=60.0)
154
+ self._client = httpx.AsyncClient(timeout=float(self.settings.timeout_secs))
110
155
  self._owns_client = True
111
156
  # Surface the effective language (after alias mapping) and
112
157
  # vad_eos at startup so deployments can verify the iFlytek model
@@ -142,10 +187,12 @@ class IflytekSTT(STTProvider):
142
187
  now = datetime.now(tz=timezone.utc)
143
188
  date = formatdate(timeval=now.timestamp(), localtime=False, usegmt=True)
144
189
 
190
+ host = self.settings.ws_host
191
+ path = self.settings.ws_path
145
192
  signature_origin = (
146
- f"host: {self._WS_HOST}\n"
193
+ f"host: {host}\n"
147
194
  f"date: {date}\n"
148
- f"GET {self._WS_PATH} HTTP/1.1"
195
+ f"GET {path} HTTP/1.1"
149
196
  )
150
197
  signature_sha = hmac.new(
151
198
  self.settings.api_secret.encode("utf-8"),
@@ -165,9 +212,120 @@ class IflytekSTT(STTProvider):
165
212
  ).decode("utf-8")
166
213
 
167
214
  params = urllib.parse.urlencode(
168
- {"authorization": authorization, "date": date, "host": self._WS_HOST}
215
+ {"authorization": authorization, "date": date, "host": host}
216
+ )
217
+ return f"wss://{host}{path}?{params}"
218
+
219
+ def _build_first_frame_blocks(
220
+ self,
221
+ opts: STTOptions | None,
222
+ *,
223
+ include_dwa: bool,
224
+ ) -> tuple[dict, dict]:
225
+ """Build the ``common`` / ``business`` blocks for the WS first frame.
226
+
227
+ Mirrors Java ``AsrServiceImpl.sendToAsr`` semantics: when
228
+ ``opts.iflytek_common``/``opts.iflytek_business`` is provided
229
+ (typically by wallex relaying the panel's per-frame
230
+ ``audio.common`` / ``audio.business``), those dicts are the
231
+ source of truth. We only ``setdefault`` keys the client omitted,
232
+ falling back to ``self.settings`` so a panel that misses a
233
+ single field doesn't get a malformed frame.
234
+
235
+ ``include_dwa`` differs between ``transcribe()`` (batch — no
236
+ wpgs because there's no streaming protocol) and
237
+ ``transcribe_stream()`` (always wpgs).
238
+ """
239
+ canon = _canonical_language(self.settings.language)
240
+ eos = (opts.vad_eos
241
+ if opts is not None and opts.vad_eos is not None
242
+ else self.settings.vad_eos)
243
+
244
+ # ── business block ─────────────────────────────────────────
245
+ if opts is not None and opts.iflytek_business:
246
+ # Panel-supplied is authoritative; copy then fill missing
247
+ # keys from yaml so we never send a partial frame.
248
+ business = dict(opts.iflytek_business)
249
+ else:
250
+ business = {}
251
+
252
+ business.setdefault("language", canon)
253
+ business.setdefault("domain", self.settings.domain)
254
+ business.setdefault("vad_eos", eos)
255
+ business.setdefault("ltc", self.settings.ltc)
256
+ if include_dwa:
257
+ business.setdefault("dwa", self.settings.dwa)
258
+ # ``accent`` is only meaningful for the Chinese model. Java
259
+ # sends ``mandarin`` blindly; we keep that for byte-for-byte
260
+ # parity when the panel omits it AND language is zh_cn. For
261
+ # other languages we leave it out entirely (sending it is a
262
+ # no-op on iFlytek's side but confuses log readers).
263
+ if "accent" not in business and canon == "zh_cn":
264
+ business["accent"] = self.settings.accent
265
+
266
+ # ── common block ──────────────────────────────────────────
267
+ if opts is not None and opts.iflytek_common:
268
+ common = dict(opts.iflytek_common)
269
+ else:
270
+ common = {}
271
+ common.setdefault("app_id", self.settings.app_id)
272
+
273
+ return common, business
274
+
275
+ @staticmethod
276
+ def _build_data_block(
277
+ *, status: int, audio_b64: str, opts: STTOptions | None,
278
+ ) -> dict:
279
+ """Assemble the ``data`` block, merging panel-supplied extras.
280
+
281
+ Canonical keys (``status``/``format``/``encoding``/``audio``)
282
+ always win over ``iflytek_data_extras`` because the IAT spec
283
+ requires them in a specific shape; extras like the panel's
284
+ ``data_type`` flow through.
285
+ """
286
+ if opts is not None and opts.iflytek_data_extras:
287
+ data = dict(opts.iflytek_data_extras)
288
+ else:
289
+ data = {}
290
+ data["status"] = status
291
+ data["format"] = "audio/L16;rate=16000"
292
+ data["encoding"] = "raw"
293
+ data["audio"] = audio_b64
294
+ return data
295
+
296
+ async def _connect_with_retry(self) -> "websockets.ClientConnection":
297
+ """Connect to iFlytek IAT WS with backoff, mirroring Java parity.
298
+
299
+ Java ``AsrServiceImpl.connectWithRetry`` does 4 attempts with
300
+ 300/600/1200ms backoff before giving up. The previous Python
301
+ path was one-shot: a single TCP/handshake hiccup surfaced as a
302
+ hard ASR failure. Aligning the retry budget keeps wallex's
303
+ Python and Java front-ends behaviourally interchangeable on
304
+ flaky links.
305
+ """
306
+ backoffs = (0.3, 0.6, 1.2) # delays AFTER attempts 1, 2, 3
307
+ last_exc: Exception | None = None
308
+ for attempt in range(4):
309
+ try:
310
+ url = self._build_auth_url()
311
+ ws = await websockets.connect(url)
312
+ if attempt > 0:
313
+ logger.info(
314
+ "{}: WS connected on attempt {}/4",
315
+ self.name, attempt + 1,
316
+ )
317
+ return ws
318
+ except Exception as e: # noqa: BLE001 — retry boundary
319
+ last_exc = e
320
+ logger.warning(
321
+ "{}: WS connect failed (attempt {}/4): {}",
322
+ self.name, attempt + 1, e,
323
+ )
324
+ if attempt < len(backoffs):
325
+ await asyncio.sleep(backoffs[attempt])
326
+ raise RuntimeError(
327
+ f"iFlytek STT connect failed after 4 attempts: {last_exc}"
169
328
  )
170
- return f"wss://{self._WS_HOST}{self._WS_PATH}?{params}"
171
329
 
172
330
  async def transcribe(
173
331
  self, audio: AudioData, opts: STTOptions | None = None
@@ -177,7 +335,6 @@ class IflytekSTT(STTProvider):
177
335
  logger.info("{}: request received, audio={} bytes", self.name, len(audio.data))
178
336
  _t0 = time.perf_counter()
179
337
 
180
- url = self._build_auth_url()
181
338
  audio_bytes = audio.data
182
339
  # iFlytek recommends ~40ms per frame at 16kHz 16bit mono = 1280 bytes.
183
340
  # Use larger frames (8000 bytes = ~250ms) with pacing to avoid server
@@ -190,7 +347,8 @@ class IflytekSTT(STTProvider):
190
347
 
191
348
  result_texts: list[str] = []
192
349
 
193
- async with websockets.connect(url) as ws:
350
+ ws = await self._connect_with_retry()
351
+ async with ws:
194
352
  # Send audio in chunks with interleaved receive
195
353
  total = len(audio_bytes)
196
354
  offset = 0
@@ -209,46 +367,36 @@ class IflytekSTT(STTProvider):
209
367
  frame_data = base64.b64encode(chunk).decode("utf-8")
210
368
 
211
369
  if status == 0:
212
- # First frame includes common and business params.
213
- # ``accent="mandarin"`` is only meaningful for the
214
- # Chinese model; sending it on en_us / ja_jp / etc.
215
- # is a wire-level no-op on iFlytek's side but
216
- # confuses anyone reading the request body, so
217
- # gate it on the canonical language.
218
- canon = _canonical_language(self.settings.language)
219
- # Per-call override (``opts.vad_eos``) trumps the
220
- # provider default. Wallex routes the panel's
221
- # ``parameter.iat.eos`` through here so a kiosk
222
- # can ship a tighter or looser silence threshold
223
- # than the deployment yaml.
224
- eos = (opts.vad_eos
225
- if opts is not None and opts.vad_eos is not None
226
- else self.settings.vad_eos)
227
- business = {
228
- "language": canon,
229
- "domain": "iat",
230
- "vad_eos": eos,
231
- }
232
- if canon == "zh_cn":
233
- business["accent"] = "mandarin"
370
+ # First frame: panel-supplied common/business win;
371
+ # batch path doesn't carry wpgs (no streaming
372
+ # protocol) so include_dwa=False.
373
+ common, business = self._build_first_frame_blocks(
374
+ opts, include_dwa=False,
375
+ )
376
+ data_block = self._build_data_block(
377
+ status=0, audio_b64=frame_data, opts=opts,
378
+ )
234
379
  msg = {
235
- "common": {"app_id": self.settings.app_id},
380
+ "common": common,
236
381
  "business": business,
237
- "data": {
238
- "status": 0,
239
- "format": "audio/L16;rate=16000",
240
- "encoding": "raw",
241
- "audio": frame_data,
242
- },
382
+ "data": data_block,
243
383
  }
384
+ # Java parity: log the exact blocks we're about to
385
+ # ship to iFlytek. Debugging "wrong language /
386
+ # wrong endpoint" reports needs to see this from
387
+ # the log alone — Java's AsrServiceImpl prints the
388
+ # equivalent line at INFO.
389
+ logger.info(
390
+ "{}: ASR first frame business={}, common={}",
391
+ self.name,
392
+ json.dumps(business, ensure_ascii=False),
393
+ json.dumps(common, ensure_ascii=False),
394
+ )
244
395
  else:
245
396
  msg = {
246
- "data": {
247
- "status": status,
248
- "format": "audio/L16;rate=16000",
249
- "encoding": "raw",
250
- "audio": frame_data,
251
- }
397
+ "data": self._build_data_block(
398
+ status=status, audio_b64=frame_data, opts=opts,
399
+ )
252
400
  }
253
401
 
254
402
  await ws.send(json.dumps(msg))
@@ -320,7 +468,6 @@ class IflytekSTT(STTProvider):
320
468
  if self._client is None:
321
469
  raise RuntimeError("Provider not started — call start() first")
322
470
 
323
- url = self._build_auth_url()
324
471
  results: asyncio.Queue[Transcription | None] = asyncio.Queue()
325
472
  _t0 = time.perf_counter()
326
473
  _frames_sent = 0
@@ -332,7 +479,8 @@ class IflytekSTT(STTProvider):
332
479
  _sender_stop = asyncio.Event()
333
480
 
334
481
  logger.debug("{}: connecting to iFlytek WebSocket...", self.name)
335
- async with websockets.connect(url) as ws:
482
+ ws = await self._connect_with_retry()
483
+ async with ws:
336
484
  _t_connected = time.perf_counter()
337
485
  logger.info("{}: WS connected in {:.0f}ms", self.name,
338
486
  (_t_connected - _t0) * 1000)
@@ -350,42 +498,38 @@ class IflytekSTT(STTProvider):
350
498
  break
351
499
  frame_data = base64.b64encode(chunk).decode("utf-8")
352
500
  if is_first:
353
- # See transcribe() for rationale on
354
- # canonicalizing language and gating accent.
355
- canon = _canonical_language(self.settings.language)
356
- # Per-call ``opts.vad_eos`` (e.g. wallex
357
- # forwarding the panel's ``parameter.iat.eos``)
358
- # trumps the provider's configured default.
359
- eos = (opts.vad_eos
360
- if opts is not None and opts.vad_eos is not None
361
- else self.settings.vad_eos)
362
- business = {
363
- "language": canon,
364
- "domain": "iat",
365
- "dwa": "wpgs",
366
- "vad_eos": eos,
367
- }
368
- if canon == "zh_cn":
369
- business["accent"] = "mandarin"
501
+ # First frame: panel-supplied common/business win;
502
+ # streaming path always carries wpgs (see
503
+ # stt-streaming-spec.md realtime-correction
504
+ # protocol) so include_dwa=True.
505
+ common, business = self._build_first_frame_blocks(
506
+ opts, include_dwa=True,
507
+ )
508
+ data_block = self._build_data_block(
509
+ status=0, audio_b64=frame_data, opts=opts,
510
+ )
370
511
  msg = {
371
- "common": {"app_id": self.settings.app_id},
512
+ "common": common,
372
513
  "business": business,
373
- "data": {
374
- "status": 0,
375
- "format": "audio/L16;rate=16000",
376
- "encoding": "raw",
377
- "audio": frame_data,
378
- },
514
+ "data": data_block,
379
515
  }
516
+ # Java parity (AsrServiceImpl line 221): log
517
+ # the first-frame business + common at INFO so
518
+ # operators can verify which language/eos/dwa
519
+ # the panel actually requested without
520
+ # rebuilding the call from yaml + STTOptions.
521
+ logger.info(
522
+ "{}: ASR first frame business={}, common={}",
523
+ self.name,
524
+ json.dumps(business, ensure_ascii=False),
525
+ json.dumps(common, ensure_ascii=False),
526
+ )
380
527
  is_first = False
381
528
  else:
382
529
  msg = {
383
- "data": {
384
- "status": 1,
385
- "format": "audio/L16;rate=16000",
386
- "encoding": "raw",
387
- "audio": frame_data,
388
- }
530
+ "data": self._build_data_block(
531
+ status=1, audio_b64=frame_data, opts=opts,
532
+ )
389
533
  }
390
534
  await ws.send(json.dumps(msg))
391
535
  _frames_sent += 1
@@ -396,12 +540,9 @@ class IflytekSTT(STTProvider):
396
540
  # Send empty last frame to signal end (only if WS still open)
397
541
  if not _sender_stop.is_set():
398
542
  last_msg = {
399
- "data": {
400
- "status": 2,
401
- "format": "audio/L16;rate=16000",
402
- "encoding": "raw",
403
- "audio": "",
404
- }
543
+ "data": self._build_data_block(
544
+ status=2, audio_b64="", opts=opts,
545
+ )
405
546
  }
406
547
  await ws.send(json.dumps(last_msg))
407
548
  except websockets.exceptions.ConnectionClosed:
@@ -29,16 +29,22 @@ class IflytekTTSSettings(BaseSettings):
29
29
  voice: str = "xiaoyan"
30
30
  speed: int = 50
31
31
  # Audio output encoding requested from iFlytek.
32
- # - "lame": MP3 frames (default; smaller, but caller must decode)
33
- # - "raw": 16-bit PCM @ 16 kHz mono, big-endian L16 (drop-in
34
- # playable as raw PCM; required by callers that wrap the
35
- # bytes in a fixed-format wire envelope and assume PCM,
36
- # e.g. wallex's RESP_VOICE which advertises
37
- # encoding=raw/bitDepth=16/sampleRate=16000 to the
38
- # panel — feeding MP3 bytes through that envelope plays
39
- # back as pure noise on the speaker).
32
+ # - "lame": MP3 frames (default; smaller, but caller must decode)
33
+ # - "raw": 16-bit PCM @ 16 kHz mono, big-endian L16 (drop-in
34
+ # playable as raw PCM; required by callers that wrap the
35
+ # bytes in a fixed-format wire envelope and assume PCM,
36
+ # e.g. wallex's RESP_VOICE which advertises
37
+ # encoding=raw/bitDepth=16/sampleRate=16000 to the
38
+ # panel — feeding MP3 bytes through that envelope plays
39
+ # back as pure noise on the speaker).
40
+ # - "speex"/"speex-wb-7": Speex narrowband / wideband (low-bitrate,
41
+ # used by some embedded Wallex panels with constrained
42
+ # uplink). Requires ``speex_size`` to declare the frame
43
+ # size iFlytek should produce. Caller must run a Speex
44
+ # decoder; not auto-handled by browsers.
40
45
  # Default stays "lame" for backward-compat; deployments that need
41
- # PCM (wallex / direct hardware playback) override via yaml.
46
+ # PCM (wallex / direct hardware playback) or Speex (embedded panels)
47
+ # override via yaml.
42
48
  aue: str = "lame"
43
49
  # Output sample rate for raw PCM mode (only meaningful when
44
50
  # aue="raw"). 16000 matches what the panel and the iFlytek
@@ -50,6 +56,22 @@ class IflytekTTSSettings(BaseSettings):
50
56
  volume: int = 50
51
57
  # Pitch (0-100). Same rationale as volume.
52
58
  pitch: int = 50
59
+ # Speex frame size (only meaningful when aue startswith "speex").
60
+ # iFlytek expects an integer that selects a Speex bitrate / frame
61
+ # mode; ``0`` is "auto-pick by aue tag". Leave 0 unless the client
62
+ # decoder requires a specific frame size. Mirrors Java
63
+ # ``AsrConfig.speex-size`` / ``TtsConfig`` parameter.
64
+ speex_size: int = 0
65
+ # ``ws_host`` / ``ws_path`` — iFlytek TTS WebSocket endpoint.
66
+ # Override in yaml (or via ``OPENSPEECH_IFLYTEK_TTS_HOST`` env var)
67
+ # for region-specific endpoints. Default is the global endpoint.
68
+ ws_host: str = "tts-api.xfyun.cn"
69
+ ws_path: str = "/v2/tts"
70
+ # ``timeout_secs`` — connect / read timeout for the underlying
71
+ # httpx AsyncClient. Java's TtsConfig defaults to 8s; we mirror
72
+ # that for parity. Increase only when the iFlytek endpoint is
73
+ # consistently slow to handshake.
74
+ timeout_secs: int = 8
53
75
 
54
76
  class IflytekTTS(TTSProvider):
55
77
  name = "iflytek-tts"
@@ -66,12 +88,9 @@ class IflytekTTS(TTSProvider):
66
88
  # English assistant-style voices used by wallex deployments.
67
89
  "x4_enuk_ashleigh_assist",
68
90
  ],
69
- "aue": ["lame", "raw"],
91
+ "aue": ["lame", "raw", "speex", "speex-wb-7"],
70
92
  }
71
93
 
72
- _WS_HOST = "tts-api.xfyun.cn"
73
- _WS_PATH = "/v2/tts"
74
-
75
94
  def __init__(self, settings: IflytekTTSSettings | None = None) -> None:
76
95
  self.settings = settings or IflytekTTSSettings()
77
96
  self._client: httpx.AsyncClient | None = None
@@ -83,7 +102,7 @@ class IflytekTTS(TTSProvider):
83
102
 
84
103
  async def start(self) -> None:
85
104
  if self._client is None:
86
- self._client = httpx.AsyncClient(timeout=60.0)
105
+ self._client = httpx.AsyncClient(timeout=float(self.settings.timeout_secs))
87
106
  self._owns_client = True
88
107
 
89
108
  async def stop(self) -> None:
@@ -99,10 +118,12 @@ class IflytekTTS(TTSProvider):
99
118
  now = datetime.now(tz=timezone.utc)
100
119
  date = formatdate(timeval=now.timestamp(), localtime=False, usegmt=True)
101
120
 
121
+ host = self.settings.ws_host
122
+ path = self.settings.ws_path
102
123
  signature_origin = (
103
- f"host: {self._WS_HOST}\n"
124
+ f"host: {host}\n"
104
125
  f"date: {date}\n"
105
- f"GET {self._WS_PATH} HTTP/1.1"
126
+ f"GET {path} HTTP/1.1"
106
127
  )
107
128
  signature_sha = hmac.new(
108
129
  self.settings.api_secret.encode("utf-8"),
@@ -122,9 +143,9 @@ class IflytekTTS(TTSProvider):
122
143
  ).decode("utf-8")
123
144
 
124
145
  params = urllib.parse.urlencode(
125
- {"authorization": authorization, "date": date, "host": self._WS_HOST}
146
+ {"authorization": authorization, "date": date, "host": host}
126
147
  )
127
- return f"wss://{self._WS_HOST}{self._WS_PATH}?{params}"
148
+ return f"wss://{host}{path}?{params}"
128
149
 
129
150
  async def synthesize(
130
151
  self, text: str, opts: TTSOptions | None = None
@@ -138,8 +159,17 @@ class IflytekTTS(TTSProvider):
138
159
  # callers downstream may set wire-protocol encoding metadata from
139
160
  # this field, and a wrong tag on the bytes plays back as noise on
140
161
  # raw-PCM consumers.
141
- fmt = "pcm_s16le" if self.settings.aue == "raw" else "mp3"
142
- sr = self.settings.auf_rate if self.settings.aue == "raw" else 16000
162
+ aue = self.settings.aue
163
+ if aue == "raw":
164
+ fmt = "pcm_s16le"
165
+ sr = self.settings.auf_rate
166
+ elif aue.startswith("speex"):
167
+ # Speex narrowband is 8 kHz, wideband ("speex-wb-*") is 16 kHz.
168
+ fmt = "speex"
169
+ sr = 16000 if "wb" in aue else 8000
170
+ else:
171
+ fmt = "mp3"
172
+ sr = 16000
143
173
  logger.info(
144
174
  "iFlytek TTS: {} chunks, {} bytes total, format={}, sample_rate={}",
145
175
  len(parts), len(audio_bytes), fmt, sr,
@@ -191,7 +221,8 @@ class IflytekTTS(TTSProvider):
191
221
  "pitch": self.settings.pitch,
192
222
  "tte": "UTF8",
193
223
  }
194
- if self.settings.aue == "lame":
224
+ aue = self.settings.aue
225
+ if aue == "lame":
195
226
  # ``sfl=1`` (stream-frame-length) is an MP3-only knob that
196
227
  # tells iFlytek to emit per-frame audio rather than waiting
197
228
  # for the whole file. It has no meaning for raw PCM (raw is
@@ -199,11 +230,17 @@ class IflytekTTS(TTSProvider):
199
230
  # combo with a code 10005 "invalid parameter" — so we only
200
231
  # send it on the lame path.
201
232
  business["sfl"] = 1
202
- else:
233
+ elif aue == "raw":
203
234
  # Raw / L16 mode requires ``auf`` to declare the PCM
204
235
  # sample-rate iFlytek should produce. Java wallex sends
205
236
  # ``audio/L16;rate=16000`` here; we mirror that exactly.
206
237
  business["auf"] = f"audio/L16;rate={self.settings.auf_rate}"
238
+ elif aue.startswith("speex"):
239
+ # Speex narrowband / wideband. ``speex_size`` is the iFlytek
240
+ # frame-size selector (0 = engine default; non-zero values
241
+ # match the Java ``TtsConfig.speex-size`` parameter).
242
+ if self.settings.speex_size:
243
+ business["speex_size"] = self.settings.speex_size
207
244
  return {
208
245
  "common": {"app_id": self.settings.app_id},
209
246
  "business": business,
@@ -123,7 +123,12 @@ engines:
123
123
  # exec_mode: remote
124
124
  # preload: true
125
125
  # settings:
126
- # language: zh_cn
126
+ # language: zh_cn # zh_cn / en_us / ja_jp / ko_kr / ru-ru
127
+ # vad_eos: 2000 # ms of trailing silence before final
128
+ # ltc: 3 # 1 sentence / 2 +word / 3 +char timestamps
129
+ # ws_host: iat-api.xfyun.cn # override for region-specific endpoints
130
+ # ws_path: /v2/iat
131
+ # timeout_secs: 15
127
132
 
128
133
  # # pip install 'openspeechapi[faster-whisper-stt]'
129
134
  # faster_whisper_stt:
@@ -190,7 +195,15 @@ engines:
190
195
  # exec_mode: remote
191
196
  # settings:
192
197
  # voice: xiaoyan
193
- # speed: 50
198
+ # speed: 50 # 0-100
199
+ # volume: 50 # 0-100
200
+ # pitch: 50 # 0-100
201
+ # aue: lame # lame / raw / speex / speex-wb-7
202
+ # auf_rate: 16000 # only used when aue=raw (8000 / 16000 / 24000)
203
+ # speex_size: 0 # only used when aue startswith speex (0 = auto)
204
+ # ws_host: tts-api.xfyun.cn # override for region-specific endpoints
205
+ # ws_path: /v2/tts
206
+ # timeout_secs: 8
194
207
 
195
208
  # # pip install 'openspeechapi[piper-tts]'
196
209
  # piper_tts:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "openspeechapi"
7
- version = "0.2.5"
7
+ version = "0.2.7"
8
8
  description = "Unified speech interface for STT/TTS providers"
9
9
  requires-python = ">=3.11"
10
10
  dependencies = [
File without changes