converse-framework 0.2.0__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {converse_framework-0.2.0 → converse_framework-0.2.3}/PKG-INFO +41 -4
  2. {converse_framework-0.2.0 → converse_framework-0.2.3}/README.md +37 -3
  3. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/audio_utils.py +47 -0
  4. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/__init__.py +4 -3
  5. converse_framework-0.2.3/converse_framework/providers/audio_cpp.py +576 -0
  6. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/pocket_tts.py +25 -20
  7. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/unavailable.py +2 -0
  8. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/registry.py +12 -0
  9. {converse_framework-0.2.0 → converse_framework-0.2.3}/pyproject.toml +4 -3
  10. converse_framework-0.2.3/tests/test_audio_cpp.py +566 -0
  11. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_audio_utils.py +52 -0
  12. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_providers.py +72 -20
  13. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_session.py +3 -3
  14. {converse_framework-0.2.0 → converse_framework-0.2.3}/.gitattributes +0 -0
  15. {converse_framework-0.2.0 → converse_framework-0.2.3}/.github/workflows/publish.yml +0 -0
  16. {converse_framework-0.2.0 → converse_framework-0.2.3}/.gitignore +0 -0
  17. {converse_framework-0.2.0 → converse_framework-0.2.3}/CHANGELOG.md +0 -0
  18. {converse_framework-0.2.0 → converse_framework-0.2.3}/LICENSE +0 -0
  19. {converse_framework-0.2.0 → converse_framework-0.2.3}/MIGRATION.md +0 -0
  20. {converse_framework-0.2.0 → converse_framework-0.2.3}/benchmarks/perf_compare.py +0 -0
  21. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/__init__.py +0 -0
  22. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/cuda_utils.py +0 -0
  23. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/events.py +0 -0
  24. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/examples/__init__.py +0 -0
  25. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/examples/subprocess_provider.py +0 -0
  26. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/examples/text_chat.py +0 -0
  27. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/examples/voice_chat.py +0 -0
  28. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/examples/websocket_voice_chat.py +0 -0
  29. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/js/browser-voice-client.js +0 -0
  30. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/js/mic-frame-sender.js +0 -0
  31. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/js/speaker-echo-guard.js +0 -0
  32. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/js/tts-audio-player.js +0 -0
  33. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/pipeline.py +0 -0
  34. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/protocols.py +0 -0
  35. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/provider_events.py +0 -0
  36. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/faster_whisper.py +0 -0
  37. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/kokoro_onnx.py +0 -0
  38. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/llamacpp.py +0 -0
  39. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/mock.py +0 -0
  40. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/silero.py +0 -0
  41. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/providers/whisper_cpp.py +0 -0
  42. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/session.py +0 -0
  43. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/transport.py +0 -0
  44. {converse_framework-0.2.0 → converse_framework-0.2.3}/converse_framework/utterance_collector.py +0 -0
  45. {converse_framework-0.2.0 → converse_framework-0.2.3}/plan.md +0 -0
  46. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/js/manual-smoke-test.html +0 -0
  47. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/js/test_helpers.mjs +0 -0
  48. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/js/test_speaker_echo_guard.mjs +0 -0
  49. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_cuda_utils.py +0 -0
  50. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_events.py +0 -0
  51. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_examples.py +0 -0
  52. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_pipeline.py +0 -0
  53. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_protocols.py +0 -0
  54. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_registry.py +0 -0
  55. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_transport.py +0 -0
  56. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_utterance_collector.py +0 -0
  57. {converse_framework-0.2.0 → converse_framework-0.2.3}/tests/test_whisper_cpp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: converse-framework
3
- Version: 0.2.0
3
+ Version: 0.2.3
4
4
  Summary: Provider-agnostic speech stack for speech-to-speech applications
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -22,12 +22,15 @@ Requires-Dist: nvidia-cublas-cu12; (platform_system == 'Windows') and extra == '
22
22
  Provides-Extra: all-llm
23
23
  Requires-Dist: httpx>=0.28; extra == 'all-llm'
24
24
  Provides-Extra: all-tts
25
+ Requires-Dist: httpx>=0.28; extra == 'all-tts'
25
26
  Requires-Dist: kokoro-onnx>=0.5; (python_version < '3.14') and extra == 'all-tts'
26
27
  Requires-Dist: misaki>=0.7; extra == 'all-tts'
27
28
  Requires-Dist: pocket-tts>=2.1; extra == 'all-tts'
28
29
  Provides-Extra: all-vad
29
30
  Requires-Dist: onnxruntime>=1.20; extra == 'all-vad'
30
31
  Requires-Dist: silero-vad>=6.0; extra == 'all-vad'
32
+ Provides-Extra: audio-cpp
33
+ Requires-Dist: httpx>=0.28; extra == 'audio-cpp'
31
34
  Provides-Extra: faster-whisper
32
35
  Requires-Dist: faster-whisper>=1.2; extra == 'faster-whisper'
33
36
  Requires-Dist: nvidia-cublas-cu12; (platform_system == 'Windows') and extra == 'faster-whisper'
@@ -49,6 +52,38 @@ Description-Content-Type: text/markdown
49
52
 
50
53
  Provider-agnostic speech stack for speech-to-speech applications.
51
54
 
55
+ ## Table of Contents
56
+
57
+ - [Install](#install)
58
+ - [Missing dependency behavior](#missing-dependency-behavior)
59
+ - [Python version compatibility](#python-version-compatibility)
60
+ - [Quick Start](#quick-start)
61
+ - [Provider status semantics](#provider-status-semantics)
62
+ - [Recipes](#recipes)
63
+ - [Minimal mock text pipeline](#minimal-mock-text-pipeline)
64
+ - [Audio frame to utterance collector to pipeline](#audio-frame-to-utterance-collector-to-pipeline)
65
+ - [Custom provider registration](#custom-provider-registration)
66
+ - [Custom event sink](#custom-event-sink)
67
+ - [Browser playback](#browser-playback-js-reference-client)
68
+ - [Browser microphone capture](#browser-microphone-capture-js-reference-client)
69
+ - [Mobile browser microphone testing](#mobile-browser-microphone-testing)
70
+ - [Wrap an external CLI as a provider](#wrap-an-external-cli-as-a-provider)
71
+ - [Pocket TTS voice listing and configuration](#pocket-tts-voice-listing-and-configuration)
72
+ - [CUDA DLL helper](#cuda-dll-helper-windows)
73
+ - [Runtime Provider Updates](#runtime-provider-updates)
74
+ - [ProviderBundle.replace()](#providerbundlereplace)
75
+ - [ProviderBundle.unload_replaced()](#providerbundleunload_replaced)
76
+ - [SpeechPipeline.update_providers()](#speechpipelineupdate_providers)
77
+ - [AudioUtteranceCollector.update_vad_provider()](#audioutterancecollectorupdate_vad_provider)
78
+ - [End-to-end pattern](#end-to-end-pattern)
79
+ - [WebSocket Session Helper](#websocket-session-helper)
80
+ - [Examples](#examples)
81
+ - [Text chat](#text-chat-automated-test-covered)
82
+ - [Voice chat](#voice-chat-manual)
83
+ - [Framework / App Boundary](#framework--app-boundary)
84
+ - [Transport boundary](#transport-boundary)
85
+ - [Status](#status)
86
+
52
87
  ## Install
53
88
 
54
89
  ```bash
@@ -62,6 +97,7 @@ providers live behind optional extras:
62
97
  pip install converse-framework[silero] # Silero VAD
63
98
  pip install converse-framework[faster-whisper] # faster-whisper ASR
64
99
  pip install converse-framework[whisper-cpp] # whisper.cpp HTTP ASR
100
+ pip install converse-framework[audio-cpp] # audio.cpp HTTP ASR + TTS
65
101
  pip install converse-framework[llamacpp] # llama.cpp HTTP LLM
66
102
  pip install converse-framework[kokoro] # Kokoro ONNX TTS
67
103
  pip install converse-framework[pocket-tts] # Pocket TTS
@@ -115,12 +151,13 @@ own constraints (the table below mirrors the markers in
115
151
  | `faster-whisper` | 3.11+ | The `nvidia-cublas-cu12` wheel pins Windows. |
116
152
  | `llamacpp` | 3.11+ | `httpx` itself supports 3.9+, so 3.11+ is the only constraint. |
117
153
  | `whisper-cpp` | 3.11+ | Only needs `httpx`, which supports 3.9+. |
154
+ | `audio-cpp` | 3.11+ | Only needs `httpx`. Talks to a user-managed `audiocpp_server`. |
118
155
  | `kokoro` | 3.11 to <3.14 | `kokoro-onnx` 0.5.0 requires Python <3.14. The wheel build fails fast on 3.14+. |
119
156
  | `pocket-tts` | 3.11+ | No known upper bound. |
120
157
 
121
158
  The `kokoro` extra is the only one with an upper-bound marker today.
122
- If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`
123
- or a mock provider. New providers should add their own
159
+ If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`,
160
+ `audio-cpp`, or a mock provider. New providers should add their own
124
161
  `python_version` markers in `pyproject.toml` when their backend has a
125
162
  known limit.
126
163
 
@@ -644,7 +681,7 @@ for v in voices:
644
681
  Change voice (clears only the voice cache, preserves the loaded model):
645
682
 
646
683
  ```python
647
- result = provider.configure(voice="galileo")
684
+ result = provider.configure(voice="anna")
648
685
  print(result.changed, result.requires_reload)
649
686
  # True, False — model stays, voice state reloaded
650
687
  ```
@@ -2,6 +2,38 @@
2
2
 
3
3
  Provider-agnostic speech stack for speech-to-speech applications.
4
4
 
5
+ ## Table of Contents
6
+
7
+ - [Install](#install)
8
+ - [Missing dependency behavior](#missing-dependency-behavior)
9
+ - [Python version compatibility](#python-version-compatibility)
10
+ - [Quick Start](#quick-start)
11
+ - [Provider status semantics](#provider-status-semantics)
12
+ - [Recipes](#recipes)
13
+ - [Minimal mock text pipeline](#minimal-mock-text-pipeline)
14
+ - [Audio frame to utterance collector to pipeline](#audio-frame-to-utterance-collector-to-pipeline)
15
+ - [Custom provider registration](#custom-provider-registration)
16
+ - [Custom event sink](#custom-event-sink)
17
+ - [Browser playback](#browser-playback-js-reference-client)
18
+ - [Browser microphone capture](#browser-microphone-capture-js-reference-client)
19
+ - [Mobile browser microphone testing](#mobile-browser-microphone-testing)
20
+ - [Wrap an external CLI as a provider](#wrap-an-external-cli-as-a-provider)
21
+ - [Pocket TTS voice listing and configuration](#pocket-tts-voice-listing-and-configuration)
22
+ - [CUDA DLL helper](#cuda-dll-helper-windows)
23
+ - [Runtime Provider Updates](#runtime-provider-updates)
24
+ - [ProviderBundle.replace()](#providerbundlereplace)
25
+ - [ProviderBundle.unload_replaced()](#providerbundleunload_replaced)
26
+ - [SpeechPipeline.update_providers()](#speechpipelineupdate_providers)
27
+ - [AudioUtteranceCollector.update_vad_provider()](#audioutterancecollectorupdate_vad_provider)
28
+ - [End-to-end pattern](#end-to-end-pattern)
29
+ - [WebSocket Session Helper](#websocket-session-helper)
30
+ - [Examples](#examples)
31
+ - [Text chat](#text-chat-automated-test-covered)
32
+ - [Voice chat](#voice-chat-manual)
33
+ - [Framework / App Boundary](#framework--app-boundary)
34
+ - [Transport boundary](#transport-boundary)
35
+ - [Status](#status)
36
+
5
37
  ## Install
6
38
 
7
39
  ```bash
@@ -15,6 +47,7 @@ providers live behind optional extras:
15
47
  pip install converse-framework[silero] # Silero VAD
16
48
  pip install converse-framework[faster-whisper] # faster-whisper ASR
17
49
  pip install converse-framework[whisper-cpp] # whisper.cpp HTTP ASR
50
+ pip install converse-framework[audio-cpp] # audio.cpp HTTP ASR + TTS
18
51
  pip install converse-framework[llamacpp] # llama.cpp HTTP LLM
19
52
  pip install converse-framework[kokoro] # Kokoro ONNX TTS
20
53
  pip install converse-framework[pocket-tts] # Pocket TTS
@@ -68,12 +101,13 @@ own constraints (the table below mirrors the markers in
68
101
  | `faster-whisper` | 3.11+ | The `nvidia-cublas-cu12` wheel pins Windows. |
69
102
  | `llamacpp` | 3.11+ | `httpx` itself supports 3.9+, so 3.11+ is the only constraint. |
70
103
  | `whisper-cpp` | 3.11+ | Only needs `httpx`, which supports 3.9+. |
104
+ | `audio-cpp` | 3.11+ | Only needs `httpx`. Talks to a user-managed `audiocpp_server`. |
71
105
  | `kokoro` | 3.11 to <3.14 | `kokoro-onnx` 0.5.0 requires Python <3.14. The wheel build fails fast on 3.14+. |
72
106
  | `pocket-tts` | 3.11+ | No known upper bound. |
73
107
 
74
108
  The `kokoro` extra is the only one with an upper-bound marker today.
75
- If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`
76
- or a mock provider. New providers should add their own
109
+ If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`,
110
+ `audio-cpp`, or a mock provider. New providers should add their own
77
111
  `python_version` markers in `pyproject.toml` when their backend has a
78
112
  known limit.
79
113
 
@@ -597,7 +631,7 @@ for v in voices:
597
631
  Change voice (clears only the voice cache, preserves the loaded model):
598
632
 
599
633
  ```python
600
- result = provider.configure(voice="galileo")
634
+ result = provider.configure(voice="anna")
601
635
  print(result.changed, result.requires_reload)
602
636
  # True, False — model stays, voice state reloaded
603
637
  ```
@@ -175,6 +175,53 @@ def float_audio_to_wav_bytes(audio, sample_rate: int) -> bytes:
175
175
  return buffer.getvalue()
176
176
 
177
177
 
178
+ def wav_bytes_to_pcm_s16le(
179
+ wav_bytes: bytes,
180
+ ) -> tuple[bytes, int, int]:
181
+ """Decode a WAV byte string back into raw PCM s16le bytes and shape.
182
+
183
+ The inverse of :func:`float_audio_to_wav_bytes`: this reads a
184
+ complete 16-bit PCM WAV file from ``bytes`` and returns the raw
185
+ signed little-endian PCM body along with the sample rate and channel
186
+ count declared in the header. Providers that fetch WAV audio from
187
+ an HTTP backend (e.g. the audio.cpp ``/v1/audio/speech`` endpoint)
188
+ use this to turn the response into a wire-ready
189
+ :class:`~converse_framework.protocols.AudioChunk`.
190
+
191
+ Args:
192
+ wav_bytes: A complete WAV file as ``bytes`` (RIFF header + data).
193
+
194
+ Returns:
195
+ A ``(pcm_s16le, sample_rate, channels)`` tuple. ``pcm_s16le`` is
196
+ the raw 16-bit signed LE PCM bytes with no header;
197
+ ``sample_rate`` and ``channels`` come from the ``fmt `` chunk.
198
+ An empty or non-WAV input returns ``(b"", 0, 0)`` rather than
199
+ raising so callers can treat a failed decode as "no audio".
200
+
201
+ Raises:
202
+ ValueError: If ``wav_bytes`` starts like a WAV stream but cannot
203
+ be parsed, or if it is not 16-bit PCM audio.
204
+ """
205
+ if not wav_bytes or wav_bytes[:4] != b"RIFF":
206
+ return b"", 0, 0
207
+ buffer = BytesIO(wav_bytes)
208
+ try:
209
+ with wave.open(buffer, "rb") as wav:
210
+ sample_rate = wav.getframerate()
211
+ channels = wav.getnchannels()
212
+ sample_width = wav.getsampwidth()
213
+ if wav.getcomptype() != "NONE" or sample_width != 2:
214
+ bits = sample_width * 8
215
+ raise ValueError(
216
+ f"unsupported WAV format: expected 16-bit PCM, got {bits}-bit "
217
+ f"{wav.getcompname()}"
218
+ )
219
+ pcm = wav.readframes(wav.getnframes())
220
+ except wave.Error as exc:
221
+ raise ValueError(f"invalid WAV stream: {exc}") from exc
222
+ return pcm, sample_rate, channels
223
+
224
+
178
225
  def float_audio_to_pcm_s16le_bytes(audio) -> bytes:
179
226
  """Encode a float audio buffer as raw 16-bit signed LE PCM bytes.
180
227
 
@@ -2,9 +2,10 @@
2
2
 
3
3
  Mock and unavailable providers are imported eagerly because they have no
4
4
  heavy dependencies. The concrete providers (``silero``, ``faster-whisper``,
5
- ``llamacpp``, ``kokoro-onnx``, ``pocket-tts``) are not imported here --
6
- they are registered with :func:`converse_framework.registry.register_provider`
7
- by import string and loaded lazily on first use.
5
+ ``whisper-cpp``, ``llamacpp``, ``kokoro-onnx``, ``pocket-tts``,
6
+ ``audio-cpp``) are not imported here -- they are registered with
7
+ :func:`converse_framework.registry.register_provider` by import string
8
+ and loaded lazily on first use.
8
9
  """
9
10
 
10
11
  from converse_framework.providers.mock import (