converse-framework 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {converse_framework-0.2.2 → converse_framework-0.2.3}/PKG-INFO +8 -3
- {converse_framework-0.2.2 → converse_framework-0.2.3}/README.md +4 -2
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/audio_utils.py +47 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/__init__.py +4 -3
- converse_framework-0.2.3/converse_framework/providers/audio_cpp.py +576 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/unavailable.py +2 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/registry.py +12 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/pyproject.toml +4 -3
- converse_framework-0.2.3/tests/test_audio_cpp.py +566 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_audio_utils.py +52 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_providers.py +28 -9
- {converse_framework-0.2.2 → converse_framework-0.2.3}/.gitattributes +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/.github/workflows/publish.yml +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/.gitignore +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/CHANGELOG.md +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/LICENSE +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/MIGRATION.md +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/benchmarks/perf_compare.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/__init__.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/cuda_utils.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/events.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/examples/__init__.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/examples/subprocess_provider.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/examples/text_chat.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/examples/voice_chat.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/examples/websocket_voice_chat.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/js/browser-voice-client.js +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/js/mic-frame-sender.js +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/js/speaker-echo-guard.js +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/js/tts-audio-player.js +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/pipeline.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/protocols.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/provider_events.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/faster_whisper.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/kokoro_onnx.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/llamacpp.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/mock.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/pocket_tts.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/silero.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/whisper_cpp.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/session.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/transport.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/utterance_collector.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/plan.md +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/js/manual-smoke-test.html +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/js/test_helpers.mjs +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/js/test_speaker_echo_guard.mjs +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_cuda_utils.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_events.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_examples.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_pipeline.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_protocols.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_registry.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_session.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_transport.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_utterance_collector.py +0 -0
- {converse_framework-0.2.2 → converse_framework-0.2.3}/tests/test_whisper_cpp.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: converse-framework
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Provider-agnostic speech stack for speech-to-speech applications
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -22,12 +22,15 @@ Requires-Dist: nvidia-cublas-cu12; (platform_system == 'Windows') and extra == '
|
|
|
22
22
|
Provides-Extra: all-llm
|
|
23
23
|
Requires-Dist: httpx>=0.28; extra == 'all-llm'
|
|
24
24
|
Provides-Extra: all-tts
|
|
25
|
+
Requires-Dist: httpx>=0.28; extra == 'all-tts'
|
|
25
26
|
Requires-Dist: kokoro-onnx>=0.5; (python_version < '3.14') and extra == 'all-tts'
|
|
26
27
|
Requires-Dist: misaki>=0.7; extra == 'all-tts'
|
|
27
28
|
Requires-Dist: pocket-tts>=2.1; extra == 'all-tts'
|
|
28
29
|
Provides-Extra: all-vad
|
|
29
30
|
Requires-Dist: onnxruntime>=1.20; extra == 'all-vad'
|
|
30
31
|
Requires-Dist: silero-vad>=6.0; extra == 'all-vad'
|
|
32
|
+
Provides-Extra: audio-cpp
|
|
33
|
+
Requires-Dist: httpx>=0.28; extra == 'audio-cpp'
|
|
31
34
|
Provides-Extra: faster-whisper
|
|
32
35
|
Requires-Dist: faster-whisper>=1.2; extra == 'faster-whisper'
|
|
33
36
|
Requires-Dist: nvidia-cublas-cu12; (platform_system == 'Windows') and extra == 'faster-whisper'
|
|
@@ -94,6 +97,7 @@ providers live behind optional extras:
|
|
|
94
97
|
pip install converse-framework[silero] # Silero VAD
|
|
95
98
|
pip install converse-framework[faster-whisper] # faster-whisper ASR
|
|
96
99
|
pip install converse-framework[whisper-cpp] # whisper.cpp HTTP ASR
|
|
100
|
+
pip install converse-framework[audio-cpp] # audio.cpp HTTP ASR + TTS
|
|
97
101
|
pip install converse-framework[llamacpp] # llama.cpp HTTP LLM
|
|
98
102
|
pip install converse-framework[kokoro] # Kokoro ONNX TTS
|
|
99
103
|
pip install converse-framework[pocket-tts] # Pocket TTS
|
|
@@ -147,12 +151,13 @@ own constraints (the table below mirrors the markers in
|
|
|
147
151
|
| `faster-whisper` | 3.11+ | The `nvidia-cublas-cu12` wheel pins Windows. |
|
|
148
152
|
| `llamacpp` | 3.11+ | `httpx` itself supports 3.9+, so 3.11+ is the only constraint. |
|
|
149
153
|
| `whisper-cpp` | 3.11+ | Only needs `httpx`, which supports 3.9+. |
|
|
154
|
+
| `audio-cpp` | 3.11+ | Only needs `httpx`. Talks to a user-managed `audiocpp_server`. |
|
|
150
155
|
| `kokoro` | 3.11 to <3.14 | `kokoro-onnx` 0.5.0 requires Python <3.14. The wheel build fails fast on 3.14+. |
|
|
151
156
|
| `pocket-tts` | 3.11+ | No known upper bound. |
|
|
152
157
|
|
|
153
158
|
The `kokoro` extra is the only one with an upper-bound marker today.
|
|
154
|
-
If you are on Python 3.14+ and need a TTS provider, use `pocket-tts
|
|
155
|
-
or a mock provider. New providers should add their own
|
|
159
|
+
If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`,
|
|
160
|
+
`audio-cpp`, or a mock provider. New providers should add their own
|
|
156
161
|
`python_version` markers in `pyproject.toml` when their backend has a
|
|
157
162
|
known limit.
|
|
158
163
|
|
|
@@ -47,6 +47,7 @@ providers live behind optional extras:
|
|
|
47
47
|
pip install converse-framework[silero] # Silero VAD
|
|
48
48
|
pip install converse-framework[faster-whisper] # faster-whisper ASR
|
|
49
49
|
pip install converse-framework[whisper-cpp] # whisper.cpp HTTP ASR
|
|
50
|
+
pip install converse-framework[audio-cpp] # audio.cpp HTTP ASR + TTS
|
|
50
51
|
pip install converse-framework[llamacpp] # llama.cpp HTTP LLM
|
|
51
52
|
pip install converse-framework[kokoro] # Kokoro ONNX TTS
|
|
52
53
|
pip install converse-framework[pocket-tts] # Pocket TTS
|
|
@@ -100,12 +101,13 @@ own constraints (the table below mirrors the markers in
|
|
|
100
101
|
| `faster-whisper` | 3.11+ | The `nvidia-cublas-cu12` wheel pins Windows. |
|
|
101
102
|
| `llamacpp` | 3.11+ | `httpx` itself supports 3.9+, so 3.11+ is the only constraint. |
|
|
102
103
|
| `whisper-cpp` | 3.11+ | Only needs `httpx`, which supports 3.9+. |
|
|
104
|
+
| `audio-cpp` | 3.11+ | Only needs `httpx`. Talks to a user-managed `audiocpp_server`. |
|
|
103
105
|
| `kokoro` | 3.11 to <3.14 | `kokoro-onnx` 0.5.0 requires Python <3.14. The wheel build fails fast on 3.14+. |
|
|
104
106
|
| `pocket-tts` | 3.11+ | No known upper bound. |
|
|
105
107
|
|
|
106
108
|
The `kokoro` extra is the only one with an upper-bound marker today.
|
|
107
|
-
If you are on Python 3.14+ and need a TTS provider, use `pocket-tts
|
|
108
|
-
or a mock provider. New providers should add their own
|
|
109
|
+
If you are on Python 3.14+ and need a TTS provider, use `pocket-tts`,
|
|
110
|
+
`audio-cpp`, or a mock provider. New providers should add their own
|
|
109
111
|
`python_version` markers in `pyproject.toml` when their backend has a
|
|
110
112
|
known limit.
|
|
111
113
|
|
|
@@ -175,6 +175,53 @@ def float_audio_to_wav_bytes(audio, sample_rate: int) -> bytes:
|
|
|
175
175
|
return buffer.getvalue()
|
|
176
176
|
|
|
177
177
|
|
|
178
|
+
def wav_bytes_to_pcm_s16le(
|
|
179
|
+
wav_bytes: bytes,
|
|
180
|
+
) -> tuple[bytes, int, int]:
|
|
181
|
+
"""Decode a WAV byte string back into raw PCM s16le bytes and shape.
|
|
182
|
+
|
|
183
|
+
The inverse of :func:`float_audio_to_wav_bytes`: this reads a
|
|
184
|
+
complete 16-bit PCM WAV file from ``bytes`` and returns the raw
|
|
185
|
+
signed little-endian PCM body along with the sample rate and channel
|
|
186
|
+
count declared in the header. Providers that fetch WAV audio from
|
|
187
|
+
an HTTP backend (e.g. the audio.cpp ``/v1/audio/speech`` endpoint)
|
|
188
|
+
use this to turn the response into a wire-ready
|
|
189
|
+
:class:`~converse_framework.protocols.AudioChunk`.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
wav_bytes: A complete WAV file as ``bytes`` (RIFF header + data).
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
A ``(pcm_s16le, sample_rate, channels)`` tuple. ``pcm_s16le`` is
|
|
196
|
+
the raw 16-bit signed LE PCM bytes with no header;
|
|
197
|
+
``sample_rate`` and ``channels`` come from the ``fmt `` chunk.
|
|
198
|
+
An empty or non-WAV input returns ``(b"", 0, 0)`` rather than
|
|
199
|
+
raising so callers can treat a failed decode as "no audio".
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
ValueError: If ``wav_bytes`` starts like a WAV stream but cannot
|
|
203
|
+
be parsed, or if it is not 16-bit PCM audio.
|
|
204
|
+
"""
|
|
205
|
+
if not wav_bytes or wav_bytes[:4] != b"RIFF":
|
|
206
|
+
return b"", 0, 0
|
|
207
|
+
buffer = BytesIO(wav_bytes)
|
|
208
|
+
try:
|
|
209
|
+
with wave.open(buffer, "rb") as wav:
|
|
210
|
+
sample_rate = wav.getframerate()
|
|
211
|
+
channels = wav.getnchannels()
|
|
212
|
+
sample_width = wav.getsampwidth()
|
|
213
|
+
if wav.getcomptype() != "NONE" or sample_width != 2:
|
|
214
|
+
bits = sample_width * 8
|
|
215
|
+
raise ValueError(
|
|
216
|
+
f"unsupported WAV format: expected 16-bit PCM, got {bits}-bit "
|
|
217
|
+
f"{wav.getcompname()}"
|
|
218
|
+
)
|
|
219
|
+
pcm = wav.readframes(wav.getnframes())
|
|
220
|
+
except wave.Error as exc:
|
|
221
|
+
raise ValueError(f"invalid WAV stream: {exc}") from exc
|
|
222
|
+
return pcm, sample_rate, channels
|
|
223
|
+
|
|
224
|
+
|
|
178
225
|
def float_audio_to_pcm_s16le_bytes(audio) -> bytes:
|
|
179
226
|
"""Encode a float audio buffer as raw 16-bit signed LE PCM bytes.
|
|
180
227
|
|
{converse_framework-0.2.2 → converse_framework-0.2.3}/converse_framework/providers/__init__.py
RENAMED
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
Mock and unavailable providers are imported eagerly because they have no
|
|
4
4
|
heavy dependencies. The concrete providers (``silero``, ``faster-whisper``,
|
|
5
|
-
``llamacpp``, ``kokoro-onnx``, ``pocket-tts
|
|
6
|
-
they are registered with
|
|
7
|
-
by import string
|
|
5
|
+
``whisper-cpp``, ``llamacpp``, ``kokoro-onnx``, ``pocket-tts``,
|
|
6
|
+
``audio-cpp``) are not imported here -- they are registered with
|
|
7
|
+
:func:`converse_framework.registry.register_provider` by import string
|
|
8
|
+
and loaded lazily on first use.
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
from converse_framework.providers.mock import (
|