converse-framework 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- converse_framework-0.2.0/.gitattributes +2 -0
- converse_framework-0.2.0/.github/workflows/publish.yml +36 -0
- converse_framework-0.2.0/.gitignore +23 -0
- converse_framework-0.2.0/CHANGELOG.md +63 -0
- converse_framework-0.2.0/LICENSE +21 -0
- converse_framework-0.2.0/MIGRATION.md +356 -0
- converse_framework-0.2.0/PKG-INFO +992 -0
- converse_framework-0.2.0/README.md +945 -0
- converse_framework-0.2.0/benchmarks/perf_compare.py +495 -0
- converse_framework-0.2.0/converse_framework/__init__.py +108 -0
- converse_framework-0.2.0/converse_framework/audio_utils.py +412 -0
- converse_framework-0.2.0/converse_framework/cuda_utils.py +176 -0
- converse_framework-0.2.0/converse_framework/events.py +94 -0
- converse_framework-0.2.0/converse_framework/examples/__init__.py +20 -0
- converse_framework-0.2.0/converse_framework/examples/subprocess_provider.py +439 -0
- converse_framework-0.2.0/converse_framework/examples/text_chat.py +308 -0
- converse_framework-0.2.0/converse_framework/examples/voice_chat.py +223 -0
- converse_framework-0.2.0/converse_framework/examples/websocket_voice_chat.py +174 -0
- converse_framework-0.2.0/converse_framework/js/browser-voice-client.js +248 -0
- converse_framework-0.2.0/converse_framework/js/mic-frame-sender.js +445 -0
- converse_framework-0.2.0/converse_framework/js/speaker-echo-guard.js +308 -0
- converse_framework-0.2.0/converse_framework/js/tts-audio-player.js +237 -0
- converse_framework-0.2.0/converse_framework/pipeline.py +620 -0
- converse_framework-0.2.0/converse_framework/protocols.py +382 -0
- converse_framework-0.2.0/converse_framework/provider_events.py +159 -0
- converse_framework-0.2.0/converse_framework/providers/__init__.py +28 -0
- converse_framework-0.2.0/converse_framework/providers/faster_whisper.py +290 -0
- converse_framework-0.2.0/converse_framework/providers/kokoro_onnx.py +391 -0
- converse_framework-0.2.0/converse_framework/providers/llamacpp.py +264 -0
- converse_framework-0.2.0/converse_framework/providers/mock.py +171 -0
- converse_framework-0.2.0/converse_framework/providers/pocket_tts.py +409 -0
- converse_framework-0.2.0/converse_framework/providers/silero.py +161 -0
- converse_framework-0.2.0/converse_framework/providers/unavailable.py +137 -0
- converse_framework-0.2.0/converse_framework/providers/whisper_cpp.py +322 -0
- converse_framework-0.2.0/converse_framework/registry.py +397 -0
- converse_framework-0.2.0/converse_framework/session.py +315 -0
- converse_framework-0.2.0/converse_framework/transport.py +54 -0
- converse_framework-0.2.0/converse_framework/utterance_collector.py +336 -0
- converse_framework-0.2.0/plan.md +635 -0
- converse_framework-0.2.0/pyproject.toml +50 -0
- converse_framework-0.2.0/tests/js/manual-smoke-test.html +392 -0
- converse_framework-0.2.0/tests/js/test_helpers.mjs +141 -0
- converse_framework-0.2.0/tests/js/test_speaker_echo_guard.mjs +233 -0
- converse_framework-0.2.0/tests/test_audio_utils.py +272 -0
- converse_framework-0.2.0/tests/test_cuda_utils.py +280 -0
- converse_framework-0.2.0/tests/test_events.py +155 -0
- converse_framework-0.2.0/tests/test_examples.py +398 -0
- converse_framework-0.2.0/tests/test_pipeline.py +484 -0
- converse_framework-0.2.0/tests/test_protocols.py +104 -0
- converse_framework-0.2.0/tests/test_providers.py +747 -0
- converse_framework-0.2.0/tests/test_registry.py +623 -0
- converse_framework-0.2.0/tests/test_session.py +440 -0
- converse_framework-0.2.0/tests/test_transport.py +58 -0
- converse_framework-0.2.0/tests/test_utterance_collector.py +832 -0
- converse_framework-0.2.0/tests/test_whisper_cpp.py +386 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
publish:
|
|
13
|
+
name: Build and publish package
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
environment: pypi
|
|
16
|
+
permissions:
|
|
17
|
+
id-token: write
|
|
18
|
+
contents: read
|
|
19
|
+
|
|
20
|
+
steps:
|
|
21
|
+
- name: Check out repository
|
|
22
|
+
uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
|
|
29
|
+
- name: Install build backend
|
|
30
|
+
run: python -m pip install --upgrade build
|
|
31
|
+
|
|
32
|
+
- name: Build distributions
|
|
33
|
+
run: python -m build
|
|
34
|
+
|
|
35
|
+
- name: Publish distributions to PyPI
|
|
36
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
nul
|
|
2
|
+
/Reference-Repository-Conversational-AI-Harness
|
|
3
|
+
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.pyc
|
|
6
|
+
*.pyo
|
|
7
|
+
|
|
8
|
+
.pi-lens/
|
|
9
|
+
.opencode/
|
|
10
|
+
.codegraph/
|
|
11
|
+
.mavis/
|
|
12
|
+
.harness/
|
|
13
|
+
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.ruff_cache/
|
|
16
|
+
|
|
17
|
+
dist/
|
|
18
|
+
*.egg-info/
|
|
19
|
+
build/
|
|
20
|
+
|
|
21
|
+
.venv/
|
|
22
|
+
venv/
|
|
23
|
+
env/
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## v0.2.0 — Provider lifecycle, session helper, and browser mic support
|
|
4
|
+
|
|
5
|
+
### Highlights
|
|
6
|
+
|
|
7
|
+
- **Provider lifecycle events** — `provider.loading`, `provider.loaded`,
|
|
8
|
+
`provider.error` events with a consistent cross-provider shape.
|
|
9
|
+
- **Provider status tiers** — `status` (cached property), `probe_status()`
|
|
10
|
+
(no I/O, no model load), `load_status()` (may initialise resources).
|
|
11
|
+
- **Runtime provider swap** — `ProviderBundle.replace()`,
|
|
12
|
+
`SpeechPipeline.update_providers()`,
|
|
13
|
+
`AudioUtteranceCollector.update_vad_provider()` for safe in-flight
|
|
14
|
+
provider replacement.
|
|
15
|
+
- **Provider configuration** — `TTSProvider.configure()` and
|
|
16
|
+
`PocketTTSProvider.list_voices()` for first-class voice changes
|
|
17
|
+
without replacing the provider instance.
|
|
18
|
+
- **Reusable WebSocket session** — `WebSocketSession` at
|
|
19
|
+
`converse_framework.session` handles 7 built-in message types,
|
|
20
|
+
freeing apps from copying the recipe state machine.
|
|
21
|
+
- **Browser mic capture** — `mic-frame-sender.js` using `AudioWorklet`,
|
|
22
|
+
`speaker-echo-guard.js` for echo-aware frame gating, and
|
|
23
|
+
`browser-voice-client.js` combining mic, TTS playback, and echo
|
|
24
|
+
guard into one class.
|
|
25
|
+
- **CUDA DLL discovery** — `cuda_utils` helper for Windows NVIDIA wheel
|
|
26
|
+
DLL path resolution, auto-integrated into `FasterWhisperASRProvider`.
|
|
27
|
+
- **Richer error payloads** — `FrameworkEvent` payloads always include
|
|
28
|
+
`message` and `exception` fields for `turn.error` and
|
|
29
|
+
`provider.error` events.
|
|
30
|
+
- **Faster Whisper lazy load** — model loads on first audio turn, not
|
|
31
|
+
at instantiation, fixing stale-file assertion failures.
|
|
32
|
+
|
|
33
|
+
### Breaking changes
|
|
34
|
+
|
|
35
|
+
- `ProviderStatus` now carries optional `status_level` (`str`),
|
|
36
|
+
`backend` (`str | None`), and `voices` (`list[VoiceInfo] | None`).
|
|
37
|
+
Old field access (`status.ready`, `status.message`) is unchanged.
|
|
38
|
+
- `PocketTTSProvider.set_quantize()` is deprecated in favour of
|
|
39
|
+
`configure(quantize=...)`. The old method is kept for backward
|
|
40
|
+
compatibility in this release.
|
|
41
|
+
|
|
42
|
+
### New dependencies
|
|
43
|
+
|
|
44
|
+
None (all optional extras unchanged).
|
|
45
|
+
|
|
46
|
+
### Migration notes
|
|
47
|
+
|
|
48
|
+
- `check_status()` is kept for backward compatibility. New code should
|
|
49
|
+
prefer `probe_status()` for lightweight checks and `load_status()`
|
|
50
|
+
when models must be confirmed loaded.
|
|
51
|
+
- `ProviderBundle.check_statuses()` is kept as an alias for probe
|
|
52
|
+
semantics.
|
|
53
|
+
- `build_websocket_voice_runtime()` and `handle_websocket_message()`
|
|
54
|
+
in the WebSocket example remain importable. New apps should prefer
|
|
55
|
+
`WebSocketSession` from `converse_framework.session`.
|
|
56
|
+
- See `MIGRATION.md` for the full v0.1 → v0.2 transition guide.
|
|
57
|
+
|
|
58
|
+
### What's next
|
|
59
|
+
|
|
60
|
+
- Refining the `WebSocketSession` hook API based on real-world usage.
|
|
61
|
+
- Extending `configure()` to ASR and LLM providers.
|
|
62
|
+
- Browser automation tests for the JS helpers.
|
|
63
|
+
- Formalising the provider event schema as typed subclasses.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 thomas9120
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
# Migration guide: adopting `converse-framework` v0.1
|
|
2
|
+
|
|
3
|
+
The framework is in v0.1 pre-release. The public surface is the
|
|
4
|
+
explicit `__all__` in `converse_framework/__init__.py` — 34
|
|
5
|
+
symbols at the time of writing — governed by
|
|
6
|
+
`.harness/docs/standards.md` and the Boundary Decisions in
|
|
7
|
+
[`plan.md`](./plan.md). Anything not in `__all__` is internal and
|
|
8
|
+
may change without notice. The reference consumer in this repo is
|
|
9
|
+
`Reference-Repository-Conversational-AI-Harness/`.
|
|
10
|
+
|
|
11
|
+
## Who this is for
|
|
12
|
+
|
|
13
|
+
This guide is for application authors who previously copied or
|
|
14
|
+
re-implemented the speech stack (provider protocols, audio
|
|
15
|
+
utilities, event sink, speech pipeline, VAD utterance collector,
|
|
16
|
+
provider registry) inside their own repository and now want to
|
|
17
|
+
depend on `converse-framework` v0.1 as a separately installed
|
|
18
|
+
package. The payoff is a smaller app codebase, a stable public
|
|
19
|
+
API for the provider-agnostic core, and the ability to pick up
|
|
20
|
+
framework bug fixes and new providers without editing the app.
|
|
21
|
+
The app keeps ownership of its transport, profiles, settings,
|
|
22
|
+
character cards, companion-mode policy, and any other
|
|
23
|
+
application-level behavior — none of those move into the
|
|
24
|
+
framework.
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install converse-framework
|
|
30
|
+
pip install converse-framework[silero] # Silero VAD
|
|
31
|
+
pip install converse-framework[faster-whisper] # faster-whisper ASR
|
|
32
|
+
pip install converse-framework[llamacpp] # llama.cpp HTTP LLM
|
|
33
|
+
pip install converse-framework[kokoro] # Kokoro ONNX TTS
|
|
34
|
+
pip install converse-framework[pocket-tts] # Pocket TTS
|
|
35
|
+
pip install converse-framework[all] # everything
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
For local development against an in-tree checkout, install the
|
|
39
|
+
package as editable from the framework root: `python -m pip install -e .`.
|
|
40
|
+
End-to-end install recipes (per-profile extras, CUDA
|
|
41
|
+
`cublas64_12.dll` workarounds, OS-specific `start.*` /
|
|
42
|
+
`install.*` scripts) live in `README.md` and in the consumer
|
|
43
|
+
app's own docs.
|
|
44
|
+
|
|
45
|
+
## Import changes
|
|
46
|
+
|
|
47
|
+
The framework's public surface is a single namespace
|
|
48
|
+
`converse_framework`. Replace your local copies of the speech
|
|
49
|
+
stack with imports from the package. The mapping below uses the
|
|
50
|
+
reference harness's old paths as concrete examples — replace the
|
|
51
|
+
`conversational_harness.*` prefix with your app's equivalent
|
|
52
|
+
module path.
|
|
53
|
+
|
|
54
|
+
| Old path (in the consumer app) | New path (in `converse_framework`) |
|
|
55
|
+
|---------------------------------------------------------------|-----------------------------------------------|
|
|
56
|
+
| `conversational_harness.providers.base` | `converse_framework.protocols` |
|
|
57
|
+
| `conversational_harness.events` | `converse_framework` (or `events`) |
|
|
58
|
+
| `conversational_harness.audio` | `converse_framework.audio_utils` |
|
|
59
|
+
| `conversational_harness.audio_frames` | `converse_framework.audio_utils` |
|
|
60
|
+
| `conversational_harness.providers.{mock,silero,faster_whisper,llamacpp,kokoro_onnx,pocket_tts,unavailable}` | `converse_framework.providers.<x>` |
|
|
61
|
+
| `conversational_harness.transport.WebSocketTransport` | **stays in the consumer app** (see below) |
|
|
62
|
+
| `conversational_harness.orchestrator.ConversationOrchestrator` | `converse_framework.pipeline.SpeechPipeline` (with a harness-side subclass if you need hooks) |
|
|
63
|
+
|
|
64
|
+
For the symbols themselves, the canonical import site is the
|
|
65
|
+
package root:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from converse_framework import (
|
|
69
|
+
VADProvider, ASRProvider, LLMProvider, TTSProvider,
|
|
70
|
+
AudioChunk, TranscriptEvent, VADEvent,
|
|
71
|
+
ProviderCapabilities, ProviderStatus,
|
|
72
|
+
AudioFrame, AudioFrameStats,
|
|
73
|
+
pcm_s16le_to_float32, float_audio_to_pcm_s16le_bytes,
|
|
74
|
+
float_audio_to_wav_bytes, make_tone_wav,
|
|
75
|
+
compute_pcm16_level, trim_pcm16_silence, parse_audio_frame,
|
|
76
|
+
EventSink, QueueEventSink, FrameworkEvent,
|
|
77
|
+
SpeechPipeline, PipelineConfig,
|
|
78
|
+
AudioUtteranceCollector, UtteranceCollectorConfig,
|
|
79
|
+
ProviderBundle, register_provider, build_provider,
|
|
80
|
+
build_provider_bundle, is_provider_available,
|
|
81
|
+
Transport, QueueTransport,
|
|
82
|
+
extra_hint_for,
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The full symbol list is `converse_framework.__all__`. The
|
|
87
|
+
reference harness's local compatibility shims under
|
|
88
|
+
`app/conversational_harness/{events,audio,audio_frames,providers/*}.py`
|
|
89
|
+
are now thin re-exports pointing at the framework. New consumers
|
|
90
|
+
can skip the shims and import directly from `converse_framework`;
|
|
91
|
+
existing shims can stay to avoid churning call sites during the
|
|
92
|
+
transition. `HarnessEvent` is exported as a compatibility alias
|
|
93
|
+
of `FrameworkEvent`. Use `FrameworkEvent` in new code; the alias
|
|
94
|
+
is kept for one minor version.
|
|
95
|
+
|
|
96
|
+
## Behavior the framework does NOT own
|
|
97
|
+
|
|
98
|
+
The framework draws a hard line at provider-agnostic mechanics.
|
|
99
|
+
The following stay in the consumer app. For each item, the
|
|
100
|
+
framework provides an injection point or a clearly defined
|
|
101
|
+
contract the app uses instead of baking the behavior in.
|
|
102
|
+
|
|
103
|
+
- **FastAPI app, REST endpoints, WebSocket handler.** The
|
|
104
|
+
framework has no FastAPI dependency. The app owns the HTTP
|
|
105
|
+
surface and the browser-facing endpoints. The only public
|
|
106
|
+
contract for moving events is the `Transport` protocol
|
|
107
|
+
(`send_event` / `receive_event` on `FrameworkEvent`).
|
|
108
|
+
- **WebSocket transport implementation.** Keep your
|
|
109
|
+
`WebSocketTransport` (or equivalent) in the consumer app; it
|
|
110
|
+
implements the framework's `Transport` protocol. The reference
|
|
111
|
+
harness exposes
|
|
112
|
+
`conversational_harness.transport.WebSocketTransport`.
|
|
113
|
+
|
|
114
|
+
As of v0.2 the framework also offers an optional reusable
|
|
115
|
+
`WebSocketSession` (``converse_framework.session``) that owns
|
|
116
|
+
the message-dispatch loop for browser-based voice apps,
|
|
117
|
+
including provider reload, status requests, settings updates,
|
|
118
|
+
and event dispatch. Apps that previously copied this loop
|
|
119
|
+
from the WebSocket recipe can switch to the session helper;
|
|
120
|
+
apps that already own their own handler can keep it unchanged.
|
|
121
|
+
The session class is **not** in the top-level
|
|
122
|
+
``__init__.py`` — apps opt in via an explicit import.
|
|
123
|
+
- **Profile file loading and layout.** The framework reads no
|
|
124
|
+
profile files. Pass the relevant sections into
|
|
125
|
+
`build_provider_bundle(config={...})` as a plain mapping.
|
|
126
|
+
- **Runtime settings persistence.** `user_settings.json`,
|
|
127
|
+
`RuntimeSettings`, and the three-tier sampler merge (server
|
|
128
|
+
`/props` → profile → user overrides) stay in the app. The
|
|
129
|
+
pipeline accepts an injected `system_prompt_builder` callable
|
|
130
|
+
that pulls settings into the prompt without the framework
|
|
131
|
+
importing `RuntimeSettings`.
|
|
132
|
+
- **Character card parsing and first-message seeding.** TavernAI
|
|
133
|
+
V2 PNG/JSON parsing, `{{user}}`/`{{char}}` substitution, and
|
|
134
|
+
`first_mes` seeding are app policy. The pipeline exposes
|
|
135
|
+
`messages_for_mode(mode)` so the app can read and prepend the
|
|
136
|
+
seed; the app emits `conversation.seeded` and skips TTS for
|
|
137
|
+
it.
|
|
138
|
+
- **Companion mode policy and memory store.** `memory.md` save /
|
|
139
|
+
summarize / clear, the Companion tab, and Companion sampler
|
|
140
|
+
overrides stay in the app. The framework treats modes as
|
|
141
|
+
opaque string keys; the pipeline keeps a separate history per
|
|
142
|
+
mode key. The app injects the companion-specific prompt
|
|
143
|
+
assembly and memory read through `system_prompt_builder`.
|
|
144
|
+
- **TTS preset manager and provider hot-swap UX.** The runtime
|
|
145
|
+
TTS selector, preset switching, and the
|
|
146
|
+
`/api/tts/{select,load,unload,voice}` endpoints are app UX.
|
|
147
|
+
The framework exposes `build_provider_bundle(..., tts_provider=...)`
|
|
148
|
+
so the app can inject a harness-managed TTS instance instead
|
|
149
|
+
of the registry-built one, and `provider.unload()` for the
|
|
150
|
+
lifecycle. As of v0.2 the framework also provides safe swap
|
|
151
|
+
mechanics (`ProviderBundle.replace()`, `pipeline.update_providers()`,
|
|
152
|
+
`collector.update_vad_provider()`) — the app still owns the
|
|
153
|
+
settings UX that triggers the swap, but the low-level
|
|
154
|
+
coordination (cancelling in-flight TTS, emitting lifecycle
|
|
155
|
+
events, unloading old providers) is now handled by the
|
|
156
|
+
framework.
|
|
157
|
+
- **`WebSocketTransport`**, **`config.py`**, **`runtime_settings.py`**, **`tts_runtime.py`**,
|
|
158
|
+
character card parser, memory store, and doctor / start /
|
|
159
|
+
install scripts all stay in the consumer. The framework never
|
|
160
|
+
imports them.
|
|
161
|
+
|
|
162
|
+
## Provider registration
|
|
163
|
+
|
|
164
|
+
The framework ships a built-in registry that knows the mock
|
|
165
|
+
providers and the optional concrete providers behind extras. The
|
|
166
|
+
consumer does not need to register the built-in providers — they
|
|
167
|
+
are already wired in `converse_framework.registry` (see the
|
|
168
|
+
`register_provider(...)` calls at the bottom of that module). The
|
|
169
|
+
consumer's job is to (1) read each kind's `provider` field from
|
|
170
|
+
the active profile, (2) call `build_provider_bundle({...})` with
|
|
171
|
+
sections shaped like `{"vad": {...}, "asr": {...}, "llm": {...},
|
|
172
|
+
"tts": {...}, "audio": {...}}`, (3) optionally pass a
|
|
173
|
+
harness-managed TTS via `tts_provider=` to override the
|
|
174
|
+
registry-built one, and (4) surface friendly missing-extra
|
|
175
|
+
messages built by `extra_hint_for(kind, name)` (e.g.
|
|
176
|
+
`pip install converse-framework[silero]`) in the status
|
|
177
|
+
endpoint and doctor script.
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from converse_framework import (
|
|
181
|
+
build_provider_bundle, extra_hint_for, is_provider_available,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
profile_sections = {
|
|
185
|
+
"vad": {"provider": "silero", "speech_threshold": 0.6},
|
|
186
|
+
"asr": {"provider": "faster-whisper", "model": "large-v3-turbo"},
|
|
187
|
+
"llm": {"provider": "llamacpp", "base_url": "http://127.0.0.1:8080"},
|
|
188
|
+
"tts": {"provider": "pocket-tts", "voice": "azelma"},
|
|
189
|
+
"audio": {"sample_rate": 16000, "channels": 1, "frame_ms": 30},
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if not is_provider_available("vad", "silero"):
|
|
193
|
+
print(f"Install silero VAD with: pip install {extra_hint_for('vad', 'silero')}")
|
|
194
|
+
|
|
195
|
+
bundle = build_provider_bundle(profile_sections)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Registering your own provider.** If you ship a custom provider
|
|
199
|
+
that does not live under `converse_framework.providers`, register
|
|
200
|
+
it with an import string and an optional `availability_probe`
|
|
201
|
+
(returns `True` only when the provider is genuinely ready to
|
|
202
|
+
use — heavy dep installed, model loaded, network reachable,
|
|
203
|
+
etc.; without it the registry falls back to a best-effort module
|
|
204
|
+
import):
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from converse_framework import register_provider
|
|
208
|
+
|
|
209
|
+
register_provider(
|
|
210
|
+
"tts", "my-cloud-tts",
|
|
211
|
+
"myapp.providers.my_cloud_tts:MyCloudTTSProvider",
|
|
212
|
+
availability_probe=lambda: _my_cloud_tts_sdk_importable(),
|
|
213
|
+
)
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Configuration surface that moved out of the framework
|
|
217
|
+
|
|
218
|
+
The framework does not own any application-level configuration.
|
|
219
|
+
The following items **used to be** either on the framework side
|
|
220
|
+
or duplicated in the consumer; after extraction they belong
|
|
221
|
+
solely in the consumer. The column on the right names the module
|
|
222
|
+
where the reference harness keeps it.
|
|
223
|
+
|
|
224
|
+
| Item | Lives in the consumer (reference harness path) |
|
|
225
|
+
|--------------------------------------------|--------------------------------------------------------------|
|
|
226
|
+
| Profile loading, `HarnessConfig`, `PROJECT_ROOT`, `DEFAULT_PROFILE` | `conversational_harness.config` |
|
|
227
|
+
| `RuntimeSettings`, `user_settings.json` persistence, sampler merge (`effective_sampler`, `sampler_display`) | `conversational_harness.runtime_settings` |
|
|
228
|
+
| TTS runtime / preset manager (long-lived TTS instance, model load/unload, voice selection) | `conversational_harness.tts_runtime` |
|
|
229
|
+
| `WebSocketTransport` (FastAPI adapter) | `conversational_harness.transport` |
|
|
230
|
+
| Doctor / readiness / pre-flight checks | `conversational_harness.doctor` and `doctor.{ps1,sh}` |
|
|
231
|
+
| Launch entry point | `conversational_harness.launch` |
|
|
232
|
+
| Character card parser (TavernAI V2 PNG / JSON) | app-local `character_cards` module (within `runtime_settings`) |
|
|
233
|
+
| Companion memory store (`memory.md`) | app-local `MemoryStore` (within `runtime_settings`) |
|
|
234
|
+
| `start.*` / `install.*` / `stop.*` / `update.*` / `tunnel.*` scripts | the harness repo root |
|
|
235
|
+
|
|
236
|
+
What moved **into** the framework, in case the consumer used to
|
|
237
|
+
carry its own copies:
|
|
238
|
+
|
|
239
|
+
- Provider protocols and shared dataclasses (`VADProvider`,
|
|
240
|
+
`ASRProvider`, `LLMProvider`, `TTSProvider`, `ProviderStatus`,
|
|
241
|
+
`ProviderCapabilities`, `AudioChunk`, `TranscriptEvent`,
|
|
242
|
+
`VADEvent`).
|
|
243
|
+
- Audio utilities (`AudioFrame`, `AudioFrameStats`,
|
|
244
|
+
`pcm_s16le_to_float32`, `float_audio_to_pcm_s16le_bytes`,
|
|
245
|
+
`float_audio_to_wav_bytes`, `make_tone_wav`,
|
|
246
|
+
`compute_pcm16_level`, `trim_pcm16_silence`,
|
|
247
|
+
`parse_audio_frame`).
|
|
248
|
+
- Event sink API and wire shape (`EventSink`, `QueueEventSink`,
|
|
249
|
+
`FrameworkEvent`).
|
|
250
|
+
- `SpeechPipeline` and `PipelineConfig` (turn orchestration).
|
|
251
|
+
- `AudioUtteranceCollector` and `UtteranceCollectorConfig` (VAD
|
|
252
|
+
state machine, pre-buffer, rejection gates, silence trimming).
|
|
253
|
+
- Lazy provider registry (`register_provider`, `build_provider`,
|
|
254
|
+
`build_provider_bundle`, `is_provider_available`,
|
|
255
|
+
`ProviderBundle`).
|
|
256
|
+
- `Transport` protocol and `QueueTransport` test double.
|
|
257
|
+
- `extra_hint_for` helper for friendly missing-dep messages.
|
|
258
|
+
|
|
259
|
+
## Event wire compatibility
|
|
260
|
+
|
|
261
|
+
The on-the-wire shape of a framework event is unchanged from the
|
|
262
|
+
pre-extraction harness: `{"type": event_type, "ts": timestamp,
|
|
263
|
+
"payload": payload}`. `FrameworkEvent.to_json()` produces exactly
|
|
264
|
+
that dict, and `QueueEventSink.emit` writes the same shape into
|
|
265
|
+
its queue, so any existing browser client that reads
|
|
266
|
+
`event.type`, `event.ts`, and `event.payload` keeps working. The
|
|
267
|
+
framework does not introduce typed event subclasses in v0.1; that
|
|
268
|
+
work is deferred to a later minor version. `HarnessEvent` is
|
|
269
|
+
exported as a temporary alias of `FrameworkEvent` for one minor
|
|
270
|
+
version; new code should import `FrameworkEvent` directly.
|
|
271
|
+
|
|
272
|
+
## Testing migration
|
|
273
|
+
|
|
274
|
+
Redirect the consumer's existing test suite to import from
|
|
275
|
+
`converse_framework` instead of the local copy: replace
|
|
276
|
+
`conversational_harness.providers.base` with
|
|
277
|
+
`converse_framework.protocols`, the local `audio` / `audio_frames`
|
|
278
|
+
shims with `converse_framework.audio_utils`, and the local
|
|
279
|
+
`events` shim with `converse_framework` (or `events`). Reuse
|
|
280
|
+
`QueueEventSink` and `QueueTransport` as test doubles and use
|
|
281
|
+
`build_provider_bundle({"vad": {"provider": "mock"}, ...})` as
|
|
282
|
+
the default fixture for unit tests. The framework's tests under
|
|
283
|
+
`tests/` are the canonical example of how to use the public API.
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
# Framework (from the converse-framework repo root)
|
|
287
|
+
python -m pytest
|
|
288
|
+
|
|
289
|
+
# Consumer (from inside the consumer repo)
|
|
290
|
+
python -m pytest
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
Both should pass against the same `converse_framework` version.
|
|
294
|
+
Pin the framework dependency in the consumer's
|
|
295
|
+
`requirements.txt` (or `pyproject.toml`) so test reproducibility
|
|
296
|
+
matches the framework release you validated against.
|
|
297
|
+
|
|
298
|
+
## Rollback plan
|
|
299
|
+
|
|
300
|
+
If a v0.1 release regresses the consumer, pin the consumer to the
|
|
301
|
+
last known-good in-tree version of the speech stack. Restore the
|
|
302
|
+
consumer's old local copies plus the small compatibility shims
|
|
303
|
+
under
|
|
304
|
+
`app/conversational_harness/{events,audio,audio_frames,providers/base,providers/silero,providers/faster_whisper,providers/llamacpp,providers/kokoro_onnx,providers/pocket_tts,providers/mock,providers/unavailable}.py`,
|
|
305
|
+
remove the editable install (`pip uninstall converse-framework`
|
|
306
|
+
and drop `-e .` from the consumer's `requirements.txt`), and the
|
|
307
|
+
shims fall back to importing the consumer's own local modules. No
|
|
308
|
+
data migration is involved — the rollback is purely an
|
|
309
|
+
import-source change.
|
|
310
|
+
|
|
311
|
+
## Probe vs Load Status
|
|
312
|
+
|
|
313
|
+
As of v0.2, provider status has three tiers:
|
|
314
|
+
|
|
315
|
+
- ``status`` (property): cached state, no I/O.
|
|
316
|
+
- ``probe_status()``: cheap check (import probe, HTTP reachability)
|
|
317
|
+
that does not load models. Used by ``status_only()`` and
|
|
318
|
+
``ProviderBundle.probe_statuses()``.
|
|
319
|
+
- ``load_status()``: may load or initialize heavy resources before
|
|
320
|
+
returning the status. Used by ``ProviderBundle.load_statuses()``.
|
|
321
|
+
|
|
322
|
+
The old ``check_status()`` is kept for backward compatibility.
|
|
323
|
+
Callers that only need a quick readiness check should prefer
|
|
324
|
+
``probe_status()``.
|
|
325
|
+
|
|
326
|
+
## Reference: from the harness
|
|
327
|
+
|
|
328
|
+
The reference harness in
|
|
329
|
+
`Reference-Repository-Conversational-AI-Harness/` is the
|
|
330
|
+
concrete worked example. `conversational_harness/providers/base.py`
|
|
331
|
+
is a 14-line shim that re-exports `ASRProvider`, `LLMProvider`,
|
|
332
|
+
`TTSProvider`, `VADProvider`, `ProviderStatus`,
|
|
333
|
+
`ProviderCapabilities`, `AudioChunk`, `TranscriptEvent`,
|
|
334
|
+
`VADEvent`, and `ProgressCallback` from
|
|
335
|
+
`converse_framework.protocols`. `conversational_harness/audio.py`
|
|
336
|
+
and `conversational_harness/audio_frames.py` are equally thin;
|
|
337
|
+
both pull their public names from
|
|
338
|
+
`converse_framework.audio_utils` so the rest of the harness can
|
|
339
|
+
keep importing `from conversational_harness.audio import
|
|
340
|
+
make_tone_wav` without edits, and `audio_frames.py` re-exports
|
|
341
|
+
`AudioFrame`, `AudioFrameStats`, `parse_audio_frame`,
|
|
342
|
+
`trim_pcm16_silence`, `compute_pcm16_level`, and
|
|
343
|
+
`SUPPORTED_ENCODING`. The harness's `orchestrator.py` is a thin
|
|
344
|
+
subclass of the framework `SpeechPipeline` that supplies the
|
|
345
|
+
app's `system_prompt_builder` (delegating to
|
|
346
|
+
`RuntimeSettings.effective_system_prompt`) and adds a
|
|
347
|
+
`seed_character_first_message` method that pokes the
|
|
348
|
+
character-card seed into `pipeline.state.messages`; turn
|
|
349
|
+
orchestration, streaming, cancellation, barge-in, and per-mode
|
|
350
|
+
history all come from the framework unchanged. The harness's
|
|
351
|
+
`transport.py` keeps the `WebSocketTransport` that implements
|
|
352
|
+
the framework's `Transport` protocol over a FastAPI `WebSocket`;
|
|
353
|
+
the framework never imports FastAPI, and `main.py` still owns
|
|
354
|
+
the `/ws/events` handler, the send/receive tasks, the pre-send
|
|
355
|
+
event queue, and ASR warmup — only the boundary object was
|
|
356
|
+
extracted.
|