converse-framework 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- converse_framework/__init__.py +108 -0
- converse_framework/audio_utils.py +412 -0
- converse_framework/cuda_utils.py +176 -0
- converse_framework/events.py +94 -0
- converse_framework/examples/__init__.py +20 -0
- converse_framework/examples/subprocess_provider.py +439 -0
- converse_framework/examples/text_chat.py +308 -0
- converse_framework/examples/voice_chat.py +223 -0
- converse_framework/examples/websocket_voice_chat.py +174 -0
- converse_framework/js/browser-voice-client.js +248 -0
- converse_framework/js/mic-frame-sender.js +445 -0
- converse_framework/js/speaker-echo-guard.js +308 -0
- converse_framework/js/tts-audio-player.js +237 -0
- converse_framework/pipeline.py +620 -0
- converse_framework/protocols.py +382 -0
- converse_framework/provider_events.py +159 -0
- converse_framework/providers/__init__.py +28 -0
- converse_framework/providers/faster_whisper.py +290 -0
- converse_framework/providers/kokoro_onnx.py +391 -0
- converse_framework/providers/llamacpp.py +264 -0
- converse_framework/providers/mock.py +171 -0
- converse_framework/providers/pocket_tts.py +409 -0
- converse_framework/providers/silero.py +161 -0
- converse_framework/providers/unavailable.py +137 -0
- converse_framework/providers/whisper_cpp.py +322 -0
- converse_framework/registry.py +397 -0
- converse_framework/session.py +315 -0
- converse_framework/transport.py +54 -0
- converse_framework/utterance_collector.py +336 -0
- converse_framework-0.2.0.dist-info/METADATA +992 -0
- converse_framework-0.2.0.dist-info/RECORD +33 -0
- converse_framework-0.2.0.dist-info/WHEEL +4 -0
- converse_framework-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""llama.cpp HTTP provider (OpenAI-compatible API).
|
|
2
|
+
|
|
3
|
+
The ``httpx`` package is imported lazily inside async methods so the base
|
|
4
|
+
:mod:`converse_framework` package stays light. Install with::
|
|
5
|
+
|
|
6
|
+
pip install 'converse-framework[llamacpp]'
|
|
7
|
+
|
|
8
|
+
Sampler values are provided either through the ``sampler`` config key at
|
|
9
|
+
construction time, or at runtime through :meth:`set_sampler_provider`, which
|
|
10
|
+
takes a zero-argument callable returning a dict of sampler overrides. The
|
|
11
|
+
framework never imports the harness ``RuntimeSettings`` -- the harness is
|
|
12
|
+
responsible for wiring the callable that resolves effective sampler values.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from collections.abc import AsyncIterator, Callable
|
|
19
|
+
|
|
20
|
+
from converse_framework.protocols import (
|
|
21
|
+
LLMProvider,
|
|
22
|
+
ProviderCapabilities,
|
|
23
|
+
ProviderStatus,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
SamplerProvider = Callable[[], dict]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LlamaCppProvider(LLMProvider):
|
|
30
|
+
def __init__(self, config: dict):
|
|
31
|
+
self.base_url = str(config.get("base_url", "http://127.0.0.1:8080")).rstrip("/")
|
|
32
|
+
self.model = str(config.get("model", "auto"))
|
|
33
|
+
self.temperature = float(config.get("temperature", 0.7))
|
|
34
|
+
self.max_tokens = int(config.get("max_tokens", 256))
|
|
35
|
+
self._sampler_provider: SamplerProvider | None = None
|
|
36
|
+
self._resolved_model: str | None = None
|
|
37
|
+
|
|
38
|
+
def set_sampler_provider(self, provider: SamplerProvider | None) -> None:
|
|
39
|
+
"""Inject a callable that returns the current sampler overrides.
|
|
40
|
+
|
|
41
|
+
The framework never imports the harness ``RuntimeSettings``; the
|
|
42
|
+
harness wires this by passing ``RUNTIME_SETTINGS.effective_sampler``
|
|
43
|
+
(a bound method) or an equivalent lambda.
|
|
44
|
+
"""
|
|
45
|
+
self._sampler_provider = provider
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def status(self) -> ProviderStatus:
|
|
49
|
+
return ProviderStatus(
|
|
50
|
+
name="llama.cpp",
|
|
51
|
+
kind="llm",
|
|
52
|
+
ready=False,
|
|
53
|
+
message=(
|
|
54
|
+
f"Configured for OpenAI-compatible llama.cpp server at {self.base_url}."
|
|
55
|
+
),
|
|
56
|
+
capabilities=ProviderCapabilities(),
|
|
57
|
+
provider_id="llamacpp",
|
|
58
|
+
status_level="configured",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
async def check_status(self) -> ProviderStatus:
|
|
62
|
+
return await self._http_check_status()
|
|
63
|
+
|
|
64
|
+
async def probe_status(self) -> ProviderStatus:
|
|
65
|
+
"""Cheap probe: check httpx is importable; no HTTP call."""
|
|
66
|
+
try:
|
|
67
|
+
pass # type: ignore[import-not-found]
|
|
68
|
+
except Exception as exc: # pragma: no cover - import path
|
|
69
|
+
return ProviderStatus(
|
|
70
|
+
name="llama.cpp",
|
|
71
|
+
kind="llm",
|
|
72
|
+
ready=False,
|
|
73
|
+
message=(
|
|
74
|
+
f"llama.cpp provider requires httpx; install with "
|
|
75
|
+
f"pip install 'converse-framework[llamacpp]'. ({exc})"
|
|
76
|
+
),
|
|
77
|
+
capabilities=ProviderCapabilities(),
|
|
78
|
+
provider_id="llamacpp",
|
|
79
|
+
status_level="unavailable",
|
|
80
|
+
)
|
|
81
|
+
# httpx is available; return existing cached status.
|
|
82
|
+
return self.status
|
|
83
|
+
|
|
84
|
+
async def load_status(self) -> ProviderStatus:
|
|
85
|
+
"""Alias for probe_status - HTTP provider has no model loading."""
|
|
86
|
+
return await self.probe_status()
|
|
87
|
+
|
|
88
|
+
async def _http_check_status(self) -> ProviderStatus:
|
|
89
|
+
try:
|
|
90
|
+
import httpx # type: ignore[import-not-found]
|
|
91
|
+
except Exception as exc: # pragma: no cover - import path
|
|
92
|
+
return ProviderStatus(
|
|
93
|
+
name="llama.cpp",
|
|
94
|
+
kind="llm",
|
|
95
|
+
ready=False,
|
|
96
|
+
message=(
|
|
97
|
+
f"llama.cpp provider requires httpx; install with "
|
|
98
|
+
f"pip install 'converse-framework[llamacpp]'. ({exc})"
|
|
99
|
+
),
|
|
100
|
+
capabilities=ProviderCapabilities(),
|
|
101
|
+
provider_id="llamacpp",
|
|
102
|
+
)
|
|
103
|
+
timeout = httpx.Timeout(connect=1.0, read=2.0, write=1.0, pool=1.0)
|
|
104
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
105
|
+
try:
|
|
106
|
+
health = await client.get(f"{self.base_url}/health")
|
|
107
|
+
health.raise_for_status()
|
|
108
|
+
health_payload = health.json()
|
|
109
|
+
except Exception as exc:
|
|
110
|
+
return ProviderStatus(
|
|
111
|
+
name="llama.cpp",
|
|
112
|
+
kind="llm",
|
|
113
|
+
ready=False,
|
|
114
|
+
message=f"Cannot reach llama.cpp at {self.base_url}: {exc}",
|
|
115
|
+
capabilities=ProviderCapabilities(),
|
|
116
|
+
provider_id="llamacpp",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if health_payload.get("status") != "ok":
|
|
120
|
+
message = health_payload.get("error", {}).get(
|
|
121
|
+
"message", "server did not report ready"
|
|
122
|
+
)
|
|
123
|
+
return ProviderStatus(
|
|
124
|
+
name="llama.cpp",
|
|
125
|
+
kind="llm",
|
|
126
|
+
ready=False,
|
|
127
|
+
message=f"llama.cpp reachable but not ready: {message}",
|
|
128
|
+
capabilities=ProviderCapabilities(),
|
|
129
|
+
provider_id="llamacpp",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
models = await client.get(f"{self.base_url}/v1/models")
|
|
134
|
+
models.raise_for_status()
|
|
135
|
+
models_payload = models.json()
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
return ProviderStatus(
|
|
138
|
+
name="llama.cpp",
|
|
139
|
+
kind="llm",
|
|
140
|
+
ready=False,
|
|
141
|
+
message=f"llama.cpp health OK, but /v1/models failed: {exc}",
|
|
142
|
+
capabilities=ProviderCapabilities(),
|
|
143
|
+
provider_id="llamacpp",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
model_ids = [
|
|
147
|
+
str(item.get("id", "unknown")) for item in models_payload.get("data", [])
|
|
148
|
+
]
|
|
149
|
+
if not model_ids:
|
|
150
|
+
return ProviderStatus(
|
|
151
|
+
name="llama.cpp",
|
|
152
|
+
kind="llm",
|
|
153
|
+
ready=False,
|
|
154
|
+
message=(
|
|
155
|
+
"llama.cpp health OK, but no loaded model was reported by "
|
|
156
|
+
"/v1/models."
|
|
157
|
+
),
|
|
158
|
+
capabilities=ProviderCapabilities(),
|
|
159
|
+
provider_id="llamacpp",
|
|
160
|
+
)
|
|
161
|
+
model_list = ", ".join(model_ids[:3])
|
|
162
|
+
selected_model = self.model if self.model != "auto" else model_ids[0]
|
|
163
|
+
if self.model != "auto" and self.model not in model_ids:
|
|
164
|
+
return ProviderStatus(
|
|
165
|
+
name="llama.cpp",
|
|
166
|
+
kind="llm",
|
|
167
|
+
ready=False,
|
|
168
|
+
message=(
|
|
169
|
+
f"llama.cpp is ready, but configured model '{self.model}' is "
|
|
170
|
+
f"not in /v1/models. Loaded: {model_list}"
|
|
171
|
+
),
|
|
172
|
+
capabilities=ProviderCapabilities(),
|
|
173
|
+
provider_id="llamacpp",
|
|
174
|
+
)
|
|
175
|
+
if self._resolved_model is not None and selected_model != self._resolved_model:
|
|
176
|
+
self._resolved_model = None
|
|
177
|
+
active = "auto-selected" if self.model == "auto" else "selected"
|
|
178
|
+
return ProviderStatus(
|
|
179
|
+
name="llama.cpp",
|
|
180
|
+
kind="llm",
|
|
181
|
+
ready=True,
|
|
182
|
+
message=(
|
|
183
|
+
f"Ready at {self.base_url}; {active} model: {selected_model}; "
|
|
184
|
+
f"loaded: {model_list}"
|
|
185
|
+
),
|
|
186
|
+
capabilities=ProviderCapabilities(),
|
|
187
|
+
provider_id="llamacpp",
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
async def stream_response(
|
|
191
|
+
self, messages: list[dict[str, str]]
|
|
192
|
+
) -> AsyncIterator[str]:
|
|
193
|
+
if self._resolved_model is None:
|
|
194
|
+
self._resolved_model = await self._resolve_model()
|
|
195
|
+
model = self._resolved_model
|
|
196
|
+
sampler = self._build_sampler()
|
|
197
|
+
payload: dict = {
|
|
198
|
+
"model": model,
|
|
199
|
+
"messages": messages,
|
|
200
|
+
"stream": True,
|
|
201
|
+
}
|
|
202
|
+
for key, value in sampler.items():
|
|
203
|
+
payload[key] = value
|
|
204
|
+
url = f"{self.base_url}/v1/chat/completions"
|
|
205
|
+
try:
|
|
206
|
+
import httpx # type: ignore[import-not-found]
|
|
207
|
+
except Exception as exc: # pragma: no cover - import path
|
|
208
|
+
raise RuntimeError(
|
|
209
|
+
"llama.cpp provider requires httpx; install with "
|
|
210
|
+
"pip install 'converse-framework[llamacpp]'."
|
|
211
|
+
) from exc
|
|
212
|
+
timeout = httpx.Timeout(connect=3.0, read=60.0, write=10.0, pool=3.0)
|
|
213
|
+
try:
|
|
214
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
215
|
+
async with client.stream("POST", url, json=payload) as response:
|
|
216
|
+
response.raise_for_status()
|
|
217
|
+
async for line in response.aiter_lines():
|
|
218
|
+
if not line.startswith("data: "):
|
|
219
|
+
continue
|
|
220
|
+
data = line[6:].strip()
|
|
221
|
+
if data == "[DONE]":
|
|
222
|
+
break
|
|
223
|
+
chunk = json.loads(data)
|
|
224
|
+
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
|
225
|
+
content = delta.get("content")
|
|
226
|
+
if content:
|
|
227
|
+
yield content
|
|
228
|
+
except Exception:
|
|
229
|
+
self._resolved_model = None
|
|
230
|
+
raise
|
|
231
|
+
|
|
232
|
+
def _build_sampler(self) -> dict:
|
|
233
|
+
defaults = {
|
|
234
|
+
"temperature": self.temperature,
|
|
235
|
+
"max_tokens": self.max_tokens,
|
|
236
|
+
}
|
|
237
|
+
if self._sampler_provider is not None:
|
|
238
|
+
return self._sampler_provider()
|
|
239
|
+
return defaults
|
|
240
|
+
|
|
241
|
+
async def _resolve_model(self) -> str:
|
|
242
|
+
if self.model != "auto":
|
|
243
|
+
return self.model
|
|
244
|
+
try:
|
|
245
|
+
import httpx # type: ignore[import-not-found]
|
|
246
|
+
except Exception as exc: # pragma: no cover - import path
|
|
247
|
+
raise RuntimeError(
|
|
248
|
+
"llama.cpp provider requires httpx; install with "
|
|
249
|
+
"pip install 'converse-framework[llamacpp]'."
|
|
250
|
+
) from exc
|
|
251
|
+
timeout = httpx.Timeout(connect=1.0, read=2.0, write=1.0, pool=1.0)
|
|
252
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
253
|
+
response = await client.get(f"{self.base_url}/v1/models")
|
|
254
|
+
response.raise_for_status()
|
|
255
|
+
payload = response.json()
|
|
256
|
+
model_data = payload.get("data", [])
|
|
257
|
+
if not model_data:
|
|
258
|
+
raise RuntimeError(
|
|
259
|
+
"llama.cpp did not report a loaded model from /v1/models"
|
|
260
|
+
)
|
|
261
|
+
return str(model_data[0].get("id", "unknown"))
|
|
262
|
+
|
|
263
|
+
async def unload(self) -> ProviderStatus:
|
|
264
|
+
return self.status
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Mock provider implementations with no external dependencies.
|
|
2
|
+
|
|
3
|
+
These are always available and useful for testing pipeline orchestration
|
|
4
|
+
without real model backends.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
|
|
12
|
+
from converse_framework.audio_utils import make_tone_wav
|
|
13
|
+
from converse_framework.protocols import (
|
|
14
|
+
ASRProvider,
|
|
15
|
+
AudioChunk,
|
|
16
|
+
LLMProvider,
|
|
17
|
+
ProviderCapabilities,
|
|
18
|
+
ProviderStatus,
|
|
19
|
+
TTSProvider,
|
|
20
|
+
TranscriptEvent,
|
|
21
|
+
VADProvider,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MockVADProvider(VADProvider):
|
|
26
|
+
def __init__(self, config: dict):
|
|
27
|
+
self.config = config
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def status(self) -> ProviderStatus:
|
|
31
|
+
return ProviderStatus(
|
|
32
|
+
name="mock-vad",
|
|
33
|
+
kind="vad",
|
|
34
|
+
ready=True,
|
|
35
|
+
message="Mock VAD accepts browser speech-start/speech-end events.",
|
|
36
|
+
capabilities=ProviderCapabilities(supports_barge_in=True),
|
|
37
|
+
provider_id="mock",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
async def check_status(self) -> ProviderStatus:
|
|
41
|
+
return self.status
|
|
42
|
+
|
|
43
|
+
async def process_frame(self, frame) -> list:
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
async def unload(self) -> ProviderStatus:
|
|
47
|
+
return self.status
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class MockASRProvider(ASRProvider):
|
|
51
|
+
def __init__(self, config: dict):
|
|
52
|
+
self.config = config
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def status(self) -> ProviderStatus:
|
|
56
|
+
return ProviderStatus(
|
|
57
|
+
name="mock-asr",
|
|
58
|
+
kind="asr",
|
|
59
|
+
ready=True,
|
|
60
|
+
message="Text input is treated as the final transcript.",
|
|
61
|
+
capabilities=ProviderCapabilities(supports_partials=True),
|
|
62
|
+
provider_id="mock",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
async def check_status(self) -> ProviderStatus:
|
|
66
|
+
return self.status
|
|
67
|
+
|
|
68
|
+
async def load(self) -> ProviderStatus:
|
|
69
|
+
return self.status
|
|
70
|
+
|
|
71
|
+
async def transcribe_text_input(self, text: str) -> AsyncIterator[TranscriptEvent]:
|
|
72
|
+
words = text.strip().split()
|
|
73
|
+
if not words:
|
|
74
|
+
return
|
|
75
|
+
partial = []
|
|
76
|
+
for word in words:
|
|
77
|
+
partial.append(word)
|
|
78
|
+
await asyncio.sleep(0.01)
|
|
79
|
+
yield TranscriptEvent(text=" ".join(partial), final=False)
|
|
80
|
+
yield TranscriptEvent(text=" ".join(words), final=True)
|
|
81
|
+
|
|
82
|
+
async def transcribe_audio(
|
|
83
|
+
self, pcm_s16le: bytes, sample_rate: int, progress=None
|
|
84
|
+
) -> AsyncIterator[TranscriptEvent]:
|
|
85
|
+
if progress:
|
|
86
|
+
await progress(
|
|
87
|
+
"asr.progress",
|
|
88
|
+
{"stage": "mock", "message": "Mock ASR transcribing audio."},
|
|
89
|
+
)
|
|
90
|
+
yield TranscriptEvent(text="Mock ASR heard audio input.", final=True)
|
|
91
|
+
|
|
92
|
+
async def unload(self) -> ProviderStatus:
|
|
93
|
+
return self.status
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class MockLLMProvider(LLMProvider):
|
|
97
|
+
def __init__(self, config: dict):
|
|
98
|
+
self.first_token_delay = float(config.get("first_token_delay_ms", 120)) / 1000
|
|
99
|
+
self.token_delay = float(config.get("token_delay_ms", 28)) / 1000
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def status(self) -> ProviderStatus:
|
|
103
|
+
return ProviderStatus(
|
|
104
|
+
name="mock-llm",
|
|
105
|
+
kind="llm",
|
|
106
|
+
ready=True,
|
|
107
|
+
message="Mock LLM streams a deterministic local response.",
|
|
108
|
+
capabilities=ProviderCapabilities(),
|
|
109
|
+
provider_id="mock",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
async def check_status(self) -> ProviderStatus:
|
|
113
|
+
return self.status
|
|
114
|
+
|
|
115
|
+
async def stream_response(
|
|
116
|
+
self, messages: list[dict[str, str]]
|
|
117
|
+
) -> AsyncIterator[str]:
|
|
118
|
+
user_text = messages[-1]["content"] if messages else ""
|
|
119
|
+
response = (
|
|
120
|
+
f"I heard: {user_text}. This is the harness running in mock mode, "
|
|
121
|
+
"with provider boundaries and latency events active."
|
|
122
|
+
)
|
|
123
|
+
await asyncio.sleep(self.first_token_delay)
|
|
124
|
+
for token in response.split(" "):
|
|
125
|
+
yield token + " "
|
|
126
|
+
await asyncio.sleep(self.token_delay)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class MockTTSProvider(TTSProvider):
|
|
130
|
+
def __init__(self, config: dict):
|
|
131
|
+
self.first_chunk_delay = float(config.get("first_chunk_delay_ms", 80)) / 1000
|
|
132
|
+
self.chunk_delay = float(config.get("chunk_delay_ms", 40)) / 1000
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def status(self) -> ProviderStatus:
|
|
136
|
+
return ProviderStatus(
|
|
137
|
+
name="mock-tts",
|
|
138
|
+
kind="tts",
|
|
139
|
+
ready=True,
|
|
140
|
+
message="Mock TTS emits short WAV tones so playback paths can be tested.",
|
|
141
|
+
capabilities=ProviderCapabilities(supports_streaming_tts=True),
|
|
142
|
+
provider_id="mock",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
async def check_status(self) -> ProviderStatus:
|
|
146
|
+
return self.status
|
|
147
|
+
|
|
148
|
+
async def load(self) -> ProviderStatus:
|
|
149
|
+
return self.status
|
|
150
|
+
|
|
151
|
+
async def unload(self) -> ProviderStatus:
|
|
152
|
+
return self.status
|
|
153
|
+
|
|
154
|
+
async def stream_audio(self, text: str) -> AsyncIterator[AudioChunk]:
|
|
155
|
+
await asyncio.sleep(self.first_chunk_delay)
|
|
156
|
+
frequency = 420 + (len(text) % 160)
|
|
157
|
+
yield AudioChunk(
|
|
158
|
+
data=make_tone_wav(frequency=frequency), mime_type="audio/wav", final=True
|
|
159
|
+
)
|
|
160
|
+
await asyncio.sleep(self.chunk_delay)
|
|
161
|
+
|
|
162
|
+
async def stream_audio_with_progress(
|
|
163
|
+
self, text: str, progress=None
|
|
164
|
+
) -> AsyncIterator[AudioChunk]:
|
|
165
|
+
if progress:
|
|
166
|
+
await progress(
|
|
167
|
+
"tts.progress",
|
|
168
|
+
{"stage": "mock", "message": "Mock TTS generating tone."},
|
|
169
|
+
)
|
|
170
|
+
async for chunk in self.stream_audio(text):
|
|
171
|
+
yield chunk
|