converse-framework 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- converse_framework/__init__.py +108 -0
- converse_framework/audio_utils.py +412 -0
- converse_framework/cuda_utils.py +176 -0
- converse_framework/events.py +94 -0
- converse_framework/examples/__init__.py +20 -0
- converse_framework/examples/subprocess_provider.py +439 -0
- converse_framework/examples/text_chat.py +308 -0
- converse_framework/examples/voice_chat.py +223 -0
- converse_framework/examples/websocket_voice_chat.py +174 -0
- converse_framework/js/browser-voice-client.js +248 -0
- converse_framework/js/mic-frame-sender.js +445 -0
- converse_framework/js/speaker-echo-guard.js +308 -0
- converse_framework/js/tts-audio-player.js +237 -0
- converse_framework/pipeline.py +620 -0
- converse_framework/protocols.py +382 -0
- converse_framework/provider_events.py +159 -0
- converse_framework/providers/__init__.py +28 -0
- converse_framework/providers/faster_whisper.py +290 -0
- converse_framework/providers/kokoro_onnx.py +391 -0
- converse_framework/providers/llamacpp.py +264 -0
- converse_framework/providers/mock.py +171 -0
- converse_framework/providers/pocket_tts.py +409 -0
- converse_framework/providers/silero.py +161 -0
- converse_framework/providers/unavailable.py +137 -0
- converse_framework/providers/whisper_cpp.py +322 -0
- converse_framework/registry.py +397 -0
- converse_framework/session.py +315 -0
- converse_framework/transport.py +54 -0
- converse_framework/utterance_collector.py +336 -0
- converse_framework-0.2.0.dist-info/METADATA +992 -0
- converse_framework-0.2.0.dist-info/RECORD +33 -0
- converse_framework-0.2.0.dist-info/WHEEL +4 -0
- converse_framework-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
"""Lazy provider registry for the speech stack.
|
|
2
|
+
|
|
3
|
+
Provider implementations are registered by import string and only loaded
|
|
4
|
+
on first use so that ``import converse_framework`` stays lightweight.
|
|
5
|
+
|
|
6
|
+
Each provider kind (``vad``/``asr``/``llm``/``tts``) supports a small set of
|
|
7
|
+
known names. The base install always provides the ``mock`` providers. Other
|
|
8
|
+
providers are registered with availability probes so the registry can
|
|
9
|
+
report friendlier error messages when an optional dependency is missing.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import importlib
|
|
15
|
+
from collections.abc import Callable, Mapping
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from converse_framework.protocols import (
|
|
20
|
+
ASRProvider,
|
|
21
|
+
LLMProvider,
|
|
22
|
+
ProviderStatus,
|
|
23
|
+
TTSProvider,
|
|
24
|
+
VADProvider,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class _ProviderEntry:
|
|
30
|
+
import_path: str
|
|
31
|
+
availability_probe: Callable[[], bool] | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_registry: dict[str, dict[str, _ProviderEntry]] = {
|
|
35
|
+
"vad": {},
|
|
36
|
+
"asr": {},
|
|
37
|
+
"llm": {},
|
|
38
|
+
"tts": {},
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def register_provider(
|
|
43
|
+
kind: str,
|
|
44
|
+
name: str,
|
|
45
|
+
import_path: str,
|
|
46
|
+
*,
|
|
47
|
+
availability_probe: Callable[[], bool] | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Register a provider implementation by import string.
|
|
50
|
+
|
|
51
|
+
The module is not imported until :func:`build_provider` is called.
|
|
52
|
+
A custom ``availability_probe`` (returning True if the provider is
|
|
53
|
+
ready to use) lets the registry give specific feedback about which
|
|
54
|
+
optional dependency is missing.
|
|
55
|
+
"""
|
|
56
|
+
if kind not in _registry:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Unknown provider kind: {kind}. Must be one of vad/asr/llm/tts."
|
|
59
|
+
)
|
|
60
|
+
_registry[kind][name] = _ProviderEntry(import_path, availability_probe)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def is_provider_available(kind: str, name: str) -> bool:
|
|
64
|
+
"""Return True if the named provider can be loaded.
|
|
65
|
+
|
|
66
|
+
A registered provider is considered available when either:
|
|
67
|
+
* its ``availability_probe`` returns True, or
|
|
68
|
+
* its module imports without raising :class:`ImportError`.
|
|
69
|
+
"""
|
|
70
|
+
entry = _registry.get(kind, {}).get(name)
|
|
71
|
+
if entry is None:
|
|
72
|
+
return False
|
|
73
|
+
if entry.availability_probe is not None:
|
|
74
|
+
return entry.availability_probe()
|
|
75
|
+
module_path, _ = entry.import_path.rsplit(":", 1)
|
|
76
|
+
try:
|
|
77
|
+
importlib.import_module(module_path)
|
|
78
|
+
return True
|
|
79
|
+
except ImportError:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _probe_module(module_path: str) -> Callable[[], bool]:
|
|
84
|
+
def probe() -> bool:
|
|
85
|
+
try:
|
|
86
|
+
importlib.import_module(module_path)
|
|
87
|
+
return True
|
|
88
|
+
except ImportError:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
return probe
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def build_provider(
|
|
95
|
+
kind: str, name: str, config: dict[str, Any] | None = None
|
|
96
|
+
) -> VADProvider | ASRProvider | LLMProvider | TTSProvider:
|
|
97
|
+
"""Instantiate a provider by kind and name.
|
|
98
|
+
|
|
99
|
+
The framework's own provider modules import cleanly even when their
|
|
100
|
+
heavy third-party dependencies are missing (those imports are deferred
|
|
101
|
+
into provider methods). So the build path only requires the
|
|
102
|
+
``converse_framework.providers.<x>`` module to be importable, not the
|
|
103
|
+
heavy backend library. :func:`is_provider_available` adds the stricter
|
|
104
|
+
"is the heavy dep present" check for callers that want a definitive
|
|
105
|
+
availability signal; missing deps are also reported by the resulting
|
|
106
|
+
provider's status message.
|
|
107
|
+
"""
|
|
108
|
+
if config is None:
|
|
109
|
+
config = {}
|
|
110
|
+
|
|
111
|
+
entry = _registry.get(kind, {}).get(name)
|
|
112
|
+
if entry is None:
|
|
113
|
+
from converse_framework.providers.unavailable import UnavailableProvider
|
|
114
|
+
|
|
115
|
+
return UnavailableProvider(kind=kind, name=name)
|
|
116
|
+
|
|
117
|
+
module_path, class_name = entry.import_path.rsplit(":", 1)
|
|
118
|
+
try:
|
|
119
|
+
module = importlib.import_module(module_path)
|
|
120
|
+
except ImportError:
|
|
121
|
+
from converse_framework.providers.unavailable import UnavailableProvider
|
|
122
|
+
|
|
123
|
+
return UnavailableProvider(kind=kind, name=name)
|
|
124
|
+
cls = getattr(module, class_name)
|
|
125
|
+
return cls(config)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class ProviderBundle:
|
|
130
|
+
"""The four providers a :class:`SpeechPipeline` needs to run a turn.
|
|
131
|
+
|
|
132
|
+
Bundles are usually produced by :func:`build_provider_bundle`
|
|
133
|
+
from a config mapping, but apps can construct them by hand
|
|
134
|
+
when they want to inject a provider that lives outside the
|
|
135
|
+
registry (e.g. a harness-managed TTS runtime). The framework
|
|
136
|
+
treats the four attributes as the canonical handles; the
|
|
137
|
+
pipeline never falls back to the registry at runtime, so a
|
|
138
|
+
bundle fully describes the providers a turn will use.
|
|
139
|
+
|
|
140
|
+
Attributes:
|
|
141
|
+
vad: VAD provider used by the utterance collector.
|
|
142
|
+
asr: ASR provider used for both audio and text input.
|
|
143
|
+
llm: LLM provider used to generate the assistant reply.
|
|
144
|
+
tts: TTS provider used to synthesise the assistant reply.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
vad: VADProvider
|
|
148
|
+
asr: ASRProvider
|
|
149
|
+
llm: LLMProvider
|
|
150
|
+
tts: TTSProvider
|
|
151
|
+
|
|
152
|
+
def statuses(self) -> list[dict[str, Any]]:
|
|
153
|
+
items = [self.vad.status, self.asr.status, self.llm.status, self.tts.status]
|
|
154
|
+
return _serialize_statuses(items)
|
|
155
|
+
|
|
156
|
+
async def check_statuses(self) -> list[dict[str, Any]]:
|
|
157
|
+
items = [
|
|
158
|
+
await self.vad.check_status(),
|
|
159
|
+
await self.asr.check_status(),
|
|
160
|
+
await self.llm.check_status(),
|
|
161
|
+
await self.tts.check_status(),
|
|
162
|
+
]
|
|
163
|
+
return _serialize_statuses(items)
|
|
164
|
+
|
|
165
|
+
async def probe_statuses(self) -> list[dict[str, Any]]:
|
|
166
|
+
"""Cheap readiness probe - does not load models."""
|
|
167
|
+
items = [
|
|
168
|
+
await self.vad.probe_status(),
|
|
169
|
+
await self.asr.probe_status(),
|
|
170
|
+
await self.llm.probe_status(),
|
|
171
|
+
await self.tts.probe_status(),
|
|
172
|
+
]
|
|
173
|
+
return _serialize_statuses(items)
|
|
174
|
+
|
|
175
|
+
async def load_statuses(self) -> list[dict[str, Any]]:
|
|
176
|
+
"""May load or initialise heavy resources."""
|
|
177
|
+
items = [
|
|
178
|
+
await self.vad.load_status(),
|
|
179
|
+
await self.asr.load_status(),
|
|
180
|
+
await self.llm.load_status(),
|
|
181
|
+
await self.tts.load_status(),
|
|
182
|
+
]
|
|
183
|
+
return _serialize_statuses(items)
|
|
184
|
+
|
|
185
|
+
def replace(
|
|
186
|
+
self, **providers: VADProvider | ASRProvider | LLMProvider | TTSProvider
|
|
187
|
+
) -> ProviderBundle:
|
|
188
|
+
"""Return a new bundle with the given providers swapped in.
|
|
189
|
+
|
|
190
|
+
Providers not specified are inherited from ``self``. This is a
|
|
191
|
+
no-copy, no-side-effect operation -- the caller is responsible
|
|
192
|
+
for calling :meth:`unload_replaced` if the old providers need
|
|
193
|
+
lifecycle cleanup.
|
|
194
|
+
|
|
195
|
+
Example::
|
|
196
|
+
|
|
197
|
+
bundle = bundle.replace(tts=new_tts)
|
|
198
|
+
await ProviderBundle.unload_replaced(old_bundle, bundle)
|
|
199
|
+
"""
|
|
200
|
+
return ProviderBundle(
|
|
201
|
+
vad=providers.get("vad", self.vad),
|
|
202
|
+
asr=providers.get("asr", self.asr),
|
|
203
|
+
llm=providers.get("llm", self.llm),
|
|
204
|
+
tts=providers.get("tts", self.tts),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
async def unload_replaced(
|
|
209
|
+
old_bundle: ProviderBundle,
|
|
210
|
+
new_bundle: ProviderBundle,
|
|
211
|
+
) -> list[dict[str, Any]]:
|
|
212
|
+
"""Unload providers that were replaced between two bundles.
|
|
213
|
+
|
|
214
|
+
Compares each field of *old_bundle* and *new_bundle* by
|
|
215
|
+
identity. Providers that differ are unloaded. Providers that
|
|
216
|
+
remain the same reference are left untouched.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Serialized status dicts for every provider that was
|
|
220
|
+
unloaded, in the order *vad*, *asr*, *llm*, *tts*.
|
|
221
|
+
"""
|
|
222
|
+
statuses: list[ProviderStatus] = []
|
|
223
|
+
if old_bundle.vad is not new_bundle.vad:
|
|
224
|
+
statuses.append(await old_bundle.vad.unload())
|
|
225
|
+
if old_bundle.asr is not new_bundle.asr:
|
|
226
|
+
statuses.append(await old_bundle.asr.unload())
|
|
227
|
+
# LLMProvider does not expose unload()
|
|
228
|
+
if old_bundle.tts is not new_bundle.tts:
|
|
229
|
+
statuses.append(await old_bundle.tts.unload())
|
|
230
|
+
return _serialize_statuses(statuses)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def build_provider_bundle(
|
|
234
|
+
config: Mapping[str, Mapping[str, Any]],
|
|
235
|
+
*,
|
|
236
|
+
tts_provider: TTSProvider | None = None,
|
|
237
|
+
) -> ProviderBundle:
|
|
238
|
+
"""Build a complete provider bundle from a nested config mapping.
|
|
239
|
+
|
|
240
|
+
Expected config shape::
|
|
241
|
+
|
|
242
|
+
{
|
|
243
|
+
"vad": {"provider": "mock", ...},
|
|
244
|
+
"asr": {"provider": "mock", ...},
|
|
245
|
+
"llm": {"provider": "mock", ...},
|
|
246
|
+
"tts": {"provider": "mock", ...},
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
If *tts_provider* is given it replaces the TTS provider built from
|
|
250
|
+
config, which allows the caller to inject a harness-managed TTS
|
|
251
|
+
runtime.
|
|
252
|
+
"""
|
|
253
|
+
vad_cfg = dict(config.get("vad", {}))
|
|
254
|
+
audio_cfg = dict(config.get("audio", {}))
|
|
255
|
+
vad_cfg.setdefault("sample_rate", int(audio_cfg.get("sample_rate", 16000)))
|
|
256
|
+
|
|
257
|
+
return ProviderBundle(
|
|
258
|
+
vad=build_provider("vad", vad_cfg.get("provider", "mock"), vad_cfg),
|
|
259
|
+
asr=build_provider(
|
|
260
|
+
"asr",
|
|
261
|
+
config.get("asr", {}).get("provider", "mock"),
|
|
262
|
+
dict(config.get("asr", {})),
|
|
263
|
+
),
|
|
264
|
+
llm=build_provider(
|
|
265
|
+
"llm",
|
|
266
|
+
config.get("llm", {}).get("provider", "mock"),
|
|
267
|
+
dict(config.get("llm", {})),
|
|
268
|
+
),
|
|
269
|
+
tts=tts_provider
|
|
270
|
+
or build_provider(
|
|
271
|
+
"tts",
|
|
272
|
+
config.get("tts", {}).get("provider", "mock"),
|
|
273
|
+
dict(config.get("tts", {})),
|
|
274
|
+
),
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
async def status_only(
|
|
279
|
+
config: Mapping[str, Mapping[str, Any]],
|
|
280
|
+
) -> list[dict[str, Any]]:
|
|
281
|
+
"""Report the runtime status of each provider without loading models.
|
|
282
|
+
|
|
283
|
+
Cheaper than :func:`build_provider_bundle` for diagnostics: the
|
|
284
|
+
returned list mirrors :meth:`ProviderBundle.statuses` in shape and
|
|
285
|
+
ordering (``vad``, ``asr``, ``llm``, ``tts``), but no provider's
|
|
286
|
+
``load()`` is called and no full bundle is constructed. A provider
|
|
287
|
+
whose optional dependency is missing shows up via
|
|
288
|
+
:class:`UnavailableProvider` with the install hint in its
|
|
289
|
+
``message``.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
config: Nested provider configuration in the same shape
|
|
293
|
+
:func:`build_provider_bundle` accepts. Missing kinds fall
|
|
294
|
+
back to the ``"mock"`` provider.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
A list of four serialized status dicts (one per kind) in
|
|
298
|
+
``[vad, asr, llm, tts]`` order. Each dict matches the shape
|
|
299
|
+
produced by :func:`_serialize_status` so callers can use the
|
|
300
|
+
result interchangeably with :meth:`ProviderBundle.statuses`.
|
|
301
|
+
"""
|
|
302
|
+
statuses: list[ProviderStatus] = []
|
|
303
|
+
for kind in ("vad", "asr", "llm", "tts"):
|
|
304
|
+
kind_config = dict(config.get(kind, {}))
|
|
305
|
+
name = str(kind_config.get("provider", "mock"))
|
|
306
|
+
provider = build_provider(kind, name, kind_config)
|
|
307
|
+
statuses.append(await provider.probe_status())
|
|
308
|
+
return _serialize_statuses(statuses)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _serialize_status(item: ProviderStatus) -> dict[str, Any]:
|
|
312
|
+
return {
|
|
313
|
+
"name": item.name,
|
|
314
|
+
"kind": item.kind,
|
|
315
|
+
"ready": item.ready,
|
|
316
|
+
"message": item.message,
|
|
317
|
+
"install_hint": item.install_hint,
|
|
318
|
+
"missing_extra": item.missing_extra,
|
|
319
|
+
"capabilities": item.capabilities.__dict__,
|
|
320
|
+
"provider_id": item.provider_id,
|
|
321
|
+
"selected": item.selected,
|
|
322
|
+
"loaded": item.loaded,
|
|
323
|
+
"managed_externally": item.managed_externally,
|
|
324
|
+
"supports_model_management": item.supports_model_management,
|
|
325
|
+
"supports_voice_selection": item.supports_voice_selection,
|
|
326
|
+
"voices": list(item.voices),
|
|
327
|
+
"active_voice": item.active_voice,
|
|
328
|
+
"models": list(item.models),
|
|
329
|
+
"active_model": item.active_model,
|
|
330
|
+
"status_level": item.status_level,
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _serialize_statuses(items) -> list[dict[str, Any]]:
|
|
335
|
+
return [_serialize_status(item) for item in items]
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
# ---------------------------------------------------------------------------
|
|
339
|
+
# Built-in provider registrations
|
|
340
|
+
# ---------------------------------------------------------------------------
|
|
341
|
+
#
|
|
342
|
+
# Mock and unavailable providers are always available. Concrete providers
|
|
343
|
+
# (silero, faster-whisper, llamacpp, kokoro-onnx, pocket-tts) live in their
|
|
344
|
+
# own modules so their heavy third-party imports only happen when the user
|
|
345
|
+
# actually selects them. The probes below check module importability without
|
|
346
|
+
# forcing eager imports.
|
|
347
|
+
|
|
348
|
+
register_provider("vad", "mock", "converse_framework.providers.mock:MockVADProvider")
|
|
349
|
+
register_provider("asr", "mock", "converse_framework.providers.mock:MockASRProvider")
|
|
350
|
+
register_provider("llm", "mock", "converse_framework.providers.mock:MockLLMProvider")
|
|
351
|
+
register_provider("tts", "mock", "converse_framework.providers.mock:MockTTSProvider")
|
|
352
|
+
|
|
353
|
+
register_provider(
|
|
354
|
+
"vad",
|
|
355
|
+
"silero",
|
|
356
|
+
"converse_framework.providers.silero:SileroVADProvider",
|
|
357
|
+
availability_probe=_probe_module("silero_vad"),
|
|
358
|
+
)
|
|
359
|
+
register_provider(
|
|
360
|
+
"asr",
|
|
361
|
+
"faster-whisper",
|
|
362
|
+
"converse_framework.providers.faster_whisper:FasterWhisperASRProvider",
|
|
363
|
+
availability_probe=_probe_module("faster_whisper"),
|
|
364
|
+
)
|
|
365
|
+
register_provider(
|
|
366
|
+
"asr",
|
|
367
|
+
"whisper-cpp",
|
|
368
|
+
"converse_framework.providers.whisper_cpp:WhisperCppASRProvider",
|
|
369
|
+
availability_probe=_probe_module("httpx"),
|
|
370
|
+
)
|
|
371
|
+
register_provider(
|
|
372
|
+
"llm",
|
|
373
|
+
"llamacpp",
|
|
374
|
+
"converse_framework.providers.llamacpp:LlamaCppProvider",
|
|
375
|
+
availability_probe=_probe_module("httpx"),
|
|
376
|
+
)
|
|
377
|
+
# ``kokoro`` is the name used in profiles; the implementation lives in
|
|
378
|
+
# ``kokoro_onnx.py`` because that's the model family. ``kokoro-onnx`` is
|
|
379
|
+
# kept as a legacy alias for harness compatibility.
|
|
380
|
+
register_provider(
|
|
381
|
+
"tts",
|
|
382
|
+
"kokoro",
|
|
383
|
+
"converse_framework.providers.kokoro_onnx:KokoroOnnxProvider",
|
|
384
|
+
availability_probe=_probe_module("kokoro_onnx"),
|
|
385
|
+
)
|
|
386
|
+
register_provider(
|
|
387
|
+
"tts",
|
|
388
|
+
"kokoro-onnx",
|
|
389
|
+
"converse_framework.providers.kokoro_onnx:KokoroOnnxProvider",
|
|
390
|
+
availability_probe=_probe_module("kokoro_onnx"),
|
|
391
|
+
)
|
|
392
|
+
register_provider(
|
|
393
|
+
"tts",
|
|
394
|
+
"pocket-tts",
|
|
395
|
+
"converse_framework.providers.pocket_tts:PocketTTSProvider",
|
|
396
|
+
availability_probe=_probe_module("pocket_tts"),
|
|
397
|
+
)
|