@kaleidorg/mind 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/qvac/assistant.d.ts +73 -0
- package/dist/qvac/assistant.d.ts.map +1 -0
- package/dist/qvac/assistant.js +97 -0
- package/dist/qvac/assistant.js.map +1 -0
- package/dist/qvac/config.d.ts +64 -0
- package/dist/qvac/config.d.ts.map +1 -0
- package/dist/qvac/config.js +71 -0
- package/dist/qvac/config.js.map +1 -0
- package/dist/qvac/delegate.d.ts +48 -0
- package/dist/qvac/delegate.d.ts.map +1 -0
- package/dist/qvac/delegate.js +51 -0
- package/dist/qvac/delegate.js.map +1 -0
- package/dist/qvac/index.d.ts +19 -0
- package/dist/qvac/index.d.ts.map +1 -0
- package/dist/qvac/index.js +19 -0
- package/dist/qvac/index.js.map +1 -0
- package/dist/qvac/parse.d.ts +44 -0
- package/dist/qvac/parse.d.ts.map +1 -0
- package/dist/qvac/parse.js +28 -0
- package/dist/qvac/parse.js.map +1 -0
- package/dist/qvac/provider.d.ts +49 -0
- package/dist/qvac/provider.d.ts.map +1 -0
- package/dist/qvac/provider.js +68 -0
- package/dist/qvac/provider.js.map +1 -0
- package/dist/qvac/stream.d.ts +37 -0
- package/dist/qvac/stream.d.ts.map +1 -0
- package/dist/qvac/stream.js +29 -0
- package/dist/qvac/stream.js.map +1 -0
- package/dist/qvac/text.d.ts +19 -0
- package/dist/qvac/text.d.ts.map +1 -0
- package/dist/qvac/text.js +56 -0
- package/dist/qvac/text.js.map +1 -0
- package/dist/qvac/voice.d.ts +69 -0
- package/dist/qvac/voice.d.ts.map +1 -0
- package/dist/qvac/voice.js +51 -0
- package/dist/qvac/voice.js.map +1 -0
- package/package.json +15 -1
- package/src/qvac/assistant.test.ts +132 -0
- package/src/qvac/assistant.ts +146 -0
- package/src/qvac/config.test.ts +44 -0
- package/src/qvac/config.ts +76 -0
- package/src/qvac/delegate.test.ts +68 -0
- package/src/qvac/delegate.ts +71 -0
- package/src/qvac/index.ts +72 -0
- package/src/qvac/parse.test.ts +52 -0
- package/src/qvac/parse.ts +57 -0
- package/src/qvac/provider.test.ts +107 -0
- package/src/qvac/provider.ts +124 -0
- package/src/qvac/stream.test.ts +79 -0
- package/src/qvac/stream.ts +56 -0
- package/src/qvac/text.test.ts +70 -0
- package/src/qvac/text.ts +60 -0
- package/src/qvac/voice.test.ts +151 -0
- package/src/qvac/voice.ts +122 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hands-free voice-assistant loop — the transcribe → reason → speak cycle that
|
|
3
|
+
* QVAC's `transcribeStream()` makes possible, lifted into shared code so mobile
|
|
4
|
+
* and desktop run the same orchestration.
|
|
5
|
+
*
|
|
6
|
+
* The host owns the I/O: it opens the SDK session (`transcribeStream` with
|
|
7
|
+
* `DEFAULT_VOICE_STREAM_PARAMS`), feeds mic audio via `session.write()`, and
|
|
8
|
+
* supplies `respond` (LLM/funnel turn → reply text) + `speak` (synth + play).
|
|
9
|
+
* This loop does the parts that must be identical everywhere: filter Whisper's
|
|
10
|
+
* silence hallucinations, and gate the mic during playback so the assistant
|
|
11
|
+
* never transcribes its own voice (QVAC's reference uses a mic-gate, not
|
|
12
|
+
* barge-in — we mirror that).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/** A transcript event from a `transcribeStream` conversation session. */
|
|
16
|
+
export interface VoiceTranscriptEvent {
|
|
17
|
+
type: string;
|
|
18
|
+
/** Present on `text` events — a committed utterance. */
|
|
19
|
+
text?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** The host's transcription session (the SDK's conversation session fits). */
|
|
23
|
+
export type VoiceAssistantSession = AsyncIterable<VoiceTranscriptEvent>;
|
|
24
|
+
|
|
25
|
+
export type VoiceAssistantState = 'listening' | 'thinking' | 'speaking';
|
|
26
|
+
|
|
27
|
+
export interface VoiceAssistantHandlers {
|
|
28
|
+
/** Produce an assistant reply for a user utterance (wraps the LLM/funnel). */
|
|
29
|
+
respond: (transcript: string) => Promise<string>;
|
|
30
|
+
/** Speak the reply: synth + playback. Resolves when playback finishes. */
|
|
31
|
+
speak: (text: string) => Promise<void>;
|
|
32
|
+
/**
|
|
33
|
+
* Gate mic capture so the assistant doesn't hear itself. The host should drop
|
|
34
|
+
* (not buffer) audio while gated. Called `true` before speaking, `false` after
|
|
35
|
+
* the post-playback cooldown.
|
|
36
|
+
*/
|
|
37
|
+
setMicGated?: (gated: boolean) => void;
|
|
38
|
+
/** A user utterance passed the filter and is about to be handled. */
|
|
39
|
+
onUserText?: (text: string) => void;
|
|
40
|
+
/** The assistant's reply, before it is spoken. */
|
|
41
|
+
onReply?: (text: string) => void;
|
|
42
|
+
/** UI state transitions. */
|
|
43
|
+
onState?: (state: VoiceAssistantState) => void;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface VoiceAssistantOptions {
|
|
47
|
+
/** Minimum utterance length to handle (drops "you", ".", etc.). Default 3. */
|
|
48
|
+
minChars?: number;
|
|
49
|
+
/** Utterances to ignore (case-insensitive, trailing punctuation stripped). */
|
|
50
|
+
ignoredUtterances?: Iterable<string>;
|
|
51
|
+
/** Pause after playback so speaker reverb settles before listening. Default 300ms. */
|
|
52
|
+
postPlaybackCooldownMs?: number;
|
|
53
|
+
/** Injected for tests; defaults to setTimeout. */
|
|
54
|
+
sleep?: (ms: number) => Promise<void>;
|
|
55
|
+
/** Stop the loop early. */
|
|
56
|
+
signal?: AbortSignal;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Whisper frequently hallucinates these from silence — drop them so the
|
|
61
|
+
* assistant doesn't answer phantom turns. (QVAC docs cite "you", ".", "Thanks.")
|
|
62
|
+
*/
|
|
63
|
+
export const DEFAULT_IGNORED_UTTERANCES: readonly string[] = [
|
|
64
|
+
'you', 'thank you', 'thanks', 'bye', 'okay', '.',
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Should this utterance be handled? False for too-short text or a known Whisper
|
|
69
|
+
* hallucination. Pure + exported so it's directly testable.
|
|
70
|
+
*/
|
|
71
|
+
export function shouldHandleUtterance(
|
|
72
|
+
text: string,
|
|
73
|
+
options: { minChars?: number; ignoredUtterances?: Iterable<string> } = {},
|
|
74
|
+
): boolean {
|
|
75
|
+
const trimmed = text.trim();
|
|
76
|
+
if (trimmed.length < (options.minChars ?? 3)) return false;
|
|
77
|
+
const norm = trimmed.toLowerCase().replace(/[.!?,]+$/, '').trim();
|
|
78
|
+
if (!norm) return false;
|
|
79
|
+
const ignored = new Set(
|
|
80
|
+
[...(options.ignoredUtterances ?? DEFAULT_IGNORED_UTTERANCES)].map((s) => s.toLowerCase()),
|
|
81
|
+
);
|
|
82
|
+
return !ignored.has(norm);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Run the hands-free loop until the session ends or `signal` aborts. Only `text`
|
|
87
|
+
* events drive a turn; `vad`/`segment`/`endOfTurn` events are ignored here (the
|
|
88
|
+
* host can read them off the session separately for UI). Always leaves the mic
|
|
89
|
+
* un-gated on exit.
|
|
90
|
+
*/
|
|
91
|
+
export async function runVoiceAssistant(
|
|
92
|
+
session: VoiceAssistantSession,
|
|
93
|
+
handlers: VoiceAssistantHandlers,
|
|
94
|
+
options: VoiceAssistantOptions = {},
|
|
95
|
+
): Promise<void> {
|
|
96
|
+
const sleep = options.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
|
|
97
|
+
const cooldown = options.postPlaybackCooldownMs ?? 300;
|
|
98
|
+
let speaking = false;
|
|
99
|
+
|
|
100
|
+
handlers.onState?.('listening');
|
|
101
|
+
try {
|
|
102
|
+
for await (const event of session) {
|
|
103
|
+
if (options.signal?.aborted) break;
|
|
104
|
+
if (event.type !== 'text' || typeof event.text !== 'string') continue;
|
|
105
|
+
// Defensive: ignore anything heard mid-playback (host also gates the mic).
|
|
106
|
+
if (speaking) continue;
|
|
107
|
+
|
|
108
|
+
const transcript = event.text.trim();
|
|
109
|
+
if (!shouldHandleUtterance(transcript, options)) continue;
|
|
110
|
+
|
|
111
|
+
handlers.onUserText?.(transcript);
|
|
112
|
+
handlers.onState?.('thinking');
|
|
113
|
+
|
|
114
|
+
let reply: string;
|
|
115
|
+
try {
|
|
116
|
+
reply = await handlers.respond(transcript);
|
|
117
|
+
} catch {
|
|
118
|
+
handlers.onState?.('listening');
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
if (options.signal?.aborted) break;
|
|
122
|
+
if (!reply || !reply.trim()) {
|
|
123
|
+
handlers.onState?.('listening');
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
handlers.onReply?.(reply);
|
|
128
|
+
speaking = true;
|
|
129
|
+
handlers.setMicGated?.(true);
|
|
130
|
+
handlers.onState?.('speaking');
|
|
131
|
+
try {
|
|
132
|
+
await handlers.speak(reply);
|
|
133
|
+
} catch {
|
|
134
|
+
/* keep the loop alive on a playback error */
|
|
135
|
+
} finally {
|
|
136
|
+
await sleep(cooldown);
|
|
137
|
+
speaking = false;
|
|
138
|
+
handlers.setMicGated?.(false);
|
|
139
|
+
handlers.onState?.('listening');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
} finally {
|
|
143
|
+
// Never leave the mic gated if the loop exits mid-turn.
|
|
144
|
+
handlers.setMicGated?.(false);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
LOCAL_LLM_CONFIG,
|
|
4
|
+
LOCAL_LLM_CONFIG_GPU,
|
|
5
|
+
DELEGATE_LLM_CONFIG,
|
|
6
|
+
TTS_SAMPLE_RATE,
|
|
7
|
+
normalizeWhisperLang,
|
|
8
|
+
} from './config.js';
|
|
9
|
+
|
|
10
|
+
describe('model configs', () => {
|
|
11
|
+
it('CPU baseline runs on cpu with tools enabled', () => {
|
|
12
|
+
expect(LOCAL_LLM_CONFIG.device).toBe('cpu');
|
|
13
|
+
expect(LOCAL_LLM_CONFIG.tools).toBe(true);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('GPU config offloads layers and grows the context', () => {
|
|
17
|
+
expect(LOCAL_LLM_CONFIG_GPU.device).toBe('gpu');
|
|
18
|
+
expect(LOCAL_LLM_CONFIG_GPU.gpu_layers).toBe(99);
|
|
19
|
+
expect(LOCAL_LLM_CONFIG_GPU.ctx_size).toBeGreaterThan(LOCAL_LLM_CONFIG.ctx_size);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('delegate config gives the desktop the largest context', () => {
|
|
23
|
+
expect(DELEGATE_LLM_CONFIG.ctx_size).toBe(16384);
|
|
24
|
+
expect(DELEGATE_LLM_CONFIG.device).toBe('gpu');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it('TTS sample rate matches SUPERTONIC-2 output', () => {
|
|
28
|
+
expect(TTS_SAMPLE_RATE).toBe(44100);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe('normalizeWhisperLang', () => {
|
|
33
|
+
it('extracts a supported 2-letter code from a locale', () => {
|
|
34
|
+
expect(normalizeWhisperLang('it-IT')).toBe('it');
|
|
35
|
+
expect(normalizeWhisperLang('en_US')).toBe('en');
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('falls back to en for unsupported or missing locales', () => {
|
|
39
|
+
expect(normalizeWhisperLang('xx-YY')).toBe('en');
|
|
40
|
+
expect(normalizeWhisperLang('')).toBe('en');
|
|
41
|
+
expect(normalizeWhisperLang(null)).toBe('en');
|
|
42
|
+
expect(normalizeWhisperLang(undefined)).toBe('en');
|
|
43
|
+
});
|
|
44
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QVAC model-load configs and constants, shared across every host. These are
|
|
3
|
+
* plain data (no SDK import) so they stay portable and testable; callers merge
|
|
4
|
+
* in SDK-specific bits like `verbosity: VERBOSITY.ERROR` at load time.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* CPU baseline for the local llamacpp model. Used as the GPU fallback and as the
|
|
9
|
+
* base the GPU attempt overrides (device + gpu_layers).
|
|
10
|
+
*/
|
|
11
|
+
export const LOCAL_LLM_CONFIG = {
|
|
12
|
+
device: 'cpu',
|
|
13
|
+
gpu_layers: 0,
|
|
14
|
+
ctx_size: 2048,
|
|
15
|
+
tools: true,
|
|
16
|
+
} as const;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* GPU (Metal on iPhone) offload — far faster than CPU when llamacpp can init the
|
|
20
|
+
* Metal context in the worklet. Fall back to {@link LOCAL_LLM_CONFIG} if the GPU
|
|
21
|
+
* load throws. ctx 4096 fits the agentic prompt (system + tools + skills + a
|
|
22
|
+
* little history); 2048 overflowed immediately ("prompt exceeds context").
|
|
23
|
+
*/
|
|
24
|
+
export const LOCAL_LLM_CONFIG_GPU = {
|
|
25
|
+
...LOCAL_LLM_CONFIG,
|
|
26
|
+
device: 'gpu',
|
|
27
|
+
gpu_layers: 99, // offload all layers; llamacpp clamps to the model's count
|
|
28
|
+
ctx_size: 4096,
|
|
29
|
+
} as const;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Delegated to a desktop provider — it has the RAM to run a big context, so give
|
|
33
|
+
* the agentic prompt plenty of room (Qwen3-600M supports up to 32k). 2048
|
|
34
|
+
* overflowed with the system prompt + tool/skill definitions alone.
|
|
35
|
+
*/
|
|
36
|
+
export const DELEGATE_LLM_CONFIG = {
|
|
37
|
+
...LOCAL_LLM_CONFIG_GPU,
|
|
38
|
+
ctx_size: 16384,
|
|
39
|
+
} as const;
|
|
40
|
+
|
|
41
|
+
/** SUPERTONIC-2 TTS output sample rate (Hz). Used to build the WAV for playback. */
|
|
42
|
+
export const TTS_SAMPLE_RATE = 44100;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Default params for a hands-free `transcribeStream()` voice session (Whisper).
|
|
46
|
+
* `emitVadEvents` turns the session into a conversation stream (text + vad +
|
|
47
|
+
* endOfTurn events); `endOfTurnSilenceMs` is how long a pause must last before
|
|
48
|
+
* an utterance is committed — conservative so it doesn't cut speakers off mid
|
|
49
|
+
* sentence or trigger on TTS reverb. Hosts merge in `modelId` + spread these.
|
|
50
|
+
*/
|
|
51
|
+
export const DEFAULT_VOICE_STREAM_PARAMS = {
|
|
52
|
+
emitVadEvents: true,
|
|
53
|
+
endOfTurnSilenceMs: 700,
|
|
54
|
+
} as const;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Whisper languages we request directly from the device locale. whisper.cpp
|
|
58
|
+
* supports more, but the QVAC handler rejects "auto"/detect_language for these
|
|
59
|
+
* tiny models, so we pass a concrete code (and fall back to 'en').
|
|
60
|
+
*/
|
|
61
|
+
export const WHISPER_LANGS: ReadonlySet<string> = new Set([
|
|
62
|
+
'en', 'it', 'es', 'fr', 'de', 'pt', 'nl', 'ru', 'pl', 'uk', 'tr', 'ar',
|
|
63
|
+
'zh', 'ja', 'ko', 'hi', 'id', 'sv', 'no', 'da', 'fi', 'cs', 'ro', 'el',
|
|
64
|
+
'he', 'th', 'vi', 'hu', 'ca',
|
|
65
|
+
]);
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Best-effort 2-letter Whisper language code from an OS locale string
|
|
69
|
+
* (e.g. "it-IT" → "it"), restricted to codes Whisper handles well. Falls back to
|
|
70
|
+
* 'en'. Pure: the host reads the locale (NativeModules etc.) and passes it here.
|
|
71
|
+
*/
|
|
72
|
+
export function normalizeWhisperLang(locale: string | null | undefined): string {
|
|
73
|
+
if (!locale) return 'en';
|
|
74
|
+
const code = String(locale).split(/[-_]/)[0]?.toLowerCase() ?? 'en';
|
|
75
|
+
return WHISPER_LANGS.has(code) ? code : 'en';
|
|
76
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
allowListFirewall,
|
|
4
|
+
denyListFirewall,
|
|
5
|
+
firewallFromKeyList,
|
|
6
|
+
buildDelegateConfig,
|
|
7
|
+
} from './delegate.js';
|
|
8
|
+
|
|
9
|
+
describe('allowListFirewall', () => {
|
|
10
|
+
it('builds an allow-list, trimming + de-duping keys', () => {
|
|
11
|
+
expect(allowListFirewall([' k1 ', 'k2', 'k1', ''])).toEqual({
|
|
12
|
+
mode: 'allow',
|
|
13
|
+
publicKeys: ['k1', 'k2'],
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it('is empty for no keys (caller must decide: open vs refuse)', () => {
|
|
18
|
+
expect(allowListFirewall([])).toEqual({ mode: 'allow', publicKeys: [] });
|
|
19
|
+
});
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
describe('denyListFirewall', () => {
|
|
23
|
+
it('builds a deny-list', () => {
|
|
24
|
+
expect(denyListFirewall(['bad'])).toEqual({ mode: 'deny', publicKeys: ['bad'] });
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
describe('firewallFromKeyList', () => {
|
|
29
|
+
it('parses comma/space/newline-separated keys into an allow-list', () => {
|
|
30
|
+
expect(firewallFromKeyList('k1, k2\nk3 k4')).toEqual({
|
|
31
|
+
mode: 'allow',
|
|
32
|
+
publicKeys: ['k1', 'k2', 'k3', 'k4'],
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('returns undefined for empty/missing input (advertise openly)', () => {
|
|
37
|
+
expect(firewallFromKeyList('')).toBeUndefined();
|
|
38
|
+
expect(firewallFromKeyList(' ')).toBeUndefined();
|
|
39
|
+
expect(firewallFromKeyList(null)).toBeUndefined();
|
|
40
|
+
expect(firewallFromKeyList(undefined)).toBeUndefined();
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe('buildDelegateConfig', () => {
|
|
45
|
+
it('defaults fallbackToLocal to false and trims the key', () => {
|
|
46
|
+
expect(buildDelegateConfig(' pk ')).toEqual({
|
|
47
|
+
providerPublicKey: 'pk',
|
|
48
|
+
fallbackToLocal: false,
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('passes through fallbackToLocal, timeout, forceNewConnection when set', () => {
|
|
53
|
+
expect(
|
|
54
|
+
buildDelegateConfig('pk', { fallbackToLocal: true, timeout: 60000, forceNewConnection: true }),
|
|
55
|
+
).toEqual({
|
|
56
|
+
providerPublicKey: 'pk',
|
|
57
|
+
fallbackToLocal: true,
|
|
58
|
+
timeout: 60000,
|
|
59
|
+
forceNewConnection: true,
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('omits optional fields that are not set', () => {
|
|
64
|
+
const cfg = buildDelegateConfig('pk', { fallbackToLocal: false });
|
|
65
|
+
expect('timeout' in cfg).toBe(false);
|
|
66
|
+
expect('forceNewConnection' in cfg).toBe(false);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Delegation helpers — the provider firewall (who may connect) and the
|
|
3
|
+
* consumer-side delegate config. Pure data builders (no `@qvac/sdk` import) so
|
|
4
|
+
* they stay shared + testable; the host passes the result to
|
|
5
|
+
* `startQVACProvider({ firewall })` / `loadModel({ delegate })`.
|
|
6
|
+
*
|
|
7
|
+
* Security note: a QVAC provider is reachable by anyone who learns its
|
|
8
|
+
* Hyperswarm public key. Advertising with no firewall means any such peer can
|
|
9
|
+
* run inference on your machine. Use {@link allowListFirewall} so a desktop
|
|
10
|
+
* provider serves ONLY its paired phone(s).
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/** Firewall for `startQVACProvider` — restrict who may delegate to this provider. */
|
|
14
|
+
export interface ProviderFirewall {
|
|
15
|
+
mode: 'allow' | 'deny';
|
|
16
|
+
publicKeys: string[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function normalizeKeys(keys: Iterable<string>): string[] {
|
|
20
|
+
return [...new Set([...keys].map((k) => k.trim()).filter(Boolean))];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Allow ONLY these consumer public keys to delegate (zero-trust). Pass the
|
|
25
|
+
* paired phone(s)' public keys so no one else can use the desktop brain even if
|
|
26
|
+
* they learn its public key.
|
|
27
|
+
*/
|
|
28
|
+
export function allowListFirewall(consumerPublicKeys: Iterable<string>): ProviderFirewall {
|
|
29
|
+
return { mode: 'allow', publicKeys: normalizeKeys(consumerPublicKeys) };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Deny these consumer public keys; everyone else may connect. */
|
|
33
|
+
export function denyListFirewall(consumerPublicKeys: Iterable<string>): ProviderFirewall {
|
|
34
|
+
return { mode: 'deny', publicKeys: normalizeKeys(consumerPublicKeys) };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Parse a comma/space/newline-separated key list (e.g. from an env var or a
|
|
39
|
+
* pairing store) into an allow-list firewall, or `undefined` when none are
|
|
40
|
+
* configured — the caller then advertises openly and should warn.
|
|
41
|
+
*/
|
|
42
|
+
export function firewallFromKeyList(raw: string | null | undefined): ProviderFirewall | undefined {
|
|
43
|
+
if (!raw) return undefined;
|
|
44
|
+
const keys = raw.split(/[\s,]+/).map((k) => k.trim()).filter(Boolean);
|
|
45
|
+
return keys.length ? allowListFirewall(keys) : undefined;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Consumer-side config for `loadModel({ delegate })`. */
|
|
49
|
+
export interface DelegateConfig {
|
|
50
|
+
providerPublicKey: string;
|
|
51
|
+
fallbackToLocal: boolean;
|
|
52
|
+
timeout?: number;
|
|
53
|
+
forceNewConnection?: boolean;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Build the `delegate` config for a delegated `loadModel`. `fallbackToLocal`
|
|
58
|
+
* defaults to false (the host owns recovery), matching rate's existing
|
|
59
|
+
* LLM/Whisper/TTS delegated loads.
|
|
60
|
+
*/
|
|
61
|
+
export function buildDelegateConfig(
|
|
62
|
+
providerPublicKey: string,
|
|
63
|
+
opts: { fallbackToLocal?: boolean; timeout?: number; forceNewConnection?: boolean } = {},
|
|
64
|
+
): DelegateConfig {
|
|
65
|
+
return {
|
|
66
|
+
providerPublicKey: providerPublicKey.trim(),
|
|
67
|
+
fallbackToLocal: opts.fallbackToLocal ?? false,
|
|
68
|
+
...(opts.timeout != null ? { timeout: opts.timeout } : {}),
|
|
69
|
+
...(opts.forceNewConnection != null ? { forceNewConnection: opts.forceNewConnection } : {}),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @kaleidorg/mind-qvac — the single home for all @qvac/sdk logic behind
|
|
3
|
+
* @kaleidorg/mind. Hosts (rate mobile, desktop provider, cli) supply @qvac/sdk
|
|
4
|
+
* as a peer dependency; this package owns the orchestration so the logic lives
|
|
5
|
+
* in one place instead of drifting copies per host.
|
|
6
|
+
*
|
|
7
|
+
* This first slice exports the platform-agnostic core (pure text helpers, model
|
|
8
|
+
* configs, completion parsing). The QVAC-calling provider/voice/host wrappers
|
|
9
|
+
* land next, on top of these.
|
|
10
|
+
*/
|
|
11
|
+
export {
|
|
12
|
+
cleanAssistantVisibleText,
|
|
13
|
+
sanitizeForSupertonic,
|
|
14
|
+
} from './text.js';
|
|
15
|
+
|
|
16
|
+
export {
|
|
17
|
+
LOCAL_LLM_CONFIG,
|
|
18
|
+
LOCAL_LLM_CONFIG_GPU,
|
|
19
|
+
DELEGATE_LLM_CONFIG,
|
|
20
|
+
TTS_SAMPLE_RATE,
|
|
21
|
+
DEFAULT_VOICE_STREAM_PARAMS,
|
|
22
|
+
WHISPER_LANGS,
|
|
23
|
+
normalizeWhisperLang,
|
|
24
|
+
} from './config.js';
|
|
25
|
+
|
|
26
|
+
export {
|
|
27
|
+
finalToTurn,
|
|
28
|
+
type QvacFinalLike,
|
|
29
|
+
type ParsedTurn,
|
|
30
|
+
} from './parse.js';
|
|
31
|
+
|
|
32
|
+
export {
|
|
33
|
+
consumeRun,
|
|
34
|
+
type CompletionEventLike,
|
|
35
|
+
type CompletionRunLike,
|
|
36
|
+
type StreamHandlers,
|
|
37
|
+
type ConsumedTurn,
|
|
38
|
+
} from './stream.js';
|
|
39
|
+
|
|
40
|
+
export {
|
|
41
|
+
createQvacProvider,
|
|
42
|
+
type QvacProviderOptions,
|
|
43
|
+
type QvacTurnInput,
|
|
44
|
+
} from './provider.js';
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
createQvacVoice,
|
|
48
|
+
type QvacVoice,
|
|
49
|
+
type QvacVoiceOptions,
|
|
50
|
+
type VoiceSession,
|
|
51
|
+
type PcmAudio,
|
|
52
|
+
} from './voice.js';
|
|
53
|
+
|
|
54
|
+
export {
|
|
55
|
+
runVoiceAssistant,
|
|
56
|
+
shouldHandleUtterance,
|
|
57
|
+
DEFAULT_IGNORED_UTTERANCES,
|
|
58
|
+
type VoiceAssistantSession,
|
|
59
|
+
type VoiceAssistantHandlers,
|
|
60
|
+
type VoiceAssistantOptions,
|
|
61
|
+
type VoiceAssistantState,
|
|
62
|
+
type VoiceTranscriptEvent,
|
|
63
|
+
} from './assistant.js';
|
|
64
|
+
|
|
65
|
+
export {
|
|
66
|
+
allowListFirewall,
|
|
67
|
+
denyListFirewall,
|
|
68
|
+
firewallFromKeyList,
|
|
69
|
+
buildDelegateConfig,
|
|
70
|
+
type ProviderFirewall,
|
|
71
|
+
type DelegateConfig,
|
|
72
|
+
} from './delegate.js';
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { finalToTurn } from './parse.js';
|
|
3
|
+
|
|
4
|
+
describe('finalToTurn', () => {
|
|
5
|
+
it('uses contentText for visible text and strips reasoning', () => {
|
|
6
|
+
const out = finalToTurn({ contentText: '<think>x</think>Hello' });
|
|
7
|
+
expect(out.text).toBe('Hello');
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it('falls back to the streamed text when contentText is empty', () => {
|
|
11
|
+
const out = finalToTurn({ contentText: '' }, 'streamed answer');
|
|
12
|
+
expect(out.text).toBe('streamed answer');
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('prefers raw.fullText for rawContent (history push-back)', () => {
|
|
16
|
+
const out = finalToTurn({ contentText: 'Hi', raw: { fullText: 'FRAMED<tool/>Hi' } });
|
|
17
|
+
expect(out.rawContent).toBe('FRAMED<tool/>Hi');
|
|
18
|
+
expect(out.text).toBe('Hi');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('falls back to the raw text for rawContent when no framed form', () => {
|
|
22
|
+
const out = finalToTurn({ contentText: 'Hi' });
|
|
23
|
+
expect(out.rawContent).toBe('Hi');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('maps tool calls and defaults missing arguments to {}', () => {
|
|
27
|
+
const out = finalToTurn({
|
|
28
|
+
contentText: '',
|
|
29
|
+
toolCalls: [{ id: 'a', name: 'get_balance' }, { name: 'send', arguments: { sats: 5000 } }],
|
|
30
|
+
});
|
|
31
|
+
expect(out.toolCalls).toEqual([
|
|
32
|
+
{ id: 'a', name: 'get_balance', arguments: {} },
|
|
33
|
+
{ id: undefined, name: 'send', arguments: { sats: 5000 } },
|
|
34
|
+
]);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('flags truncation when the SDK stops on length', () => {
|
|
38
|
+
const out = finalToTurn({ contentText: 'partial', stopReason: 'length' });
|
|
39
|
+
expect(out.truncated).toBe(true);
|
|
40
|
+
expect(out.stopReason).toBe('length');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('does not flag truncation on a natural stop', () => {
|
|
44
|
+
const out = finalToTurn({ contentText: 'done' });
|
|
45
|
+
expect(out.truncated).toBe(false);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('handles an empty final without throwing', () => {
|
|
49
|
+
const out = finalToTurn({});
|
|
50
|
+
expect(out).toEqual({ text: '', rawContent: '', toolCalls: [], truncated: false, stopReason: undefined });
|
|
51
|
+
});
|
|
52
|
+
});
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure mapping from a QVAC completion `final` frame to the shape the shared
|
|
3
|
+
* @kaleidorg/mind Engine consumes. Kept SDK-free (structural input type) so it
|
|
4
|
+
* is testable without loading a model, and so the same mapping runs on mobile,
|
|
5
|
+
* desktop, and the eval harness.
|
|
6
|
+
*/
|
|
7
|
+
import { cleanAssistantVisibleText } from './text.js';
|
|
8
|
+
|
|
9
|
+
/** Structural subset of a QVAC `completion().final` we depend on. */
|
|
10
|
+
export interface QvacFinalLike {
|
|
11
|
+
/** Visible assistant text (excludes `<think>` reasoning). */
|
|
12
|
+
contentText?: string;
|
|
13
|
+
/** Raw assistant frame, incl. tool-call framing, for history push-back. */
|
|
14
|
+
raw?: { fullText?: string };
|
|
15
|
+
/** Tool calls the model requested this turn (empty ⇒ final answer). */
|
|
16
|
+
toolCalls?: Array<{ id?: string; name: string; arguments?: Record<string, unknown> }>;
|
|
17
|
+
/**
|
|
18
|
+
* Why generation stopped. QVAC 0.13 emits `"length"` when the token budget is
|
|
19
|
+
* exhausted, `"cancelled"` on abort, `undefined` on a natural stop. We surface
|
|
20
|
+
* it so the funnel can tell a truncated tool-call from a complete one.
|
|
21
|
+
*/
|
|
22
|
+
stopReason?: 'length' | 'cancelled' | string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface ParsedTurn {
|
|
26
|
+
/** Cleaned assistant content for display. */
|
|
27
|
+
text: string;
|
|
28
|
+
/** Raw assistant frame to push back into history for the next turn. */
|
|
29
|
+
rawContent: string;
|
|
30
|
+
/** Tool calls the model requested (arguments defaulted to `{}`). */
|
|
31
|
+
toolCalls: Array<{ id?: string; name: string; arguments: Record<string, unknown> }>;
|
|
32
|
+
/** True when generation was cut off by the token budget (incomplete output). */
|
|
33
|
+
truncated: boolean;
|
|
34
|
+
/** Raw stop reason from the SDK, when provided. */
|
|
35
|
+
stopReason?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Map a completion `final` (plus the streamed fallback text) into a ParsedTurn.
|
|
40
|
+
* `rawContent` prefers the SDK's framed `raw.fullText` so the Engine can anchor
|
|
41
|
+
* the next turn; falls back to the visible text when a provider has no raw form.
|
|
42
|
+
*/
|
|
43
|
+
export function finalToTurn(final: QvacFinalLike, streamed = ''): ParsedTurn {
|
|
44
|
+
const rawText = final.contentText || streamed;
|
|
45
|
+
const text = cleanAssistantVisibleText(rawText);
|
|
46
|
+
return {
|
|
47
|
+
text,
|
|
48
|
+
rawContent: final.raw?.fullText ?? rawText,
|
|
49
|
+
toolCalls: (final.toolCalls ?? []).map((c) => ({
|
|
50
|
+
id: c.id,
|
|
51
|
+
name: c.name,
|
|
52
|
+
arguments: c.arguments ?? {},
|
|
53
|
+
})),
|
|
54
|
+
truncated: final.stopReason === 'length',
|
|
55
|
+
stopReason: final.stopReason,
|
|
56
|
+
};
|
|
57
|
+
}
|