@kaleidorg/mind 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/qvac/assistant.d.ts +73 -0
- package/dist/qvac/assistant.d.ts.map +1 -0
- package/dist/qvac/assistant.js +97 -0
- package/dist/qvac/assistant.js.map +1 -0
- package/dist/qvac/config.d.ts +64 -0
- package/dist/qvac/config.d.ts.map +1 -0
- package/dist/qvac/config.js +71 -0
- package/dist/qvac/config.js.map +1 -0
- package/dist/qvac/delegate.d.ts +48 -0
- package/dist/qvac/delegate.d.ts.map +1 -0
- package/dist/qvac/delegate.js +51 -0
- package/dist/qvac/delegate.js.map +1 -0
- package/dist/qvac/index.d.ts +19 -0
- package/dist/qvac/index.d.ts.map +1 -0
- package/dist/qvac/index.js +19 -0
- package/dist/qvac/index.js.map +1 -0
- package/dist/qvac/parse.d.ts +44 -0
- package/dist/qvac/parse.d.ts.map +1 -0
- package/dist/qvac/parse.js +28 -0
- package/dist/qvac/parse.js.map +1 -0
- package/dist/qvac/provider.d.ts +49 -0
- package/dist/qvac/provider.d.ts.map +1 -0
- package/dist/qvac/provider.js +68 -0
- package/dist/qvac/provider.js.map +1 -0
- package/dist/qvac/stream.d.ts +37 -0
- package/dist/qvac/stream.d.ts.map +1 -0
- package/dist/qvac/stream.js +29 -0
- package/dist/qvac/stream.js.map +1 -0
- package/dist/qvac/text.d.ts +19 -0
- package/dist/qvac/text.d.ts.map +1 -0
- package/dist/qvac/text.js +56 -0
- package/dist/qvac/text.js.map +1 -0
- package/dist/qvac/voice.d.ts +69 -0
- package/dist/qvac/voice.d.ts.map +1 -0
- package/dist/qvac/voice.js +51 -0
- package/dist/qvac/voice.js.map +1 -0
- package/package.json +15 -1
- package/src/qvac/assistant.test.ts +132 -0
- package/src/qvac/assistant.ts +146 -0
- package/src/qvac/config.test.ts +44 -0
- package/src/qvac/config.ts +76 -0
- package/src/qvac/delegate.test.ts +68 -0
- package/src/qvac/delegate.ts +71 -0
- package/src/qvac/index.ts +72 -0
- package/src/qvac/parse.test.ts +52 -0
- package/src/qvac/parse.ts +57 -0
- package/src/qvac/provider.test.ts +107 -0
- package/src/qvac/provider.ts +124 -0
- package/src/qvac/stream.test.ts +79 -0
- package/src/qvac/stream.ts +56 -0
- package/src/qvac/text.test.ts +70 -0
- package/src/qvac/text.ts +60 -0
- package/src/qvac/voice.test.ts +151 -0
- package/src/qvac/voice.ts +122 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice runtime ops shared across hosts: one-shot transcription (Whisper) and
|
|
3
|
+
* speech synthesis (SUPERTONIC TTS). Like the provider, the SDK functions are
|
|
4
|
+
* injected (type-only `@qvac/sdk` import, erased at build) so this carries no
|
|
5
|
+
* runtime SDK dependency and is unit-testable with fakes.
|
|
6
|
+
*
|
|
7
|
+
* The host still owns model lifecycle (download, load, local-vs-delegated) and
|
|
8
|
+
* audio I/O (mic capture, playback). It passes the loaded model-id resolvers;
|
|
9
|
+
* this module does the SDK calls + the text gating that must be identical
|
|
10
|
+
* everywhere (payment-string redaction, U+0060 refusal, file:// stripping).
|
|
11
|
+
*
|
|
12
|
+
* The streaming voice-assistant loop (transcribeStream + VAD) builds on top of
|
|
13
|
+
* these in a later pass.
|
|
14
|
+
*/
|
|
15
|
+
import type * as QvacSdk from '@qvac/sdk';
|
|
16
|
+
import { sanitizeForSupertonic } from './text.js';
|
|
17
|
+
import { TTS_SAMPLE_RATE, DEFAULT_VOICE_STREAM_PARAMS } from './config.js';
|
|
18
|
+
import type { VoiceTranscriptEvent } from './assistant.js';
|
|
19
|
+
|
|
20
|
+
type TranscribeFn = typeof QvacSdk.transcribe;
|
|
21
|
+
type TextToSpeechFn = typeof QvacSdk.textToSpeech;
|
|
22
|
+
type TranscribeStreamFn = typeof QvacSdk.transcribeStream;
|
|
23
|
+
|
|
24
|
+
/** 16-bit PCM samples plus their sample rate, ready for the host to play. */
|
|
25
|
+
export interface PcmAudio {
|
|
26
|
+
pcm: number[];
|
|
27
|
+
sampleRate: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* A live VAD transcription session: feed mic audio with `write()`, iterate to
|
|
32
|
+
* receive `text`/`vad`/`endOfTurn` events, `end()` when audio stops. Pass it
|
|
33
|
+
* straight to `runVoiceAssistant`.
|
|
34
|
+
*/
|
|
35
|
+
export interface VoiceSession {
|
|
36
|
+
write(audioChunk: Uint8Array): void;
|
|
37
|
+
end(): void;
|
|
38
|
+
destroy(): void;
|
|
39
|
+
[Symbol.asyncIterator](): AsyncIterator<VoiceTranscriptEvent>;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface QvacVoiceOptions {
|
|
43
|
+
/** The SDK's `transcribe` (injected). */
|
|
44
|
+
transcribe: TranscribeFn;
|
|
45
|
+
/** The SDK's `textToSpeech` (injected). */
|
|
46
|
+
textToSpeech: TextToSpeechFn;
|
|
47
|
+
/** The SDK's `transcribeStream` (injected) — only needed for `openVoiceSession`. */
|
|
48
|
+
transcribeStream?: TranscribeStreamFn;
|
|
49
|
+
/** Resolve the loaded Whisper model id (null ⇒ not loaded → throws). */
|
|
50
|
+
getWhisperModelId: () => string | null;
|
|
51
|
+
/** Resolve the loaded TTS model id (null ⇒ not loaded → returns null). */
|
|
52
|
+
getTtsModelId: () => string | null;
|
|
53
|
+
/** TTS output sample rate; defaults to SUPERTONIC-2's 44.1 kHz. */
|
|
54
|
+
ttsSampleRate?: number;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface QvacVoice {
|
|
58
|
+
/** Transcribe an audio file (path or `file://` URI) to text. */
|
|
59
|
+
transcribeAudio(audioUri: string): Promise<string>;
|
|
60
|
+
/**
|
|
61
|
+
* Synthesize speech for `text`. Returns PCM + sample rate, or `null` when TTS
|
|
62
|
+
* is unavailable or the text is empty after sanitization (host falls back to
|
|
63
|
+
* the system voice). Payment strings are redacted so they're never read aloud.
|
|
64
|
+
*/
|
|
65
|
+
synthesizeSpeech(text: string): Promise<PcmAudio | null>;
|
|
66
|
+
/**
|
|
67
|
+
* Open a hands-free VAD transcription session (continuous voice). Requires
|
|
68
|
+
* `transcribeStream` to have been provided. Merge in `paramsOverride` to tune
|
|
69
|
+
* the defaults ({@link DEFAULT_VOICE_STREAM_PARAMS}). Feed the returned session
|
|
70
|
+
* to `runVoiceAssistant`.
|
|
71
|
+
*/
|
|
72
|
+
openVoiceSession(paramsOverride?: Record<string, unknown>): Promise<VoiceSession>;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function createQvacVoice(options: QvacVoiceOptions): QvacVoice {
|
|
76
|
+
const sampleRate = options.ttsSampleRate ?? TTS_SAMPLE_RATE;
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
async transcribeAudio(audioUri: string): Promise<string> {
|
|
80
|
+
const modelId = options.getWhisperModelId();
|
|
81
|
+
if (!modelId) throw new Error('Whisper model not loaded');
|
|
82
|
+
// The SDK's native file reader wants a plain filesystem path, not a
|
|
83
|
+
// `file://` URI — the URI raises AUDIO_FILE_NOT_FOUND even when present.
|
|
84
|
+
const audioChunk = audioUri.replace('file://', '');
|
|
85
|
+
return await options.transcribe({ modelId, audioChunk } as Parameters<TranscribeFn>[0]);
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
async synthesizeSpeech(text: string): Promise<PcmAudio | null> {
|
|
89
|
+
const modelId = options.getTtsModelId();
|
|
90
|
+
if (!modelId) return null;
|
|
91
|
+
|
|
92
|
+
const trimmed = sanitizeForSupertonic(text);
|
|
93
|
+
if (!trimmed) return null;
|
|
94
|
+
// Belt-and-suspenders: SUPERTONIC chokes on U+0060; sanitize already
|
|
95
|
+
// strips it, so refuse if any slipped through rather than crash the voice.
|
|
96
|
+
if (Array.from(trimmed).some((ch) => ch.charCodeAt(0) === 0x60)) return null;
|
|
97
|
+
|
|
98
|
+
const result = options.textToSpeech({
|
|
99
|
+
modelId,
|
|
100
|
+
text: trimmed,
|
|
101
|
+
inputType: 'text',
|
|
102
|
+
stream: false,
|
|
103
|
+
} as Parameters<TextToSpeechFn>[0]);
|
|
104
|
+
const pcm = await result.buffer;
|
|
105
|
+
return { pcm, sampleRate };
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
async openVoiceSession(paramsOverride: Record<string, unknown> = {}): Promise<VoiceSession> {
|
|
109
|
+
if (!options.transcribeStream) {
|
|
110
|
+
throw new Error('transcribeStream not provided — pass it in QvacVoiceOptions for voice sessions');
|
|
111
|
+
}
|
|
112
|
+
const modelId = options.getWhisperModelId();
|
|
113
|
+
if (!modelId) throw new Error('Whisper model not loaded');
|
|
114
|
+
const session = await options.transcribeStream({
|
|
115
|
+
modelId,
|
|
116
|
+
...DEFAULT_VOICE_STREAM_PARAMS,
|
|
117
|
+
...paramsOverride,
|
|
118
|
+
} as Parameters<TranscribeStreamFn>[0]);
|
|
119
|
+
return session as unknown as VoiceSession;
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
}
|