@kaleidorg/mind 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/qvac/assistant.d.ts +73 -0
  2. package/dist/qvac/assistant.d.ts.map +1 -0
  3. package/dist/qvac/assistant.js +97 -0
  4. package/dist/qvac/assistant.js.map +1 -0
  5. package/dist/qvac/config.d.ts +64 -0
  6. package/dist/qvac/config.d.ts.map +1 -0
  7. package/dist/qvac/config.js +71 -0
  8. package/dist/qvac/config.js.map +1 -0
  9. package/dist/qvac/delegate.d.ts +48 -0
  10. package/dist/qvac/delegate.d.ts.map +1 -0
  11. package/dist/qvac/delegate.js +51 -0
  12. package/dist/qvac/delegate.js.map +1 -0
  13. package/dist/qvac/index.d.ts +19 -0
  14. package/dist/qvac/index.d.ts.map +1 -0
  15. package/dist/qvac/index.js +19 -0
  16. package/dist/qvac/index.js.map +1 -0
  17. package/dist/qvac/parse.d.ts +44 -0
  18. package/dist/qvac/parse.d.ts.map +1 -0
  19. package/dist/qvac/parse.js +28 -0
  20. package/dist/qvac/parse.js.map +1 -0
  21. package/dist/qvac/provider.d.ts +49 -0
  22. package/dist/qvac/provider.d.ts.map +1 -0
  23. package/dist/qvac/provider.js +68 -0
  24. package/dist/qvac/provider.js.map +1 -0
  25. package/dist/qvac/stream.d.ts +37 -0
  26. package/dist/qvac/stream.d.ts.map +1 -0
  27. package/dist/qvac/stream.js +29 -0
  28. package/dist/qvac/stream.js.map +1 -0
  29. package/dist/qvac/text.d.ts +19 -0
  30. package/dist/qvac/text.d.ts.map +1 -0
  31. package/dist/qvac/text.js +56 -0
  32. package/dist/qvac/text.js.map +1 -0
  33. package/dist/qvac/voice.d.ts +69 -0
  34. package/dist/qvac/voice.d.ts.map +1 -0
  35. package/dist/qvac/voice.js +51 -0
  36. package/dist/qvac/voice.js.map +1 -0
  37. package/package.json +15 -1
  38. package/src/qvac/assistant.test.ts +132 -0
  39. package/src/qvac/assistant.ts +146 -0
  40. package/src/qvac/config.test.ts +44 -0
  41. package/src/qvac/config.ts +76 -0
  42. package/src/qvac/delegate.test.ts +68 -0
  43. package/src/qvac/delegate.ts +71 -0
  44. package/src/qvac/index.ts +72 -0
  45. package/src/qvac/parse.test.ts +52 -0
  46. package/src/qvac/parse.ts +57 -0
  47. package/src/qvac/provider.test.ts +107 -0
  48. package/src/qvac/provider.ts +124 -0
  49. package/src/qvac/stream.test.ts +79 -0
  50. package/src/qvac/stream.ts +56 -0
  51. package/src/qvac/text.test.ts +70 -0
  52. package/src/qvac/text.ts +60 -0
  53. package/src/qvac/voice.test.ts +151 -0
  54. package/src/qvac/voice.ts +122 -0
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Voice runtime ops shared across hosts: one-shot transcription (Whisper) and
3
+ * speech synthesis (SUPERTONIC TTS). Like the provider, the SDK functions are
4
+ * injected (type-only `@qvac/sdk` import, erased at build) so this carries no
5
+ * runtime SDK dependency and is unit-testable with fakes.
6
+ *
7
+ * The host still owns model lifecycle (download, load, local-vs-delegated) and
8
+ * audio I/O (mic capture, playback). It passes the loaded model-id resolvers;
9
+ * this module does the SDK calls + the text gating that must be identical
10
+ * everywhere (payment-string redaction, U+0060 refusal, file:// stripping).
11
+ *
12
+ * The streaming voice-assistant loop (transcribeStream + VAD) builds on top of
13
+ * these in a later pass.
14
+ */
15
+ import type * as QvacSdk from '@qvac/sdk';
16
+ import { sanitizeForSupertonic } from './text.js';
17
+ import { TTS_SAMPLE_RATE, DEFAULT_VOICE_STREAM_PARAMS } from './config.js';
18
+ import type { VoiceTranscriptEvent } from './assistant.js';
19
+
20
+ type TranscribeFn = typeof QvacSdk.transcribe;
21
+ type TextToSpeechFn = typeof QvacSdk.textToSpeech;
22
+ type TranscribeStreamFn = typeof QvacSdk.transcribeStream;
23
+
24
+ /** 16-bit PCM samples plus their sample rate, ready for the host to play. */
25
+ export interface PcmAudio {
26
+ pcm: number[];
27
+ sampleRate: number;
28
+ }
29
+
30
+ /**
31
+ * A live VAD transcription session: feed mic audio with `write()`, iterate to
32
+ * receive `text`/`vad`/`endOfTurn` events, `end()` when audio stops. Pass it
33
+ * straight to `runVoiceAssistant`.
34
+ */
35
+ export interface VoiceSession {
36
+ write(audioChunk: Uint8Array): void;
37
+ end(): void;
38
+ destroy(): void;
39
+ [Symbol.asyncIterator](): AsyncIterator<VoiceTranscriptEvent>;
40
+ }
41
+
42
+ export interface QvacVoiceOptions {
43
+ /** The SDK's `transcribe` (injected). */
44
+ transcribe: TranscribeFn;
45
+ /** The SDK's `textToSpeech` (injected). */
46
+ textToSpeech: TextToSpeechFn;
47
+ /** The SDK's `transcribeStream` (injected) — only needed for `openVoiceSession`. */
48
+ transcribeStream?: TranscribeStreamFn;
49
+ /** Resolve the loaded Whisper model id (null ⇒ not loaded → throws). */
50
+ getWhisperModelId: () => string | null;
51
+ /** Resolve the loaded TTS model id (null ⇒ not loaded → returns null). */
52
+ getTtsModelId: () => string | null;
53
+ /** TTS output sample rate; defaults to SUPERTONIC-2's 44.1 kHz. */
54
+ ttsSampleRate?: number;
55
+ }
56
+
57
+ export interface QvacVoice {
58
+ /** Transcribe an audio file (path or `file://` URI) to text. */
59
+ transcribeAudio(audioUri: string): Promise<string>;
60
+ /**
61
+ * Synthesize speech for `text`. Returns PCM + sample rate, or `null` when TTS
62
+ * is unavailable or the text is empty after sanitization (host falls back to
63
+ * the system voice). Payment strings are redacted so they're never read aloud.
64
+ */
65
+ synthesizeSpeech(text: string): Promise<PcmAudio | null>;
66
+ /**
67
+ * Open a hands-free VAD transcription session (continuous voice). Requires
68
+ * `transcribeStream` to have been provided. Merge in `paramsOverride` to tune
69
+ * the defaults ({@link DEFAULT_VOICE_STREAM_PARAMS}). Feed the returned session
70
+ * to `runVoiceAssistant`.
71
+ */
72
+ openVoiceSession(paramsOverride?: Record<string, unknown>): Promise<VoiceSession>;
73
+ }
74
+
75
+ export function createQvacVoice(options: QvacVoiceOptions): QvacVoice {
76
+ const sampleRate = options.ttsSampleRate ?? TTS_SAMPLE_RATE;
77
+
78
+ return {
79
+ async transcribeAudio(audioUri: string): Promise<string> {
80
+ const modelId = options.getWhisperModelId();
81
+ if (!modelId) throw new Error('Whisper model not loaded');
82
+ // The SDK's native file reader wants a plain filesystem path, not a
83
+ // `file://` URI — the URI raises AUDIO_FILE_NOT_FOUND even when present.
84
+ const audioChunk = audioUri.replace('file://', '');
85
+ return await options.transcribe({ modelId, audioChunk } as Parameters<TranscribeFn>[0]);
86
+ },
87
+
88
+ async synthesizeSpeech(text: string): Promise<PcmAudio | null> {
89
+ const modelId = options.getTtsModelId();
90
+ if (!modelId) return null;
91
+
92
+ const trimmed = sanitizeForSupertonic(text);
93
+ if (!trimmed) return null;
94
+ // Belt-and-suspenders: SUPERTONIC chokes on U+0060; sanitize already
95
+ // strips it, so refuse if any slipped through rather than crash the voice.
96
+ if (Array.from(trimmed).some((ch) => ch.charCodeAt(0) === 0x60)) return null;
97
+
98
+ const result = options.textToSpeech({
99
+ modelId,
100
+ text: trimmed,
101
+ inputType: 'text',
102
+ stream: false,
103
+ } as Parameters<TextToSpeechFn>[0]);
104
+ const pcm = await result.buffer;
105
+ return { pcm, sampleRate };
106
+ },
107
+
108
+ async openVoiceSession(paramsOverride: Record<string, unknown> = {}): Promise<VoiceSession> {
109
+ if (!options.transcribeStream) {
110
+ throw new Error('transcribeStream not provided — pass it in QvacVoiceOptions for voice sessions');
111
+ }
112
+ const modelId = options.getWhisperModelId();
113
+ if (!modelId) throw new Error('Whisper model not loaded');
114
+ const session = await options.transcribeStream({
115
+ modelId,
116
+ ...DEFAULT_VOICE_STREAM_PARAMS,
117
+ ...paramsOverride,
118
+ } as Parameters<TranscribeStreamFn>[0]);
119
+ return session as unknown as VoiceSession;
120
+ },
121
+ };
122
+ }