@kaleidorg/mind 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/dist/capabilities.d.ts +4 -0
  2. package/dist/capabilities.d.ts.map +1 -1
  3. package/dist/capabilities.js +7 -0
  4. package/dist/capabilities.js.map +1 -1
  5. package/dist/engine.d.ts +9 -0
  6. package/dist/engine.d.ts.map +1 -1
  7. package/dist/engine.js +1 -0
  8. package/dist/engine.js.map +1 -1
  9. package/dist/funnel.d.ts +6 -0
  10. package/dist/funnel.d.ts.map +1 -1
  11. package/dist/funnel.js +26 -6
  12. package/dist/funnel.js.map +1 -1
  13. package/dist/index.d.ts +9 -1
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +8 -0
  16. package/dist/index.js.map +1 -1
  17. package/dist/kaleidoswap/contract.d.ts +72 -0
  18. package/dist/kaleidoswap/contract.d.ts.map +1 -0
  19. package/dist/kaleidoswap/contract.js +125 -0
  20. package/dist/kaleidoswap/contract.js.map +1 -0
  21. package/dist/knowledge/btc-map.d.ts +87 -0
  22. package/dist/knowledge/btc-map.d.ts.map +1 -0
  23. package/dist/knowledge/btc-map.js +365 -0
  24. package/dist/knowledge/btc-map.js.map +1 -0
  25. package/dist/lsps1/contract.d.ts +55 -0
  26. package/dist/lsps1/contract.d.ts.map +1 -0
  27. package/dist/lsps1/contract.js +91 -0
  28. package/dist/lsps1/contract.js.map +1 -0
  29. package/dist/memory/store.d.ts +7 -1
  30. package/dist/memory/store.d.ts.map +1 -1
  31. package/dist/memory/store.js +43 -3
  32. package/dist/memory/store.js.map +1 -1
  33. package/dist/memory/types.d.ts +12 -0
  34. package/dist/memory/types.d.ts.map +1 -1
  35. package/dist/qvac/assistant.d.ts +73 -0
  36. package/dist/qvac/assistant.d.ts.map +1 -0
  37. package/dist/qvac/assistant.js +97 -0
  38. package/dist/qvac/assistant.js.map +1 -0
  39. package/dist/qvac/config.d.ts +64 -0
  40. package/dist/qvac/config.d.ts.map +1 -0
  41. package/dist/qvac/config.js +71 -0
  42. package/dist/qvac/config.js.map +1 -0
  43. package/dist/qvac/delegate.d.ts +48 -0
  44. package/dist/qvac/delegate.d.ts.map +1 -0
  45. package/dist/qvac/delegate.js +51 -0
  46. package/dist/qvac/delegate.js.map +1 -0
  47. package/dist/qvac/index.d.ts +19 -0
  48. package/dist/qvac/index.d.ts.map +1 -0
  49. package/dist/qvac/index.js +19 -0
  50. package/dist/qvac/index.js.map +1 -0
  51. package/dist/qvac/parse.d.ts +44 -0
  52. package/dist/qvac/parse.d.ts.map +1 -0
  53. package/dist/qvac/parse.js +28 -0
  54. package/dist/qvac/parse.js.map +1 -0
  55. package/dist/qvac/provider.d.ts +49 -0
  56. package/dist/qvac/provider.d.ts.map +1 -0
  57. package/dist/qvac/provider.js +68 -0
  58. package/dist/qvac/provider.js.map +1 -0
  59. package/dist/qvac/stream.d.ts +37 -0
  60. package/dist/qvac/stream.d.ts.map +1 -0
  61. package/dist/qvac/stream.js +29 -0
  62. package/dist/qvac/stream.js.map +1 -0
  63. package/dist/qvac/text.d.ts +19 -0
  64. package/dist/qvac/text.d.ts.map +1 -0
  65. package/dist/qvac/text.js +56 -0
  66. package/dist/qvac/text.js.map +1 -0
  67. package/dist/qvac/voice.d.ts +69 -0
  68. package/dist/qvac/voice.d.ts.map +1 -0
  69. package/dist/qvac/voice.js +51 -0
  70. package/dist/qvac/voice.js.map +1 -0
  71. package/dist/recipe/kaleidoswap-atomic.d.ts +27 -0
  72. package/dist/recipe/kaleidoswap-atomic.d.ts.map +1 -0
  73. package/dist/recipe/kaleidoswap-atomic.js +111 -0
  74. package/dist/recipe/kaleidoswap-atomic.js.map +1 -0
  75. package/dist/recipe/runner.d.ts.map +1 -1
  76. package/dist/recipe/runner.js +13 -1
  77. package/dist/recipe/runner.js.map +1 -1
  78. package/dist/skills/registry.d.ts.map +1 -1
  79. package/dist/skills/registry.js +20 -2
  80. package/dist/skills/registry.js.map +1 -1
  81. package/dist/wallet/confirm.d.ts +12 -0
  82. package/dist/wallet/confirm.d.ts.map +1 -0
  83. package/dist/wallet/confirm.js +67 -0
  84. package/dist/wallet/confirm.js.map +1 -0
  85. package/package.json +16 -1
  86. package/skills/README.md +6 -1
  87. package/skills/kaleido-lsps/SKILL.md +56 -0
  88. package/skills/kaleido-trading/SKILL.md +85 -18
  89. package/skills/merchant-finder/SKILL.md +87 -0
  90. package/skills/paid-data/SKILL.md +12 -0
  91. package/skills/wallet-assistant/SKILL.md +38 -0
  92. package/src/capabilities.ts +12 -0
  93. package/src/context/context.test.ts +6 -2
  94. package/src/engine.ts +6 -0
  95. package/src/funnel.ts +32 -7
  96. package/src/index.ts +43 -0
  97. package/src/kaleidoswap/contract.test.ts +147 -0
  98. package/src/kaleidoswap/contract.ts +212 -0
  99. package/src/knowledge/btc-map.test.ts +188 -0
  100. package/src/knowledge/btc-map.ts +446 -0
  101. package/src/lsps1/contract.test.ts +81 -0
  102. package/src/lsps1/contract.ts +132 -0
  103. package/src/memory/memory.test.ts +55 -0
  104. package/src/memory/store.ts +49 -4
  105. package/src/memory/types.ts +13 -0
  106. package/src/qvac/assistant.test.ts +132 -0
  107. package/src/qvac/assistant.ts +146 -0
  108. package/src/qvac/config.test.ts +44 -0
  109. package/src/qvac/config.ts +76 -0
  110. package/src/qvac/delegate.test.ts +68 -0
  111. package/src/qvac/delegate.ts +71 -0
  112. package/src/qvac/index.ts +72 -0
  113. package/src/qvac/parse.test.ts +52 -0
  114. package/src/qvac/parse.ts +57 -0
  115. package/src/qvac/provider.test.ts +107 -0
  116. package/src/qvac/provider.ts +124 -0
  117. package/src/qvac/stream.test.ts +79 -0
  118. package/src/qvac/stream.ts +56 -0
  119. package/src/qvac/text.test.ts +70 -0
  120. package/src/qvac/text.ts +60 -0
  121. package/src/qvac/voice.test.ts +151 -0
  122. package/src/qvac/voice.ts +122 -0
  123. package/src/recipe/kaleidoswap-atomic.test.ts +138 -0
  124. package/src/recipe/kaleidoswap-atomic.ts +117 -0
  125. package/src/recipe/runner.ts +13 -1
  126. package/src/skills/registry.ts +21 -2
  127. package/src/skills/skills.test.ts +42 -0
  128. package/src/wallet/confirm.test.ts +57 -0
  129. package/src/wallet/confirm.ts +74 -0
  130. package/skills/kaleido-wallet/SKILL.md +0 -28
@@ -61,6 +61,61 @@ describe('InMemoryMemoryStore', () => {
61
61
  const hits = await store.search({ text: 'how many sats do I have', limit: 1 });
62
62
  expect(hits[0].text).toMatch(/wallet balance/);
63
63
  });
64
+
65
+ // Embedding-only dedup: same vector → near-dup → newer supersedes older. No LLM.
66
+ it('consolidate (dedup): near-duplicates supersede instead of appending', async () => {
67
+ const embed = async (text: string): Promise<number[]> =>
68
+ /eur/i.test(text) ? [1, 0] : [0, 1];
69
+ let t = 0;
70
+ const store = new InMemoryMemoryStore({
71
+ embed,
72
+ consolidate: { threshold: 0.9 },
73
+ now: () => ++t,
74
+ });
75
+ await store.add({ text: 'user prefers EUR', kind: 'preference' });
76
+ await store.add({ text: 'user prefers EUR for fiat display', kind: 'preference' });
77
+
78
+ const all = await store.all();
79
+ expect(all).toHaveLength(1); // folded, not appended
80
+ expect(all[0].text).toBe('user prefers EUR for fiat display'); // newer wins
81
+ });
82
+
83
+ it('consolidate (dedup): distinct facts are kept separate', async () => {
84
+ const embed = async (text: string): Promise<number[]> =>
85
+ /eur/i.test(text) ? [1, 0] : [0, 1];
86
+ const store = new InMemoryMemoryStore({ embed, consolidate: { threshold: 0.9 }, now: () => 1 });
87
+ await store.add({ text: 'user prefers EUR', kind: 'preference' });
88
+ await store.add({ text: 'it is sunny today', kind: 'note' });
89
+ expect(await store.all()).toHaveLength(2);
90
+ });
91
+
92
+ it('consolidate (dedup): different kinds are never merged', async () => {
93
+ const embed = async (): Promise<number[]> => [1, 0]; // identical vectors
94
+ const store = new InMemoryMemoryStore({ embed, consolidate: { threshold: 0.9 }, now: () => 1 });
95
+ await store.add({ text: 'EUR', kind: 'preference' });
96
+ await store.add({ text: 'EUR', kind: 'fact' });
97
+ expect(await store.all()).toHaveLength(2);
98
+ });
99
+
100
+ // LLM merge: injected merger rewrites old + new into one consolidated item, with unioned tags.
101
+ it('consolidate (merge): injected merger folds near-dups into one item', async () => {
102
+ const embed = async (): Promise<number[]> => [1, 0];
103
+ const merge = vi.fn(async (existing: string, incoming: string) => `${existing}; ${incoming}`);
104
+ let t = 0;
105
+ const store = new InMemoryMemoryStore({
106
+ embed,
107
+ consolidate: { threshold: 0.9, merge },
108
+ now: () => ++t,
109
+ });
110
+ await store.add({ text: 'likes EUR', kind: 'preference', tags: ['fiat'] });
111
+ await store.add({ text: 'and CHF', kind: 'preference', tags: ['currency'] });
112
+
113
+ expect(merge).toHaveBeenCalledWith('likes EUR', 'and CHF');
114
+ const all = await store.all();
115
+ expect(all).toHaveLength(1);
116
+ expect(all[0].text).toBe('likes EUR; and CHF');
117
+ expect(all[0].tags).toEqual(['fiat', 'currency']); // unioned
118
+ });
64
119
  });
65
120
 
66
121
  describe('memory tool source', () => {
@@ -9,6 +9,7 @@
9
9
 
10
10
  import { cosineSimilarity } from '../rag/vector-store.js';
11
11
  import type {
12
+ MemoryConsolidation,
12
13
  MemoryIO,
13
14
  MemoryItem,
14
15
  MemoryQuery,
@@ -16,11 +17,18 @@ import type {
16
17
  NewMemory,
17
18
  } from './types.js';
18
19
 
20
+ const DEFAULT_DEDUP_THRESHOLD = 0.92;
21
+
19
22
  export interface MemoryStoreOptions {
20
23
  /** Persistence (load on first use, save on writes). Omit for ephemeral memory. */
21
24
  io?: MemoryIO;
22
25
  /** Embed text for semantic recall. Omit to fall back to substring matching. */
23
26
  embed?: (text: string) => Promise<number[]>;
27
+ /**
28
+ * Fold near-duplicate writes into one item instead of appending. Needs `embed`.
29
+ * Omit for append-only. See {@link MemoryConsolidation}.
30
+ */
31
+ consolidate?: MemoryConsolidation;
24
32
  /** Clock — injectable for deterministic tests. */
25
33
  now?: () => number;
26
34
  }
@@ -31,11 +39,13 @@ export class InMemoryMemoryStore implements MemoryStore {
31
39
  private counter = 0;
32
40
  private readonly io?: MemoryIO;
33
41
  private readonly embed?: (text: string) => Promise<number[]>;
42
+ private readonly consolidate?: MemoryConsolidation;
34
43
  private readonly now: () => number;
35
44
 
36
45
  constructor(opts: MemoryStoreOptions = {}) {
37
46
  this.io = opts.io;
38
47
  this.embed = opts.embed;
48
+ this.consolidate = opts.consolidate;
39
49
  this.now = opts.now ?? (() => Date.now());
40
50
  }
41
51
 
@@ -58,16 +68,45 @@ export class InMemoryMemoryStore implements MemoryStore {
58
68
 
59
69
  async add(item: NewMemory): Promise<MemoryItem> {
60
70
  await this.hydrate();
61
- const embedding =
62
- item.embedding ?? (this.embed ? await this.embed(item.text).catch(() => undefined) : undefined);
71
+ let text = item.text;
72
+ let embedding =
73
+ item.embedding ?? (this.embed ? await this.embed(text).catch(() => undefined) : undefined);
74
+ let tags = item.tags;
75
+ let supersedeId: string | undefined;
76
+
77
+ // Consolidation: fold a same-kind near-duplicate into this write instead of
78
+ // appending — embedding-only by default, LLM rewrite when `merge` is set.
79
+ if (this.consolidate && embedding) {
80
+ const threshold = this.consolidate.threshold ?? DEFAULT_DEDUP_THRESHOLD;
81
+ let best: { item: MemoryItem; score: number } | undefined;
82
+ for (const m of this.items) {
83
+ if (m.kind !== item.kind || !m.embedding) continue;
84
+ const score = cosineSimilarity(embedding, m.embedding);
85
+ if (!best || score > best.score) best = { item: m, score };
86
+ }
87
+ if (best && best.score >= threshold) {
88
+ supersedeId = best.item.id;
89
+ tags = unionTags(best.item.tags, item.tags);
90
+ if (this.consolidate.merge) {
91
+ const merged = await this.consolidate.merge(best.item.text, text).catch(() => null);
92
+ if (merged && merged.trim()) {
93
+ text = merged.trim();
94
+ if (this.embed) embedding = await this.embed(text).catch(() => embedding);
95
+ }
96
+ }
97
+ // No merger → the incoming (newer) text supersedes the older item as-is.
98
+ }
99
+ }
100
+
63
101
  const full: MemoryItem = {
64
102
  id: item.id ?? `mem_${this.now()}_${++this.counter}`,
65
- text: item.text,
103
+ text,
66
104
  kind: item.kind,
67
- tags: item.tags,
105
+ tags,
68
106
  createdAt: item.createdAt ?? this.now(),
69
107
  ...(embedding ? { embedding } : {}),
70
108
  };
109
+ if (supersedeId) this.items = this.items.filter((m) => m.id !== supersedeId);
71
110
  this.items.push(full);
72
111
  await this.persist();
73
112
  return full;
@@ -127,3 +166,9 @@ export class InMemoryMemoryStore implements MemoryStore {
127
166
  await this.persist();
128
167
  }
129
168
  }
169
+
170
+ /** Merge two optional tag lists, de-duplicated. Returns undefined when both empty. */
171
+ function unionTags(a?: string[], b?: string[]): string[] | undefined {
172
+ if (!a?.length && !b?.length) return undefined;
173
+ return [...new Set([...(a ?? []), ...(b ?? [])])];
174
+ }
@@ -61,3 +61,16 @@ export interface MemoryIO {
61
61
  load(): Promise<MemoryItem[]>;
62
62
  save(items: MemoryItem[]): Promise<void>;
63
63
  }
64
+
65
+ /**
66
+ * Consolidation — fold same-kind near-duplicate memories into one item instead
67
+ * of bloating with "user likes EUR" ×5. Needs embeddings (the dup check is
68
+ * cosine). Omit for append-only. Two tiers: embedding-only dedup (zero
69
+ * inference, mobile-safe) and, when `merge` is set, an LLM rewrite.
70
+ */
71
+ export interface MemoryConsolidation {
72
+ /** Cosine threshold above which two same-kind memories are "the same". Default 0.92. */
73
+ threshold?: number;
74
+ /** Optional LLM merger; without it the newer item simply supersedes the older. */
75
+ merge?: (existing: string, incoming: string) => Promise<string>;
76
+ }
@@ -0,0 +1,132 @@
1
+ import { describe, it, expect, vi } from 'vitest';
2
+ import {
3
+ shouldHandleUtterance,
4
+ runVoiceAssistant,
5
+ type VoiceTranscriptEvent,
6
+ } from './assistant.js';
7
+
8
+ const immediateSleep = async () => {};
9
+
10
+ function sessionOf(events: VoiceTranscriptEvent[]): AsyncIterable<VoiceTranscriptEvent> {
11
+ return {
12
+ async *[Symbol.asyncIterator]() {
13
+ for (const e of events) yield e;
14
+ },
15
+ };
16
+ }
17
+
18
+ describe('shouldHandleUtterance', () => {
19
+ it('drops utterances shorter than minChars', () => {
20
+ expect(shouldHandleUtterance('hi')).toBe(false);
21
+ expect(shouldHandleUtterance('go!', { minChars: 5 })).toBe(false);
22
+ });
23
+
24
+ it('drops known Whisper hallucinations regardless of trailing punctuation/case', () => {
25
+ expect(shouldHandleUtterance('you')).toBe(false);
26
+ expect(shouldHandleUtterance('Thanks.')).toBe(false);
27
+ expect(shouldHandleUtterance('Thank you')).toBe(false);
28
+ expect(shouldHandleUtterance('.')).toBe(false);
29
+ });
30
+
31
+ it('keeps a real request', () => {
32
+ expect(shouldHandleUtterance('what is my balance')).toBe(true);
33
+ });
34
+
35
+ it('honours a custom ignore list', () => {
36
+ expect(shouldHandleUtterance('computer', { ignoredUtterances: ['computer'] })).toBe(false);
37
+ });
38
+ });
39
+
40
+ describe('runVoiceAssistant', () => {
41
+ it('handles only real utterances and ignores vad/short/hallucination events', async () => {
42
+ const respond = vi.fn(async (t: string) => `reply to ${t}`);
43
+ const speak = vi.fn(async () => {});
44
+ const session = sessionOf([
45
+ { type: 'vad', text: undefined },
46
+ { type: 'text', text: 'you' }, // hallucination → skipped
47
+ { type: 'text', text: 'what is my balance' }, // handled
48
+ { type: 'endOfTurn' },
49
+ ]);
50
+
51
+ await runVoiceAssistant(session, { respond, speak }, { sleep: immediateSleep });
52
+
53
+ expect(respond).toHaveBeenCalledTimes(1);
54
+ expect(respond).toHaveBeenCalledWith('what is my balance');
55
+ expect(speak).toHaveBeenCalledWith('reply to what is my balance');
56
+ });
57
+
58
+ it('gates the mic around playback and ends un-gated', async () => {
59
+ const gates: boolean[] = [];
60
+ const session = sessionOf([{ type: 'text', text: 'tell me a joke' }]);
61
+ await runVoiceAssistant(
62
+ session,
63
+ {
64
+ respond: async () => 'here is a joke',
65
+ speak: async () => {},
66
+ setMicGated: (g) => gates.push(g),
67
+ },
68
+ { sleep: immediateSleep },
69
+ );
70
+ // gated true before speaking, false after cooldown, false again on loop exit.
71
+ expect(gates).toEqual([true, false, false]);
72
+ });
73
+
74
+ it('emits listening → thinking → speaking → listening states', async () => {
75
+ const states: string[] = [];
76
+ const session = sessionOf([{ type: 'text', text: 'what time is it' }]);
77
+ await runVoiceAssistant(
78
+ session,
79
+ { respond: async () => 'noon', speak: async () => {}, onState: (s) => states.push(s) },
80
+ { sleep: immediateSleep },
81
+ );
82
+ expect(states).toEqual(['listening', 'thinking', 'speaking', 'listening']);
83
+ });
84
+
85
+ it('does not speak an empty reply', async () => {
86
+ const speak = vi.fn(async () => {});
87
+ const session = sessionOf([{ type: 'text', text: 'a vague mumble here' }]);
88
+ await runVoiceAssistant(
89
+ session,
90
+ { respond: async () => ' ', speak },
91
+ { sleep: immediateSleep },
92
+ );
93
+ expect(speak).not.toHaveBeenCalled();
94
+ });
95
+
96
+ it('survives a respond() error and keeps listening', async () => {
97
+ const speak = vi.fn(async () => {});
98
+ const session = sessionOf([
99
+ { type: 'text', text: 'first thing fails' },
100
+ { type: 'text', text: 'second thing works' },
101
+ ]);
102
+ const respond = vi
103
+ .fn<[string], Promise<string>>()
104
+ .mockRejectedValueOnce(new Error('boom'))
105
+ .mockResolvedValueOnce('ok');
106
+ await runVoiceAssistant(session, { respond, speak }, { sleep: immediateSleep });
107
+ expect(respond).toHaveBeenCalledTimes(2);
108
+ expect(speak).toHaveBeenCalledTimes(1);
109
+ expect(speak).toHaveBeenCalledWith('ok');
110
+ });
111
+
112
+ it('stops early when the signal is aborted', async () => {
113
+ const controller = new AbortController();
114
+ const respond = vi.fn(async (t: string) => {
115
+ controller.abort();
116
+ return `reply ${t}`;
117
+ });
118
+ const speak = vi.fn(async () => {});
119
+ const session = sessionOf([
120
+ { type: 'text', text: 'first utterance here' },
121
+ { type: 'text', text: 'second utterance here' },
122
+ ]);
123
+ await runVoiceAssistant(
124
+ session,
125
+ { respond, speak },
126
+ { sleep: immediateSleep, signal: controller.signal },
127
+ );
128
+ // Aborted during the first respond ⇒ never speaks, never handles the second.
129
+ expect(respond).toHaveBeenCalledTimes(1);
130
+ expect(speak).not.toHaveBeenCalled();
131
+ });
132
+ });
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Hands-free voice-assistant loop — the transcribe → reason → speak cycle that
3
+ * QVAC's `transcribeStream()` makes possible, lifted into shared code so mobile
4
+ * and desktop run the same orchestration.
5
+ *
6
+ * The host owns the I/O: it opens the SDK session (`transcribeStream` with
7
+ * `DEFAULT_VOICE_STREAM_PARAMS`), feeds mic audio via `session.write()`, and
8
+ * supplies `respond` (LLM/funnel turn → reply text) + `speak` (synth + play).
9
+ * This loop does the parts that must be identical everywhere: filter Whisper's
10
+ * silence hallucinations, and gate the mic during playback so the assistant
11
+ * never transcribes its own voice (QVAC's reference uses a mic-gate, not
12
+ * barge-in — we mirror that).
13
+ */
14
+
15
+ /** A transcript event from a `transcribeStream` conversation session. */
16
+ export interface VoiceTranscriptEvent {
17
+ type: string;
18
+ /** Present on `text` events — a committed utterance. */
19
+ text?: string;
20
+ }
21
+
22
+ /** The host's transcription session (the SDK's conversation session fits). */
23
+ export type VoiceAssistantSession = AsyncIterable<VoiceTranscriptEvent>;
24
+
25
+ export type VoiceAssistantState = 'listening' | 'thinking' | 'speaking';
26
+
27
+ export interface VoiceAssistantHandlers {
28
+ /** Produce an assistant reply for a user utterance (wraps the LLM/funnel). */
29
+ respond: (transcript: string) => Promise<string>;
30
+ /** Speak the reply: synth + playback. Resolves when playback finishes. */
31
+ speak: (text: string) => Promise<void>;
32
+ /**
33
+ * Gate mic capture so the assistant doesn't hear itself. The host should drop
34
+ * (not buffer) audio while gated. Called `true` before speaking, `false` after
35
+ * the post-playback cooldown.
36
+ */
37
+ setMicGated?: (gated: boolean) => void;
38
+ /** A user utterance passed the filter and is about to be handled. */
39
+ onUserText?: (text: string) => void;
40
+ /** The assistant's reply, before it is spoken. */
41
+ onReply?: (text: string) => void;
42
+ /** UI state transitions. */
43
+ onState?: (state: VoiceAssistantState) => void;
44
+ }
45
+
46
+ export interface VoiceAssistantOptions {
47
+ /** Minimum utterance length to handle (drops "you", ".", etc.). Default 3. */
48
+ minChars?: number;
49
+ /** Utterances to ignore (case-insensitive, trailing punctuation stripped). */
50
+ ignoredUtterances?: Iterable<string>;
51
+ /** Pause after playback so speaker reverb settles before listening. Default 300ms. */
52
+ postPlaybackCooldownMs?: number;
53
+ /** Injected for tests; defaults to setTimeout. */
54
+ sleep?: (ms: number) => Promise<void>;
55
+ /** Stop the loop early. */
56
+ signal?: AbortSignal;
57
+ }
58
+
59
+ /**
60
+ * Whisper frequently hallucinates these from silence — drop them so the
61
+ * assistant doesn't answer phantom turns. (QVAC docs cite "you", ".", "Thanks.")
62
+ */
63
+ export const DEFAULT_IGNORED_UTTERANCES: readonly string[] = [
64
+ 'you', 'thank you', 'thanks', 'bye', 'okay', '.',
65
+ ];
66
+
67
+ /**
68
+ * Should this utterance be handled? False for too-short text or a known Whisper
69
+ * hallucination. Pure + exported so it's directly testable.
70
+ */
71
+ export function shouldHandleUtterance(
72
+ text: string,
73
+ options: { minChars?: number; ignoredUtterances?: Iterable<string> } = {},
74
+ ): boolean {
75
+ const trimmed = text.trim();
76
+ if (trimmed.length < (options.minChars ?? 3)) return false;
77
+ const norm = trimmed.toLowerCase().replace(/[.!?,]+$/, '').trim();
78
+ if (!norm) return false;
79
+ const ignored = new Set(
80
+ [...(options.ignoredUtterances ?? DEFAULT_IGNORED_UTTERANCES)].map((s) => s.toLowerCase()),
81
+ );
82
+ return !ignored.has(norm);
83
+ }
84
+
85
+ /**
86
+ * Run the hands-free loop until the session ends or `signal` aborts. Only `text`
87
+ * events drive a turn; `vad`/`segment`/`endOfTurn` events are ignored here (the
88
+ * host can read them off the session separately for UI). Always leaves the mic
89
+ * un-gated on exit.
90
+ */
91
+ export async function runVoiceAssistant(
92
+ session: VoiceAssistantSession,
93
+ handlers: VoiceAssistantHandlers,
94
+ options: VoiceAssistantOptions = {},
95
+ ): Promise<void> {
96
+ const sleep = options.sleep ?? ((ms: number) => new Promise<void>((r) => setTimeout(r, ms)));
97
+ const cooldown = options.postPlaybackCooldownMs ?? 300;
98
+ let speaking = false;
99
+
100
+ handlers.onState?.('listening');
101
+ try {
102
+ for await (const event of session) {
103
+ if (options.signal?.aborted) break;
104
+ if (event.type !== 'text' || typeof event.text !== 'string') continue;
105
+ // Defensive: ignore anything heard mid-playback (host also gates the mic).
106
+ if (speaking) continue;
107
+
108
+ const transcript = event.text.trim();
109
+ if (!shouldHandleUtterance(transcript, options)) continue;
110
+
111
+ handlers.onUserText?.(transcript);
112
+ handlers.onState?.('thinking');
113
+
114
+ let reply: string;
115
+ try {
116
+ reply = await handlers.respond(transcript);
117
+ } catch {
118
+ handlers.onState?.('listening');
119
+ continue;
120
+ }
121
+ if (options.signal?.aborted) break;
122
+ if (!reply || !reply.trim()) {
123
+ handlers.onState?.('listening');
124
+ continue;
125
+ }
126
+
127
+ handlers.onReply?.(reply);
128
+ speaking = true;
129
+ handlers.setMicGated?.(true);
130
+ handlers.onState?.('speaking');
131
+ try {
132
+ await handlers.speak(reply);
133
+ } catch {
134
+ /* keep the loop alive on a playback error */
135
+ } finally {
136
+ await sleep(cooldown);
137
+ speaking = false;
138
+ handlers.setMicGated?.(false);
139
+ handlers.onState?.('listening');
140
+ }
141
+ }
142
+ } finally {
143
+ // Never leave the mic gated if the loop exits mid-turn.
144
+ handlers.setMicGated?.(false);
145
+ }
146
+ }
@@ -0,0 +1,44 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import {
3
+ LOCAL_LLM_CONFIG,
4
+ LOCAL_LLM_CONFIG_GPU,
5
+ DELEGATE_LLM_CONFIG,
6
+ TTS_SAMPLE_RATE,
7
+ normalizeWhisperLang,
8
+ } from './config.js';
9
+
10
+ describe('model configs', () => {
11
+ it('CPU baseline runs on cpu with tools enabled', () => {
12
+ expect(LOCAL_LLM_CONFIG.device).toBe('cpu');
13
+ expect(LOCAL_LLM_CONFIG.tools).toBe(true);
14
+ });
15
+
16
+ it('GPU config offloads layers and grows the context', () => {
17
+ expect(LOCAL_LLM_CONFIG_GPU.device).toBe('gpu');
18
+ expect(LOCAL_LLM_CONFIG_GPU.gpu_layers).toBe(99);
19
+ expect(LOCAL_LLM_CONFIG_GPU.ctx_size).toBeGreaterThan(LOCAL_LLM_CONFIG.ctx_size);
20
+ });
21
+
22
+ it('delegate config gives the desktop the largest context', () => {
23
+ expect(DELEGATE_LLM_CONFIG.ctx_size).toBe(16384);
24
+ expect(DELEGATE_LLM_CONFIG.device).toBe('gpu');
25
+ });
26
+
27
+ it('TTS sample rate matches SUPERTONIC-2 output', () => {
28
+ expect(TTS_SAMPLE_RATE).toBe(44100);
29
+ });
30
+ });
31
+
32
+ describe('normalizeWhisperLang', () => {
33
+ it('extracts a supported 2-letter code from a locale', () => {
34
+ expect(normalizeWhisperLang('it-IT')).toBe('it');
35
+ expect(normalizeWhisperLang('en_US')).toBe('en');
36
+ });
37
+
38
+ it('falls back to en for unsupported or missing locales', () => {
39
+ expect(normalizeWhisperLang('xx-YY')).toBe('en');
40
+ expect(normalizeWhisperLang('')).toBe('en');
41
+ expect(normalizeWhisperLang(null)).toBe('en');
42
+ expect(normalizeWhisperLang(undefined)).toBe('en');
43
+ });
44
+ });
@@ -0,0 +1,76 @@
1
+ /**
2
+ * QVAC model-load configs and constants, shared across every host. These are
3
+ * plain data (no SDK import) so they stay portable and testable; callers merge
4
+ * in SDK-specific bits like `verbosity: VERBOSITY.ERROR` at load time.
5
+ */
6
+
7
+ /**
8
+ * CPU baseline for the local llamacpp model. Used as the GPU fallback and as the
9
+ * base the GPU attempt overrides (device + gpu_layers).
10
+ */
11
+ export const LOCAL_LLM_CONFIG = {
12
+ device: 'cpu',
13
+ gpu_layers: 0,
14
+ ctx_size: 2048,
15
+ tools: true,
16
+ } as const;
17
+
18
+ /**
19
+ * GPU (Metal on iPhone) offload — far faster than CPU when llamacpp can init the
20
+ * Metal context in the worklet. Fall back to {@link LOCAL_LLM_CONFIG} if the GPU
21
+ * load throws. ctx 4096 fits the agentic prompt (system + tools + skills + a
22
+ * little history); 2048 overflowed immediately ("prompt exceeds context").
23
+ */
24
+ export const LOCAL_LLM_CONFIG_GPU = {
25
+ ...LOCAL_LLM_CONFIG,
26
+ device: 'gpu',
27
+ gpu_layers: 99, // offload all layers; llamacpp clamps to the model's count
28
+ ctx_size: 4096,
29
+ } as const;
30
+
31
+ /**
32
+ * Delegated to a desktop provider — it has the RAM to run a big context, so give
33
+ * the agentic prompt plenty of room (Qwen3-600M supports up to 32k). 2048
34
+ * overflowed with the system prompt + tool/skill definitions alone.
35
+ */
36
+ export const DELEGATE_LLM_CONFIG = {
37
+ ...LOCAL_LLM_CONFIG_GPU,
38
+ ctx_size: 16384,
39
+ } as const;
40
+
41
+ /** SUPERTONIC-2 TTS output sample rate (Hz). Used to build the WAV for playback. */
42
+ export const TTS_SAMPLE_RATE = 44100;
43
+
44
+ /**
45
+ * Default params for a hands-free `transcribeStream()` voice session (Whisper).
46
+ * `emitVadEvents` turns the session into a conversation stream (text + vad +
47
+ * endOfTurn events); `endOfTurnSilenceMs` is how long a pause must last before
48
+ * an utterance is committed — conservative so it doesn't cut speakers off mid
49
+ * sentence or trigger on TTS reverb. Hosts merge in `modelId` + spread these.
50
+ */
51
+ export const DEFAULT_VOICE_STREAM_PARAMS = {
52
+ emitVadEvents: true,
53
+ endOfTurnSilenceMs: 700,
54
+ } as const;
55
+
56
+ /**
57
+ * Whisper languages we request directly from the device locale. whisper.cpp
58
+ * supports more, but the QVAC handler rejects "auto"/detect_language for these
59
+ * tiny models, so we pass a concrete code (and fall back to 'en').
60
+ */
61
+ export const WHISPER_LANGS: ReadonlySet<string> = new Set([
62
+ 'en', 'it', 'es', 'fr', 'de', 'pt', 'nl', 'ru', 'pl', 'uk', 'tr', 'ar',
63
+ 'zh', 'ja', 'ko', 'hi', 'id', 'sv', 'no', 'da', 'fi', 'cs', 'ro', 'el',
64
+ 'he', 'th', 'vi', 'hu', 'ca',
65
+ ]);
66
+
67
+ /**
68
+ * Best-effort 2-letter Whisper language code from an OS locale string
69
+ * (e.g. "it-IT" → "it"), restricted to codes Whisper handles well. Falls back to
70
+ * 'en'. Pure: the host reads the locale (NativeModules etc.) and passes it here.
71
+ */
72
+ export function normalizeWhisperLang(locale: string | null | undefined): string {
73
+ if (!locale) return 'en';
74
+ const code = String(locale).split(/[-_]/)[0]?.toLowerCase() ?? 'en';
75
+ return WHISPER_LANGS.has(code) ? code : 'en';
76
+ }
@@ -0,0 +1,68 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import {
3
+ allowListFirewall,
4
+ denyListFirewall,
5
+ firewallFromKeyList,
6
+ buildDelegateConfig,
7
+ } from './delegate.js';
8
+
9
+ describe('allowListFirewall', () => {
10
+ it('builds an allow-list, trimming + de-duping keys', () => {
11
+ expect(allowListFirewall([' k1 ', 'k2', 'k1', ''])).toEqual({
12
+ mode: 'allow',
13
+ publicKeys: ['k1', 'k2'],
14
+ });
15
+ });
16
+
17
+ it('is empty for no keys (caller must decide: open vs refuse)', () => {
18
+ expect(allowListFirewall([])).toEqual({ mode: 'allow', publicKeys: [] });
19
+ });
20
+ });
21
+
22
+ describe('denyListFirewall', () => {
23
+ it('builds a deny-list', () => {
24
+ expect(denyListFirewall(['bad'])).toEqual({ mode: 'deny', publicKeys: ['bad'] });
25
+ });
26
+ });
27
+
28
+ describe('firewallFromKeyList', () => {
29
+ it('parses comma/space/newline-separated keys into an allow-list', () => {
30
+ expect(firewallFromKeyList('k1, k2\nk3 k4')).toEqual({
31
+ mode: 'allow',
32
+ publicKeys: ['k1', 'k2', 'k3', 'k4'],
33
+ });
34
+ });
35
+
36
+ it('returns undefined for empty/missing input (advertise openly)', () => {
37
+ expect(firewallFromKeyList('')).toBeUndefined();
38
+ expect(firewallFromKeyList(' ')).toBeUndefined();
39
+ expect(firewallFromKeyList(null)).toBeUndefined();
40
+ expect(firewallFromKeyList(undefined)).toBeUndefined();
41
+ });
42
+ });
43
+
44
+ describe('buildDelegateConfig', () => {
45
+ it('defaults fallbackToLocal to false and trims the key', () => {
46
+ expect(buildDelegateConfig(' pk ')).toEqual({
47
+ providerPublicKey: 'pk',
48
+ fallbackToLocal: false,
49
+ });
50
+ });
51
+
52
+ it('passes through fallbackToLocal, timeout, forceNewConnection when set', () => {
53
+ expect(
54
+ buildDelegateConfig('pk', { fallbackToLocal: true, timeout: 60000, forceNewConnection: true }),
55
+ ).toEqual({
56
+ providerPublicKey: 'pk',
57
+ fallbackToLocal: true,
58
+ timeout: 60000,
59
+ forceNewConnection: true,
60
+ });
61
+ });
62
+
63
+ it('omits optional fields that are not set', () => {
64
+ const cfg = buildDelegateConfig('pk', { fallbackToLocal: false });
65
+ expect('timeout' in cfg).toBe(false);
66
+ expect('forceNewConnection' in cfg).toBe(false);
67
+ });
68
+ });