@vellumai/assistant 0.4.23 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/bun.lock +3 -0
  2. package/package.json +2 -1
  3. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +0 -15
  4. package/src/__tests__/assistant-events-sse-hardening.test.ts +9 -3
  5. package/src/__tests__/call-controller.test.ts +80 -0
  6. package/src/__tests__/config-schema.test.ts +38 -178
  7. package/src/__tests__/conversation-routes-guardian-reply.test.ts +4 -1
  8. package/src/__tests__/credential-security-invariants.test.ts +0 -2
  9. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +2 -2
  10. package/src/__tests__/ipc-snapshot.test.ts +0 -9
  11. package/src/__tests__/onboarding-template-contract.test.ts +10 -20
  12. package/src/__tests__/relay-server.test.ts +3 -3
  13. package/src/__tests__/runtime-events-sse-parity.test.ts +10 -0
  14. package/src/__tests__/runtime-events-sse.test.ts +7 -0
  15. package/src/__tests__/session-runtime-assembly.test.ts +34 -8
  16. package/src/__tests__/system-prompt.test.ts +7 -1
  17. package/src/__tests__/trusted-contact-approval-notifier.test.ts +12 -8
  18. package/src/__tests__/twilio-routes-twiml.test.ts +2 -2
  19. package/src/__tests__/twilio-routes.test.ts +2 -3
  20. package/src/__tests__/voice-quality.test.ts +21 -132
  21. package/src/calls/call-controller.ts +34 -29
  22. package/src/calls/relay-server.ts +11 -5
  23. package/src/calls/twilio-routes.ts +4 -38
  24. package/src/calls/voice-quality.ts +7 -63
  25. package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +7 -10
  26. package/src/config/bundled-skills/messaging/SKILL.md +3 -5
  27. package/src/config/bundled-skills/phone-calls/SKILL.md +144 -83
  28. package/src/config/bundled-skills/sms-setup/SKILL.md +0 -20
  29. package/src/config/bundled-skills/twilio-setup/SKILL.md +9 -17
  30. package/src/config/bundled-skills/voice-setup/SKILL.md +36 -1
  31. package/src/config/bundled-skills/voice-setup/icon.svg +20 -0
  32. package/src/config/calls-schema.ts +3 -53
  33. package/src/config/elevenlabs-schema.ts +33 -0
  34. package/src/config/schema.ts +183 -137
  35. package/src/config/types.ts +0 -1
  36. package/src/daemon/handlers/browser.ts +1 -6
  37. package/src/daemon/ipc-contract/browser.ts +5 -14
  38. package/src/daemon/ipc-contract-inventory.json +0 -2
  39. package/src/daemon/session-agent-loop-handlers.ts +3 -0
  40. package/src/daemon/session-runtime-assembly.ts +9 -7
  41. package/src/mcp/client.ts +2 -1
  42. package/src/memory/conversation-crud.ts +339 -166
  43. package/src/runtime/auth/middleware.ts +87 -26
  44. package/src/runtime/routes/events-routes.ts +7 -0
  45. package/src/runtime/routes/inbound-message-handler.ts +3 -4
  46. package/src/schedule/scheduler.ts +159 -45
  47. package/src/security/secure-keys.ts +3 -3
  48. package/src/tools/browser/browser-manager.ts +72 -228
  49. package/src/tools/browser/browser-screencast.ts +0 -5
  50. package/src/tools/network/script-proxy/certs.ts +7 -237
  51. package/src/tools/network/script-proxy/connect-tunnel.ts +1 -82
  52. package/src/tools/network/script-proxy/http-forwarder.ts +2 -151
  53. package/src/tools/network/script-proxy/logging.ts +12 -196
  54. package/src/tools/network/script-proxy/mitm-handler.ts +2 -270
  55. package/src/tools/network/script-proxy/policy.ts +4 -152
  56. package/src/tools/network/script-proxy/router.ts +2 -60
  57. package/src/tools/network/script-proxy/server.ts +5 -137
  58. package/src/tools/network/script-proxy/types.ts +19 -125
  59. package/src/tools/system/voice-config.ts +23 -1
  60. package/src/util/logger.ts +4 -1
  61. package/src/__tests__/elevenlabs-config.test.ts +0 -95
  62. package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -407
  63. package/src/calls/elevenlabs-config.ts +0 -32
@@ -106,6 +106,11 @@ async function publishAndReadFrame(
106
106
  await assistantEventHub.publish(event);
107
107
 
108
108
  const reader = response.body!.getReader();
109
+
110
+ // The first chunk is the immediate heartbeat comment enqueued in start().
111
+ await reader.read();
112
+
113
+ // The second chunk is the actual assistant event.
109
114
  const { value } = await reader.read();
110
115
  ac.abort();
111
116
 
@@ -366,6 +371,11 @@ describe("SSE IPC parity — streaming/delta message types", () => {
366
371
  await assistantEventHub.publish(published);
367
372
 
368
373
  const reader = response.body!.getReader();
374
+
375
+ // The first chunk is the immediate heartbeat comment enqueued in start().
376
+ await reader.read();
377
+
378
+ // The second chunk is the actual assistant event.
369
379
  const { value } = await reader.read();
370
380
  ac.abort();
371
381
 
@@ -168,6 +168,13 @@ describe("SSE assistant-events endpoint", () => {
168
168
 
169
169
  // Read the first frame directly from the response body stream.
170
170
  const reader = response.body!.getReader();
171
+
172
+ // The first chunk is the immediate heartbeat comment enqueued in start().
173
+ const initial = await reader.read();
174
+ expect(initial.done).toBe(false);
175
+ expect(new TextDecoder().decode(initial.value)).toBe(": heartbeat\n\n");
176
+
177
+ // The second chunk is the actual assistant event.
171
178
  const { value, done } = await reader.read();
172
179
  ac.abort();
173
180
 
@@ -44,6 +44,14 @@ describe("resolveChannelCapabilities", () => {
44
44
  expect(caps.supportsVoiceInput).toBe(true);
45
45
  });
46
46
 
47
+ test("vellum channel with vellum interface supports dynamic UI", () => {
48
+ const caps = resolveChannelCapabilities("vellum", "vellum");
49
+ expect(caps.channel).toBe("vellum");
50
+ expect(caps.dashboardCapable).toBe(false);
51
+ expect(caps.supportsDynamicUi).toBe(true);
52
+ expect(caps.supportsVoiceInput).toBe(false);
53
+ });
54
+
47
55
  test("defaults to vellum for null source channel", () => {
48
56
  const caps = resolveChannelCapabilities(null);
49
57
  expect(caps.channel).toBe("vellum");
@@ -407,6 +415,24 @@ describe("trust-gating via channel capabilities", () => {
407
415
  expect(injected).toContain("Present information as well-formatted text");
408
416
  expect(injected).toContain("desktop app");
409
417
  });
418
+
419
+ test("vellum web interface allows dynamic UI but constrains dashboard references", () => {
420
+ const caps = resolveChannelCapabilities("vellum", "vellum");
421
+ const message: Message = {
422
+ role: "user",
423
+ content: [{ type: "text", text: "Show me a form" }],
424
+ };
425
+
426
+ const result = injectChannelCapabilityContext(message, caps);
427
+ const injected = (result.content[0] as { type: "text"; text: string }).text;
428
+
429
+ expect(injected).toContain("CHANNEL CONSTRAINTS");
430
+ expect(injected).toContain("Do NOT reference the dashboard UI");
431
+ expect(injected).not.toContain("Do NOT use ui_show");
432
+ expect(injected).not.toContain("Present information as well-formatted text");
433
+ expect(injected).toContain("supports_dynamic_ui: true");
434
+ expect(injected).toContain("dashboard_capable: false");
435
+ });
410
436
  });
411
437
 
412
438
  // ---------------------------------------------------------------------------
@@ -994,12 +1020,12 @@ describe("sanitizePttActivationKey", () => {
994
1020
  expect(sanitizePttActivationKey("none")).toBe("none");
995
1021
  });
996
1022
 
997
- test('returns "unknown" for invalid keys', () => {
998
- expect(sanitizePttActivationKey("malicious\nprompt injection")).toBe(
999
- "unknown",
1000
- );
1001
- expect(sanitizePttActivationKey("arbitrary_value")).toBe("unknown");
1002
- expect(sanitizePttActivationKey("")).toBe("unknown");
1023
+ test("returns undefined for invalid keys", () => {
1024
+ expect(
1025
+ sanitizePttActivationKey("malicious\nprompt injection"),
1026
+ ).toBeUndefined();
1027
+ expect(sanitizePttActivationKey("arbitrary_value")).toBeUndefined();
1028
+ expect(sanitizePttActivationKey("")).toBeUndefined();
1003
1029
  });
1004
1030
  });
1005
1031
 
@@ -1015,11 +1041,11 @@ describe("resolveChannelCapabilities with PTT metadata", () => {
1015
1041
  expect(caps.pttActivationKey).toBe("fn");
1016
1042
  });
1017
1043
 
1018
- test("sanitizes invalid pttActivationKey to unknown", () => {
1044
+ test("sanitizes invalid pttActivationKey to undefined", () => {
1019
1045
  const caps = resolveChannelCapabilities("macos", "macos", {
1020
1046
  pttActivationKey: "evil\nprompt",
1021
1047
  });
1022
- expect(caps.pttActivationKey).toBe("unknown");
1048
+ expect(caps.pttActivationKey).toBeUndefined();
1023
1049
  });
1024
1050
 
1025
1051
  test("passes through microphonePermissionGranted", () => {
@@ -75,9 +75,15 @@ const {
75
75
  buildPhoneCallsRoutingSection,
76
76
  } = await import("../config/system-prompt.js");
77
77
 
78
- /** Strip the Configuration and Skills sections so base-prompt tests stay focused. */
78
+ /** Strip the Configuration, Skills, and hardcoded preamble sections so base-prompt tests stay focused. */
79
79
  function basePrompt(result: string): string {
80
80
  let s = result;
81
+ // Strip the hardcoded em-dash instruction preamble
82
+ const emDashLine =
83
+ "IMPORTANT: Never use em dashes (\u2014) in your messages. Use commas, periods, or just start a new sentence instead.";
84
+ if (s.startsWith(emDashLine)) {
85
+ s = s.slice(emDashLine.length).replace(/^\n\n/, "");
86
+ }
81
87
  for (const heading of [
82
88
  "## Configuration",
83
89
  "## Skills Catalog",
@@ -120,8 +120,14 @@ mock.module("../config/env.js", () => ({
120
120
  getGatewayInternalBaseUrl: () => "http://localhost:3000",
121
121
  }));
122
122
 
123
+ // ── User reference mock ──
124
+ mock.module("../config/user-reference.js", () => ({
125
+ resolveUserReference: () => "my human",
126
+ }));
127
+
123
128
  // Import module under test AFTER mocks are set up
124
129
  import type { ChannelId } from "../channels/types.js";
130
+ import { resolveUserReference } from "../config/user-reference.js";
125
131
  import type { GuardianContext } from "../runtime/guardian-context-resolver.js";
126
132
 
127
133
  // We need to test the private functions by importing the module.
@@ -220,9 +226,7 @@ async function simulateNotifierPoll(params: {
220
226
  }
221
227
  }
222
228
 
223
- const waitingText = guardianName
224
- ? `Waiting for ${guardianName}'s approval...`
225
- : "Waiting for your guardian's approval...";
229
+ const waitingText = `Waiting for ${guardianName ?? resolveUserReference()}'s approval...`;
226
230
 
227
231
  try {
228
232
  await deliverChannelReply(
@@ -330,7 +334,7 @@ describe("trusted-contact pending-approval notifier", () => {
330
334
  );
331
335
  });
332
336
 
333
- test("uses generic phrasing when no guardian name is available", async () => {
337
+ test("falls back to user reference when no guardian name is available", async () => {
334
338
  mockPendingApprovals = [
335
339
  {
336
340
  requestId: "req-3",
@@ -359,11 +363,11 @@ describe("trusted-contact pending-approval notifier", () => {
359
363
 
360
364
  expect(deliveredReplies).toHaveLength(1);
361
365
  expect(deliveredReplies[0].payload.text).toBe(
362
- "Waiting for your guardian's approval...",
366
+ "Waiting for my human's approval...",
363
367
  );
364
368
  });
365
369
 
366
- test("uses generic phrasing when no guardian binding exists", async () => {
370
+ test("falls back to user reference when no guardian binding exists", async () => {
367
371
  mockPendingApprovals = [
368
372
  {
369
373
  requestId: "req-4",
@@ -388,7 +392,7 @@ describe("trusted-contact pending-approval notifier", () => {
388
392
 
389
393
  expect(deliveredReplies).toHaveLength(1);
390
394
  expect(deliveredReplies[0].payload.text).toBe(
391
- "Waiting for your guardian's approval...",
395
+ "Waiting for my human's approval...",
392
396
  );
393
397
  });
394
398
 
@@ -736,7 +740,7 @@ describe("trusted-contact pending-approval notifier", () => {
736
740
  expect(deliveredReplies).toHaveLength(1);
737
741
  // Falls back to generic phrasing
738
742
  expect(deliveredReplies[0].payload.text).toBe(
739
- "Waiting for your guardian's approval...",
743
+ "Waiting for my human's approval...",
740
744
  );
741
745
  });
742
746
  });
@@ -46,7 +46,7 @@ describe("generateTwiML with voice quality profile", () => {
46
46
  expect(twiml).toContain('voice="voice123-turbo_v2_5-1_0.5_0.75"');
47
47
  });
48
48
 
49
- test("voice attribute reflects configured voice for twilio_standard mode", () => {
49
+ test("voice attribute reflects configured Google voice", () => {
50
50
  const twiml = generateTwiML(callSessionId, relayUrl, welcomeGreeting, {
51
51
  language: "en-US",
52
52
  transcriptionProvider: "Deepgram",
@@ -57,7 +57,7 @@ describe("generateTwiML with voice quality profile", () => {
57
57
  expect(twiml).toContain('voice="Google.en-US-Journey-O"');
58
58
  });
59
59
 
60
- test("voice attribute reflects configured voice for twilio_elevenlabs_tts mode", () => {
60
+ test("voice attribute reflects configured ElevenLabs voice", () => {
61
61
  const twiml = generateTwiML(callSessionId, relayUrl, welcomeGreeting, {
62
62
  language: "en-US",
63
63
  transcriptionProvider: "Deepgram",
@@ -60,13 +60,12 @@ const mockConfigObj = {
60
60
  memory: { enabled: false },
61
61
  rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
62
62
  secretDetection: { enabled: false },
63
+ elevenlabs: { voiceId: "21m00Tcm4TlvDq8ikWAM" },
63
64
  calls: {
64
65
  voice: {
65
- mode: "twilio_standard",
66
66
  language: "en-US",
67
67
  transcriptionProvider: "Deepgram",
68
- fallbackToStandardOnError: true,
69
- elevenlabs: { voiceId: "" },
68
+ elevenlabs: {},
70
69
  },
71
70
  },
72
71
  };
@@ -8,7 +8,6 @@ mock.module("../config/loader.js", () => ({
8
8
 
9
9
  import {
10
10
  buildElevenLabsVoiceSpec,
11
- isVoiceProfileValid,
12
11
  resolveVoiceQualityProfile,
13
12
  } from "../calls/voice-quality.js";
14
13
 
@@ -62,176 +61,66 @@ describe("buildElevenLabsVoiceSpec", () => {
62
61
  });
63
62
 
64
63
  describe("resolveVoiceQualityProfile", () => {
65
- test("returns standard profile for twilio_standard mode", () => {
64
+ test("always returns ElevenLabs ttsProvider", () => {
66
65
  mockConfig = {
66
+ elevenlabs: { voiceId: "21m00Tcm4TlvDq8ikWAM" },
67
67
  calls: {
68
68
  voice: {
69
- mode: "twilio_standard",
70
69
  language: "en-US",
71
- transcriptionProvider: "Google",
72
- fallbackToStandardOnError: false,
73
- elevenlabs: {},
74
- },
75
- },
76
- };
77
- const profile = resolveVoiceQualityProfile();
78
- expect(profile.mode).toBe("twilio_standard");
79
- expect(profile.ttsProvider).toBe("Google");
80
- expect(profile.voice).toBe("Google.en-US-Journey-O");
81
- expect(profile.validationErrors).toHaveLength(0);
82
- });
83
-
84
- test("returns elevenlabs profile for twilio_elevenlabs_tts mode", () => {
85
- mockConfig = {
86
- calls: {
87
- voice: {
88
- mode: "twilio_elevenlabs_tts",
89
- language: "en-US",
90
- transcriptionProvider: "Google",
91
- fallbackToStandardOnError: false,
92
- elevenlabs: { voiceId: "elvoice1" },
70
+ transcriptionProvider: "Deepgram",
93
71
  },
94
72
  },
95
73
  };
96
74
  const profile = resolveVoiceQualityProfile();
97
- expect(profile.mode).toBe("twilio_elevenlabs_tts");
98
75
  expect(profile.ttsProvider).toBe("ElevenLabs");
99
- expect(profile.voice).toBe("elvoice1");
100
- expect(profile.validationErrors).toHaveLength(0);
101
- });
102
-
103
- test("falls back to standard when voiceId missing and fallback enabled", () => {
104
- mockConfig = {
105
- calls: {
106
- voice: {
107
- mode: "twilio_elevenlabs_tts",
108
- language: "en-US",
109
- transcriptionProvider: "Google",
110
- fallbackToStandardOnError: true,
111
- elevenlabs: { voiceId: "" },
112
- },
113
- },
114
- };
115
- const profile = resolveVoiceQualityProfile();
116
- expect(profile.mode).toBe("twilio_standard");
117
- expect(profile.validationErrors.length).toBeGreaterThan(0);
118
- expect(profile.validationErrors[0]).toContain("falling back");
119
- });
120
-
121
- test("returns validation error when voiceId missing and fallback disabled", () => {
122
- mockConfig = {
123
- calls: {
124
- voice: {
125
- mode: "twilio_elevenlabs_tts",
126
- language: "en-US",
127
- transcriptionProvider: "Google",
128
- fallbackToStandardOnError: false,
129
- elevenlabs: { voiceId: "" },
130
- },
131
- },
132
- };
133
- const profile = resolveVoiceQualityProfile();
134
- expect(profile.mode).toBe("twilio_elevenlabs_tts");
135
- expect(profile.validationErrors.length).toBeGreaterThan(0);
136
- expect(profile.validationErrors[0]).toContain("voiceId is required");
137
76
  });
138
77
 
139
- test("returns elevenlabs_agent profile with agentId", () => {
78
+ test("voice ID comes from elevenlabs.voiceId", () => {
140
79
  mockConfig = {
80
+ elevenlabs: { voiceId: "custom-voice-123" },
141
81
  calls: {
142
82
  voice: {
143
- mode: "elevenlabs_agent",
144
83
  language: "en-US",
145
- transcriptionProvider: "Google",
146
- fallbackToStandardOnError: false,
147
- elevenlabs: { voiceId: "voice1", agentId: "agent123" },
84
+ transcriptionProvider: "Deepgram",
148
85
  },
149
86
  },
150
87
  };
151
88
  const profile = resolveVoiceQualityProfile();
152
- expect(profile.mode).toBe("elevenlabs_agent");
153
- expect(profile.agentId).toBe("agent123");
154
- expect(profile.validationErrors).toHaveLength(0);
89
+ expect(profile.voice).toBe("custom-voice-123");
155
90
  });
156
91
 
157
- test("falls back to standard when agentId missing and fallback enabled", () => {
92
+ test("uses language from calls.voice config", () => {
158
93
  mockConfig = {
94
+ elevenlabs: { voiceId: "abc" },
159
95
  calls: {
160
96
  voice: {
161
- mode: "elevenlabs_agent",
162
- language: "en-US",
97
+ language: "es-MX",
163
98
  transcriptionProvider: "Google",
164
- fallbackToStandardOnError: true,
165
- elevenlabs: { voiceId: "voice1", agentId: "" },
166
99
  },
167
100
  },
168
101
  };
169
102
  const profile = resolveVoiceQualityProfile();
170
- expect(profile.mode).toBe("twilio_standard");
171
- expect(profile.validationErrors[0]).toContain("falling back");
103
+ expect(profile.language).toBe("es-MX");
104
+ expect(profile.transcriptionProvider).toBe("Google");
172
105
  });
173
106
 
174
- test("returns validation error when agentId missing and fallback disabled", () => {
107
+ test("builds voice spec with model and tuning params", () => {
175
108
  mockConfig = {
176
- calls: {
177
- voice: {
178
- mode: "elevenlabs_agent",
179
- language: "en-US",
180
- transcriptionProvider: "Google",
181
- fallbackToStandardOnError: false,
182
- elevenlabs: { voiceId: "voice1", agentId: "" },
183
- },
109
+ elevenlabs: {
110
+ voiceId: "voice1",
111
+ voiceModelId: "turbo_v2_5",
112
+ speed: 0.9,
113
+ stability: 0.8,
114
+ similarityBoost: 0.9,
184
115
  },
185
- };
186
- const profile = resolveVoiceQualityProfile();
187
- expect(profile.mode).toBe("elevenlabs_agent");
188
- expect(profile.validationErrors.length).toBeGreaterThan(0);
189
- expect(profile.validationErrors[0]).toContain("agentId is required");
190
- });
191
-
192
- test("returns standard profile for unknown mode", () => {
193
- mockConfig = {
194
116
  calls: {
195
117
  voice: {
196
- mode: "unknown_mode",
197
118
  language: "en-US",
198
- transcriptionProvider: "Google",
199
- fallbackToStandardOnError: false,
200
- elevenlabs: {},
119
+ transcriptionProvider: "Deepgram",
201
120
  },
202
121
  },
203
122
  };
204
123
  const profile = resolveVoiceQualityProfile();
205
- expect(profile.mode).toBe("twilio_standard");
206
- });
207
- });
208
-
209
- describe("isVoiceProfileValid", () => {
210
- test("returns true for profile with no errors", () => {
211
- expect(
212
- isVoiceProfileValid({
213
- mode: "twilio_standard",
214
- language: "en-US",
215
- transcriptionProvider: "Google",
216
- ttsProvider: "Google",
217
- voice: "Google.en-US-Journey-O",
218
- fallbackToStandardOnError: false,
219
- validationErrors: [],
220
- }),
221
- ).toBe(true);
222
- });
223
-
224
- test("returns false for profile with errors", () => {
225
- expect(
226
- isVoiceProfileValid({
227
- mode: "twilio_elevenlabs_tts",
228
- language: "en-US",
229
- transcriptionProvider: "Google",
230
- ttsProvider: "ElevenLabs",
231
- voice: "",
232
- fallbackToStandardOnError: false,
233
- validationErrors: ["voiceId is required"],
234
- }),
235
- ).toBe(false);
124
+ expect(profile.voice).toBe("voice1-turbo_v2_5-0.9_0.8_0.9");
236
125
  });
237
126
  });
@@ -44,11 +44,13 @@ const log = getLogger('call-controller');
44
44
  type ControllerState = 'idle' | 'processing' | 'speaking';
45
45
 
46
46
  /**
47
- * Tracks a pending guardian consultation independently of the controller's
47
+ * Tracks a pending guardian input request independently of the controller's
48
48
  * turn state. This allows the call to continue normal turn processing
49
- * (idle -> processing -> speaking) while a consultation is outstanding.
49
+ * (idle -> processing -> speaking) while a guardian consultation is outstanding.
50
+ * Also used to suppress the silence nudge ("Are you still there?") while
51
+ * the caller is waiting on a guardian decision.
50
52
  */
51
- interface PendingConsultation {
53
+ interface PendingGuardianInput {
52
54
  questionText: string;
53
55
  questionId: string;
54
56
  toolApprovalMeta: { toolName: string; inputDigest: string } | null;
@@ -191,16 +193,17 @@ export class CallController {
191
193
  private durationTimer: ReturnType<typeof setTimeout> | null = null;
192
194
  private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
193
195
  /**
194
- * Tracks the currently pending guardian consultation, if any. Decoupled
196
+ * Tracks the currently pending guardian input request, if any. Decoupled
195
197
  * from the controller's turn state so callers can continue to trigger
196
- * normal turns while consultation is outstanding.
198
+ * normal turns while a guardian consultation is outstanding. Also
199
+ * suppresses the silence nudge while non-null.
197
200
  */
198
- private pendingConsultation: PendingConsultation | null = null;
201
+ private pendingGuardianInput: PendingGuardianInput | null = null;
199
202
  private durationEndTimer: ReturnType<typeof setTimeout> | null = null;
200
203
  private task: string | null;
201
204
  /** True when the call session was created via the inbound path (no outbound task). */
202
205
  private isInbound: boolean;
203
- /** Instructions queued while an LLM turn is in-flight or during pending consultation */
206
+ /** Instructions queued while an LLM turn is in-flight or during pending guardian input */
204
207
  private pendingInstructions: string[] = [];
205
208
  /** Ensures the call opener is triggered at most once per call. */
206
209
  private initialGreetingStarted = false;
@@ -271,7 +274,7 @@ export class CallController {
271
274
  * incoming answers to the correct consultation record.
272
275
  */
273
276
  getPendingConsultationQuestionId(): string | null {
274
- return this.pendingConsultation?.questionId ?? null;
277
+ return this.pendingGuardianInput?.questionId ?? null;
275
278
  }
276
279
 
277
280
  /**
@@ -357,7 +360,7 @@ export class CallController {
357
360
  * speaking.
358
361
  */
359
362
  async handleUserAnswer(answerText: string): Promise<boolean> {
360
- if (!this.pendingConsultation) {
363
+ if (!this.pendingGuardianInput) {
361
364
  log.warn(
362
365
  { callSessionId: this.callSessionId, state: this.state },
363
366
  'handleUserAnswer called but no pending consultation exists',
@@ -366,8 +369,8 @@ export class CallController {
366
369
  }
367
370
 
368
371
  // Clear the consultation timeout and record
369
- clearTimeout(this.pendingConsultation.timer);
370
- this.pendingConsultation = null;
372
+ clearTimeout(this.pendingGuardianInput.timer);
373
+ this.pendingGuardianInput = null;
371
374
 
372
375
  updateCallSession(this.callSessionId, { status: 'in_progress' });
373
376
 
@@ -436,7 +439,7 @@ export class CallController {
436
439
  if (this.silenceTimer) clearTimeout(this.silenceTimer);
437
440
  if (this.durationTimer) clearTimeout(this.durationTimer);
438
441
  if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
439
- if (this.pendingConsultation) { clearTimeout(this.pendingConsultation.timer); this.pendingConsultation = null; }
442
+ if (this.pendingGuardianInput) { clearTimeout(this.pendingGuardianInput.timer); this.pendingGuardianInput = null; }
440
443
  if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
441
444
  this.llmRunVersion++;
442
445
  this.abortCurrentTurn();
@@ -713,30 +716,30 @@ export class CallController {
713
716
  // the prior pending consultation (preserves tool scope on re-asks).
714
717
  const effectiveToolMeta = toolApprovalMeta
715
718
  ? { toolName: toolApprovalMeta.toolName, inputDigest: toolApprovalMeta.inputDigest }
716
- : this.pendingConsultation?.toolApprovalMeta ?? null;
719
+ : this.pendingGuardianInput?.toolApprovalMeta ?? null;
717
720
 
718
721
  // Coalesce repeated identical asks: if a consultation is already
719
722
  // pending for the same tool/action (or same informational question),
720
723
  // avoid churning requests and just keep the existing one.
721
- if (this.pendingConsultation) {
724
+ if (this.pendingGuardianInput) {
722
725
  const isSameToolAction =
723
- effectiveToolMeta && this.pendingConsultation.toolApprovalMeta
724
- ? effectiveToolMeta.toolName === this.pendingConsultation.toolApprovalMeta.toolName
725
- && effectiveToolMeta.inputDigest === this.pendingConsultation.toolApprovalMeta.inputDigest
726
- : !effectiveToolMeta && !this.pendingConsultation.toolApprovalMeta;
726
+ effectiveToolMeta && this.pendingGuardianInput.toolApprovalMeta
727
+ ? effectiveToolMeta.toolName === this.pendingGuardianInput.toolApprovalMeta.toolName
728
+ && effectiveToolMeta.inputDigest === this.pendingGuardianInput.toolApprovalMeta.inputDigest
729
+ : !effectiveToolMeta && !this.pendingGuardianInput.toolApprovalMeta;
727
730
 
728
731
  if (isSameToolAction) {
729
732
  // Same tool/action — coalesce. Keep the existing consultation
730
733
  // alive and skip creating a new request.
731
734
  log.info(
732
- { callSessionId: this.callSessionId, questionId: this.pendingConsultation.questionId },
735
+ { callSessionId: this.callSessionId, questionId: this.pendingGuardianInput.questionId },
733
736
  'Coalescing repeated ASK_GUARDIAN — same tool/action already pending',
734
737
  );
735
738
  recordCallEvent(this.callSessionId, 'guardian_consult_coalesced', { question: questionText });
736
739
  // Fall through to normal turn completion (idle + flushPendingInstructions)
737
740
  } else {
738
741
  // Materially different intent — supersede the old consultation.
739
- clearTimeout(this.pendingConsultation.timer);
742
+ clearTimeout(this.pendingGuardianInput.timer);
740
743
 
741
744
  // Expire the previous consultation's storage records so stale
742
745
  // guardian answers cannot match the old request.
@@ -752,7 +755,7 @@ export class CallController {
752
755
  );
753
756
  }
754
757
 
755
- this.pendingConsultation = null;
758
+ this.pendingGuardianInput = null;
756
759
 
757
760
  // Dispatch the new consultation with effective tool metadata.
758
761
  // The previous request ID is passed through so the dispatch
@@ -773,10 +776,10 @@ export class CallController {
773
776
  // Without this, the consultation timeout can fire on an already-ended
774
777
  // call, overwriting 'completed' status back to 'in_progress' and
775
778
  // starting a new LLM turn on a dead session. Similarly, a late
776
- // handleUserAnswer could be accepted since pendingConsultation is
779
+ // handleUserAnswer could be accepted since pendingGuardianInput is
777
780
  // still non-null.
778
- if (this.pendingConsultation) {
779
- clearTimeout(this.pendingConsultation.timer);
781
+ if (this.pendingGuardianInput) {
782
+ clearTimeout(this.pendingGuardianInput.timer);
780
783
 
781
784
  // Expire store-side consultation records so clients don't observe
782
785
  // a completed call with a dangling pendingQuestion, and guardian
@@ -787,7 +790,7 @@ export class CallController {
787
790
  expireCanonicalGuardianRequest(previousRequest.id);
788
791
  }
789
792
 
790
- this.pendingConsultation = null;
793
+ this.pendingGuardianInput = null;
791
794
  }
792
795
 
793
796
  const currentSession = getCallSession(this.callSessionId);
@@ -928,7 +931,7 @@ export class CallController {
928
931
  // record, not the global controller state.
929
932
  const consultationTimer = setTimeout(() => {
930
933
  // Only fire if this consultation is still the active one
931
- if (!this.pendingConsultation || this.pendingConsultation.questionId !== pendingQuestion.id) return;
934
+ if (!this.pendingGuardianInput || this.pendingGuardianInput.questionId !== pendingQuestion.id) return;
932
935
 
933
936
  log.info({ callSessionId: this.callSessionId }, 'Guardian consultation timed out');
934
937
 
@@ -960,7 +963,7 @@ export class CallController {
960
963
 
961
964
  // Expire pending questions and update call state
962
965
  expirePendingQuestions(this.callSessionId);
963
- this.pendingConsultation = null;
966
+ this.pendingGuardianInput = null;
964
967
  updateCallSession(this.callSessionId, { status: 'in_progress' });
965
968
  this.guardianUnavailableForCall = true;
966
969
  recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
@@ -982,7 +985,7 @@ export class CallController {
982
985
  }
983
986
  }, getUserConsultationTimeoutMs());
984
987
 
985
- this.pendingConsultation = {
988
+ this.pendingGuardianInput = {
986
989
  questionText,
987
990
  questionId: pendingQuestion.id,
988
991
  toolApprovalMeta: effectiveToolMeta,
@@ -1067,7 +1070,9 @@ export class CallController {
1067
1070
  // During guardian wait states, the relay heartbeat timer handles
1068
1071
  // periodic updates — suppress the generic "Are you still there?"
1069
1072
  // which is confusing when the caller is waiting on a decision.
1070
- if (this.relay.getConnectionState() === 'awaiting_guardian_decision') {
1073
+ // Two paths: in-call consultation (pendingGuardianInput) and
1074
+ // inbound access-request wait (relay state).
1075
+ if (this.pendingGuardianInput || this.relay.getConnectionState() === 'awaiting_guardian_decision') {
1071
1076
  log.debug({ callSessionId: this.callSessionId }, 'Silence timeout suppressed during guardian wait');
1072
1077
  return;
1073
1078
  }
@@ -11,6 +11,7 @@ import { randomInt } from 'node:crypto';
11
11
  import type { ServerWebSocket } from 'bun';
12
12
 
13
13
  import { getConfig } from '../config/loader.js';
14
+ import { resolveUserReference } from '../config/user-reference.js';
14
15
  import { getAssistantName } from '../daemon/identity-helpers.js';
15
16
  import { getCanonicalGuardianRequest } from '../memory/canonical-guardian-store.js';
16
17
  import { listActiveBindingsByAssistant } from '../memory/channel-guardian-store.js';
@@ -1214,10 +1215,14 @@ export class RelayConnection {
1214
1215
 
1215
1216
  updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
1216
1217
 
1217
- // Start the heartbeat timer for periodic progress updates
1218
- this.accessRequestWaitStartedAt = Date.now();
1218
+ // Start the heartbeat timer for periodic progress updates.
1219
+ // Delay the first heartbeat by the estimated TTS playback duration so
1220
+ // the initial hold message finishes before any heartbeat fires.
1219
1221
  this.heartbeatSequence = 0;
1220
- this.scheduleNextHeartbeat();
1222
+ this.accessRequestHeartbeatTimer = setTimeout(() => {
1223
+ this.accessRequestWaitStartedAt = Date.now();
1224
+ this.scheduleNextHeartbeat();
1225
+ }, getTtsPlaybackDelayMs());
1221
1226
 
1222
1227
  // Poll the canonical request status
1223
1228
  this.accessRequestPollTimer = setInterval(() => {
@@ -1658,7 +1663,7 @@ export class RelayConnection {
1658
1663
  /**
1659
1664
  * Resolve a human-readable guardian label for voice wait copy.
1660
1665
  * Prefers displayName from the guardian binding metadata, falls back
1661
- * to @username, then "my guardian".
1666
+ * to @username, then the user's preferred name from USER.md.
1662
1667
  */
1663
1668
  private resolveGuardianLabel(): string {
1664
1669
  const assistantId = this.accessRequestAssistantId ?? DAEMON_INTERNAL_ASSISTANT_ID;
@@ -1690,7 +1695,8 @@ export class RelayConnection {
1690
1695
  // ignore malformed metadata
1691
1696
  }
1692
1697
  }
1693
- return 'my guardian';
1698
+
1699
+ return resolveUserReference();
1694
1700
  }
1695
1701
 
1696
1702
  /**