@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/bun.lock +139 -2
  5. package/docs/architecture/integrations.md +7 -11
  6. package/package.json +2 -1
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  8. package/src/__tests__/approval-primitive.test.ts +540 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  10. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  12. package/src/__tests__/call-controller.test.ts +439 -108
  13. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  14. package/src/__tests__/cli.test.ts +42 -1
  15. package/src/__tests__/config-schema.test.ts +11 -127
  16. package/src/__tests__/config-watcher.test.ts +0 -8
  17. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  18. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  19. package/src/__tests__/diff.test.ts +22 -0
  20. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  21. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  22. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  23. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  24. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  25. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  26. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  27. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  28. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  29. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  30. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  31. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  32. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  33. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  34. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  35. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  36. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  37. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  38. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  39. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  40. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  41. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  42. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  43. package/src/__tests__/system-prompt.test.ts +1 -1
  44. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  45. package/src/__tests__/terminal-tools.test.ts +2 -93
  46. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  47. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  48. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  49. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  50. package/src/agent/loop.ts +36 -1
  51. package/src/approvals/approval-primitive.ts +381 -0
  52. package/src/approvals/guardian-decision-primitive.ts +191 -0
  53. package/src/calls/call-controller.ts +252 -209
  54. package/src/calls/call-domain.ts +44 -6
  55. package/src/calls/guardian-dispatch.ts +48 -0
  56. package/src/calls/types.ts +1 -1
  57. package/src/calls/voice-session-bridge.ts +46 -30
  58. package/src/cli/core-commands.ts +0 -4
  59. package/src/cli/mcp.ts +58 -0
  60. package/src/cli.ts +76 -34
  61. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  62. package/src/config/assistant-feature-flags.ts +162 -0
  63. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  64. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  65. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  66. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  67. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  68. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  69. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  70. package/src/config/core-schema.ts +1 -1
  71. package/src/config/env-registry.ts +10 -0
  72. package/src/config/feature-flag-registry.json +61 -0
  73. package/src/config/loader.ts +22 -1
  74. package/src/config/mcp-schema.ts +46 -0
  75. package/src/config/sandbox-schema.ts +0 -39
  76. package/src/config/schema.ts +18 -2
  77. package/src/config/skill-state.ts +34 -0
  78. package/src/config/skills-schema.ts +0 -1
  79. package/src/config/skills.ts +9 -0
  80. package/src/config/system-prompt.ts +110 -46
  81. package/src/config/templates/SOUL.md +1 -1
  82. package/src/config/types.ts +19 -1
  83. package/src/config/vellum-skills/catalog.json +1 -1
  84. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  85. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  86. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
  87. package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
  88. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  89. package/src/daemon/config-watcher.ts +0 -1
  90. package/src/daemon/daemon-control.ts +1 -1
  91. package/src/daemon/guardian-invite-intent.ts +124 -0
  92. package/src/daemon/handlers/avatar.ts +68 -0
  93. package/src/daemon/handlers/browser.ts +2 -2
  94. package/src/daemon/handlers/guardian-actions.ts +120 -0
  95. package/src/daemon/handlers/index.ts +4 -0
  96. package/src/daemon/handlers/sessions.ts +19 -0
  97. package/src/daemon/handlers/shared.ts +3 -1
  98. package/src/daemon/install-cli-launchers.ts +58 -13
  99. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  100. package/src/daemon/ipc-contract/sessions.ts +8 -2
  101. package/src/daemon/ipc-contract/settings.ts +25 -2
  102. package/src/daemon/ipc-contract-inventory.json +10 -0
  103. package/src/daemon/ipc-contract.ts +4 -0
  104. package/src/daemon/lifecycle.ts +14 -2
  105. package/src/daemon/main.ts +1 -0
  106. package/src/daemon/providers-setup.ts +26 -1
  107. package/src/daemon/server.ts +1 -0
  108. package/src/daemon/session-lifecycle.ts +52 -7
  109. package/src/daemon/session-memory.ts +45 -0
  110. package/src/daemon/session-process.ts +258 -432
  111. package/src/daemon/session-runtime-assembly.ts +12 -0
  112. package/src/daemon/session-skill-tools.ts +14 -1
  113. package/src/daemon/session-tool-setup.ts +5 -0
  114. package/src/daemon/session.ts +11 -0
  115. package/src/daemon/shutdown-handlers.ts +11 -0
  116. package/src/daemon/tool-side-effects.ts +35 -9
  117. package/src/index.ts +2 -2
  118. package/src/mcp/client.ts +152 -0
  119. package/src/mcp/manager.ts +139 -0
  120. package/src/memory/conversation-display-order-migration.ts +44 -0
  121. package/src/memory/conversation-queries.ts +2 -0
  122. package/src/memory/conversation-store.ts +91 -0
  123. package/src/memory/db-init.ts +5 -1
  124. package/src/memory/embedding-local.ts +13 -8
  125. package/src/memory/guardian-action-store.ts +125 -2
  126. package/src/memory/ingress-invite-store.ts +95 -1
  127. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  128. package/src/memory/migrations/index.ts +2 -1
  129. package/src/memory/schema.ts +5 -1
  130. package/src/memory/scoped-approval-grants.ts +14 -5
  131. package/src/messaging/providers/slack/client.ts +12 -0
  132. package/src/messaging/providers/slack/types.ts +5 -0
  133. package/src/notifications/decision-engine.ts +49 -12
  134. package/src/notifications/emit-signal.ts +7 -0
  135. package/src/notifications/signal.ts +7 -0
  136. package/src/notifications/thread-seed-composer.ts +2 -1
  137. package/src/runtime/channel-approval-types.ts +16 -6
  138. package/src/runtime/channel-approvals.ts +19 -15
  139. package/src/runtime/channel-invite-transport.ts +85 -0
  140. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  141. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  142. package/src/runtime/guardian-action-message-composer.ts +30 -0
  143. package/src/runtime/guardian-decision-types.ts +91 -0
  144. package/src/runtime/http-server.ts +23 -1
  145. package/src/runtime/ingress-service.ts +22 -0
  146. package/src/runtime/invite-redemption-service.ts +181 -0
  147. package/src/runtime/invite-redemption-templates.ts +39 -0
  148. package/src/runtime/routes/call-routes.ts +2 -1
  149. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  150. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  151. package/src/runtime/routes/identity-routes.ts +73 -0
  152. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  153. package/src/runtime/routes/pairing-routes.ts +4 -0
  154. package/src/security/encrypted-store.ts +31 -17
  155. package/src/security/keychain.ts +176 -2
  156. package/src/security/secure-keys.ts +97 -0
  157. package/src/security/tool-approval-digest.ts +1 -1
  158. package/src/tools/browser/browser-execution.ts +2 -2
  159. package/src/tools/browser/browser-manager.ts +46 -32
  160. package/src/tools/browser/browser-screencast.ts +2 -2
  161. package/src/tools/calls/call-start.ts +1 -1
  162. package/src/tools/executor.ts +22 -17
  163. package/src/tools/mcp/mcp-tool-factory.ts +100 -0
  164. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  165. package/src/tools/registry.ts +64 -1
  166. package/src/tools/skills/load.ts +22 -8
  167. package/src/tools/system/avatar-generator.ts +119 -0
  168. package/src/tools/system/navigate-settings.ts +65 -0
  169. package/src/tools/system/open-system-settings.ts +75 -0
  170. package/src/tools/system/voice-config.ts +121 -32
  171. package/src/tools/terminal/backends/native.ts +40 -19
  172. package/src/tools/terminal/backends/types.ts +3 -3
  173. package/src/tools/terminal/parser.ts +1 -1
  174. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  175. package/src/tools/terminal/sandbox.ts +1 -12
  176. package/src/tools/terminal/shell.ts +3 -31
  177. package/src/tools/tool-approval-handler.ts +141 -3
  178. package/src/tools/tool-manifest.ts +6 -0
  179. package/src/tools/types.ts +10 -2
  180. package/src/util/diff.ts +36 -13
  181. package/Dockerfile.sandbox +0 -5
  182. package/src/__tests__/doordash-client.test.ts +0 -187
  183. package/src/__tests__/doordash-session.test.ts +0 -154
  184. package/src/__tests__/signup-e2e.test.ts +0 -354
  185. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  186. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  187. package/src/cli/doordash.ts +0 -1057
  188. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  189. package/src/config/templates/LOOKS.md +0 -25
  190. package/src/doordash/cart-queries.ts +0 -787
  191. package/src/doordash/client.ts +0 -1016
  192. package/src/doordash/order-queries.ts +0 -85
  193. package/src/doordash/queries.ts +0 -13
  194. package/src/doordash/query-extractor.ts +0 -94
  195. package/src/doordash/search-queries.ts +0 -203
  196. package/src/doordash/session.ts +0 -84
  197. package/src/doordash/store-queries.ts +0 -246
  198. package/src/doordash/types.ts +0 -367
  199. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -12,11 +12,15 @@ import { getGatewayInternalBaseUrl } from '../config/env.js';
12
12
  import type { ServerMessage } from '../daemon/ipc-contract.js';
13
13
  import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
14
14
  import {
15
+ backfillSupersessionMetadata,
16
+ expireGuardianActionRequest,
17
+ getByPendingQuestionId,
15
18
  getDeliveriesByRequestId,
16
19
  getPendingRequestByCallSessionId,
17
20
  markTimedOutWithReason,
18
21
  } from '../memory/guardian-action-store.js';
19
22
  import { revokeScopedApprovalGrantsForContext } from '../memory/scoped-approval-grants.js';
23
+ import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
20
24
  import { getLogger } from '../util/logger.js';
21
25
  import { readHttpToken } from '../util/platform.js';
22
26
  import { getMaxCallDurationMs, getUserConsultationTimeoutMs, SILENCE_TIMEOUT_MS } from './call-constants.js';
@@ -30,7 +34,6 @@ import {
30
34
  recordCallEvent,
31
35
  updateCallSession,
32
36
  } from './call-store.js';
33
- import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
34
37
  import { sendGuardianExpiryNotices } from './guardian-action-sweep.js';
35
38
  import { dispatchGuardianQuestion } from './guardian-dispatch.js';
36
39
  import type { RelayConnection } from './relay-server.js';
@@ -39,7 +42,19 @@ import { startVoiceTurn, type VoiceTurnHandle } from './voice-session-bridge.js'
39
42
 
40
43
  const log = getLogger('call-controller');
41
44
 
42
- type ControllerState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking';
45
+ type ControllerState = 'idle' | 'processing' | 'speaking';
46
+
47
+ /**
48
+ * Tracks a pending guardian consultation independently of the controller's
49
+ * turn state. This allows the call to continue normal turn processing
50
+ * (idle -> processing -> speaking) while a consultation is outstanding.
51
+ */
52
+ interface PendingConsultation {
53
+ questionText: string;
54
+ questionId: string;
55
+ toolApprovalMeta: { toolName: string; inputDigest: string } | null;
56
+ timer: ReturnType<typeof setTimeout>;
57
+ }
43
58
 
44
59
  const ASK_GUARDIAN_CAPTURE_REGEX = /\[ASK_GUARDIAN:\s*(.+?)\]/;
45
60
  const ASK_GUARDIAN_MARKER_REGEX = /\[ASK_GUARDIAN:\s*.+?\]/g;
@@ -176,15 +191,18 @@ export class CallController {
176
191
  private silenceTimer: ReturnType<typeof setTimeout> | null = null;
177
192
  private durationTimer: ReturnType<typeof setTimeout> | null = null;
178
193
  private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
179
- private consultationTimer: ReturnType<typeof setTimeout> | null = null;
194
+ /**
195
+ * Tracks the currently pending guardian consultation, if any. Decoupled
196
+ * from the controller's turn state so callers can continue to trigger
197
+ * normal turns while consultation is outstanding.
198
+ */
199
+ private pendingConsultation: PendingConsultation | null = null;
180
200
  private durationEndTimer: ReturnType<typeof setTimeout> | null = null;
181
201
  private task: string | null;
182
202
  /** True when the call session was created via the inbound path (no outbound task). */
183
203
  private isInbound: boolean;
184
- /** Instructions queued while an LLM turn is in-flight or during waiting_on_user */
204
+ /** Instructions queued while an LLM turn is in-flight or during pending consultation */
185
205
  private pendingInstructions: string[] = [];
186
- /** Caller utterances queued while waiting_on_user to prevent re-entrant turns */
187
- private pendingCallerUtterances: Array<{transcript: string, speaker?: PromptSpeakerContext}> = [];
188
206
  /** Ensures the call opener is triggered at most once per call. */
189
207
  private initialGreetingStarted = false;
190
208
  /** Marks that the next caller turn should be treated as an opening acknowledgment. */
@@ -248,6 +266,15 @@ export class CallController {
248
266
  return this.state;
249
267
  }
250
268
 
269
+ /**
270
+ * Returns the question ID of the currently pending guardian consultation,
271
+ * or null if no consultation is active. Used by answerCall to match
272
+ * incoming answers to the correct consultation record.
273
+ */
274
+ getPendingConsultationQuestionId(): string | null {
275
+ return this.pendingConsultation?.questionId ?? null;
276
+ }
277
+
251
278
  /**
252
279
  * Update guardian trust context for subsequent LLM turns.
253
280
  */
@@ -270,19 +297,10 @@ export class CallController {
270
297
 
271
298
  /**
272
299
  * Handle a final caller utterance from the ConversationRelay.
300
+ * Caller utterances always trigger normal turns, even when a guardian
301
+ * consultation is pending — the consultation is tracked separately.
273
302
  */
274
303
  async handleCallerUtterance(transcript: string, speaker?: PromptSpeakerContext): Promise<void> {
275
- // Do not start a new turn while waiting for guardian input — queue
276
- // the utterance so it can be processed after the answer arrives.
277
- if (this.state === 'waiting_on_user') {
278
- log.warn(
279
- { callSessionId: this.callSessionId },
280
- 'Caller utterance received while waiting_on_user — queued for after answer.',
281
- );
282
- this.pendingCallerUtterances.push({ transcript, speaker });
283
- return;
284
- }
285
-
286
304
  const interruptedInFlight = this.state === 'processing' || this.state === 'speaking';
287
305
  // If we're already processing or speaking, abort the in-flight generation
288
306
  if (interruptedInFlight) {
@@ -318,66 +336,39 @@ export class CallController {
318
336
  }
319
337
 
320
338
  /**
321
- * Called when the user (in the chat UI) answers a pending question.
339
+ * Called when the guardian (via chat UI or channel) answers a pending
340
+ * consultation question. Acceptance is gated on having an active
341
+ * pending consultation record, not on controller turn state — so
342
+ * answers can arrive while the controller is idle, processing, or
343
+ * speaking.
322
344
  */
323
345
  async handleUserAnswer(answerText: string): Promise<boolean> {
324
- if (this.state !== 'waiting_on_user') {
346
+ if (!this.pendingConsultation) {
325
347
  log.warn(
326
348
  { callSessionId: this.callSessionId, state: this.state },
327
- 'handleUserAnswer called but controller is not in waiting_on_user state',
349
+ 'handleUserAnswer called but no pending consultation exists',
328
350
  );
329
351
  return false;
330
352
  }
331
353
 
332
- // Clear the consultation timeout
333
- if (this.consultationTimer) {
334
- clearTimeout(this.consultationTimer);
335
- this.consultationTimer = null;
336
- }
337
-
338
- // Defensive: await any lingering turn promise before starting a new one.
339
- if (this.currentTurnPromise) {
340
- const teardownPromise = this.currentTurnPromise;
341
- this.currentTurnPromise = null;
342
- await Promise.race([
343
- teardownPromise.catch(() => {}),
344
- new Promise<void>(resolve => setTimeout(resolve, 2000)),
345
- ]);
346
- }
354
+ // Clear the consultation timeout and record
355
+ clearTimeout(this.pendingConsultation.timer);
356
+ this.pendingConsultation = null;
347
357
 
348
- this.state = 'processing';
349
358
  updateCallSession(this.callSessionId, { status: 'in_progress' });
350
359
 
351
- // Merge any instructions that were queued during the waiting_on_user
352
- // state into a single user message alongside the answer to avoid
353
- // consecutive user-role messages (which violate API role-alternation
354
- // requirements).
355
- const parts: string[] = [];
356
- for (const instr of this.pendingInstructions) {
357
- parts.push(`[USER_INSTRUCTION: ${instr}]`);
358
- }
359
- this.pendingInstructions = [];
360
- parts.push(`[USER_ANSWERED: ${answerText}]`);
360
+ // Inject the answer as a queued instruction so it merges into the
361
+ // next turn naturally, respecting role-alternation. If the controller
362
+ // is idle the instruction flush will fire a turn immediately.
363
+ this.pendingInstructions.push(`[USER_ANSWERED: ${answerText}]`);
361
364
 
362
- const content = parts.join('\n');
365
+ // If the controller is idle, flush instructions immediately to
366
+ // deliver the answer. If processing/speaking, the answer will be
367
+ // delivered when the current turn completes via flushPendingInstructions.
368
+ if (this.state === 'idle') {
369
+ this.flushPendingInstructions();
370
+ }
363
371
 
364
- // Fire-and-forget: unblock the caller so the HTTP response and answer
365
- // persistence happen immediately, before LLM streaming begins.
366
- this.runTurn(content)
367
- .then(() => {
368
- // If the answer turn ended the call (e.g. [END_CALL]), don't drain
369
- // queued utterances — just discard them to avoid starting a fresh
370
- // turn on a dead session.
371
- if (this.state === 'idle' && this.isCallCompleted()) {
372
- this.pendingCallerUtterances = [];
373
- return;
374
- }
375
- this.drainPendingCallerUtterances();
376
- })
377
- .catch((err) => {
378
- this.pendingCallerUtterances = [];
379
- log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after user answer');
380
- });
381
372
  return true;
382
373
  }
383
374
 
@@ -386,17 +377,16 @@ export class CallController {
386
377
  * The instruction is formatted as a dedicated marker that the system prompt
387
378
  * tells the model to treat as high-priority steering input.
388
379
  *
389
- * When the LLM is actively processing or speaking, or when the controller
390
- * is waiting on a user answer, the instruction is queued and spliced into
391
- * the conversation at the correct chronological position once the current
392
- * turn completes.
380
+ * When the LLM is actively processing or speaking, the instruction is
381
+ * queued and spliced into the conversation at the correct chronological
382
+ * position once the current turn completes.
393
383
  */
394
384
  async handleUserInstruction(instructionText: string): Promise<void> {
395
385
  recordCallEvent(this.callSessionId, 'user_instruction_relayed', { instruction: instructionText });
396
386
 
397
387
  // Queue the instruction when it cannot be safely appended right now
398
- if (this.state === 'processing' || this.state === 'speaking' || this.state === 'waiting_on_user') {
399
- this.pendingInstructions.push(instructionText);
388
+ if (this.state === 'processing' || this.state === 'speaking') {
389
+ this.pendingInstructions.push(`[USER_INSTRUCTION: ${instructionText}]`);
400
390
  return;
401
391
  }
402
392
 
@@ -432,7 +422,7 @@ export class CallController {
432
422
  if (this.silenceTimer) clearTimeout(this.silenceTimer);
433
423
  if (this.durationTimer) clearTimeout(this.durationTimer);
434
424
  if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
435
- if (this.consultationTimer) clearTimeout(this.consultationTimer);
425
+ if (this.pendingConsultation) { clearTimeout(this.pendingConsultation.timer); this.pendingConsultation = null; }
436
426
  if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
437
427
  this.llmRunVersion++;
438
428
  this.abortCurrentTurn();
@@ -692,105 +682,101 @@ export class CallController {
692
682
  + `The unanswered question was: "${questionText}"`,
693
683
  );
694
684
  // Fall through to normal turn completion (idle + flushPendingInstructions)
685
+ } else if (this.pendingInstructions.some((instr) => instr.startsWith('[USER_ANSWERED:'))) {
686
+ // A guardian answer arrived mid-turn and is queued in
687
+ // pendingInstructions but hasn't been flushed yet. The in-flight
688
+ // LLM response was generated without knowledge of this answer, so
689
+ // creating a new consultation now would supersede the old one and
690
+ // desynchronize the flow. Skip this consultation — the answer will
691
+ // be flushed on the next turn, and if the model still needs to
692
+ // consult a guardian, it will emit another ASK_GUARDIAN then.
693
+ log.info({ callSessionId: this.callSessionId }, 'Deferring ASK_GUARDIAN — queued USER_ANSWERED pending');
694
+ recordCallEvent(this.callSessionId, 'guardian_consult_deferred', { question: questionText });
695
+ // Fall through to normal turn completion (idle + flushPendingInstructions)
695
696
  } else {
696
- const pendingQuestion = createPendingQuestion(this.callSessionId, questionText);
697
- this.state = 'waiting_on_user';
698
- updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
699
- recordCallEvent(this.callSessionId, 'user_question_asked', { question: questionText });
700
-
701
- // Notify the conversation that a question was asked
702
- const session = getCallSession(this.callSessionId);
703
- if (session) {
704
- fireCallQuestionNotifier(session.conversationId, this.callSessionId, questionText);
705
-
706
- // Dispatch guardian action request to all configured channels
707
- void dispatchGuardianQuestion({
708
- callSessionId: this.callSessionId,
709
- conversationId: session.conversationId,
710
- assistantId: this.assistantId,
711
- pendingQuestion,
712
- toolName: toolApprovalMeta?.toolName,
713
- inputDigest: toolApprovalMeta?.inputDigest,
714
- });
715
- }
716
-
717
- // Set a consultation timeout
718
- this.consultationTimer = setTimeout(() => {
719
- if (this.state !== 'waiting_on_user') return;
720
-
721
- log.info({ callSessionId: this.callSessionId }, 'User consultation timed out');
722
-
723
- // Mark the linked guardian action request as timed out and
724
- // send expiry notices to guardian destinations. Deliveries
725
- // must be captured before markTimedOutWithReason changes
726
- // their status.
727
- const pendingActionRequest = getPendingRequestByCallSessionId(this.callSessionId);
728
- if (pendingActionRequest) {
729
- const deliveries = getDeliveriesByRequestId(pendingActionRequest.id);
730
- markTimedOutWithReason(pendingActionRequest.id, 'call_timeout');
697
+ // Determine the effective tool metadata for this ask. If the new
698
+ // ask has structured tool metadata, use it; otherwise inherit from
699
+ // the prior pending consultation (preserves tool scope on re-asks).
700
+ const effectiveToolMeta = toolApprovalMeta
701
+ ? { toolName: toolApprovalMeta.toolName, inputDigest: toolApprovalMeta.inputDigest }
702
+ : this.pendingConsultation?.toolApprovalMeta ?? null;
703
+
704
+ // Coalesce repeated identical asks: if a consultation is already
705
+ // pending for the same tool/action (or same informational question),
706
+ // avoid churning requests and just keep the existing one.
707
+ if (this.pendingConsultation) {
708
+ const isSameToolAction =
709
+ effectiveToolMeta && this.pendingConsultation.toolApprovalMeta
710
+ ? effectiveToolMeta.toolName === this.pendingConsultation.toolApprovalMeta.toolName
711
+ && effectiveToolMeta.inputDigest === this.pendingConsultation.toolApprovalMeta.inputDigest
712
+ : !effectiveToolMeta && !this.pendingConsultation.toolApprovalMeta
713
+ && questionText === this.pendingConsultation.questionText;
714
+
715
+ if (isSameToolAction) {
716
+ // Same tool/action — coalesce. Keep the existing consultation
717
+ // alive and skip creating a new request.
731
718
  log.info(
732
- { callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
733
- 'Marked guardian action request as timed out',
719
+ { callSessionId: this.callSessionId, questionId: this.pendingConsultation.questionId },
720
+ 'Coalescing repeated ASK_GUARDIAN same tool/action already pending',
734
721
  );
735
- void sendGuardianExpiryNotices(
736
- deliveries,
737
- pendingActionRequest.assistantId,
738
- getGatewayInternalBaseUrl(),
739
- readHttpToken() ?? undefined,
740
- ).catch((err) => {
741
- log.error(
742
- { err, callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
743
- 'Failed to send guardian action expiry notices after call timeout',
722
+ recordCallEvent(this.callSessionId, 'guardian_consult_coalesced', { question: questionText });
723
+ // Fall through to normal turn completion (idle + flushPendingInstructions)
724
+ } else {
725
+ // Materially different intent — supersede the old consultation.
726
+ clearTimeout(this.pendingConsultation.timer);
727
+
728
+ // Expire the previous consultation's storage records so stale
729
+ // guardian answers cannot match the old request.
730
+ expirePendingQuestions(this.callSessionId);
731
+ const previousRequest = getPendingRequestByCallSessionId(this.callSessionId);
732
+ if (previousRequest) {
733
+ // Immediately expire with 'superseded' reason to prevent
734
+ // stale answers from resolving the old request.
735
+ expireGuardianActionRequest(previousRequest.id, 'superseded');
736
+ log.info(
737
+ { callSessionId: this.callSessionId, requestId: previousRequest.id },
738
+ 'Superseded guardian action request (materially different intent)',
744
739
  );
745
- });
746
- }
747
-
748
- // Expire pending questions and update call state
749
- expirePendingQuestions(this.callSessionId);
750
- this.state = 'idle';
751
- updateCallSession(this.callSessionId, { status: 'in_progress' });
752
- this.guardianUnavailableForCall = true;
753
- recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
754
-
755
- // Restart silence detection before firing the generated turn
756
- this.resetSilenceTimer();
757
-
758
- // Build a generated turn instruction instead of hardcoded text.
759
- // Merge any queued instructions and caller utterances into the
760
- // timeout turn to avoid concurrent-turn races.
761
- const timeoutInstruction =
762
- `[GUARDIAN_TIMEOUT] Your guardian did not respond in time to your question: "${questionText}". `
763
- + `Apologize to the caller for the delay, let them know you were unable to reach your guardian, `
764
- + `ask if they would like to leave a message or receive a callback, `
765
- + `and ask if there are any other questions you can help with right now.`;
766
-
767
- const parts: string[] = [];
768
- for (const instr of this.pendingInstructions) {
769
- parts.push(`[USER_INSTRUCTION: ${instr}]`);
770
- }
771
- this.pendingInstructions = [];
772
- parts.push(`[USER_INSTRUCTION: ${timeoutInstruction}]`);
773
-
774
- if (this.pendingCallerUtterances.length > 0) {
775
- const latest = this.pendingCallerUtterances[this.pendingCallerUtterances.length - 1];
776
- this.pendingCallerUtterances = [];
777
- const callerContent = this.formatCallerUtterance(latest.transcript, latest.speaker);
778
- if (callerContent.length > 0) {
779
- parts.push(callerContent);
780
740
  }
781
- }
782
741
 
783
- const content = parts.join('\n');
784
- this.runTurn(content).catch((err) =>
785
- log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after guardian consultation timeout'),
786
- );
787
- }, getUserConsultationTimeoutMs());
788
- return;
742
+ this.pendingConsultation = null;
743
+
744
+ // Dispatch the new consultation with effective tool metadata.
745
+ // The previous request ID is passed through so the dispatch
746
+ // can backfill supersession chain metadata (superseded_by_request_id)
747
+ // once the new request has been created.
748
+ this.dispatchNewConsultation(questionText, effectiveToolMeta, previousRequest?.id ?? null);
749
+ }
750
+ } else {
751
+ // No prior consultation — dispatch fresh
752
+ this.dispatchNewConsultation(questionText, effectiveToolMeta, null);
753
+ }
789
754
  }
790
755
  }
791
756
 
792
757
  // Check for END_CALL marker
793
758
  if (responseText.includes(END_CALL_MARKER)) {
759
+ // Clear any pending consultation before completing the call.
760
+ // Without this, the consultation timeout can fire on an already-ended
761
+ // call, overwriting 'completed' status back to 'in_progress' and
762
+ // starting a new LLM turn on a dead session. Similarly, a late
763
+ // handleUserAnswer could be accepted since pendingConsultation is
764
+ // still non-null.
765
+ if (this.pendingConsultation) {
766
+ clearTimeout(this.pendingConsultation.timer);
767
+
768
+ // Expire store-side consultation records so clients don't observe
769
+ // a completed call with a dangling pendingQuestion, and guardian
770
+ // replies are cleanly rejected instead of hitting answerCall failures.
771
+ expirePendingQuestions(this.callSessionId);
772
+ const previousRequest = getPendingRequestByCallSessionId(this.callSessionId);
773
+ if (previousRequest) {
774
+ expireGuardianActionRequest(previousRequest.id, 'cancelled');
775
+ }
776
+
777
+ this.pendingConsultation = null;
778
+ }
779
+
794
780
  const currentSession = getCallSession(this.callSessionId);
795
781
  const shouldNotifyCompletion = currentSession
796
782
  ? currentSession.status !== 'completed' && currentSession.status !== 'failed' && currentSession.status !== 'cancelled'
@@ -875,14 +861,114 @@ export class CallController {
875
861
  }
876
862
 
877
863
  /**
878
- * Check whether the underlying call session has already ended.
879
- * Used to guard against post-completion work (e.g. draining queued
880
- * utterances after an [END_CALL] turn).
864
+ * Create a new consultation: persist a pending question, dispatch
865
+ * guardian action request to channels, and start the consultation timer.
866
+ *
867
+ * If `supersededRequestId` is provided, backfills the supersession
868
+ * chain after the new request is created.
881
869
  */
882
- private isCallCompleted(): boolean {
870
+ private dispatchNewConsultation(
871
+ questionText: string,
872
+ effectiveToolMeta: { toolName: string; inputDigest: string } | null,
873
+ supersededRequestId: string | null,
874
+ ): void {
875
+ const pendingQuestion = createPendingQuestion(this.callSessionId, questionText);
876
+ updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
877
+ recordCallEvent(this.callSessionId, 'user_question_asked', { question: questionText });
878
+
879
+ // Notify the conversation that a question was asked
883
880
  const session = getCallSession(this.callSessionId);
884
- if (!session) return true;
885
- return session.status === 'completed' || session.status === 'failed' || session.status === 'cancelled';
881
+ if (session) {
882
+ fireCallQuestionNotifier(session.conversationId, this.callSessionId, questionText);
883
+
884
+ // Dispatch guardian action request to all configured channels
885
+ // Capture the pending question ID in a closure for stable lookup
886
+ // after the async dispatch completes — avoids a racy
887
+ // getPendingRequestByCallSessionId lookup that could return a
888
+ // different request if another supersession occurs during the gap.
889
+ const stablePendingQuestionId = pendingQuestion.id;
890
+ void dispatchGuardianQuestion({
891
+ callSessionId: this.callSessionId,
892
+ conversationId: session.conversationId,
893
+ assistantId: this.assistantId,
894
+ pendingQuestion,
895
+ toolName: effectiveToolMeta?.toolName,
896
+ inputDigest: effectiveToolMeta?.inputDigest,
897
+ }).then(() => {
898
+ // Backfill supersession chain: now that the new request exists in
899
+ // the store, update the old request's superseded_by_request_id.
900
+ if (supersededRequestId) {
901
+ const newRequest = getByPendingQuestionId(stablePendingQuestionId);
902
+ if (newRequest) {
903
+ backfillSupersessionMetadata(supersededRequestId, newRequest.id);
904
+ }
905
+ }
906
+ });
907
+ }
908
+
909
+ // Set a consultation timeout tied to this specific consultation
910
+ // record, not the global controller state.
911
+ const consultationTimer = setTimeout(() => {
912
+ // Only fire if this consultation is still the active one
913
+ if (!this.pendingConsultation || this.pendingConsultation.questionId !== pendingQuestion.id) return;
914
+
915
+ log.info({ callSessionId: this.callSessionId }, 'Guardian consultation timed out');
916
+
917
+ // Mark the linked guardian action request as timed out and
918
+ // send expiry notices to guardian destinations. Deliveries
919
+ // must be captured before markTimedOutWithReason changes
920
+ // their status.
921
+ const pendingActionRequest = getPendingRequestByCallSessionId(this.callSessionId);
922
+ if (pendingActionRequest) {
923
+ const deliveries = getDeliveriesByRequestId(pendingActionRequest.id);
924
+ markTimedOutWithReason(pendingActionRequest.id, 'call_timeout');
925
+ log.info(
926
+ { callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
927
+ 'Marked guardian action request as timed out',
928
+ );
929
+ void sendGuardianExpiryNotices(
930
+ deliveries,
931
+ pendingActionRequest.assistantId,
932
+ getGatewayInternalBaseUrl(),
933
+ readHttpToken() ?? undefined,
934
+ ).catch((err) => {
935
+ log.error(
936
+ { err, callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
937
+ 'Failed to send guardian action expiry notices after call timeout',
938
+ );
939
+ });
940
+ }
941
+
942
+ // Expire pending questions and update call state
943
+ expirePendingQuestions(this.callSessionId);
944
+ this.pendingConsultation = null;
945
+ updateCallSession(this.callSessionId, { status: 'in_progress' });
946
+ this.guardianUnavailableForCall = true;
947
+ recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
948
+
949
+ // Inject timeout instruction so the model addresses it on the
950
+ // next turn. If idle, flush immediately; otherwise it merges
951
+ // into the next turn completion.
952
+ const timeoutInstruction =
953
+ `[GUARDIAN_TIMEOUT] Your guardian did not respond in time to your question: "${questionText}". `
954
+ + `Apologize to the caller for the delay, let them know you were unable to reach your guardian, `
955
+ + `ask if they would like to leave a message or receive a callback, `
956
+ + `and ask if there are any other questions you can help with right now.`;
957
+
958
+ this.pendingInstructions.push(timeoutInstruction);
959
+
960
+ if (this.state === 'idle') {
961
+ this.resetSilenceTimer();
962
+ this.flushPendingInstructions();
963
+ }
964
+ }, getUserConsultationTimeoutMs());
965
+
966
+ this.pendingConsultation = {
967
+ questionText,
968
+ questionId: pendingQuestion.id,
969
+ toolApprovalMeta: effectiveToolMeta,
970
+ timer: consultationTimer,
971
+ };
886
972
  }
887
973
 
888
974
  /**
@@ -892,7 +978,7 @@ export class CallController {
892
978
  if (this.pendingInstructions.length === 0) return;
893
979
 
894
980
  const parts = this.pendingInstructions.map(
895
- (instr) => `[USER_INSTRUCTION: ${instr}]`,
981
+ (instr) => instr.startsWith('[') ? instr : `[USER_INSTRUCTION: ${instr}]`,
896
982
  );
897
983
  this.pendingInstructions = [];
898
984
 
@@ -906,49 +992,6 @@ export class CallController {
906
992
  );
907
993
  }
908
994
 
909
- /**
910
- * Drain caller utterances that were queued while waiting_on_user.
911
- * Only the most recent utterance is processed — older ones are discarded
912
- * as stale since the caller likely moved on.
913
- *
914
- * @param contentPrefix — optional string (e.g. instruction markers) to
915
- * prepend to the turn content so instructions and the caller utterance
916
- * are sent as a single turn, avoiding concurrent-turn races.
917
- */
918
- private drainPendingCallerUtterances(contentPrefix?: string): void {
919
- if (this.pendingCallerUtterances.length === 0) return;
920
-
921
- // Keep only the most recent utterance; discard stale older ones
922
- const latest = this.pendingCallerUtterances[this.pendingCallerUtterances.length - 1];
923
- this.pendingCallerUtterances = [];
924
-
925
- if (contentPrefix) {
926
- // Merge prefix content with the caller utterance into a single turn
927
- let callerContent = this.formatCallerUtterance(latest.transcript, latest.speaker);
928
-
929
- // Preserve opening-ack semantics when draining bypasses handleCallerUtterance
930
- if (this.awaitingOpeningAck) {
931
- callerContent = callerContent.length > 0
932
- ? `${CALL_OPENING_ACK_MARKER}\n${callerContent}`
933
- : CALL_OPENING_ACK_MARKER;
934
- this.awaitingOpeningAck = false;
935
- this.lastSentWasOpener = false;
936
- }
937
-
938
- const combined = `${contentPrefix}\n${callerContent}`;
939
- this.resetSilenceTimer();
940
- this.runTurn(combined).catch((err) =>
941
- log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after draining queued caller utterance with prefix'),
942
- );
943
- return;
944
- }
945
-
946
- // Fire-and-forget so we don't block the current turn's cleanup.
947
- this.handleCallerUtterance(latest.transcript, latest.speaker).catch((err) =>
948
- log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after draining queued caller utterance'),
949
- );
950
- }
951
-
952
995
  private startDurationTimer(): void {
953
996
  const maxDurationMs = getMaxCallDurationMs();
954
997
  const warningMs = maxDurationMs - 2 * 60 * 1000; // 2 minutes before max