npm - @vellumai/assistant - Versions diffs - 0.3.19 → 0.3.21 - Mend

@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

package/ARCHITECTURE.md +151 -15
package/Dockerfile +1 -0
package/README.md +40 -4
package/bun.lock +139 -2
package/docs/architecture/integrations.md +7 -11
package/package.json +2 -1
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
package/src/__tests__/approval-primitive.test.ts +540 -0
package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
package/src/__tests__/call-controller.test.ts +439 -108
package/src/__tests__/channel-invite-transport.test.ts +264 -0
package/src/__tests__/cli.test.ts +42 -1
package/src/__tests__/config-schema.test.ts +11 -127
package/src/__tests__/config-watcher.test.ts +0 -8
package/src/__tests__/daemon-lifecycle.test.ts +1 -0
package/src/__tests__/daemon-server-session-init.test.ts +8 -2
package/src/__tests__/diff.test.ts +22 -0
package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
package/src/__tests__/guardian-dispatch.test.ts +124 -0
package/src/__tests__/guardian-grant-minting.test.ts +6 -17
package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
package/src/__tests__/invite-redemption-service.test.ts +306 -0
package/src/__tests__/ipc-snapshot.test.ts +57 -0
package/src/__tests__/notification-decision-fallback.test.ts +88 -0
package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
package/src/__tests__/sandbox-host-parity.test.ts +6 -13
package/src/__tests__/scoped-approval-grants.test.ts +6 -6
package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
package/src/__tests__/session-load-history-repair.test.ts +169 -2
package/src/__tests__/session-runtime-assembly.test.ts +33 -5
package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
package/src/__tests__/skill-feature-flags.test.ts +188 -0
package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
package/src/__tests__/skill-mirror-parity.test.ts +1 -0
package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
package/src/__tests__/system-prompt.test.ts +1 -1
package/src/__tests__/terminal-sandbox.test.ts +142 -9
package/src/__tests__/terminal-tools.test.ts +2 -93
package/src/__tests__/thread-seed-composer.test.ts +18 -0
package/src/__tests__/tool-approval-handler.test.ts +350 -0
package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
package/src/agent/loop.ts +36 -1
package/src/approvals/approval-primitive.ts +381 -0
package/src/approvals/guardian-decision-primitive.ts +191 -0
package/src/calls/call-controller.ts +252 -209
package/src/calls/call-domain.ts +44 -6
package/src/calls/guardian-dispatch.ts +48 -0
package/src/calls/types.ts +1 -1
package/src/calls/voice-session-bridge.ts +46 -30
package/src/cli/core-commands.ts +0 -4
package/src/cli/mcp.ts +58 -0
package/src/cli.ts +76 -34
package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
package/src/config/assistant-feature-flags.ts +162 -0
package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
package/src/config/bundled-skills/notifications/SKILL.md +1 -1
package/src/config/bundled-skills/reminder/SKILL.md +49 -2
package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
package/src/config/core-schema.ts +1 -1
package/src/config/env-registry.ts +10 -0
package/src/config/feature-flag-registry.json +61 -0
package/src/config/loader.ts +22 -1
package/src/config/mcp-schema.ts +46 -0
package/src/config/sandbox-schema.ts +0 -39
package/src/config/schema.ts +18 -2
package/src/config/skill-state.ts +34 -0
package/src/config/skills-schema.ts +0 -1
package/src/config/skills.ts +9 -0
package/src/config/system-prompt.ts +110 -46
package/src/config/templates/SOUL.md +1 -1
package/src/config/types.ts +19 -1
package/src/config/vellum-skills/catalog.json +1 -1
package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
package/src/daemon/config-watcher.ts +0 -1
package/src/daemon/daemon-control.ts +1 -1
package/src/daemon/guardian-invite-intent.ts +124 -0
package/src/daemon/handlers/avatar.ts +68 -0
package/src/daemon/handlers/browser.ts +2 -2
package/src/daemon/handlers/guardian-actions.ts +120 -0
package/src/daemon/handlers/index.ts +4 -0
package/src/daemon/handlers/sessions.ts +19 -0
package/src/daemon/handlers/shared.ts +3 -1
package/src/daemon/install-cli-launchers.ts +58 -13
package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
package/src/daemon/ipc-contract/sessions.ts +8 -2
package/src/daemon/ipc-contract/settings.ts +25 -2
package/src/daemon/ipc-contract-inventory.json +10 -0
package/src/daemon/ipc-contract.ts +4 -0
package/src/daemon/lifecycle.ts +14 -2
package/src/daemon/main.ts +1 -0
package/src/daemon/providers-setup.ts +26 -1
package/src/daemon/server.ts +1 -0
package/src/daemon/session-lifecycle.ts +52 -7
package/src/daemon/session-memory.ts +45 -0
package/src/daemon/session-process.ts +258 -432
package/src/daemon/session-runtime-assembly.ts +12 -0
package/src/daemon/session-skill-tools.ts +14 -1
package/src/daemon/session-tool-setup.ts +5 -0
package/src/daemon/session.ts +11 -0
package/src/daemon/shutdown-handlers.ts +11 -0
package/src/daemon/tool-side-effects.ts +35 -9
package/src/index.ts +2 -2
package/src/mcp/client.ts +152 -0
package/src/mcp/manager.ts +139 -0
package/src/memory/conversation-display-order-migration.ts +44 -0
package/src/memory/conversation-queries.ts +2 -0
package/src/memory/conversation-store.ts +91 -0
package/src/memory/db-init.ts +5 -1
package/src/memory/embedding-local.ts +13 -8
package/src/memory/guardian-action-store.ts +125 -2
package/src/memory/ingress-invite-store.ts +95 -1
package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
package/src/memory/migrations/index.ts +2 -1
package/src/memory/schema.ts +5 -1
package/src/memory/scoped-approval-grants.ts +14 -5
package/src/messaging/providers/slack/client.ts +12 -0
package/src/messaging/providers/slack/types.ts +5 -0
package/src/notifications/decision-engine.ts +49 -12
package/src/notifications/emit-signal.ts +7 -0
package/src/notifications/signal.ts +7 -0
package/src/notifications/thread-seed-composer.ts +2 -1
package/src/runtime/channel-approval-types.ts +16 -6
package/src/runtime/channel-approvals.ts +19 -15
package/src/runtime/channel-invite-transport.ts +85 -0
package/src/runtime/channel-invite-transports/telegram.ts +105 -0
package/src/runtime/guardian-action-grant-minter.ts +92 -35
package/src/runtime/guardian-action-message-composer.ts +30 -0
package/src/runtime/guardian-decision-types.ts +91 -0
package/src/runtime/http-server.ts +23 -1
package/src/runtime/ingress-service.ts +22 -0
package/src/runtime/invite-redemption-service.ts +181 -0
package/src/runtime/invite-redemption-templates.ts +39 -0
package/src/runtime/routes/call-routes.ts +2 -1
package/src/runtime/routes/guardian-action-routes.ts +206 -0
package/src/runtime/routes/guardian-approval-interception.ts +66 -190
package/src/runtime/routes/identity-routes.ts +73 -0
package/src/runtime/routes/inbound-message-handler.ts +486 -394
package/src/runtime/routes/pairing-routes.ts +4 -0
package/src/security/encrypted-store.ts +31 -17
package/src/security/keychain.ts +176 -2
package/src/security/secure-keys.ts +97 -0
package/src/security/tool-approval-digest.ts +1 -1
package/src/tools/browser/browser-execution.ts +2 -2
package/src/tools/browser/browser-manager.ts +46 -32
package/src/tools/browser/browser-screencast.ts +2 -2
package/src/tools/calls/call-start.ts +1 -1
package/src/tools/executor.ts +22 -17
package/src/tools/mcp/mcp-tool-factory.ts +100 -0
package/src/tools/network/script-proxy/session-manager.ts +1 -5
package/src/tools/registry.ts +64 -1
package/src/tools/skills/load.ts +22 -8
package/src/tools/system/avatar-generator.ts +119 -0
package/src/tools/system/navigate-settings.ts +65 -0
package/src/tools/system/open-system-settings.ts +75 -0
package/src/tools/system/voice-config.ts +121 -32
package/src/tools/terminal/backends/native.ts +40 -19
package/src/tools/terminal/backends/types.ts +3 -3
package/src/tools/terminal/parser.ts +1 -1
package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
package/src/tools/terminal/sandbox.ts +1 -12
package/src/tools/terminal/shell.ts +3 -31
package/src/tools/tool-approval-handler.ts +141 -3
package/src/tools/tool-manifest.ts +6 -0
package/src/tools/types.ts +10 -2
package/src/util/diff.ts +36 -13
package/Dockerfile.sandbox +0 -5
package/src/__tests__/doordash-client.test.ts +0 -187
package/src/__tests__/doordash-session.test.ts +0 -154
package/src/__tests__/signup-e2e.test.ts +0 -354
package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
package/src/cli/doordash.ts +0 -1057
package/src/config/bundled-skills/doordash/SKILL.md +0 -163
package/src/config/templates/LOOKS.md +0 -25
package/src/doordash/cart-queries.ts +0 -787
package/src/doordash/client.ts +0 -1016
package/src/doordash/order-queries.ts +0 -85
package/src/doordash/queries.ts +0 -13
package/src/doordash/query-extractor.ts +0 -94
package/src/doordash/search-queries.ts +0 -203
package/src/doordash/session.ts +0 -84
package/src/doordash/store-queries.ts +0 -246
package/src/doordash/types.ts +0 -367
package/src/tools/terminal/backends/docker.ts +0 -379

package/src/calls/call-controller.ts CHANGED Viewed

@@ -12,11 +12,15 @@ import { getGatewayInternalBaseUrl } from '../config/env.js';
 import type { ServerMessage } from '../daemon/ipc-contract.js';
 import type { GuardianRuntimeContext } from '../daemon/session-runtime-assembly.js';
 import {
+  backfillSupersessionMetadata,
+  expireGuardianActionRequest,
+  getByPendingQuestionId,
   getDeliveriesByRequestId,
   getPendingRequestByCallSessionId,
   markTimedOutWithReason,
 } from '../memory/guardian-action-store.js';
 import { revokeScopedApprovalGrantsForContext } from '../memory/scoped-approval-grants.js';
+import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
 import { getLogger } from '../util/logger.js';
 import { readHttpToken } from '../util/platform.js';
 import { getMaxCallDurationMs, getUserConsultationTimeoutMs, SILENCE_TIMEOUT_MS } from './call-constants.js';
@@ -30,7 +34,6 @@ import {
   recordCallEvent,
   updateCallSession,
 } from './call-store.js';
-import { computeToolApprovalDigest } from '../security/tool-approval-digest.js';
 import { sendGuardianExpiryNotices } from './guardian-action-sweep.js';
 import { dispatchGuardianQuestion } from './guardian-dispatch.js';
 import type { RelayConnection } from './relay-server.js';
@@ -39,7 +42,19 @@ import { startVoiceTurn, type VoiceTurnHandle } from './voice-session-bridge.js'
 const log = getLogger('call-controller');
-type ControllerState = 'idle' | 'processing' | 'waiting_on_user' | 'speaking';
+type ControllerState = 'idle' | 'processing' | 'speaking';
+/**
+ * Tracks a pending guardian consultation independently of the controller's
+ * turn state. This allows the call to continue normal turn processing
+ * (idle -> processing -> speaking) while a consultation is outstanding.
+ */
+interface PendingConsultation {
+  questionText: string;
+  questionId: string;
+  toolApprovalMeta: { toolName: string; inputDigest: string } | null;
+  timer: ReturnType<typeof setTimeout>;
+}
 const ASK_GUARDIAN_CAPTURE_REGEX = /\[ASK_GUARDIAN:\s*(.+?)\]/;
 const ASK_GUARDIAN_MARKER_REGEX = /\[ASK_GUARDIAN:\s*.+?\]/g;
@@ -176,15 +191,18 @@ export class CallController {
   private silenceTimer: ReturnType<typeof setTimeout> | null = null;
   private durationTimer: ReturnType<typeof setTimeout> | null = null;
   private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
-  private consultationTimer: ReturnType<typeof setTimeout> | null = null;
+  /**
+   * Tracks the currently pending guardian consultation, if any. Decoupled
+   * from the controller's turn state so callers can continue to trigger
+   * normal turns while consultation is outstanding.
+   */
+  private pendingConsultation: PendingConsultation | null = null;
   private durationEndTimer: ReturnType<typeof setTimeout> | null = null;
   private task: string | null;
   /** True when the call session was created via the inbound path (no outbound task). */
   private isInbound: boolean;
-  /** Instructions queued while an LLM turn is in-flight or during waiting_on_user */
+  /** Instructions queued while an LLM turn is in-flight or during pending consultation */
   private pendingInstructions: string[] = [];
-  /** Caller utterances queued while waiting_on_user to prevent re-entrant turns */
-  private pendingCallerUtterances: Array<{transcript: string, speaker?: PromptSpeakerContext}> = [];
   /** Ensures the call opener is triggered at most once per call. */
   private initialGreetingStarted = false;
   /** Marks that the next caller turn should be treated as an opening acknowledgment. */
@@ -248,6 +266,15 @@ export class CallController {
     return this.state;
   }
+  /**
+   * Returns the question ID of the currently pending guardian consultation,
+   * or null if no consultation is active. Used by answerCall to match
+   * incoming answers to the correct consultation record.
+   */
+  getPendingConsultationQuestionId(): string | null {
+    return this.pendingConsultation?.questionId ?? null;
+  }
   /**
    * Update guardian trust context for subsequent LLM turns.
    */
@@ -270,19 +297,10 @@ export class CallController {
   /**
    * Handle a final caller utterance from the ConversationRelay.
+   * Caller utterances always trigger normal turns, even when a guardian
+   * consultation is pending — the consultation is tracked separately.
    */
   async handleCallerUtterance(transcript: string, speaker?: PromptSpeakerContext): Promise<void> {
-    // Do not start a new turn while waiting for guardian input — queue
-    // the utterance so it can be processed after the answer arrives.
-    if (this.state === 'waiting_on_user') {
-      log.warn(
-        { callSessionId: this.callSessionId },
-        'Caller utterance received while waiting_on_user — queued for after answer.',
-      );
-      this.pendingCallerUtterances.push({ transcript, speaker });
-      return;
-    }
     const interruptedInFlight = this.state === 'processing' || this.state === 'speaking';
     // If we're already processing or speaking, abort the in-flight generation
     if (interruptedInFlight) {
@@ -318,66 +336,39 @@ export class CallController {
   }
   /**
-   * Called when the user (in the chat UI) answers a pending question.
+   * Called when the guardian (via chat UI or channel) answers a pending
+   * consultation question. Acceptance is gated on having an active
+   * pending consultation record, not on controller turn state — so
+   * answers can arrive while the controller is idle, processing, or
+   * speaking.
    */
   async handleUserAnswer(answerText: string): Promise<boolean> {
-    if (this.state !== 'waiting_on_user') {
+    if (!this.pendingConsultation) {
       log.warn(
         { callSessionId: this.callSessionId, state: this.state },
-        'handleUserAnswer called but controller is not in waiting_on_user state',
+        'handleUserAnswer called but no pending consultation exists',
       );
       return false;
     }
-    // Clear the consultation timeout
-    if (this.consultationTimer) {
-      clearTimeout(this.consultationTimer);
-      this.consultationTimer = null;
-    }
-    // Defensive: await any lingering turn promise before starting a new one.
-    if (this.currentTurnPromise) {
-      const teardownPromise = this.currentTurnPromise;
-      this.currentTurnPromise = null;
-      await Promise.race([
-        teardownPromise.catch(() => {}),
-        new Promise<void>(resolve => setTimeout(resolve, 2000)),
-      ]);
-    }
+    // Clear the consultation timeout and record
+    clearTimeout(this.pendingConsultation.timer);
+    this.pendingConsultation = null;
-    this.state = 'processing';
     updateCallSession(this.callSessionId, { status: 'in_progress' });
-    // Merge any instructions that were queued during the waiting_on_user
-    // state into a single user message alongside the answer to avoid
-    // consecutive user-role messages (which violate API role-alternation
-    // requirements).
-    const parts: string[] = [];
-    for (const instr of this.pendingInstructions) {
-      parts.push(`[USER_INSTRUCTION: ${instr}]`);
-    }
-    this.pendingInstructions = [];
-    parts.push(`[USER_ANSWERED: ${answerText}]`);
+    // Inject the answer as a queued instruction so it merges into the
+    // next turn naturally, respecting role-alternation. If the controller
+    // is idle the instruction flush will fire a turn immediately.
+    this.pendingInstructions.push(`[USER_ANSWERED: ${answerText}]`);
-    const content = parts.join('\n');
+    // If the controller is idle, flush instructions immediately to
+    // deliver the answer. If processing/speaking, the answer will be
+    // delivered when the current turn completes via flushPendingInstructions.
+    if (this.state === 'idle') {
+      this.flushPendingInstructions();
+    }
-    // Fire-and-forget: unblock the caller so the HTTP response and answer
-    // persistence happen immediately, before LLM streaming begins.
-    this.runTurn(content)
-      .then(() => {
-        // If the answer turn ended the call (e.g. [END_CALL]), don't drain
-        // queued utterances — just discard them to avoid starting a fresh
-        // turn on a dead session.
-        if (this.state === 'idle' && this.isCallCompleted()) {
-          this.pendingCallerUtterances = [];
-          return;
-        }
-        this.drainPendingCallerUtterances();
-      })
-      .catch((err) => {
-        this.pendingCallerUtterances = [];
-        log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after user answer');
-      });
     return true;
   }
@@ -386,17 +377,16 @@ export class CallController {
    * The instruction is formatted as a dedicated marker that the system prompt
    * tells the model to treat as high-priority steering input.
    *
-   * When the LLM is actively processing or speaking, or when the controller
-   * is waiting on a user answer, the instruction is queued and spliced into
-   * the conversation at the correct chronological position once the current
-   * turn completes.
+   * When the LLM is actively processing or speaking, the instruction is
+   * queued and spliced into the conversation at the correct chronological
+   * position once the current turn completes.
    */
   async handleUserInstruction(instructionText: string): Promise<void> {
     recordCallEvent(this.callSessionId, 'user_instruction_relayed', { instruction: instructionText });
     // Queue the instruction when it cannot be safely appended right now
-    if (this.state === 'processing' || this.state === 'speaking' || this.state === 'waiting_on_user') {
-      this.pendingInstructions.push(instructionText);
+    if (this.state === 'processing' || this.state === 'speaking') {
+      this.pendingInstructions.push(`[USER_INSTRUCTION: ${instructionText}]`);
       return;
     }
@@ -432,7 +422,7 @@ export class CallController {
     if (this.silenceTimer) clearTimeout(this.silenceTimer);
     if (this.durationTimer) clearTimeout(this.durationTimer);
     if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
-    if (this.consultationTimer) clearTimeout(this.consultationTimer);
+    if (this.pendingConsultation) { clearTimeout(this.pendingConsultation.timer); this.pendingConsultation = null; }
     if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
     this.llmRunVersion++;
     this.abortCurrentTurn();
@@ -692,105 +682,101 @@ export class CallController {
             + `The unanswered question was: "${questionText}"`,
           );
           // Fall through to normal turn completion (idle + flushPendingInstructions)
+        } else if (this.pendingInstructions.some((instr) => instr.startsWith('[USER_ANSWERED:'))) {
+          // A guardian answer arrived mid-turn and is queued in
+          // pendingInstructions but hasn't been flushed yet. The in-flight
+          // LLM response was generated without knowledge of this answer, so
+          // creating a new consultation now would supersede the old one and
+          // desynchronize the flow. Skip this consultation — the answer will
+          // be flushed on the next turn, and if the model still needs to
+          // consult a guardian, it will emit another ASK_GUARDIAN then.
+          log.info({ callSessionId: this.callSessionId }, 'Deferring ASK_GUARDIAN — queued USER_ANSWERED pending');
+          recordCallEvent(this.callSessionId, 'guardian_consult_deferred', { question: questionText });
+          // Fall through to normal turn completion (idle + flushPendingInstructions)
         } else {
-          const pendingQuestion = createPendingQuestion(this.callSessionId, questionText);
-          this.state = 'waiting_on_user';
-          updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
-          recordCallEvent(this.callSessionId, 'user_question_asked', { question: questionText });
-          // Notify the conversation that a question was asked
-          const session = getCallSession(this.callSessionId);
-          if (session) {
-            fireCallQuestionNotifier(session.conversationId, this.callSessionId, questionText);
-            // Dispatch guardian action request to all configured channels
-            void dispatchGuardianQuestion({
-              callSessionId: this.callSessionId,
-              conversationId: session.conversationId,
-              assistantId: this.assistantId,
-              pendingQuestion,
-              toolName: toolApprovalMeta?.toolName,
-              inputDigest: toolApprovalMeta?.inputDigest,
-            });
-          }
-          // Set a consultation timeout
-          this.consultationTimer = setTimeout(() => {
-            if (this.state !== 'waiting_on_user') return;
-            log.info({ callSessionId: this.callSessionId }, 'User consultation timed out');
-            // Mark the linked guardian action request as timed out and
-            // send expiry notices to guardian destinations. Deliveries
-            // must be captured before markTimedOutWithReason changes
-            // their status.
-            const pendingActionRequest = getPendingRequestByCallSessionId(this.callSessionId);
-            if (pendingActionRequest) {
-              const deliveries = getDeliveriesByRequestId(pendingActionRequest.id);
-              markTimedOutWithReason(pendingActionRequest.id, 'call_timeout');
+          // Determine the effective tool metadata for this ask. If the new
+          // ask has structured tool metadata, use it; otherwise inherit from
+          // the prior pending consultation (preserves tool scope on re-asks).
+          const effectiveToolMeta = toolApprovalMeta
+            ? { toolName: toolApprovalMeta.toolName, inputDigest: toolApprovalMeta.inputDigest }
+            : this.pendingConsultation?.toolApprovalMeta ?? null;
+          // Coalesce repeated identical asks: if a consultation is already
+          // pending for the same tool/action (or same informational question),
+          // avoid churning requests and just keep the existing one.
+          if (this.pendingConsultation) {
+            const isSameToolAction =
+              effectiveToolMeta && this.pendingConsultation.toolApprovalMeta
+                ? effectiveToolMeta.toolName === this.pendingConsultation.toolApprovalMeta.toolName
+                  && effectiveToolMeta.inputDigest === this.pendingConsultation.toolApprovalMeta.inputDigest
+                : !effectiveToolMeta && !this.pendingConsultation.toolApprovalMeta
+                  && questionText === this.pendingConsultation.questionText;
+            if (isSameToolAction) {
+              // Same tool/action — coalesce. Keep the existing consultation
+              // alive and skip creating a new request.
               log.info(
-                { callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
-                'Marked guardian action request as timed out',
+                { callSessionId: this.callSessionId, questionId: this.pendingConsultation.questionId },
+                'Coalescing repeated ASK_GUARDIAN — same tool/action already pending',
               );
-              void sendGuardianExpiryNotices(
-                deliveries,
-                pendingActionRequest.assistantId,
-                getGatewayInternalBaseUrl(),
-                readHttpToken() ?? undefined,
-              ).catch((err) => {
-                log.error(
-                  { err, callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
-                  'Failed to send guardian action expiry notices after call timeout',
+              recordCallEvent(this.callSessionId, 'guardian_consult_coalesced', { question: questionText });
+              // Fall through to normal turn completion (idle + flushPendingInstructions)
+            } else {
+              // Materially different intent — supersede the old consultation.
+              clearTimeout(this.pendingConsultation.timer);
+              // Expire the previous consultation's storage records so stale
+              // guardian answers cannot match the old request.
+              expirePendingQuestions(this.callSessionId);
+              const previousRequest = getPendingRequestByCallSessionId(this.callSessionId);
+              if (previousRequest) {
+                // Immediately expire with 'superseded' reason to prevent
+                // stale answers from resolving the old request.
+                expireGuardianActionRequest(previousRequest.id, 'superseded');
+                log.info(
+                  { callSessionId: this.callSessionId, requestId: previousRequest.id },
+                  'Superseded guardian action request (materially different intent)',
                 );
-              });
-            }
-            // Expire pending questions and update call state
-            expirePendingQuestions(this.callSessionId);
-            this.state = 'idle';
-            updateCallSession(this.callSessionId, { status: 'in_progress' });
-            this.guardianUnavailableForCall = true;
-            recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
-            // Restart silence detection before firing the generated turn
-            this.resetSilenceTimer();
-            // Build a generated turn instruction instead of hardcoded text.
-            // Merge any queued instructions and caller utterances into the
-            // timeout turn to avoid concurrent-turn races.
-            const timeoutInstruction =
-              `[GUARDIAN_TIMEOUT] Your guardian did not respond in time to your question: "${questionText}". `
-              + `Apologize to the caller for the delay, let them know you were unable to reach your guardian, `
-              + `ask if they would like to leave a message or receive a callback, `
-              + `and ask if there are any other questions you can help with right now.`;
-            const parts: string[] = [];
-            for (const instr of this.pendingInstructions) {
-              parts.push(`[USER_INSTRUCTION: ${instr}]`);
-            }
-            this.pendingInstructions = [];
-            parts.push(`[USER_INSTRUCTION: ${timeoutInstruction}]`);
-            if (this.pendingCallerUtterances.length > 0) {
-              const latest = this.pendingCallerUtterances[this.pendingCallerUtterances.length - 1];
-              this.pendingCallerUtterances = [];
-              const callerContent = this.formatCallerUtterance(latest.transcript, latest.speaker);
-              if (callerContent.length > 0) {
-                parts.push(callerContent);
               }
-            }
-            const content = parts.join('\n');
-            this.runTurn(content).catch((err) =>
-              log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after guardian consultation timeout'),
-            );
-          }, getUserConsultationTimeoutMs());
-          return;
+              this.pendingConsultation = null;
+              // Dispatch the new consultation with effective tool metadata.
+              // The previous request ID is passed through so the dispatch
+              // can backfill supersession chain metadata (superseded_by_request_id)
+              // once the new request has been created.
+              this.dispatchNewConsultation(questionText, effectiveToolMeta, previousRequest?.id ?? null);
+            }
+          } else {
+            // No prior consultation — dispatch fresh
+            this.dispatchNewConsultation(questionText, effectiveToolMeta, null);
+          }
         }
       }
       // Check for END_CALL marker
       if (responseText.includes(END_CALL_MARKER)) {
+        // Clear any pending consultation before completing the call.
+        // Without this, the consultation timeout can fire on an already-ended
+        // call, overwriting 'completed' status back to 'in_progress' and
+        // starting a new LLM turn on a dead session. Similarly, a late
+        // handleUserAnswer could be accepted since pendingConsultation is
+        // still non-null.
+        if (this.pendingConsultation) {
+          clearTimeout(this.pendingConsultation.timer);
+          // Expire store-side consultation records so clients don't observe
+          // a completed call with a dangling pendingQuestion, and guardian
+          // replies are cleanly rejected instead of hitting answerCall failures.
+          expirePendingQuestions(this.callSessionId);
+          const previousRequest = getPendingRequestByCallSessionId(this.callSessionId);
+          if (previousRequest) {
+            expireGuardianActionRequest(previousRequest.id, 'cancelled');
+          }
+          this.pendingConsultation = null;
+        }
         const currentSession = getCallSession(this.callSessionId);
         const shouldNotifyCompletion = currentSession
           ? currentSession.status !== 'completed' && currentSession.status !== 'failed' && currentSession.status !== 'cancelled'
@@ -875,14 +861,114 @@ export class CallController {
   }
   /**
-   * Check whether the underlying call session has already ended.
-   * Used to guard against post-completion work (e.g. draining queued
-   * utterances after an [END_CALL] turn).
+   * Create a new consultation: persist a pending question, dispatch
+   * guardian action request to channels, and start the consultation timer.
+   *
+   * If `supersededRequestId` is provided, backfills the supersession
+   * chain after the new request is created.
    */
-  private isCallCompleted(): boolean {
+  private dispatchNewConsultation(
+    questionText: string,
+    effectiveToolMeta: { toolName: string; inputDigest: string } | null,
+    supersededRequestId: string | null,
+  ): void {
+    const pendingQuestion = createPendingQuestion(this.callSessionId, questionText);
+    updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
+    recordCallEvent(this.callSessionId, 'user_question_asked', { question: questionText });
+    // Notify the conversation that a question was asked
     const session = getCallSession(this.callSessionId);
-    if (!session) return true;
-    return session.status === 'completed' || session.status === 'failed' || session.status === 'cancelled';
+    if (session) {
+      fireCallQuestionNotifier(session.conversationId, this.callSessionId, questionText);
+      // Dispatch guardian action request to all configured channels
+      // Capture the pending question ID in a closure for stable lookup
+      // after the async dispatch completes — avoids a racy
+      // getPendingRequestByCallSessionId lookup that could return a
+      // different request if another supersession occurs during the gap.
+      const stablePendingQuestionId = pendingQuestion.id;
+      void dispatchGuardianQuestion({
+        callSessionId: this.callSessionId,
+        conversationId: session.conversationId,
+        assistantId: this.assistantId,
+        pendingQuestion,
+        toolName: effectiveToolMeta?.toolName,
+        inputDigest: effectiveToolMeta?.inputDigest,
+      }).then(() => {
+        // Backfill supersession chain: now that the new request exists in
+        // the store, update the old request's superseded_by_request_id.
+        if (supersededRequestId) {
+          const newRequest = getByPendingQuestionId(stablePendingQuestionId);
+          if (newRequest) {
+            backfillSupersessionMetadata(supersededRequestId, newRequest.id);
+          }
+        }
+      });
+    }
+    // Set a consultation timeout tied to this specific consultation
+    // record, not the global controller state.
+    const consultationTimer = setTimeout(() => {
+      // Only fire if this consultation is still the active one
+      if (!this.pendingConsultation || this.pendingConsultation.questionId !== pendingQuestion.id) return;
+      log.info({ callSessionId: this.callSessionId }, 'Guardian consultation timed out');
+      // Mark the linked guardian action request as timed out and
+      // send expiry notices to guardian destinations. Deliveries
+      // must be captured before markTimedOutWithReason changes
+      // their status.
+      const pendingActionRequest = getPendingRequestByCallSessionId(this.callSessionId);
+      if (pendingActionRequest) {
+        const deliveries = getDeliveriesByRequestId(pendingActionRequest.id);
+        markTimedOutWithReason(pendingActionRequest.id, 'call_timeout');
+        log.info(
+          { callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
+          'Marked guardian action request as timed out',
+        );
+        void sendGuardianExpiryNotices(
+          deliveries,
+          pendingActionRequest.assistantId,
+          getGatewayInternalBaseUrl(),
+          readHttpToken() ?? undefined,
+        ).catch((err) => {
+          log.error(
+            { err, callSessionId: this.callSessionId, requestId: pendingActionRequest.id },
+            'Failed to send guardian action expiry notices after call timeout',
+          );
+        });
+      }
+      // Expire pending questions and update call state
+      expirePendingQuestions(this.callSessionId);
+      this.pendingConsultation = null;
+      updateCallSession(this.callSessionId, { status: 'in_progress' });
+      this.guardianUnavailableForCall = true;
+      recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
+      // Inject timeout instruction so the model addresses it on the
+      // next turn. If idle, flush immediately; otherwise it merges
+      // into the next turn completion.
+      const timeoutInstruction =
+        `[GUARDIAN_TIMEOUT] Your guardian did not respond in time to your question: "${questionText}". `
+        + `Apologize to the caller for the delay, let them know you were unable to reach your guardian, `
+        + `ask if they would like to leave a message or receive a callback, `
+        + `and ask if there are any other questions you can help with right now.`;
+      this.pendingInstructions.push(timeoutInstruction);
+      if (this.state === 'idle') {
+        this.resetSilenceTimer();
+        this.flushPendingInstructions();
+      }
+    }, getUserConsultationTimeoutMs());
+    this.pendingConsultation = {
+      questionText,
+      questionId: pendingQuestion.id,
+      toolApprovalMeta: effectiveToolMeta,
+      timer: consultationTimer,
+    };
   }
   /**
@@ -892,7 +978,7 @@ export class CallController {
     if (this.pendingInstructions.length === 0) return;
     const parts = this.pendingInstructions.map(
-      (instr) => `[USER_INSTRUCTION: ${instr}]`,
+      (instr) => instr.startsWith('[') ? instr : `[USER_INSTRUCTION: ${instr}]`,
     );
     this.pendingInstructions = [];
@@ -906,49 +992,6 @@ export class CallController {
     );
   }
-  /**
-   * Drain caller utterances that were queued while waiting_on_user.
-   * Only the most recent utterance is processed — older ones are discarded
-   * as stale since the caller likely moved on.
-   *
-   * @param contentPrefix — optional string (e.g. instruction markers) to
-   *   prepend to the turn content so instructions and the caller utterance
-   *   are sent as a single turn, avoiding concurrent-turn races.
-   */
-  private drainPendingCallerUtterances(contentPrefix?: string): void {
-    if (this.pendingCallerUtterances.length === 0) return;
-    // Keep only the most recent utterance; discard stale older ones
-    const latest = this.pendingCallerUtterances[this.pendingCallerUtterances.length - 1];
-    this.pendingCallerUtterances = [];
-    if (contentPrefix) {
-      // Merge prefix content with the caller utterance into a single turn
-      let callerContent = this.formatCallerUtterance(latest.transcript, latest.speaker);
-      // Preserve opening-ack semantics when draining bypasses handleCallerUtterance
-      if (this.awaitingOpeningAck) {
-        callerContent = callerContent.length > 0
-          ? `${CALL_OPENING_ACK_MARKER}\n${callerContent}`
-          : CALL_OPENING_ACK_MARKER;
-        this.awaitingOpeningAck = false;
-        this.lastSentWasOpener = false;
-      }
-      const combined = `${contentPrefix}\n${callerContent}`;
-      this.resetSilenceTimer();
-      this.runTurn(combined).catch((err) =>
-        log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after draining queued caller utterance with prefix'),
-      );
-      return;
-    }
-    // Fire-and-forget so we don't block the current turn's cleanup.
-    this.handleCallerUtterance(latest.transcript, latest.speaker).catch((err) =>
-      log.error({ err, callSessionId: this.callSessionId }, 'runTurn failed after draining queued caller utterance'),
-    );
-  }
   private startDurationTimer(): void {
     const maxDurationMs = getMaxCallDurationMs();
     const warningMs = maxDurationMs - 2 * 60 * 1000; // 2 minutes before max