npm - @vellumai/assistant - Versions diffs - 0.4.25 → 0.4.26 - Mend

@vellumai/assistant 0.4.25 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/__tests__/call-controller.test.ts +80 -0
package/src/calls/call-controller.ts +34 -29
package/src/config/bundled-skills/phone-calls/SKILL.md +1 -1
package/src/runtime/auth/middleware.ts +87 -26

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.4.25",
+  "version": "0.4.26",
   "type": "module",
   "bin": {
     "vellum": "./src/index.ts"

package/src/__tests__/call-controller.test.ts CHANGED Viewed

@@ -2138,6 +2138,86 @@ describe("call-controller", () => {
     controller.destroy();
   });
+  test('silence timeout suppressed during in-call guardian consultation (pendingGuardianInput)', async () => {
+    mockSilenceTimeoutMs = 50; // Short timeout for testing
+    mockConsultationTimeoutMs = 10_000; // Long enough to not interfere
+    // LLM emits an ASK_GUARDIAN marker so the controller creates a pendingGuardianInput
+    mockStartVoiceTurn.mockImplementation(
+      createMockVoiceTurn(["Let me check with your guardian. [ASK_GUARDIAN: Can this caller access the account?]"]),
+    );
+    const { relay, controller } = setupController();
+    // Trigger a turn that creates a pending guardian input request
+    await controller.handleCallerUtterance("I need to access the account");
+    // Allow turn to complete
+    await new Promise((r) => setTimeout(r, 50));
+    // Verify a guardian input request is now pending
+    expect(controller.getPendingConsultationQuestionId()).not.toBeNull();
+    // Relay state is still 'connected' (not 'awaiting_guardian_decision')
+    expect(relay.mockConnectionState).toBe("connected");
+    // Clear any tokens from the turn itself
+    relay.sentTokens.length = 0;
+    // Wait for the silence timeout to fire
+    await new Promise((r) => setTimeout(r, 200));
+    // "Are you still there?" should NOT have been sent because
+    // pendingGuardianInput is active
+    const silenceTokens = relay.sentTokens.filter((t) =>
+      t.token.includes("Are you still there?"),
+    );
+    expect(silenceTokens.length).toBe(0);
+    controller.destroy();
+  });
+  test('silence nudge resumes after guardian consultation resolves', async () => {
+    mockSilenceTimeoutMs = 50; // Short timeout for testing
+    mockConsultationTimeoutMs = 10_000; // Long enough to not interfere
+    // LLM emits an ASK_GUARDIAN marker so the controller creates a pendingGuardianInput
+    mockStartVoiceTurn.mockImplementation(
+      createMockVoiceTurn(["Let me check. [ASK_GUARDIAN: Is this approved?]"]),
+    );
+    const { relay, controller } = setupController();
+    // Trigger a turn that creates a pending guardian input request
+    await controller.handleCallerUtterance("Can I do this?");
+    await new Promise((r) => setTimeout(r, 50));
+    // Verify guardian input request is pending
+    expect(controller.getPendingConsultationQuestionId()).not.toBeNull();
+    // Now resolve the consultation by providing an answer
+    // Mock the next LLM turn for the answer-driven follow-up
+    mockStartVoiceTurn.mockImplementation(
+      createMockVoiceTurn(["Great news, your guardian approved the request."]),
+    );
+    await controller.handleUserAnswer("Yes, approved");
+    // Allow the answer-driven turn to complete
+    await new Promise((r) => setTimeout(r, 100));
+    // Guardian input request should now be cleared
+    expect(controller.getPendingConsultationQuestionId()).toBeNull();
+    // Clear tokens from the answer turn
+    relay.sentTokens.length = 0;
+    // Wait for the silence timeout to fire again
+    await new Promise((r) => setTimeout(r, 200));
+    // "Are you still there?" SHOULD fire now that guardian wait is resolved
+    const silenceTokens = relay.sentTokens.filter((t) =>
+      t.token.includes("Are you still there?"),
+    );
+    expect(silenceTokens.length).toBe(1);
+    controller.destroy();
+  });
   // ── Pointer message regression tests ─────────────────────────────
   test("END_CALL marker writes completed pointer to origin conversation", async () => {

package/src/calls/call-controller.ts CHANGED Viewed

@@ -44,11 +44,13 @@ const log = getLogger('call-controller');
 type ControllerState = 'idle' | 'processing' | 'speaking';
 /**
- * Tracks a pending guardian consultation independently of the controller's
+ * Tracks a pending guardian input request independently of the controller's
  * turn state. This allows the call to continue normal turn processing
- * (idle -> processing -> speaking) while a consultation is outstanding.
+ * (idle -> processing -> speaking) while a guardian consultation is outstanding.
+ * Also used to suppress the silence nudge ("Are you still there?") while
+ * the caller is waiting on a guardian decision.
  */
-interface PendingConsultation {
+interface PendingGuardianInput {
   questionText: string;
   questionId: string;
   toolApprovalMeta: { toolName: string; inputDigest: string } | null;
@@ -191,16 +193,17 @@ export class CallController {
   private durationTimer: ReturnType<typeof setTimeout> | null = null;
   private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
   /**
-   * Tracks the currently pending guardian consultation, if any. Decoupled
+   * Tracks the currently pending guardian input request, if any. Decoupled
    * from the controller's turn state so callers can continue to trigger
-   * normal turns while consultation is outstanding.
+   * normal turns while a guardian consultation is outstanding. Also
+   * suppresses the silence nudge while non-null.
    */
-  private pendingConsultation: PendingConsultation | null = null;
+  private pendingGuardianInput: PendingGuardianInput | null = null;
   private durationEndTimer: ReturnType<typeof setTimeout> | null = null;
   private task: string | null;
   /** True when the call session was created via the inbound path (no outbound task). */
   private isInbound: boolean;
-  /** Instructions queued while an LLM turn is in-flight or during pending consultation */
+  /** Instructions queued while an LLM turn is in-flight or during pending guardian input */
   private pendingInstructions: string[] = [];
   /** Ensures the call opener is triggered at most once per call. */
   private initialGreetingStarted = false;
@@ -271,7 +274,7 @@ export class CallController {
    * incoming answers to the correct consultation record.
    */
   getPendingConsultationQuestionId(): string | null {
-    return this.pendingConsultation?.questionId ?? null;
+    return this.pendingGuardianInput?.questionId ?? null;
   }
   /**
@@ -357,7 +360,7 @@ export class CallController {
    * speaking.
    */
   async handleUserAnswer(answerText: string): Promise<boolean> {
-    if (!this.pendingConsultation) {
+    if (!this.pendingGuardianInput) {
       log.warn(
         { callSessionId: this.callSessionId, state: this.state },
         'handleUserAnswer called but no pending consultation exists',
@@ -366,8 +369,8 @@ export class CallController {
     }
     // Clear the consultation timeout and record
-    clearTimeout(this.pendingConsultation.timer);
-    this.pendingConsultation = null;
+    clearTimeout(this.pendingGuardianInput.timer);
+    this.pendingGuardianInput = null;
     updateCallSession(this.callSessionId, { status: 'in_progress' });
@@ -436,7 +439,7 @@ export class CallController {
     if (this.silenceTimer) clearTimeout(this.silenceTimer);
     if (this.durationTimer) clearTimeout(this.durationTimer);
     if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
-    if (this.pendingConsultation) { clearTimeout(this.pendingConsultation.timer); this.pendingConsultation = null; }
+    if (this.pendingGuardianInput) { clearTimeout(this.pendingGuardianInput.timer); this.pendingGuardianInput = null; }
     if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
     this.llmRunVersion++;
     this.abortCurrentTurn();
@@ -713,30 +716,30 @@ export class CallController {
           // the prior pending consultation (preserves tool scope on re-asks).
           const effectiveToolMeta = toolApprovalMeta
             ? { toolName: toolApprovalMeta.toolName, inputDigest: toolApprovalMeta.inputDigest }
-            : this.pendingConsultation?.toolApprovalMeta ?? null;
+            : this.pendingGuardianInput?.toolApprovalMeta ?? null;
           // Coalesce repeated identical asks: if a consultation is already
           // pending for the same tool/action (or same informational question),
           // avoid churning requests and just keep the existing one.
-          if (this.pendingConsultation) {
+          if (this.pendingGuardianInput) {
             const isSameToolAction =
-              effectiveToolMeta && this.pendingConsultation.toolApprovalMeta
-                ? effectiveToolMeta.toolName === this.pendingConsultation.toolApprovalMeta.toolName
-                  && effectiveToolMeta.inputDigest === this.pendingConsultation.toolApprovalMeta.inputDigest
-                : !effectiveToolMeta && !this.pendingConsultation.toolApprovalMeta;
+              effectiveToolMeta && this.pendingGuardianInput.toolApprovalMeta
+                ? effectiveToolMeta.toolName === this.pendingGuardianInput.toolApprovalMeta.toolName
+                  && effectiveToolMeta.inputDigest === this.pendingGuardianInput.toolApprovalMeta.inputDigest
+                : !effectiveToolMeta && !this.pendingGuardianInput.toolApprovalMeta;
             if (isSameToolAction) {
               // Same tool/action — coalesce. Keep the existing consultation
               // alive and skip creating a new request.
               log.info(
-                { callSessionId: this.callSessionId, questionId: this.pendingConsultation.questionId },
+                { callSessionId: this.callSessionId, questionId: this.pendingGuardianInput.questionId },
                 'Coalescing repeated ASK_GUARDIAN — same tool/action already pending',
               );
               recordCallEvent(this.callSessionId, 'guardian_consult_coalesced', { question: questionText });
               // Fall through to normal turn completion (idle + flushPendingInstructions)
             } else {
               // Materially different intent — supersede the old consultation.
-              clearTimeout(this.pendingConsultation.timer);
+              clearTimeout(this.pendingGuardianInput.timer);
               // Expire the previous consultation's storage records so stale
               // guardian answers cannot match the old request.
@@ -752,7 +755,7 @@ export class CallController {
                 );
               }
-              this.pendingConsultation = null;
+              this.pendingGuardianInput = null;
               // Dispatch the new consultation with effective tool metadata.
               // The previous request ID is passed through so the dispatch
@@ -773,10 +776,10 @@ export class CallController {
         // Without this, the consultation timeout can fire on an already-ended
         // call, overwriting 'completed' status back to 'in_progress' and
         // starting a new LLM turn on a dead session. Similarly, a late
-        // handleUserAnswer could be accepted since pendingConsultation is
+        // handleUserAnswer could be accepted since pendingGuardianInput is
         // still non-null.
-        if (this.pendingConsultation) {
-          clearTimeout(this.pendingConsultation.timer);
+        if (this.pendingGuardianInput) {
+          clearTimeout(this.pendingGuardianInput.timer);
           // Expire store-side consultation records so clients don't observe
           // a completed call with a dangling pendingQuestion, and guardian
@@ -787,7 +790,7 @@ export class CallController {
             expireCanonicalGuardianRequest(previousRequest.id);
           }
-          this.pendingConsultation = null;
+          this.pendingGuardianInput = null;
         }
         const currentSession = getCallSession(this.callSessionId);
@@ -928,7 +931,7 @@ export class CallController {
     // record, not the global controller state.
     const consultationTimer = setTimeout(() => {
       // Only fire if this consultation is still the active one
-      if (!this.pendingConsultation || this.pendingConsultation.questionId !== pendingQuestion.id) return;
+      if (!this.pendingGuardianInput || this.pendingGuardianInput.questionId !== pendingQuestion.id) return;
       log.info({ callSessionId: this.callSessionId }, 'Guardian consultation timed out');
@@ -960,7 +963,7 @@ export class CallController {
       // Expire pending questions and update call state
       expirePendingQuestions(this.callSessionId);
-      this.pendingConsultation = null;
+      this.pendingGuardianInput = null;
       updateCallSession(this.callSessionId, { status: 'in_progress' });
       this.guardianUnavailableForCall = true;
       recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
@@ -982,7 +985,7 @@ export class CallController {
       }
     }, getUserConsultationTimeoutMs());
-    this.pendingConsultation = {
+    this.pendingGuardianInput = {
       questionText,
       questionId: pendingQuestion.id,
       toolApprovalMeta: effectiveToolMeta,
@@ -1067,7 +1070,9 @@ export class CallController {
       // During guardian wait states, the relay heartbeat timer handles
       // periodic updates — suppress the generic "Are you still there?"
       // which is confusing when the caller is waiting on a decision.
-      if (this.relay.getConnectionState() === 'awaiting_guardian_decision') {
+      // Two paths: in-call consultation (pendingGuardianInput) and
+      // inbound access-request wait (relay state).
+      if (this.pendingGuardianInput || this.relay.getConnectionState() === 'awaiting_guardian_decision') {
         log.debug({ callSessionId: this.callSessionId }, 'Silence timeout suppressed during guardian wait');
         return;
       }

package/src/config/bundled-skills/phone-calls/SKILL.md CHANGED Viewed

@@ -629,7 +629,7 @@ Or re-run the public-ingress skill to auto-detect and save the new URL.
 ### Call drops after 30 seconds of silence
-The system has a 30-second silence timeout. If nobody speaks for 30 seconds, the agent will ask "Are you still there?" This is expected behavior.
+The system has a 30-second silence timeout. If nobody speaks for 30 seconds during normal conversation, the agent will ask "Are you still there?" This is expected behavior. During guardian wait states (inbound access-request wait or in-call guardian consultation wait), this generic silence nudge is suppressed — the guardian-wait heartbeat messaging is used instead.
 ### Call quality sounds off

package/src/runtime/auth/middleware.ts CHANGED Viewed

@@ -1,8 +1,18 @@
 /**
  * JWT bearer auth middleware for the runtime HTTP server.
  *
- * Extracts `Authorization: Bearer <token>`, verifies the JWT with
- * `aud=vellum-daemon`, and builds an AuthContext from the claims.
+ * Extracts `Authorization: Bearer <token>`, verifies the JWT, and
+ * builds an AuthContext from the claims.
+ *
+ * Accepts two JWT audiences:
+ *   - `vellum-daemon` — primary audience, used by the gateway's runtime
+ *     proxy after token exchange and by daemon-minted delivery tokens.
+ *   - `vellum-gateway` — fallback audience, used by direct local clients
+ *     (e.g., the macOS app's SettingsStore) that hold a guardian-issued
+ *     JWT but call daemon endpoints directly without routing through the
+ *     gateway's runtime proxy. Both daemon and gateway share the same
+ *     HMAC signing key (~/.vellum/protected/actor-token-signing-key),
+ *     so the signature is valid regardless of audience.
  *
  * Replaces both the legacy bearer shared-secret check and the
  * actor-token HMAC middleware with a single JWT verification path.
@@ -12,16 +22,16 @@
  * so downstream code always has a typed context to consume.
  */
-import { isHttpAuthDisabled } from '../../config/env.js';
-import { getLogger } from '../../util/logger.js';
-import { DAEMON_INTERNAL_ASSISTANT_ID } from '../assistant-scope.js';
-import { extractBearerToken } from '../middleware/auth.js';
-import { buildAuthContext } from './context.js';
-import { resolveScopeProfile } from './scopes.js';
-import { verifyToken } from './token-service.js';
-import type { AuthContext } from './types.js';
+import { isHttpAuthDisabled } from "../../config/env.js";
+import { getLogger } from "../../util/logger.js";
+import { DAEMON_INTERNAL_ASSISTANT_ID } from "../assistant-scope.js";
+import { extractBearerToken } from "../middleware/auth.js";
+import { buildAuthContext } from "./context.js";
+import { resolveScopeProfile } from "./scopes.js";
+import { verifyToken } from "./token-service.js";
+import type { AuthContext } from "./types.js";
-const log = getLogger('auth-middleware');
+const log = getLogger("auth-middleware");
 // ---------------------------------------------------------------------------
 // Result type
@@ -43,11 +53,11 @@ export type AuthenticateResult =
 function buildDevBypassContext(): AuthContext {
   return {
     subject: `actor:${DAEMON_INTERNAL_ASSISTANT_ID}:dev-bypass`,
-    principalType: 'actor',
+    principalType: "actor",
     assistantId: DAEMON_INTERNAL_ASSISTANT_ID,
-    actorPrincipalId: 'dev-bypass',
-    scopeProfile: 'actor_client_v1',
-    scopes: resolveScopeProfile('actor_client_v1'),
+    actorPrincipalId: "dev-bypass",
+    scopeProfile: "actor_client_v1",
+    scopes: resolveScopeProfile("actor_client_v1"),
     policyEpoch: Number.MAX_SAFE_INTEGER,
   };
 }
@@ -72,36 +82,82 @@ export function authenticateRequest(req: Request): AuthenticateResult {
   const rawToken = extractBearerToken(req);
   if (!rawToken) {
-    log.warn({ reason: 'missing_token', path }, 'Auth denied: missing Authorization header');
+    log.warn(
+      { reason: "missing_token", path },
+      "Auth denied: missing Authorization header",
+    );
     return {
       ok: false,
       response: Response.json(
-        { error: { code: 'UNAUTHORIZED', message: 'Missing Authorization header' } },
+        {
+          error: {
+            code: "UNAUTHORIZED",
+            message: "Missing Authorization header",
+          },
+        },
         { status: 401 },
       ),
     };
   }
-  // Verify the JWT with audience = vellum-daemon
-  const verifyResult = verifyToken(rawToken, 'vellum-daemon');
+  // Verify the JWT — prefer vellum-daemon audience (gateway-proxied requests
+  // and daemon-minted tokens), but also accept vellum-gateway audience for
+  // direct local clients (macOS SettingsStore) that hold a guardian-issued JWT
+  // and call daemon endpoints without routing through the gateway runtime proxy.
+  let verifyResult = verifyToken(rawToken, "vellum-daemon");
+  if (
+    !verifyResult.ok &&
+    verifyResult.reason?.startsWith("audience_mismatch")
+  ) {
+    verifyResult = verifyToken(rawToken, "vellum-gateway");
+    // Normalize gateway-audience claims to daemon context so that
+    // buildAuthContext applies the same assistantId normalization
+    // (aud=vellum-daemon → assistantId='self') that gateway-exchanged
+    // tokens receive. Without this rewrite, the external assistant ID
+    // from the guardian-issued JWT would leak into daemon-internal
+    // scoping (storage keys, routing), violating the invariant
+    // documented in context.ts:30-33.
+    if (verifyResult.ok) {
+      verifyResult = {
+        ok: true,
+        claims: { ...verifyResult.claims, aud: "vellum-daemon" },
+      };
+    }
+  }
   if (!verifyResult.ok) {
     // Stale policy epoch gets a specific error code so clients can refresh
-    if (verifyResult.reason === 'stale_policy_epoch') {
-      log.warn({ reason: 'stale_policy_epoch', path }, 'Auth denied: stale policy epoch');
+    if (verifyResult.reason === "stale_policy_epoch") {
+      log.warn(
+        { reason: "stale_policy_epoch", path },
+        "Auth denied: stale policy epoch",
+      );
       return {
         ok: false,
         response: Response.json(
-          { error: { code: 'refresh_required', message: 'Token policy epoch is stale; refresh required' } },
+          {
+            error: {
+              code: "refresh_required",
+              message: "Token policy epoch is stale; refresh required",
+            },
+          },
           { status: 401 },
         ),
       };
     }
-    log.warn({ reason: verifyResult.reason, path }, 'Auth denied: JWT verification failed');
+    log.warn(
+      { reason: verifyResult.reason, path },
+      "Auth denied: JWT verification failed",
+    );
     return {
       ok: false,
       response: Response.json(
-        { error: { code: 'UNAUTHORIZED', message: `Invalid token: ${verifyResult.reason}` } },
+        {
+          error: {
+            code: "UNAUTHORIZED",
+            message: `Invalid token: ${verifyResult.reason}`,
+          },
+        },
         { status: 401 },
       ),
     };
@@ -112,12 +168,17 @@ export function authenticateRequest(req: Request): AuthenticateResult {
   if (!contextResult.ok) {
     log.warn(
       { reason: contextResult.reason, path, sub: verifyResult.claims.sub },
-      'Auth denied: invalid JWT claims',
+      "Auth denied: invalid JWT claims",
     );
     return {
       ok: false,
       response: Response.json(
-        { error: { code: 'UNAUTHORIZED', message: `Invalid token claims: ${contextResult.reason}` } },
+        {
+          error: {
+            code: "UNAUTHORIZED",
+            message: `Invalid token claims: ${contextResult.reason}`,
+          },
+        },
         { status: 401 },
       ),
     };