@vellumai/assistant 0.4.25 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.4.25",
3
+ "version": "0.4.26",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "vellum": "./src/index.ts"
@@ -2138,6 +2138,86 @@ describe("call-controller", () => {
2138
2138
  controller.destroy();
2139
2139
  });
2140
2140
 
2141
+ test('silence timeout suppressed during in-call guardian consultation (pendingGuardianInput)', async () => {
2142
+ mockSilenceTimeoutMs = 50; // Short timeout for testing
2143
+ mockConsultationTimeoutMs = 10_000; // Long enough to not interfere
2144
+
2145
+ // LLM emits an ASK_GUARDIAN marker so the controller creates a pendingGuardianInput
2146
+ mockStartVoiceTurn.mockImplementation(
2147
+ createMockVoiceTurn(["Let me check with your guardian. [ASK_GUARDIAN: Can this caller access the account?]"]),
2148
+ );
2149
+ const { relay, controller } = setupController();
2150
+
2151
+ // Trigger a turn that creates a pending guardian input request
2152
+ await controller.handleCallerUtterance("I need to access the account");
2153
+ // Allow turn to complete
2154
+ await new Promise((r) => setTimeout(r, 50));
2155
+
2156
+ // Verify a guardian input request is now pending
2157
+ expect(controller.getPendingConsultationQuestionId()).not.toBeNull();
2158
+ // Relay state is still 'connected' (not 'awaiting_guardian_decision')
2159
+ expect(relay.mockConnectionState).toBe("connected");
2160
+
2161
+ // Clear any tokens from the turn itself
2162
+ relay.sentTokens.length = 0;
2163
+
2164
+ // Wait for the silence timeout to fire
2165
+ await new Promise((r) => setTimeout(r, 200));
2166
+
2167
+ // "Are you still there?" should NOT have been sent because
2168
+ // pendingGuardianInput is active
2169
+ const silenceTokens = relay.sentTokens.filter((t) =>
2170
+ t.token.includes("Are you still there?"),
2171
+ );
2172
+ expect(silenceTokens.length).toBe(0);
2173
+
2174
+ controller.destroy();
2175
+ });
2176
+
2177
+ test('silence nudge resumes after guardian consultation resolves', async () => {
2178
+ mockSilenceTimeoutMs = 50; // Short timeout for testing
2179
+ mockConsultationTimeoutMs = 10_000; // Long enough to not interfere
2180
+
2181
+ // LLM emits an ASK_GUARDIAN marker so the controller creates a pendingGuardianInput
2182
+ mockStartVoiceTurn.mockImplementation(
2183
+ createMockVoiceTurn(["Let me check. [ASK_GUARDIAN: Is this approved?]"]),
2184
+ );
2185
+ const { relay, controller } = setupController();
2186
+
2187
+ // Trigger a turn that creates a pending guardian input request
2188
+ await controller.handleCallerUtterance("Can I do this?");
2189
+ await new Promise((r) => setTimeout(r, 50));
2190
+
2191
+ // Verify guardian input request is pending
2192
+ expect(controller.getPendingConsultationQuestionId()).not.toBeNull();
2193
+
2194
+ // Now resolve the consultation by providing an answer
2195
+ // Mock the next LLM turn for the answer-driven follow-up
2196
+ mockStartVoiceTurn.mockImplementation(
2197
+ createMockVoiceTurn(["Great news, your guardian approved the request."]),
2198
+ );
2199
+ await controller.handleUserAnswer("Yes, approved");
2200
+ // Allow the answer-driven turn to complete
2201
+ await new Promise((r) => setTimeout(r, 100));
2202
+
2203
+ // Guardian input request should now be cleared
2204
+ expect(controller.getPendingConsultationQuestionId()).toBeNull();
2205
+
2206
+ // Clear tokens from the answer turn
2207
+ relay.sentTokens.length = 0;
2208
+
2209
+ // Wait for the silence timeout to fire again
2210
+ await new Promise((r) => setTimeout(r, 200));
2211
+
2212
+ // "Are you still there?" SHOULD fire now that guardian wait is resolved
2213
+ const silenceTokens = relay.sentTokens.filter((t) =>
2214
+ t.token.includes("Are you still there?"),
2215
+ );
2216
+ expect(silenceTokens.length).toBe(1);
2217
+
2218
+ controller.destroy();
2219
+ });
2220
+
2141
2221
  // ── Pointer message regression tests ─────────────────────────────
2142
2222
 
2143
2223
  test("END_CALL marker writes completed pointer to origin conversation", async () => {
@@ -44,11 +44,13 @@ const log = getLogger('call-controller');
44
44
  type ControllerState = 'idle' | 'processing' | 'speaking';
45
45
 
46
46
  /**
47
- * Tracks a pending guardian consultation independently of the controller's
47
+ * Tracks a pending guardian input request independently of the controller's
48
48
  * turn state. This allows the call to continue normal turn processing
49
- * (idle -> processing -> speaking) while a consultation is outstanding.
49
+ * (idle -> processing -> speaking) while a guardian consultation is outstanding.
50
+ * Also used to suppress the silence nudge ("Are you still there?") while
51
+ * the caller is waiting on a guardian decision.
50
52
  */
51
- interface PendingConsultation {
53
+ interface PendingGuardianInput {
52
54
  questionText: string;
53
55
  questionId: string;
54
56
  toolApprovalMeta: { toolName: string; inputDigest: string } | null;
@@ -191,16 +193,17 @@ export class CallController {
191
193
  private durationTimer: ReturnType<typeof setTimeout> | null = null;
192
194
  private durationWarningTimer: ReturnType<typeof setTimeout> | null = null;
193
195
  /**
194
- * Tracks the currently pending guardian consultation, if any. Decoupled
196
+ * Tracks the currently pending guardian input request, if any. Decoupled
195
197
  * from the controller's turn state so callers can continue to trigger
196
- * normal turns while consultation is outstanding.
198
+ * normal turns while a guardian consultation is outstanding. Also
199
+ * suppresses the silence nudge while non-null.
197
200
  */
198
- private pendingConsultation: PendingConsultation | null = null;
201
+ private pendingGuardianInput: PendingGuardianInput | null = null;
199
202
  private durationEndTimer: ReturnType<typeof setTimeout> | null = null;
200
203
  private task: string | null;
201
204
  /** True when the call session was created via the inbound path (no outbound task). */
202
205
  private isInbound: boolean;
203
- /** Instructions queued while an LLM turn is in-flight or during pending consultation */
206
+ /** Instructions queued while an LLM turn is in-flight or during pending guardian input */
204
207
  private pendingInstructions: string[] = [];
205
208
  /** Ensures the call opener is triggered at most once per call. */
206
209
  private initialGreetingStarted = false;
@@ -271,7 +274,7 @@ export class CallController {
271
274
  * incoming answers to the correct consultation record.
272
275
  */
273
276
  getPendingConsultationQuestionId(): string | null {
274
- return this.pendingConsultation?.questionId ?? null;
277
+ return this.pendingGuardianInput?.questionId ?? null;
275
278
  }
276
279
 
277
280
  /**
@@ -357,7 +360,7 @@ export class CallController {
357
360
  * speaking.
358
361
  */
359
362
  async handleUserAnswer(answerText: string): Promise<boolean> {
360
- if (!this.pendingConsultation) {
363
+ if (!this.pendingGuardianInput) {
361
364
  log.warn(
362
365
  { callSessionId: this.callSessionId, state: this.state },
363
366
  'handleUserAnswer called but no pending consultation exists',
@@ -366,8 +369,8 @@ export class CallController {
366
369
  }
367
370
 
368
371
  // Clear the consultation timeout and record
369
- clearTimeout(this.pendingConsultation.timer);
370
- this.pendingConsultation = null;
372
+ clearTimeout(this.pendingGuardianInput.timer);
373
+ this.pendingGuardianInput = null;
371
374
 
372
375
  updateCallSession(this.callSessionId, { status: 'in_progress' });
373
376
 
@@ -436,7 +439,7 @@ export class CallController {
436
439
  if (this.silenceTimer) clearTimeout(this.silenceTimer);
437
440
  if (this.durationTimer) clearTimeout(this.durationTimer);
438
441
  if (this.durationWarningTimer) clearTimeout(this.durationWarningTimer);
439
- if (this.pendingConsultation) { clearTimeout(this.pendingConsultation.timer); this.pendingConsultation = null; }
442
+ if (this.pendingGuardianInput) { clearTimeout(this.pendingGuardianInput.timer); this.pendingGuardianInput = null; }
440
443
  if (this.durationEndTimer) { clearTimeout(this.durationEndTimer); this.durationEndTimer = null; }
441
444
  this.llmRunVersion++;
442
445
  this.abortCurrentTurn();
@@ -713,30 +716,30 @@ export class CallController {
713
716
  // the prior pending consultation (preserves tool scope on re-asks).
714
717
  const effectiveToolMeta = toolApprovalMeta
715
718
  ? { toolName: toolApprovalMeta.toolName, inputDigest: toolApprovalMeta.inputDigest }
716
- : this.pendingConsultation?.toolApprovalMeta ?? null;
719
+ : this.pendingGuardianInput?.toolApprovalMeta ?? null;
717
720
 
718
721
  // Coalesce repeated identical asks: if a consultation is already
719
722
  // pending for the same tool/action (or same informational question),
720
723
  // avoid churning requests and just keep the existing one.
721
- if (this.pendingConsultation) {
724
+ if (this.pendingGuardianInput) {
722
725
  const isSameToolAction =
723
- effectiveToolMeta && this.pendingConsultation.toolApprovalMeta
724
- ? effectiveToolMeta.toolName === this.pendingConsultation.toolApprovalMeta.toolName
725
- && effectiveToolMeta.inputDigest === this.pendingConsultation.toolApprovalMeta.inputDigest
726
- : !effectiveToolMeta && !this.pendingConsultation.toolApprovalMeta;
726
+ effectiveToolMeta && this.pendingGuardianInput.toolApprovalMeta
727
+ ? effectiveToolMeta.toolName === this.pendingGuardianInput.toolApprovalMeta.toolName
728
+ && effectiveToolMeta.inputDigest === this.pendingGuardianInput.toolApprovalMeta.inputDigest
729
+ : !effectiveToolMeta && !this.pendingGuardianInput.toolApprovalMeta;
727
730
 
728
731
  if (isSameToolAction) {
729
732
  // Same tool/action — coalesce. Keep the existing consultation
730
733
  // alive and skip creating a new request.
731
734
  log.info(
732
- { callSessionId: this.callSessionId, questionId: this.pendingConsultation.questionId },
735
+ { callSessionId: this.callSessionId, questionId: this.pendingGuardianInput.questionId },
733
736
  'Coalescing repeated ASK_GUARDIAN — same tool/action already pending',
734
737
  );
735
738
  recordCallEvent(this.callSessionId, 'guardian_consult_coalesced', { question: questionText });
736
739
  // Fall through to normal turn completion (idle + flushPendingInstructions)
737
740
  } else {
738
741
  // Materially different intent — supersede the old consultation.
739
- clearTimeout(this.pendingConsultation.timer);
742
+ clearTimeout(this.pendingGuardianInput.timer);
740
743
 
741
744
  // Expire the previous consultation's storage records so stale
742
745
  // guardian answers cannot match the old request.
@@ -752,7 +755,7 @@ export class CallController {
752
755
  );
753
756
  }
754
757
 
755
- this.pendingConsultation = null;
758
+ this.pendingGuardianInput = null;
756
759
 
757
760
  // Dispatch the new consultation with effective tool metadata.
758
761
  // The previous request ID is passed through so the dispatch
@@ -773,10 +776,10 @@ export class CallController {
773
776
  // Without this, the consultation timeout can fire on an already-ended
774
777
  // call, overwriting 'completed' status back to 'in_progress' and
775
778
  // starting a new LLM turn on a dead session. Similarly, a late
776
- // handleUserAnswer could be accepted since pendingConsultation is
779
+ // handleUserAnswer could be accepted since pendingGuardianInput is
777
780
  // still non-null.
778
- if (this.pendingConsultation) {
779
- clearTimeout(this.pendingConsultation.timer);
781
+ if (this.pendingGuardianInput) {
782
+ clearTimeout(this.pendingGuardianInput.timer);
780
783
 
781
784
  // Expire store-side consultation records so clients don't observe
782
785
  // a completed call with a dangling pendingQuestion, and guardian
@@ -787,7 +790,7 @@ export class CallController {
787
790
  expireCanonicalGuardianRequest(previousRequest.id);
788
791
  }
789
792
 
790
- this.pendingConsultation = null;
793
+ this.pendingGuardianInput = null;
791
794
  }
792
795
 
793
796
  const currentSession = getCallSession(this.callSessionId);
@@ -928,7 +931,7 @@ export class CallController {
928
931
  // record, not the global controller state.
929
932
  const consultationTimer = setTimeout(() => {
930
933
  // Only fire if this consultation is still the active one
931
- if (!this.pendingConsultation || this.pendingConsultation.questionId !== pendingQuestion.id) return;
934
+ if (!this.pendingGuardianInput || this.pendingGuardianInput.questionId !== pendingQuestion.id) return;
932
935
 
933
936
  log.info({ callSessionId: this.callSessionId }, 'Guardian consultation timed out');
934
937
 
@@ -960,7 +963,7 @@ export class CallController {
960
963
 
961
964
  // Expire pending questions and update call state
962
965
  expirePendingQuestions(this.callSessionId);
963
- this.pendingConsultation = null;
966
+ this.pendingGuardianInput = null;
964
967
  updateCallSession(this.callSessionId, { status: 'in_progress' });
965
968
  this.guardianUnavailableForCall = true;
966
969
  recordCallEvent(this.callSessionId, 'guardian_consultation_timed_out', { question: questionText });
@@ -982,7 +985,7 @@ export class CallController {
982
985
  }
983
986
  }, getUserConsultationTimeoutMs());
984
987
 
985
- this.pendingConsultation = {
988
+ this.pendingGuardianInput = {
986
989
  questionText,
987
990
  questionId: pendingQuestion.id,
988
991
  toolApprovalMeta: effectiveToolMeta,
@@ -1067,7 +1070,9 @@ export class CallController {
1067
1070
  // During guardian wait states, the relay heartbeat timer handles
1068
1071
  // periodic updates — suppress the generic "Are you still there?"
1069
1072
  // which is confusing when the caller is waiting on a decision.
1070
- if (this.relay.getConnectionState() === 'awaiting_guardian_decision') {
1073
+ // Two paths: in-call consultation (pendingGuardianInput) and
1074
+ // inbound access-request wait (relay state).
1075
+ if (this.pendingGuardianInput || this.relay.getConnectionState() === 'awaiting_guardian_decision') {
1071
1076
  log.debug({ callSessionId: this.callSessionId }, 'Silence timeout suppressed during guardian wait');
1072
1077
  return;
1073
1078
  }
@@ -629,7 +629,7 @@ Or re-run the public-ingress skill to auto-detect and save the new URL.
629
629
 
630
630
  ### Call drops after 30 seconds of silence
631
631
 
632
- The system has a 30-second silence timeout. If nobody speaks for 30 seconds, the agent will ask "Are you still there?" This is expected behavior.
632
+ The system has a 30-second silence timeout. If nobody speaks for 30 seconds during normal conversation, the agent will ask "Are you still there?" This is expected behavior. During guardian wait states (inbound access-request wait or in-call guardian consultation wait), this generic silence nudge is suppressed — the guardian-wait heartbeat messaging is used instead.
633
633
 
634
634
  ### Call quality sounds off
635
635
 
@@ -1,8 +1,18 @@
1
1
  /**
2
2
  * JWT bearer auth middleware for the runtime HTTP server.
3
3
  *
4
- * Extracts `Authorization: Bearer <token>`, verifies the JWT with
5
- * `aud=vellum-daemon`, and builds an AuthContext from the claims.
4
+ * Extracts `Authorization: Bearer <token>`, verifies the JWT, and
5
+ * builds an AuthContext from the claims.
6
+ *
7
+ * Accepts two JWT audiences:
8
+ * - `vellum-daemon` — primary audience, used by the gateway's runtime
9
+ * proxy after token exchange and by daemon-minted delivery tokens.
10
+ * - `vellum-gateway` — fallback audience, used by direct local clients
11
+ * (e.g., the macOS app's SettingsStore) that hold a guardian-issued
12
+ * JWT but call daemon endpoints directly without routing through the
13
+ * gateway's runtime proxy. Both daemon and gateway share the same
14
+ * HMAC signing key (~/.vellum/protected/actor-token-signing-key),
15
+ * so the signature is valid regardless of audience.
6
16
  *
7
17
  * Replaces both the legacy bearer shared-secret check and the
8
18
  * actor-token HMAC middleware with a single JWT verification path.
@@ -12,16 +22,16 @@
12
22
  * so downstream code always has a typed context to consume.
13
23
  */
14
24
 
15
- import { isHttpAuthDisabled } from '../../config/env.js';
16
- import { getLogger } from '../../util/logger.js';
17
- import { DAEMON_INTERNAL_ASSISTANT_ID } from '../assistant-scope.js';
18
- import { extractBearerToken } from '../middleware/auth.js';
19
- import { buildAuthContext } from './context.js';
20
- import { resolveScopeProfile } from './scopes.js';
21
- import { verifyToken } from './token-service.js';
22
- import type { AuthContext } from './types.js';
25
+ import { isHttpAuthDisabled } from "../../config/env.js";
26
+ import { getLogger } from "../../util/logger.js";
27
+ import { DAEMON_INTERNAL_ASSISTANT_ID } from "../assistant-scope.js";
28
+ import { extractBearerToken } from "../middleware/auth.js";
29
+ import { buildAuthContext } from "./context.js";
30
+ import { resolveScopeProfile } from "./scopes.js";
31
+ import { verifyToken } from "./token-service.js";
32
+ import type { AuthContext } from "./types.js";
23
33
 
24
- const log = getLogger('auth-middleware');
34
+ const log = getLogger("auth-middleware");
25
35
 
26
36
  // ---------------------------------------------------------------------------
27
37
  // Result type
@@ -43,11 +53,11 @@ export type AuthenticateResult =
43
53
  function buildDevBypassContext(): AuthContext {
44
54
  return {
45
55
  subject: `actor:${DAEMON_INTERNAL_ASSISTANT_ID}:dev-bypass`,
46
- principalType: 'actor',
56
+ principalType: "actor",
47
57
  assistantId: DAEMON_INTERNAL_ASSISTANT_ID,
48
- actorPrincipalId: 'dev-bypass',
49
- scopeProfile: 'actor_client_v1',
50
- scopes: resolveScopeProfile('actor_client_v1'),
58
+ actorPrincipalId: "dev-bypass",
59
+ scopeProfile: "actor_client_v1",
60
+ scopes: resolveScopeProfile("actor_client_v1"),
51
61
  policyEpoch: Number.MAX_SAFE_INTEGER,
52
62
  };
53
63
  }
@@ -72,36 +82,82 @@ export function authenticateRequest(req: Request): AuthenticateResult {
72
82
 
73
83
  const rawToken = extractBearerToken(req);
74
84
  if (!rawToken) {
75
- log.warn({ reason: 'missing_token', path }, 'Auth denied: missing Authorization header');
85
+ log.warn(
86
+ { reason: "missing_token", path },
87
+ "Auth denied: missing Authorization header",
88
+ );
76
89
  return {
77
90
  ok: false,
78
91
  response: Response.json(
79
- { error: { code: 'UNAUTHORIZED', message: 'Missing Authorization header' } },
92
+ {
93
+ error: {
94
+ code: "UNAUTHORIZED",
95
+ message: "Missing Authorization header",
96
+ },
97
+ },
80
98
  { status: 401 },
81
99
  ),
82
100
  };
83
101
  }
84
102
 
85
- // Verify the JWT with audience = vellum-daemon
86
- const verifyResult = verifyToken(rawToken, 'vellum-daemon');
103
+ // Verify the JWT prefer vellum-daemon audience (gateway-proxied requests
104
+ // and daemon-minted tokens), but also accept vellum-gateway audience for
105
+ // direct local clients (macOS SettingsStore) that hold a guardian-issued JWT
106
+ // and call daemon endpoints without routing through the gateway runtime proxy.
107
+ let verifyResult = verifyToken(rawToken, "vellum-daemon");
108
+ if (
109
+ !verifyResult.ok &&
110
+ verifyResult.reason?.startsWith("audience_mismatch")
111
+ ) {
112
+ verifyResult = verifyToken(rawToken, "vellum-gateway");
113
+ // Normalize gateway-audience claims to daemon context so that
114
+ // buildAuthContext applies the same assistantId normalization
115
+ // (aud=vellum-daemon → assistantId='self') that gateway-exchanged
116
+ // tokens receive. Without this rewrite, the external assistant ID
117
+ // from the guardian-issued JWT would leak into daemon-internal
118
+ // scoping (storage keys, routing), violating the invariant
119
+ // documented in context.ts:30-33.
120
+ if (verifyResult.ok) {
121
+ verifyResult = {
122
+ ok: true,
123
+ claims: { ...verifyResult.claims, aud: "vellum-daemon" },
124
+ };
125
+ }
126
+ }
87
127
  if (!verifyResult.ok) {
88
128
  // Stale policy epoch gets a specific error code so clients can refresh
89
- if (verifyResult.reason === 'stale_policy_epoch') {
90
- log.warn({ reason: 'stale_policy_epoch', path }, 'Auth denied: stale policy epoch');
129
+ if (verifyResult.reason === "stale_policy_epoch") {
130
+ log.warn(
131
+ { reason: "stale_policy_epoch", path },
132
+ "Auth denied: stale policy epoch",
133
+ );
91
134
  return {
92
135
  ok: false,
93
136
  response: Response.json(
94
- { error: { code: 'refresh_required', message: 'Token policy epoch is stale; refresh required' } },
137
+ {
138
+ error: {
139
+ code: "refresh_required",
140
+ message: "Token policy epoch is stale; refresh required",
141
+ },
142
+ },
95
143
  { status: 401 },
96
144
  ),
97
145
  };
98
146
  }
99
147
 
100
- log.warn({ reason: verifyResult.reason, path }, 'Auth denied: JWT verification failed');
148
+ log.warn(
149
+ { reason: verifyResult.reason, path },
150
+ "Auth denied: JWT verification failed",
151
+ );
101
152
  return {
102
153
  ok: false,
103
154
  response: Response.json(
104
- { error: { code: 'UNAUTHORIZED', message: `Invalid token: ${verifyResult.reason}` } },
155
+ {
156
+ error: {
157
+ code: "UNAUTHORIZED",
158
+ message: `Invalid token: ${verifyResult.reason}`,
159
+ },
160
+ },
105
161
  { status: 401 },
106
162
  ),
107
163
  };
@@ -112,12 +168,17 @@ export function authenticateRequest(req: Request): AuthenticateResult {
112
168
  if (!contextResult.ok) {
113
169
  log.warn(
114
170
  { reason: contextResult.reason, path, sub: verifyResult.claims.sub },
115
- 'Auth denied: invalid JWT claims',
171
+ "Auth denied: invalid JWT claims",
116
172
  );
117
173
  return {
118
174
  ok: false,
119
175
  response: Response.json(
120
- { error: { code: 'UNAUTHORIZED', message: `Invalid token claims: ${contextResult.reason}` } },
176
+ {
177
+ error: {
178
+ code: "UNAUTHORIZED",
179
+ message: `Invalid token claims: ${contextResult.reason}`,
180
+ },
181
+ },
121
182
  { status: 401 },
122
183
  ),
123
184
  };