@vellumai/assistant 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/docker-entrypoint.sh +12 -2
  2. package/node_modules/@vellumai/ces-contracts/src/handles.ts +7 -9
  3. package/openapi.yaml +1 -1
  4. package/package.json +1 -1
  5. package/src/__tests__/assistant-event-hub.test.ts +30 -0
  6. package/src/__tests__/checker.test.ts +104 -170
  7. package/src/__tests__/cli-command-risk-guard.test.ts +1 -1
  8. package/src/__tests__/context-overflow-approval.test.ts +5 -5
  9. package/src/__tests__/conversation-analysis-routes.test.ts +169 -0
  10. package/src/__tests__/conversation-directories-parse.test.ts +105 -0
  11. package/src/__tests__/credential-execution-approval-bridge.test.ts +0 -2
  12. package/src/__tests__/init-feature-flag-overrides.test.ts +167 -0
  13. package/src/__tests__/inline-command-runner.test.ts +7 -5
  14. package/src/__tests__/log-export-workspace.test.ts +190 -0
  15. package/src/__tests__/managed-credential-catalog-cli.test.ts +12 -14
  16. package/src/__tests__/navigate-settings-tab.test.ts +14 -1
  17. package/src/__tests__/notification-broadcaster.test.ts +65 -0
  18. package/src/__tests__/onboarding-template-contract.test.ts +5 -4
  19. package/src/__tests__/pkb-autoinject.test.ts +96 -0
  20. package/src/__tests__/require-fresh-approval.test.ts +0 -2
  21. package/src/__tests__/sandbox-diagnostics.test.ts +1 -32
  22. package/src/__tests__/terminal-sandbox.test.ts +1 -1
  23. package/src/__tests__/terminal-tools.test.ts +2 -5
  24. package/src/__tests__/test-preload.ts +14 -0
  25. package/src/__tests__/tool-domain-event-publisher.test.ts +0 -1
  26. package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -8
  27. package/src/__tests__/tool-executor.test.ts +0 -1
  28. package/src/__tests__/transport-hints-queue.test.ts +77 -0
  29. package/src/__tests__/trust-store.test.ts +4 -4
  30. package/src/__tests__/workspace-migration-030-seed-pkb-autoinject.test.ts +168 -0
  31. package/src/__tests__/workspace-policy.test.ts +2 -7
  32. package/src/agent/loop.ts +0 -29
  33. package/src/channels/types.ts +5 -0
  34. package/src/cli/__tests__/run-assistant-command.ts +34 -7
  35. package/src/cli/__tests__/unknown-command.test.ts +33 -0
  36. package/src/cli/commands/default-action.ts +68 -1
  37. package/src/cli/commands/oauth/__tests__/connect.test.ts +27 -0
  38. package/src/cli/commands/oauth/connect.ts +11 -0
  39. package/src/cli/commands/platform/__tests__/connect.test.ts +1 -1
  40. package/src/cli/commands/platform/__tests__/disconnect.test.ts +1 -1
  41. package/src/cli/commands/platform/__tests__/status.test.ts +1 -1
  42. package/src/cli/program.ts +9 -2
  43. package/src/config/assistant-feature-flags.ts +59 -55
  44. package/src/config/bundled-skills/app-builder/SKILL.md +87 -4
  45. package/src/config/bundled-skills/gmail/SKILL.md +11 -6
  46. package/src/config/bundled-skills/gmail/TOOLS.json +1 -1
  47. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +2 -1
  48. package/src/config/bundled-skills/settings/TOOLS.json +1 -1
  49. package/src/config/bundled-skills/settings/tools/navigate-settings-tab.ts +8 -3
  50. package/src/config/feature-flag-registry.json +2 -2
  51. package/src/config/schemas/services.ts +8 -0
  52. package/src/credential-execution/approval-bridge.ts +0 -1
  53. package/src/credential-execution/managed-catalog.ts +3 -7
  54. package/src/daemon/config-watcher.ts +6 -2
  55. package/src/daemon/context-overflow-approval.ts +0 -1
  56. package/src/daemon/conversation-agent-loop.ts +33 -12
  57. package/src/daemon/conversation-attachments.ts +0 -1
  58. package/src/daemon/conversation-messaging.ts +3 -0
  59. package/src/daemon/conversation-process.ts +18 -2
  60. package/src/daemon/conversation-queue-manager.ts +8 -0
  61. package/src/daemon/conversation-runtime-assembly.ts +64 -7
  62. package/src/daemon/conversation-surfaces.ts +65 -0
  63. package/src/daemon/conversation-tool-setup.ts +0 -3
  64. package/src/daemon/conversation.ts +3 -5
  65. package/src/daemon/handlers/conversations.ts +2 -1
  66. package/src/daemon/handlers/shared.ts +7 -0
  67. package/src/daemon/lifecycle.ts +21 -1
  68. package/src/daemon/message-types/conversations.ts +4 -0
  69. package/src/daemon/message-types/messages.ts +0 -1
  70. package/src/daemon/message-types/notifications.ts +12 -0
  71. package/src/daemon/message-types/settings.ts +12 -0
  72. package/src/daemon/server.ts +21 -24
  73. package/src/daemon/transport-hints.ts +33 -0
  74. package/src/index.ts +1 -1
  75. package/src/memory/conversation-crud.ts +15 -10
  76. package/src/memory/conversation-directories.ts +39 -0
  77. package/src/memory/conversation-group-migration.ts +65 -5
  78. package/src/memory/embedding-local.ts +1 -1
  79. package/src/memory/graph/capability-seed.ts +3 -5
  80. package/src/memory/group-crud.ts +25 -9
  81. package/src/messaging/provider.ts +1 -1
  82. package/src/notifications/broadcaster.ts +6 -0
  83. package/src/notifications/conversation-pairing.ts +12 -4
  84. package/src/notifications/emit-signal.ts +14 -0
  85. package/src/notifications/signal.ts +11 -0
  86. package/src/oauth/platform-connection.test.ts +2 -2
  87. package/src/oauth/seed-providers.ts +1 -0
  88. package/src/permissions/checker.ts +3 -3
  89. package/src/permissions/defaults.ts +7 -8
  90. package/src/permissions/prompter.ts +0 -2
  91. package/src/platform/client.ts +1 -1
  92. package/src/prompts/templates/BOOTSTRAP.md +14 -5
  93. package/src/prompts/templates/SOUL.md +11 -11
  94. package/src/runtime/assistant-event-hub.ts +22 -0
  95. package/src/runtime/auth/token-service.ts +8 -0
  96. package/src/runtime/routes/conversation-analysis-routes.ts +18 -6
  97. package/src/runtime/routes/conversation-routes.ts +9 -3
  98. package/src/runtime/routes/group-routes.ts +22 -8
  99. package/src/runtime/routes/log-export/AGENTS.md +104 -0
  100. package/src/runtime/routes/log-export/__tests__/workspace-allowlist-error-contract.test.ts +103 -0
  101. package/src/runtime/routes/log-export/__tests__/workspace-allowlist.test.ts +716 -0
  102. package/src/runtime/routes/log-export/workspace-allowlist.ts +458 -0
  103. package/src/runtime/routes/log-export-routes.ts +18 -3
  104. package/src/skills/inline-command-runner.ts +12 -14
  105. package/src/tools/permission-checker.ts +0 -18
  106. package/src/tools/secret-detection-handler.ts +0 -1
  107. package/src/tools/skills/sandbox-runner.ts +3 -6
  108. package/src/tools/terminal/sandbox-diagnostics.ts +4 -4
  109. package/src/tools/terminal/sandbox.ts +4 -1
  110. package/src/tools/terminal/shell.ts +3 -5
  111. package/src/tools/types.ts +0 -3
  112. package/src/watcher/provider-types.ts +1 -1
  113. package/src/workspace/migrations/029-seed-pkb.ts +1 -0
  114. package/src/workspace/migrations/030-seed-pkb-autoinject.ts +73 -0
  115. package/src/workspace/migrations/registry.ts +2 -0
@@ -20,7 +20,6 @@ import { existsSync, readFileSync } from "node:fs";
20
20
  import { homedir } from "node:os";
21
21
  import { dirname, join } from "node:path";
22
22
 
23
- import { getIsContainerized } from "./env-registry.js";
24
23
  import type { AssistantConfig } from "./schema.js";
25
24
 
26
25
  // ---------------------------------------------------------------------------
@@ -173,61 +172,49 @@ function loadOverridesFromFile(): Record<string, boolean> {
173
172
  }
174
173
 
175
174
  /**
176
- * Load override values from the gateway via synchronous HTTP call.
175
+ * Fetch override values from the gateway via async HTTP.
177
176
  *
178
- * Follows the trust-client pattern: uses `Bun.spawnSync` + `curl` to make
179
- * a blocking GET request to the gateway's feature-flags endpoint. The
180
- * gateway returns `{ flags: Array<{ key, enabled, ... }> }` and we extract
181
- * just the key → enabled map.
177
+ * Returns the gateway's merged feature flag map (persisted > remote >
178
+ * registry), or an empty record on any failure (network, auth, parse).
182
179
  */
183
- function loadOverridesFromGateway(): Record<string, boolean> {
180
+ async function fetchOverridesFromGateway(): Promise<Record<string, boolean>> {
184
181
  try {
185
182
  // Lazy-import to avoid circular dependency and keep this module
186
183
  // importable from bootstrap code when not in containerized mode.
187
184
  const { getGatewayInternalBaseUrl } =
188
185
  // eslint-disable-next-line @typescript-eslint/no-require-imports
189
186
  require("./env.js") as typeof import("./env.js");
190
- const { mintEdgeRelayToken } =
187
+ const {
188
+ mintEdgeRelayToken,
189
+ isSigningKeyInitialized,
190
+ initAuthSigningKey,
191
+ resolveSigningKey,
192
+ } =
191
193
  // eslint-disable-next-line @typescript-eslint/no-require-imports
192
194
  require("../runtime/auth/token-service.js") as typeof import("../runtime/auth/token-service.js");
193
195
 
196
+ // CLI subprocesses don't run daemon startup, so the signing key
197
+ // may not be initialized yet. Initialize it now so mintEdgeRelayToken
198
+ // can produce a valid JWT for the gateway request.
199
+ if (!isSigningKeyInitialized()) {
200
+ initAuthSigningKey(resolveSigningKey());
201
+ }
202
+
194
203
  const url = `${getGatewayInternalBaseUrl()}/v1/feature-flags`;
195
204
  const token = mintEdgeRelayToken();
196
205
 
197
- const proc = Bun.spawnSync(
198
- [
199
- "curl",
200
- "-s",
201
- "-S",
202
- "-X",
203
- "GET",
204
- "--max-time",
205
- "10",
206
- "-H",
207
- `Authorization: Bearer ${token}`,
208
- "-H",
209
- "Accept: application/json",
210
- "-w",
211
- "\n%{http_code}",
212
- url,
213
- ],
214
- { stdout: "pipe", stderr: "pipe" },
215
- );
216
-
217
- if (proc.exitCode !== 0) return {};
218
-
219
- const output = proc.stdout.toString().trim();
220
- const lastNewline = output.lastIndexOf("\n");
221
- const responseBody = lastNewline >= 0 ? output.slice(0, lastNewline) : "";
222
- const statusCode = parseInt(
223
- lastNewline >= 0 ? output.slice(lastNewline + 1) : output,
224
- 10,
225
- );
226
-
227
- if (statusCode < 200 || statusCode >= 300) return {};
228
- if (!responseBody) return {};
229
-
230
- const parsed = JSON.parse(responseBody) as {
206
+ const response = await fetch(url, {
207
+ method: "GET",
208
+ headers: {
209
+ Authorization: `Bearer ${token}`,
210
+ Accept: "application/json",
211
+ },
212
+ signal: AbortSignal.timeout(10_000),
213
+ });
214
+
215
+ if (!response.ok) return {};
216
+
217
+ const parsed = (await response.json()) as {
231
218
  flags?: Array<{ key: string; enabled: boolean }>;
232
219
  };
233
220
  if (!Array.isArray(parsed.flags)) return {};
@@ -245,25 +232,42 @@ function loadOverridesFromGateway(): Record<string, boolean> {
245
232
  }
246
233
 
247
234
  /**
248
- * Load overrides, preferring the gateway HTTP API.
235
+ * Pre-populate the override cache from the gateway (async).
249
236
  *
250
- * In containerized mode, always uses the gateway. In local mode, tries
251
- * the gateway first and falls back to `loadOverridesFromFile()` when
252
- * the gateway is not yet available (startup race).
237
+ * Call this once during startup (daemon or CLI entry) before any sync
238
+ * `isAssistantFeatureFlagEnabled` calls. In containerized mode, always
239
+ * uses the gateway. In local mode, falls back to the local file when
240
+ * the gateway is unreachable.
253
241
  *
254
- * Results are cached at module level.
242
+ * On failure, the cache is left unset so subsequent sync calls fall
243
+ * through to the file-based fallback rather than caching an empty map
244
+ * that masks all overrides for the process lifetime.
255
245
  */
256
- function loadOverrides(): Record<string, boolean> {
257
- if (cachedOverrides != null) return cachedOverrides;
258
-
259
- const gatewayOverrides = loadOverridesFromGateway();
260
- if (Object.keys(gatewayOverrides).length > 0 || getIsContainerized()) {
246
+ export async function initFeatureFlagOverrides(): Promise<void> {
247
+ const gatewayOverrides = await fetchOverridesFromGateway();
248
+ if (Object.keys(gatewayOverrides).length > 0) {
261
249
  cachedOverrides = gatewayOverrides;
262
- return cachedOverrides;
250
+ return;
263
251
  }
264
252
 
265
- // Graceful fallback: in local mode, if the gateway hasn't started yet
266
- // (empty response), read overrides from file as a temporary measure.
253
+ // Gateway returned empty or failed. Leave the cache unset so
254
+ // loadOverrides() falls through to file on the next sync read,
255
+ // regardless of containerized vs local mode.
256
+ }
257
+
258
+ /**
259
+ * Read cached overrides synchronously.
260
+ *
261
+ * If `initFeatureFlagOverrides()` was called at startup, this returns the
262
+ * pre-populated cache. Otherwise falls back to the local file — this
263
+ * ensures the resolver never blocks on a network call.
264
+ */
265
+ function loadOverrides(): Record<string, boolean> {
266
+ if (cachedOverrides != null) return cachedOverrides;
267
+
268
+ // Cache not yet populated (initFeatureFlagOverrides wasn't called or
269
+ // hasn't finished). Fall back to the local file so the resolver still
270
+ // works, just without gateway data.
267
271
  cachedOverrides = loadOverridesFromFile();
268
272
  return cachedOverrides;
269
273
  }
@@ -448,9 +448,91 @@ Important:
448
448
  - All operations are async - use `async/await`
449
449
  - Wrap all calls in `try/catch`
450
450
 
451
+ #### Custom route handlers (user-defined routes)
452
+
453
+ When the app needs server-side persistence, custom API logic, or workspace file access, use **user-defined routes**. Route handlers are TypeScript or JavaScript files that live in the workspace `routes/` directory and are served under the `/v1/x/` URL path.
454
+
455
+ **Common use cases:** CRUD storage, file-based persistence, search/aggregation, external API proxying, webhook receivers.
456
+
457
+ **Handler file convention:**
458
+
459
+ Each handler file exports named functions for the HTTP methods it supports (`GET`, `POST`, `PUT`, `PATCH`, `DELETE`). Handlers use the standard Web API `Request`/`Response` signature.
460
+
461
+ ```
462
+ {workspaceDir}/routes/
463
+ items.ts # Handles /v1/x/items
464
+ items/
465
+ [id].ts # Not supported — use query params instead
466
+ index.ts # Also handles /v1/x/items (index convention)
467
+ ```
468
+
469
+ **Example handler — JSON file persistence:**
470
+
471
+ ```typescript
472
+ // routes/items.ts
473
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
474
+ import { join } from "node:path";
475
+
476
+ export const description = "Item CRUD — stores records as a JSON file";
477
+
478
+ const DATA_DIR = join(process.env.VELLUM_WORKSPACE_DIR!, "data");
479
+ const DATA_FILE = join(DATA_DIR, "items.json");
480
+
481
+ function loadItems(): Array<Record<string, unknown>> {
482
+ mkdirSync(DATA_DIR, { recursive: true });
483
+ if (!existsSync(DATA_FILE)) return [];
484
+ return JSON.parse(readFileSync(DATA_FILE, "utf-8"));
485
+ }
486
+
487
+ function saveItems(items: Array<Record<string, unknown>>): void {
488
+ mkdirSync(DATA_DIR, { recursive: true });
489
+ writeFileSync(DATA_FILE, JSON.stringify(items, null, 2));
490
+ }
491
+
492
+ export function GET(): Response {
493
+ return Response.json(loadItems());
494
+ }
495
+
496
+ export async function POST(request: Request): Promise<Response> {
497
+ const body = await request.json();
498
+ const items = loadItems();
499
+ const item = { id: crypto.randomUUID(), ...body, createdAt: new Date().toISOString() };
500
+ items.push(item);
501
+ saveItems(items);
502
+ return Response.json(item, { status: 201 });
503
+ }
504
+ ```
505
+
506
+ **Calling routes from the app frontend:**
507
+
508
+ Apps call custom routes via `fetch()` using the `/v1/x/` prefix. The assistant's runtime HTTP server requires the `/v1/` namespace for all API requests.
509
+
510
+ ```typescript
511
+ // In a TSX component or HTML script
512
+ const res = await fetch("/v1/x/items");
513
+ const items = await res.json();
514
+
515
+ // Create a new item
516
+ await fetch("/v1/x/items", {
517
+ method: "POST",
518
+ headers: { "Content-Type": "application/json" },
519
+ body: JSON.stringify({ name: "New item", status: "active" }),
520
+ });
521
+ ```
522
+
523
+ **Key rules:**
524
+
525
+ - Always create the route handler files via `file_write` before calling `app_refresh`
526
+ - Export an optional `description` string for CLI discoverability (`assistant routes list`)
527
+ - Handlers have full Node.js API access — `fs`, `path`, `crypto`, etc.
528
+ - Handlers get a 30-second timeout per request
529
+ - Files are hot-reloaded on change (mtime-based cache)
530
+ - Use `.ts` (preferred) or `.js` extensions
531
+ - Route resolution: `routes/foo.ts` → `/v1/x/foo`, `routes/bar/index.ts` → `/v1/x/bar`
532
+
451
533
  #### Client-side state management
452
534
 
453
- `localStorage` and `sessionStorage` are available for ephemeral UI state (filters, view modes, collapsed state, preferences, form drafts). Use `window.vellum.data` for persistent app records, `localStorage` for UI preferences.
535
+ `localStorage` and `sessionStorage` are available for ephemeral UI state (filters, view modes, collapsed state, preferences, form drafts). Use custom routes for persistent app records, `localStorage` for UI preferences.
454
536
 
455
537
  <!-- feature:app-builder-multifile:alt -->
456
538
 
@@ -467,7 +549,8 @@ let allRecords = [];
467
549
 
468
550
  async function loadRecords() {
469
551
  try {
470
- allRecords = await window.vellum.data.query();
552
+ const res = await fetch("/v1/x/records");
553
+ allRecords = await res.json();
471
554
  render();
472
555
  } catch (err) {
473
556
  console.error("Failed to load:", err);
@@ -556,7 +639,7 @@ Every app must meet these baselines:
556
639
 
557
640
  ## Presentation Slide Design
558
641
 
559
- Slides are a different domain from apps. Skip app-specific patterns (contextual headers, search/filter, toast notifications, form validation, data bridge). Slides are static content — build navigation and layouts with custom HTML/CSS.
642
+ Slides are a different domain from apps. Skip app-specific patterns (contextual headers, search/filter, toast notifications, form validation, custom routes). Slides are static content — build navigation and layouts with custom HTML/CSS.
560
643
 
561
644
  **Key principles:**
562
645
 
@@ -569,7 +652,7 @@ Slides are a different domain from apps. Skip app-specific patterns (contextual
569
652
 
570
653
  ## Error Handling
571
654
 
572
- - All `window.vellum.data` calls must be wrapped in `try/catch` with user-friendly feedback.
655
+ - All `fetch()` calls to custom routes must be wrapped in `try/catch` with user-friendly feedback.
573
656
  - Never let a failed operation silently pass - always show a toast or inline error.
574
657
  - If the page loads with no data, show a designed empty state (`.v-empty-state`).
575
658
  - For forms, show validation errors inline next to the relevant field.
@@ -110,22 +110,27 @@ When a user asks to declutter, clean up, or organize their email - start scannin
110
110
 
111
111
  ### Workflow
112
112
 
113
- 1. **Scan**: Call `gmail_sender_digest`. Default query targets promotions from the last 90 days.
113
+ 1. **Scan**: Call `gmail_sender_digest`. Default query targets promotions currently in the inbox from the last 90 days (`in:inbox category:promotions newer_than:90d`). Counts shown in the table reflect only what is currently in the inbox — these are the emails that will be archived.
114
114
  2. **Present**: Show results as a `ui_show` table with `selectionMode: "multiple"`:
115
115
  - **Columns (exactly 3)**: Sender, Emails Found, Unsub?
116
116
  - **Unsub? cell values**: Use rich cell format: `{ "text": "Yes", "icon": "checkmark.circle.fill", "iconColor": "success" }` when `has_unsubscribe` is true, `{ "text": "No", "icon": "minus.circle", "iconColor": "muted" }` when false.
117
117
  - **Pre-select all rows** (`selected: true`) - users deselect what they want to keep
118
118
  - **Caption**: Include two parts separated by a newline: (1) data scope, e.g. "Newsletters, notifications, and outreach from last 90 days. Deselect anything you want to keep." (adjusted to match the query used), and (2) the Unsub? column legend: "Unsub? - \"Yes\" means these emails contain an unsubscribe link, so I can opt you out automatically. \"No\" means no unsubscribe link was found - these will be archived but you may continue receiving them."
119
119
  - **Action buttons (exactly 2)**: "Archive & Unsubscribe" (primary), "Archive Only" (secondary). **NEVER offer Delete, Trash, or any destructive action.**
120
- 3. **Wait for user action**: Stop and wait. Do NOT proceed to archiving or unsubscribing until the user clicks one of the action buttons on the table. When the user clicks an action button:
120
+ 3. **Embed scan_id in button data**: When constructing the action buttons in `ui_show`, include the `scan_id` from the `gmail_sender_digest` result in each button's `data` field. This ensures `scan_id` is forwarded automatically when the user clicks the LLM does not need to recall it from earlier context:
121
+ ```json
122
+ { "id": "archive_unsubscribe", "label": "Archive & Unsubscribe", "style": "primary", "data": { "scan_id": "<scan_id value here>" } }
123
+ ```
124
+ 4. **Wait for user action**: Stop and wait. Do NOT proceed to archiving or unsubscribing until the user clicks one of the action buttons on the table. When the user clicks an action button you will receive a surface action message containing `action data: { scan_id, selectedIds }`:
125
+ - `selectedIds` are **sender IDs** (the `id` values from the scan result rows, base64-encoded email addresses) — NOT Gmail message IDs. Always use them as `sender_ids` with `scan_id`, never as `message_ids`.
121
126
  - **Dismiss the table immediately** with `ui_dismiss` - it collapses to a completion chip
122
127
  - **Show a `task_progress` card** with steps for each phase (e.g., "Archiving 89 senders (2,400 emails)", "Unsubscribing from 72 senders"). Update each step from `in_progress` → `completed` as each phase finishes.
123
128
  - When all senders are processed, set the progress card's `status: "completed"`.
124
- 4. **Act on selection** - batch, don't loop:
125
- - **Archive all at once**: Call `gmail_archive` **once** with `scan_id` + **all** selected senders' `id` values in the `sender_ids` array. The tool resolves message IDs server-side and batches the Gmail API calls internally - never loop sender-by-sender.
129
+ 5. **Act on selection** - batch, don't loop:
130
+ - **Archive all at once**: Call `gmail_archive` **once** with `scan_id` (from action data) + `sender_ids` set to all `selectedIds` from the action data. The tool resolves message IDs server-side and batches the Gmail API calls internally - never loop sender-by-sender. **Never** pass `selectedIds` as `message_ids` — they are sender IDs, not Gmail message IDs.
126
131
  - **Unsubscribe in bulk**: If the action is "Archive & Unsubscribe", call `gmail_unsubscribe` for each sender that has `has_unsubscribe: true` - but emit **all** unsubscribe tool calls in a **single assistant response** (parallel tool use) rather than one-at-a-time across separate turns.
127
- 5. **Accurate summary**: The scan counts are exact - the `message_count` shown in the table matches the number of messages archived. Format: "Cleaned up [total_archived] emails from [sender_count] senders. Unsubscribed from [unsub_count]."
128
- 6. **Ongoing protection offer**: After reporting results, offer auto-archive filters:
132
+ 6. **Accurate summary**: The scan counts are exact - the `message_count` shown in the table matches the number of messages archived. Format: "Cleaned up [total_archived] emails from [sender_count] senders. Unsubscribed from [unsub_count]."
133
+ 7. **Ongoing protection offer**: After reporting results, offer auto-archive filters:
129
134
  - "Want me to set up auto-archive filters so future emails from these senders skip your inbox?"
130
135
  - If yes, call `gmail_filters` with `action: "create"` for each sender with `from` set to the sender's email and `remove_label_ids: ["INBOX"]`.
131
136
  - Then offer a recurring declutter schedule: "Want me to scan for new clutter monthly?" If yes, use `schedule_create` to set up a monthly declutter check.
@@ -490,7 +490,7 @@
490
490
  "properties": {
491
491
  "query": {
492
492
  "type": "string",
493
- "description": "Gmail search query (default 'category:promotions newer_than:90d')"
493
+ "description": "Gmail search query (default 'in:inbox category:promotions newer_than:90d')"
494
494
  },
495
495
  "max_messages": {
496
496
  "type": "number",
@@ -49,7 +49,8 @@ export async function run(
49
49
  _context: ToolContext,
50
50
  ): Promise<ToolExecutionResult> {
51
51
  const account = input.account as string | undefined;
52
- const query = (input.query as string) ?? "category:promotions newer_than:90d";
52
+ const query =
53
+ (input.query as string) ?? "in:inbox category:promotions newer_than:90d";
53
54
  const maxMessages = Math.min(
54
55
  (input.max_messages as number) ?? 5000,
55
56
  MAX_MESSAGES_CAP,
@@ -72,7 +72,7 @@
72
72
  "Sounds",
73
73
  "Permissions & Privacy",
74
74
  "Billing",
75
- "Archived Conversations",
75
+ "Archive",
76
76
  "Schedules",
77
77
  "Developer"
78
78
  ],
@@ -10,21 +10,26 @@ const SETTINGS_TABS = [
10
10
  "Sounds",
11
11
  "Permissions & Privacy",
12
12
  "Billing",
13
- "Archived Conversations",
13
+ "Archive",
14
14
  "Schedules",
15
15
  "Developer",
16
16
  ] as const;
17
17
 
18
18
  type SettingsTab = (typeof SETTINGS_TABS)[number];
19
19
 
20
+ const LEGACY_TAB_ALIASES: Record<string, SettingsTab> = {
21
+ "Archived Conversations": "Archive",
22
+ };
23
+
20
24
  export async function run(
21
25
  input: Record<string, unknown>,
22
26
  context: ToolContext,
23
27
  ): Promise<ToolExecutionResult> {
24
- const tab = input.tab as string;
28
+ const rawTab = input.tab as string;
29
+ const tab = LEGACY_TAB_ALIASES[rawTab] ?? rawTab;
25
30
  if (!SETTINGS_TABS.includes(tab as SettingsTab)) {
26
31
  return {
27
- content: `Error: unknown tab "${tab}". Valid tabs: ${SETTINGS_TABS.join(
32
+ content: `Error: unknown tab "${rawTab}". Valid tabs: ${SETTINGS_TABS.join(
28
33
  ", ",
29
34
  )}`,
30
35
  isError: true,
@@ -126,8 +126,8 @@
126
126
  "scope": "macos",
127
127
  "key": "referral-codes",
128
128
  "label": "Referral Codes",
129
- "description": "Show the referral invite link and stats panel on the Billing tab in Settings",
130
- "defaultEnabled": false
129
+ "description": "Surface the Earn Credits referral entry points (sidebar drawer row and Billing tab button) that open the referral modal",
130
+ "defaultEnabled": true
131
131
  },
132
132
  {
133
133
  "id": "managed-sign-in",
@@ -56,6 +56,11 @@ export const OutlookOAuthServiceSchema = BaseServiceSchema.extend({
56
56
  });
57
57
  export type OutlookOAuthService = z.infer<typeof OutlookOAuthServiceSchema>;
58
58
 
59
+ export const LinearOAuthServiceSchema = BaseServiceSchema.extend({
60
+ mode: ServiceModeSchema.default("your-own"),
61
+ });
62
+ export type LinearOAuthService = z.infer<typeof LinearOAuthServiceSchema>;
63
+
59
64
  export const ServicesSchema = z.object({
60
65
  inference: InferenceServiceSchema.default(InferenceServiceSchema.parse({})),
61
66
  "image-generation": ImageGenerationServiceSchema.default(
@@ -70,5 +75,8 @@ export const ServicesSchema = z.object({
70
75
  "outlook-oauth": OutlookOAuthServiceSchema.default(
71
76
  OutlookOAuthServiceSchema.parse({}),
72
77
  ),
78
+ "linear-oauth": LinearOAuthServiceSchema.default(
79
+ LinearOAuthServiceSchema.parse({}),
80
+ ),
73
81
  });
74
82
  export type Services = z.infer<typeof ServicesSchema>;
@@ -220,7 +220,6 @@ export async function bridgeCesApproval(
220
220
  [], // No allowlist options — CES manages its own grant patterns
221
221
  [], // No scope options — CES manages scope internally
222
222
  undefined, // No file diff
223
- undefined, // Not sandboxed
224
223
  options?.conversationId,
225
224
  "host", // CES operations target the host
226
225
  false, // Persistent decisions are managed by CES, not trust.json
@@ -130,16 +130,12 @@ export async function fetchManagedCatalog(): Promise<FetchManagedCatalogResult>
130
130
 
131
131
  return { ok: true, descriptors };
132
132
  } catch (err) {
133
- const message = err instanceof Error ? err.message : String(err);
134
- const safeMessage = message.replace(
135
- /Api-Key\s+\S+/gi,
136
- "Api-Key [REDACTED]",
137
- );
138
- log.warn(`Failed to fetch managed CES catalog: ${safeMessage}`);
133
+ const errorName = err instanceof Error ? err.constructor.name : "Unknown";
134
+ log.warn(`Failed to fetch managed CES catalog (${errorName})`);
139
135
  return {
140
136
  ok: false,
141
137
  descriptors: [],
142
- error: `Failed to fetch managed CES catalog: ${safeMessage}`,
138
+ error: `Failed to fetch managed CES catalog (${errorName})`,
143
139
  };
144
140
  }
145
141
  }
@@ -118,6 +118,8 @@ export class ConfigWatcher {
118
118
  onIdentityChanged?: () => void,
119
119
  onSoundsConfigChanged?: () => void,
120
120
  onAvatarChanged?: () => void,
121
+ onConfigChanged?: () => void,
122
+ onFeatureFlagsChanged?: () => void,
121
123
  ): void {
122
124
  const workspaceDir = getWorkspaceDir();
123
125
 
@@ -130,6 +132,7 @@ export class ConfigWatcher {
130
132
  const changed = await this.refreshConfigFromSources();
131
133
  if (changed) {
132
134
  onConversationEvict();
135
+ onConfigChanged?.();
133
136
  const newConfig = getConfig();
134
137
  const newMcpFingerprint = JSON.stringify(newConfig.mcp ?? {});
135
138
  if (newMcpFingerprint !== prevMcpFingerprint) {
@@ -190,7 +193,7 @@ export class ConfigWatcher {
190
193
  this.startAvatarWatcher(onAvatarChanged);
191
194
  }
192
195
 
193
- this.startFeatureFlagsWatcher();
196
+ this.startFeatureFlagsWatcher(onFeatureFlagsChanged);
194
197
  this.startSignalsWatcher();
195
198
  this.startSkillsWatchers(onConversationEvict);
196
199
  }
@@ -266,7 +269,7 @@ export class ConfigWatcher {
266
269
  }
267
270
  }
268
271
 
269
- private startFeatureFlagsWatcher(): void {
272
+ private startFeatureFlagsWatcher(onFeatureFlagsChanged?: () => void): void {
270
273
  const protectedDir = process.env.GATEWAY_SECURITY_DIR
271
274
  ? process.env.GATEWAY_SECURITY_DIR
272
275
  : join(homedir(), ".vellum", "protected");
@@ -297,6 +300,7 @@ export class ConfigWatcher {
297
300
  "Feature flags file changed, invalidating cache",
298
301
  );
299
302
  clearFeatureFlagOverridesCache();
303
+ onFeatureFlagsChanged?.();
300
304
  },
301
305
  500,
302
306
  );
@@ -39,7 +39,6 @@ export async function requestCompressionApproval(
39
39
  undefined,
40
40
  undefined,
41
41
  undefined,
42
- undefined,
43
42
  false,
44
43
  opts?.signal,
45
44
  );
@@ -102,6 +102,7 @@ import {
102
102
  applyRuntimeInjections,
103
103
  buildUnifiedTurnContextBlock,
104
104
  findLastInjectedNowContent,
105
+ findLastInjectedPkbContent,
105
106
  inboundActorContextFromTrust,
106
107
  inboundActorContextFromTrustContext,
107
108
  readNowScratchpad,
@@ -109,6 +110,7 @@ import {
109
110
  stripInjectionsForCompaction,
110
111
  } from "./conversation-runtime-assembly.js";
111
112
  import type { SkillProjectionCache } from "./conversation-skill-tools.js";
113
+ import { markSurfaceCompleted } from "./conversation-surfaces.js";
112
114
  import { resolveTrustClass } from "./conversation-tool-setup.js";
113
115
  import { recordUsage } from "./conversation-usage.js";
114
116
  import { formatTurnTimestamp } from "./date-context.js";
@@ -438,6 +440,7 @@ export async function runAgentLoopImpl(
438
440
  surfaceId,
439
441
  summary: "Dismissed",
440
442
  });
443
+ markSurfaceCompleted(ctx, surfaceId, "Dismissed");
441
444
  ctx.pendingSurfaceActions.delete(surfaceId);
442
445
  }
443
446
  }
@@ -784,8 +787,16 @@ export async function runAgentLoopImpl(
784
787
  const nowScratchpad =
785
788
  currentNowContent !== lastInjectedNow ? currentNowContent : null;
786
789
 
787
- // Read PKB always-loaded files (INDEX, essentials, threads, buffer)
790
+ // Only inject PKB if it changed since the last injection in the
791
+ // conversation. Keeping the previous injection in place avoids mutating
792
+ // historical user messages and preserves the cached prefix.
793
+ // Note: injectPkbContext escapes </pkb> sequences before writing to history,
794
+ // so we must apply the same escaping before comparing to avoid false mismatches.
788
795
  const currentPkbContent = readPkbContext();
796
+ const lastInjectedPkb = findLastInjectedPkbContent(ctx.messages);
797
+ const escapedCurrentPkb = currentPkbContent?.replace(/<\/pkb\s*>/gi, "&lt;/pkb&gt;") ?? null;
798
+ const pkbContext =
799
+ escapedCurrentPkb !== lastInjectedPkb ? currentPkbContent : null;
789
800
 
790
801
  // Shared injection options — reused whenever we need to re-inject after reduction.
791
802
  const injectionOpts = {
@@ -796,7 +807,7 @@ export async function runAgentLoopImpl(
796
807
  channelCapabilities: ctx.channelCapabilities ?? null,
797
808
  channelCommandContext: ctx.commandIntent ?? null,
798
809
  unifiedTurnContext: unifiedTurnContextStr,
799
- pkbContext: currentPkbContent,
810
+ pkbContext,
800
811
  nowScratchpad,
801
812
  voiceCallControlPrompt: ctx.voiceCallControlPrompt ?? null,
802
813
  transportHints: ctx.transportHints ?? null,
@@ -922,7 +933,7 @@ export async function runAgentLoopImpl(
922
933
  // value from injectionOpts to avoid duplicate injection.
923
934
  runMessages = applyRuntimeInjections(ctx.messages, {
924
935
  ...injectionOpts,
925
- pkbContext: currentPkbContent,
936
+ ...(step.compactionResult?.compacted && { pkbContext: currentPkbContent }),
926
937
  ...(step.compactionResult?.compacted && { nowScratchpad: currentNowContent }),
927
938
  workspaceTopLevelContext: shouldInjectWorkspace
928
939
  ? ctx.workspaceTopLevelContext
@@ -1202,8 +1213,16 @@ export async function runAgentLoopImpl(
1202
1213
  // limit), incorporate those new messages into ctx.messages so the
1203
1214
  // convergence loop operates on the full (larger) history.
1204
1215
  if (state.contextTooLargeDetected) {
1216
+ // Track whether ctx.messages was actually stripped so we know if
1217
+ // NOW.md (and other injections) need to be re-injected. When the
1218
+ // provider rejects before adding any messages, the strip is skipped
1219
+ // and ctx.messages still contains the previous injection — blindly
1220
+ // re-injecting would duplicate the NOW.md block.
1221
+ let convergenceStripped = false;
1222
+
1205
1223
  if (updatedHistory.length > preRunHistoryLength) {
1206
1224
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
1225
+ convergenceStripped = true;
1207
1226
  preRepairMessages = updatedHistory;
1208
1227
  preRunHistoryLength = updatedHistory.length;
1209
1228
  }
@@ -1326,12 +1345,13 @@ export async function runAgentLoopImpl(
1326
1345
  shouldInjectWorkspace = true;
1327
1346
  }
1328
1347
 
1329
- // ctx.messages has been stripped (line 1206/1373) so NOW.md must
1330
- // always be re-injected regardless of whether compaction ran.
1348
+ // Only re-inject NOW.md when ctx.messages was actually stripped;
1349
+ // otherwise the existing NOW.md block is still present and
1350
+ // re-injecting would duplicate it.
1331
1351
  runMessages = applyRuntimeInjections(ctx.messages, {
1332
1352
  ...injectionOpts,
1333
1353
  pkbContext: currentPkbContent,
1334
- nowScratchpad: currentNowContent,
1354
+ nowScratchpad: convergenceStripped ? currentNowContent : null,
1335
1355
  workspaceTopLevelContext: shouldInjectWorkspace
1336
1356
  ? ctx.workspaceTopLevelContext
1337
1357
  : null,
@@ -1373,6 +1393,7 @@ export async function runAgentLoopImpl(
1373
1393
  // pre-rerun messages.
1374
1394
  if (updatedHistory.length > preRunHistoryLength) {
1375
1395
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
1396
+ convergenceStripped = true;
1376
1397
  preRepairMessages = updatedHistory;
1377
1398
  preRunHistoryLength = updatedHistory.length;
1378
1399
  }
@@ -1448,12 +1469,12 @@ export async function runAgentLoopImpl(
1448
1469
  shouldInjectWorkspace = true;
1449
1470
  }
1450
1471
 
1451
- // ctx.messages was already stripped before the convergence
1452
- // loop, so NOW.md must always be re-injected here.
1472
+ // Only re-inject NOW.md when ctx.messages was actually stripped;
1473
+ // otherwise the existing block is still present.
1453
1474
  runMessages = applyRuntimeInjections(ctx.messages, {
1454
1475
  ...injectionOpts,
1455
1476
  pkbContext: currentPkbContent,
1456
- nowScratchpad: currentNowContent,
1477
+ nowScratchpad: convergenceStripped ? currentNowContent : null,
1457
1478
  workspaceTopLevelContext: shouldInjectWorkspace
1458
1479
  ? ctx.workspaceTopLevelContext
1459
1480
  : null,
@@ -1568,12 +1589,12 @@ export async function runAgentLoopImpl(
1568
1589
  shouldInjectWorkspace = true;
1569
1590
  }
1570
1591
 
1571
- // ctx.messages was already stripped before the convergence
1572
- // loop, so NOW.md must always be re-injected here.
1592
+ // Only re-inject NOW.md when ctx.messages was actually stripped;
1593
+ // otherwise the existing block is still present.
1573
1594
  runMessages = applyRuntimeInjections(ctx.messages, {
1574
1595
  ...injectionOpts,
1575
1596
  pkbContext: currentPkbContent,
1576
- nowScratchpad: currentNowContent,
1597
+ nowScratchpad: convergenceStripped ? currentNowContent : null,
1577
1598
  workspaceTopLevelContext: shouldInjectWorkspace
1578
1599
  ? ctx.workspaceTopLevelContext
1579
1600
  : null,
@@ -71,7 +71,6 @@ export async function approveHostAttachmentRead(
71
71
  await generateAllowlistOptions(toolName, input),
72
72
  generateScopeOptions(workingDir, toolName),
73
73
  undefined,
74
- undefined,
75
74
  conversationId,
76
75
  "host",
77
76
  );