@vellumai/assistant 0.4.57 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/package.json +1 -1
  2. package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -9
  3. package/src/__tests__/conversation-runtime-assembly.test.ts +28 -21
  4. package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
  5. package/src/__tests__/encrypted-store.test.ts +24 -12
  6. package/src/__tests__/file-read-tool.test.ts +40 -0
  7. package/src/__tests__/filesystem-tools.test.ts +4 -2
  8. package/src/__tests__/history-repair.test.ts +71 -0
  9. package/src/__tests__/host-file-read-tool.test.ts +87 -0
  10. package/src/__tests__/identity-intro-cache.test.ts +209 -0
  11. package/src/__tests__/model-intents.test.ts +1 -1
  12. package/src/__tests__/non-member-access-request.test.ts +3 -3
  13. package/src/__tests__/skill-feature-flags-integration.test.ts +18 -17
  14. package/src/__tests__/skill-feature-flags.test.ts +13 -13
  15. package/src/__tests__/skill-load-feature-flag.test.ts +4 -4
  16. package/src/__tests__/skill-memory.test.ts +14 -12
  17. package/src/__tests__/system-prompt.test.ts +8 -0
  18. package/src/config/feature-flag-registry.json +9 -1
  19. package/src/daemon/conversation-agent-loop-handlers.ts +2 -39
  20. package/src/daemon/conversation-runtime-assembly.ts +4 -3
  21. package/src/daemon/history-repair.ts +28 -8
  22. package/src/daemon/trace-emitter.ts +3 -2
  23. package/src/memory/search/staleness.ts +4 -1
  24. package/src/notifications/decision-engine.ts +43 -2
  25. package/src/notifications/emit-signal.ts +1 -0
  26. package/src/permissions/checker.ts +0 -20
  27. package/src/prompts/system-prompt.ts +2 -0
  28. package/src/prompts/templates/BOOTSTRAP.md +10 -4
  29. package/src/prompts/templates/IDENTITY.md +1 -2
  30. package/src/providers/anthropic/client.ts +5 -17
  31. package/src/runtime/access-request-helper.ts +15 -1
  32. package/src/runtime/guardian-vellum-migration.ts +1 -3
  33. package/src/runtime/routes/btw-routes.ts +84 -0
  34. package/src/runtime/routes/identity-intro-cache.ts +105 -0
  35. package/src/runtime/routes/identity-routes.ts +51 -0
  36. package/src/runtime/routes/settings-routes.ts +1 -1
  37. package/src/security/encrypted-store.ts +1 -2
  38. package/src/skills/skill-memory.ts +5 -3
  39. package/src/telemetry/usage-telemetry-reporter.test.ts +6 -1
  40. package/src/telemetry/usage-telemetry-reporter.ts +2 -0
  41. package/src/tools/filesystem/read.ts +14 -3
  42. package/src/tools/host-filesystem/read.ts +17 -1
  43. package/src/tools/shared/filesystem/format-diff.ts +4 -16
  44. package/src/util/pricing.ts +4 -0
@@ -65,7 +65,7 @@ describe("model intents", () => {
65
65
  "claude-opus-4-6",
66
66
  );
67
67
  expect(resolveModelIntent("openai", "latency-optimized")).toBe(
68
- "gpt-4o-mini",
68
+ "gpt-5.4-nano",
69
69
  );
70
70
  });
71
71
 
@@ -623,7 +623,7 @@ describe("access-request-helper unit tests", () => {
623
623
  expect(telegram!.status).toBe("sent");
624
624
  });
625
625
 
626
- test("notifyGuardianOfAccessRequest records failed vellum fallback when pipeline has no vellum delivery", async () => {
626
+ test("notifyGuardianOfAccessRequest skips vellum fallback for same-channel-only routing (telegram)", async () => {
627
627
  mockEmitResult = {
628
628
  signalId: "sig-no-vellum",
629
629
  deduplicated: false,
@@ -657,8 +657,8 @@ describe("access-request-helper unit tests", () => {
657
657
  (d) => d.destinationChannel === "telegram",
658
658
  );
659
659
 
660
- expect(vellum).toBeDefined();
661
- expect(vellum!.status).toBe("failed");
660
+ // Same-channel routing skips vellum delivery entirely — no fallback record
661
+ expect(vellum).toBeUndefined();
662
662
  expect(telegram).toBeDefined();
663
663
  expect(telegram!.destinationChatId).toBe("guardian-chat-456");
664
664
  expect(telegram!.status).toBe("sent");
@@ -138,14 +138,15 @@ describe("frontmatter feature-flag integration", () => {
138
138
  expect(key).toBeUndefined();
139
139
  });
140
140
 
141
- test("resolveSkillStates gates skill with featureFlag when flag is OFF", () => {
141
+ test("resolveSkillStates includes skill with featureFlag when flag defaults to ON", () => {
142
142
  const skill = buildSkillSummary("contacts", SKILL_MD_WITH_FLAG)!;
143
- // "contacts" is in the registry with defaultEnabled: false
143
+ // "contacts" is in the registry with defaultEnabled: true
144
144
  const config = makeConfig();
145
145
 
146
146
  const resolved = resolveSkillStates([skill], config);
147
- // Flag defaults to false → skill is filtered out
148
- expect(resolved.length).toBe(0);
147
+ // Flag defaults to true → skill passes through
148
+ expect(resolved.length).toBe(1);
149
+ expect(resolved[0].summary.id).toBe("contacts");
149
150
  });
150
151
 
151
152
  test("resolveSkillStates includes skill with featureFlag when flag is ON", () => {
@@ -192,22 +193,22 @@ describe("frontmatter feature-flag integration", () => {
192
193
  const key = skillFlagKey(skill);
193
194
  expect(key).toBe("feature_flags.contacts.enabled");
194
195
 
195
- // Step 4: Check flag state — "contacts" has defaultEnabled: false in registry
196
- const configOff = makeConfig();
197
- expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
196
+ // Step 4: Check flag state — "contacts" has defaultEnabled: true in registry
197
+ const configDefault = makeConfig();
198
+ expect(isAssistantFeatureFlagEnabled(key!, configDefault)).toBe(true);
198
199
 
199
- // Step 5: resolveSkillStates correctly filters it out
200
- const resolvedOff = resolveSkillStates([skill], configOff);
201
- expect(resolvedOff.length).toBe(0);
200
+ // Step 5: resolveSkillStates includes it by default
201
+ const resolvedDefault = resolveSkillStates([skill], configDefault);
202
+ expect(resolvedDefault.length).toBe(1);
203
+ expect(resolvedDefault[0].summary.id).toBe("contacts");
202
204
 
203
- // Step 6: With override enabled, skill passes through
204
- const configOn = makeConfig({
205
- assistantFeatureFlagValues: { [key!]: true },
205
+ // Step 6: With override disabled, skill is filtered out
206
+ const configOff = makeConfig({
207
+ assistantFeatureFlagValues: { [key!]: false },
206
208
  });
207
- expect(isAssistantFeatureFlagEnabled(key!, configOn)).toBe(true);
209
+ expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
208
210
 
209
- const resolvedOn = resolveSkillStates([skill], configOn);
210
- expect(resolvedOn.length).toBe(1);
211
- expect(resolvedOn[0].summary.id).toBe("contacts");
211
+ const resolvedOff = resolveSkillStates([skill], configOff);
212
+ expect(resolvedOff.length).toBe(0);
212
213
  });
213
214
  });
@@ -81,14 +81,14 @@ describe("skillFlagKey", () => {
81
81
  // ---------------------------------------------------------------------------
82
82
 
83
83
  describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
84
- test("returns false when no flag overrides (registry default is false)", () => {
84
+ test("returns true when no flag overrides (registry default is true)", () => {
85
85
  const config = makeConfig();
86
86
  expect(
87
87
  isAssistantFeatureFlagEnabled(
88
88
  skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
89
89
  config,
90
90
  ),
91
- ).toBe(false);
91
+ ).toBe(true);
92
92
  });
93
93
 
94
94
  test("returns true when skill key is explicitly true", () => {
@@ -140,10 +140,8 @@ describe("isAssistantFeatureFlagEnabled", () => {
140
140
 
141
141
  test("falls back to registry default when no override", () => {
142
142
  const config = makeConfig();
143
- // contacts defaults to false in the registry
144
- expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
145
- false,
146
- );
143
+ // contacts defaults to true in the registry
144
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
147
145
  });
148
146
 
149
147
  test("respects persisted overrides for undeclared keys", () => {
@@ -207,13 +205,14 @@ describe("resolveSkillStates with feature flags", () => {
207
205
  expect(ids).toContain("browser");
208
206
  });
209
207
 
210
- test("declared flag key defaults to registry value (false)", () => {
208
+ test("declared flag key defaults to registry value (true)", () => {
211
209
  const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
212
210
  const config = makeConfig();
213
211
 
214
212
  const resolved = resolveSkillStates(catalog, config);
215
- // contacts registry default is false, so it's filtered out
216
- expect(resolved.length).toBe(0);
213
+ // contacts registry default is true, so it passes through
214
+ expect(resolved.length).toBe(1);
215
+ expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
217
216
  });
218
217
 
219
218
  test("skill without featureFlag is never flag-gated", () => {
@@ -280,14 +279,15 @@ describe("resolveSkillStates with feature flags", () => {
280
279
  // ---------------------------------------------------------------------------
281
280
 
282
281
  describe("resolveSkillStates with frontmatter featureFlag", () => {
283
- test("skill with featureFlag (defaultEnabled: false) is filtered when no config override", () => {
284
- // contacts has defaultEnabled: false in the registry
282
+ test("skill with featureFlag (defaultEnabled: true) is included when no config override", () => {
283
+ // contacts has defaultEnabled: true in the registry
285
284
  const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
286
285
  const config = makeConfig();
287
286
 
288
287
  const resolved = resolveSkillStates(catalog, config);
289
- // No override, registry default is falsefiltered out
290
- expect(resolved.length).toBe(0);
288
+ // No override, registry default is truepasses through
289
+ expect(resolved.length).toBe(1);
290
+ expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
291
291
  });
292
292
 
293
293
  test("skill with featureFlag is included when config override enables it", () => {
@@ -166,7 +166,7 @@ describe("skill_load feature flag enforcement", () => {
166
166
  expect(result.content).toContain("Skill: Contacts");
167
167
  });
168
168
 
169
- test("rejects skill when flag key is absent (registry defaults to disabled)", async () => {
169
+ test("loads skill when flag key is absent (registry defaults to enabled)", async () => {
170
170
  writeSkill(
171
171
  DECLARED_SKILL_ID,
172
172
  "Contacts",
@@ -184,8 +184,8 @@ describe("skill_load feature flag enforcement", () => {
184
184
 
185
185
  const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
186
186
 
187
- // contacts is declared in the registry with defaultEnabled: false
188
- expect(result.isError).toBe(true);
189
- expect(result.content).toContain("disabled by feature flag");
187
+ // contacts is declared in the registry with defaultEnabled: true
188
+ expect(result.isError).toBe(false);
189
+ expect(result.content).toContain("Skill: Contacts");
190
190
  });
191
191
  });
@@ -1,14 +1,7 @@
1
1
  import { mkdtempSync, rmSync } from "node:fs";
2
2
  import { tmpdir } from "node:os";
3
3
  import { join } from "node:path";
4
- import {
5
- afterAll,
6
- beforeEach,
7
- describe,
8
- expect,
9
- mock,
10
- test,
11
- } from "bun:test";
4
+ import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
12
5
 
13
6
  import { eq } from "drizzle-orm";
14
7
 
@@ -46,8 +39,9 @@ mock.module("../memory/qdrant-client.js", () => ({
46
39
  }));
47
40
 
48
41
  // Controllable mock for resolveCatalog used by seedCatalogSkillMemories
49
- let mockResolveCatalog: () => Promise<import("../skills/catalog-install.js").CatalogSkill[]> =
50
- async () => [];
42
+ let mockResolveCatalog: () => Promise<
43
+ import("../skills/catalog-install.js").CatalogSkill[]
44
+ > = async () => [];
51
45
 
52
46
  mock.module("../skills/catalog-install.js", () => ({
53
47
  resolveCatalog: (..._args: unknown[]) => mockResolveCatalog(),
@@ -453,7 +447,11 @@ describe("seedCatalogSkillMemories", () => {
453
447
 
454
448
  test("skips skills whose feature flag is disabled", async () => {
455
449
  const skills: CatalogSkill[] = [
456
- makeSkill({ id: "unflagged-skill", name: "Unflagged", description: "No flag" }),
450
+ makeSkill({
451
+ id: "unflagged-skill",
452
+ name: "Unflagged",
453
+ description: "No flag",
454
+ }),
457
455
  makeSkill({
458
456
  id: "flagged-skill",
459
457
  name: "Flagged",
@@ -485,7 +483,11 @@ describe("seedCatalogSkillMemories", () => {
485
483
  test("prunes pre-existing capability for a skill whose flag becomes disabled", async () => {
486
484
  // First seed with both skills, all flags enabled
487
485
  const skills: CatalogSkill[] = [
488
- makeSkill({ id: "unflagged-skill", name: "Unflagged", description: "No flag" }),
486
+ makeSkill({
487
+ id: "unflagged-skill",
488
+ name: "Unflagged",
489
+ description: "No flag",
490
+ }),
489
491
  makeSkill({
490
492
  id: "flagged-skill",
491
493
  name: "Flagged",
@@ -237,6 +237,14 @@ describe("buildSystemPrompt", () => {
237
237
  expect(result).toContain("browser automation as last resort");
238
238
  });
239
239
 
240
+ test("includes inline media attachment guidance", () => {
241
+ const result = buildSystemPrompt();
242
+ expect(result).toContain(
243
+ "Image and video attachments can render inline in chat.",
244
+ );
245
+ expect(result).toContain("attach it instead of only printing its path");
246
+ });
247
+
240
248
  test("does not include removed sections", () => {
241
249
  const result = buildSystemPrompt();
242
250
  expect(result).not.toContain("## External Communications Identity");
@@ -23,7 +23,7 @@
23
23
  "key": "feature_flags.contacts.enabled",
24
24
  "label": "Contacts",
25
25
  "description": "Show the Contacts tab in Settings for viewing and managing contacts",
26
- "defaultEnabled": false
26
+ "defaultEnabled": true
27
27
  },
28
28
  {
29
29
  "id": "email-channel",
@@ -256,6 +256,14 @@
256
256
  "label": "Quick Input",
257
257
  "description": "Enable the Quick Input popover on right-click of the menu bar icon",
258
258
  "defaultEnabled": false
259
+ },
260
+ {
261
+ "id": "expand-completed-steps",
262
+ "scope": "macos",
263
+ "key": "expand_completed_steps",
264
+ "label": "Expand Completed Steps",
265
+ "description": "Auto-expand completed tool call step groups instead of showing them collapsed",
266
+ "defaultEnabled": false
259
267
  }
260
268
  ]
261
269
  }
@@ -167,30 +167,6 @@ export function emitLlmCallStartedIfNeeded(
167
167
  );
168
168
  }
169
169
 
170
- // ── Client Payload Size Caps ─────────────────────────────────────────
171
- // The client truncates tool results anyway (20 000 chars in ChatViewModel),
172
- // but the full string can be megabytes (file_read, bash output). Capping
173
- // here avoids sending oversized payloads which get decoded on the
174
- // client's main thread.
175
-
176
- const TOOL_RESULT_MAX_CHARS = 20_000;
177
- const TOOL_RESULT_TRUNCATION_SUFFIX = "...[truncated]";
178
-
179
- // tool_input_delta streams accumulated JSON as tools run. For non-app
180
- // tools the client discards it (extractCodePreview only handles app tools),
181
- // so we cap it aggressively to avoid excessive client traffic.
182
- const TOOL_INPUT_DELTA_MAX_CHARS = 50_000;
183
- const APP_TOOL_NAMES = new Set(["app_create", "app_update"]);
184
-
185
- function truncateForClient(
186
- value: string,
187
- maxChars: number,
188
- suffix: string,
189
- ): string {
190
- if (value.length <= maxChars) return value;
191
- return value.slice(0, maxChars - suffix.length) + suffix;
192
- }
193
-
194
170
  // ── Friendly Tool Names ──────────────────────────────────────────────
195
171
 
196
172
  const TOOL_FRIENDLY_NAMES: Record<string, string> = {
@@ -409,19 +385,10 @@ export function handleInputJsonDelta(
409
385
  deps: EventHandlerDeps,
410
386
  event: Extract<AgentEvent, { type: "input_json_delta" }>,
411
387
  ): void {
412
- // Cap non-app tool input deltas — the client only uses this data for
413
- // app_create/app_update code previews; all other tools discard it.
414
- const content = APP_TOOL_NAMES.has(event.toolName)
415
- ? event.accumulatedJson
416
- : truncateForClient(
417
- event.accumulatedJson,
418
- TOOL_INPUT_DELTA_MAX_CHARS,
419
- TOOL_RESULT_TRUNCATION_SUFFIX,
420
- );
421
388
  deps.onEvent({
422
389
  type: "tool_input_delta",
423
390
  toolName: event.toolName,
424
- content,
391
+ content: event.accumulatedJson,
425
392
  conversationId: deps.ctx.conversationId,
426
393
  toolUseId: event.toolUseId,
427
394
  });
@@ -438,11 +405,7 @@ export function handleToolResult(
438
405
  deps.onEvent({
439
406
  type: "tool_result",
440
407
  toolName: "",
441
- result: truncateForClient(
442
- event.content,
443
- TOOL_RESULT_MAX_CHARS,
444
- TOOL_RESULT_TRUNCATION_SUFFIX,
445
- ),
408
+ result: event.content,
446
409
  isError: event.isError,
447
410
  diff: event.diff,
448
411
  status: event.status,
@@ -655,7 +655,6 @@ export function injectTurnContext(
655
655
  };
656
656
  }
657
657
 
658
-
659
658
  /**
660
659
  * Build the `<inbound_actor_context>` text block used for model grounding.
661
660
  *
@@ -737,7 +736,10 @@ export function buildInboundActorContextBlock(
737
736
  }
738
737
  // Contact metadata - only included when the sender has a contact record
739
738
  // with non-default values.
740
- if (ctx.contactNotes && sanitizeInlineContextValue(ctx.contactNotes) !== ctx.trustClass) {
739
+ if (
740
+ ctx.contactNotes &&
741
+ sanitizeInlineContextValue(ctx.contactNotes) !== ctx.trustClass
742
+ ) {
741
743
  lines.push(
742
744
  `contact_notes: ${sanitizeInlineContextValue(ctx.contactNotes)}`,
743
745
  );
@@ -932,7 +934,6 @@ export interface InterfaceTurnContextParams {
932
934
  conversationOriginInterface: InterfaceId | null;
933
935
  }
934
936
 
935
-
936
937
  /** Strip interface turn context blocks (both legacy separate and unified). */
937
938
  export function stripInterfaceTurnContext(messages: Message[]): Message[] {
938
939
  return stripUserTextBlocksByPrefix(messages, [
@@ -69,7 +69,10 @@ export function repairHistory(messages: Message[]): RepairResult {
69
69
  }
70
70
 
71
71
  // Ensure every server_tool_use has a paired web_search_tool_result
72
- // in the same assistant message (handles interrupted streams)
72
+ // in the same assistant message (handles interrupted streams).
73
+ // Synthetic results are inserted IMMEDIATELY AFTER their corresponding
74
+ // server_tool_use block — not appended to the end — so that
75
+ // ensureToolPairing's split at tool_use boundaries cannot separate them.
73
76
  const serverToolIds = new Set(
74
77
  cleanedContent
75
78
  .filter(
@@ -82,18 +85,35 @@ export function repairHistory(messages: Message[]): RepairResult {
82
85
  .filter((b) => b.type === "web_search_tool_result")
83
86
  .map((b) => (b as { tool_use_id: string }).tool_use_id),
84
87
  );
88
+ const orphanedServerIds = new Set<string>();
85
89
  for (const id of serverToolIds) {
86
90
  if (!matchedServerIds.has(id)) {
87
- cleanedContent.push({
88
- type: "web_search_tool_result",
89
- tool_use_id: id,
90
- content: SYNTHETIC_WEB_SEARCH_ERROR,
91
- });
92
- stats.missingToolResultsInserted++;
91
+ orphanedServerIds.add(id);
93
92
  }
94
93
  }
95
94
 
96
- result.push({ role: "assistant", content: cleanedContent });
95
+ let repairedContent: ContentBlock[];
96
+ if (orphanedServerIds.size > 0) {
97
+ repairedContent = [];
98
+ for (const block of cleanedContent) {
99
+ repairedContent.push(block);
100
+ if (
101
+ block.type === "server_tool_use" &&
102
+ orphanedServerIds.has(block.id)
103
+ ) {
104
+ repairedContent.push({
105
+ type: "web_search_tool_result",
106
+ tool_use_id: block.id,
107
+ content: SYNTHETIC_WEB_SEARCH_ERROR,
108
+ });
109
+ stats.missingToolResultsInserted++;
110
+ }
111
+ }
112
+ } else {
113
+ repairedContent = cleanedContent;
114
+ }
115
+
116
+ result.push({ role: "assistant", content: repairedContent });
97
117
 
98
118
  // Only track client-side tool_use IDs as pending (not server_tool_use)
99
119
  pendingToolUseIds = new Set(
@@ -67,13 +67,14 @@ export class TraceEmitter {
67
67
  attributes,
68
68
  };
69
69
 
70
+ // Send to client first so synchronous DB writes don't block SSE delivery.
71
+ this.sendToClient(event);
72
+
70
73
  try {
71
74
  persistTraceEvent(event as TraceEvent);
72
75
  } catch (err) {
73
76
  log.warn({ err, eventId }, "Failed to persist trace event");
74
77
  }
75
-
76
- this.sendToClient(event);
77
78
  }
78
79
  }
79
80
 
@@ -22,7 +22,10 @@ export function computeStaleness(
22
22
  now: number,
23
23
  ): { level: StalenessLevel; ratio: number } {
24
24
  const baseLifetime = BASE_LIFETIME_MS[item.kind] ?? DEFAULT_LIFETIME_MS;
25
- const reinforcement = Math.max(1, 1 + 0.3 * (item.sourceConversationCount - 1));
25
+ const reinforcement = Math.max(
26
+ 1,
27
+ 1 + 0.3 * (item.sourceConversationCount - 1),
28
+ );
26
29
  const effectiveLifetime = baseLifetime * reinforcement;
27
30
  const age = now - item.firstSeenAt;
28
31
  const ratio = age / effectiveLifetime;
@@ -852,17 +852,58 @@ async function classifyWithLLM(
852
852
  *
853
853
  * - `all_channels`: force selected channels to all connected channels.
854
854
  * - `multi_channel`: ensure at least 2 channels when 2+ are connected.
855
- * - `single_channel`: no override (default behavior).
855
+ * - `single_channel`: cap to a single channel. When explicitly set, reduces
856
+ * selected channels to one — preferring the source channel if present.
856
857
  */
857
858
  export function enforceRoutingIntent(
858
859
  decision: NotificationDecision,
859
860
  routingIntent: RoutingIntent | undefined,
860
861
  connectedChannels: NotificationChannel[],
862
+ sourceChannel?: string,
861
863
  ): NotificationDecision {
862
- if (!routingIntent || routingIntent === "single_channel") {
864
+ if (!routingIntent) {
863
865
  return decision;
864
866
  }
865
867
 
868
+ if (routingIntent === "single_channel") {
869
+ if (!decision.shouldNotify) {
870
+ return decision;
871
+ }
872
+
873
+ // Force delivery to the source channel only. If the source channel
874
+ // is among the connected channels, use it regardless of what the LLM
875
+ // picked (even if the LLM picked exactly one wrong channel).
876
+ // Otherwise fall back to capping at the first selected channel.
877
+ const sourceIsConnected =
878
+ sourceChannel &&
879
+ connectedChannels.includes(sourceChannel as NotificationChannel);
880
+ const preferred = sourceIsConnected
881
+ ? (sourceChannel as NotificationChannel)
882
+ : decision.selectedChannels[0];
883
+
884
+ // No change needed if the decision already matches.
885
+ if (
886
+ decision.selectedChannels.length === 1 &&
887
+ decision.selectedChannels[0] === preferred
888
+ ) {
889
+ return decision;
890
+ }
891
+
892
+ const enforced = { ...decision };
893
+ enforced.selectedChannels = [preferred];
894
+ enforced.reasoningSummary = `${decision.reasoningSummary} [routing_intent=single_channel enforced: capped to ${preferred}]`;
895
+ log.info(
896
+ {
897
+ routingIntent,
898
+ sourceChannel,
899
+ originalChannels: decision.selectedChannels,
900
+ enforcedChannel: preferred,
901
+ },
902
+ "Routing intent enforcement: single_channel → capped to one channel",
903
+ );
904
+ return enforced;
905
+ }
906
+
866
907
  if (!decision.shouldNotify) {
867
908
  return decision;
868
909
  }
@@ -256,6 +256,7 @@ export async function emitNotificationSignal<TEventName extends string>(
256
256
  decision,
257
257
  signal.routingIntent,
258
258
  connectedChannels,
259
+ signal.sourceChannel,
259
260
  );
260
261
 
261
262
  // Re-persist the decision if routing intent enforcement changed it,
@@ -197,15 +197,6 @@ const LOW_RISK_GIT_SUBCOMMANDS = new Set([
197
197
  "reflog",
198
198
  ]);
199
199
 
200
- // Vellum/assistant CLI subcommands that are low-risk (read-only)
201
- const LOW_RISK_CLI_SUBCOMMANDS = new Set([
202
- "ps",
203
- "doctor",
204
- "audit",
205
- "completions",
206
- "map",
207
- ]);
208
-
209
200
  // Commands that wrap another program — the real program appears as the first
210
201
  // non-flag argument. When one of these is the segment program we look through
211
202
  // its args to find the effective program (e.g. `env curl …` → curl).
@@ -671,17 +662,6 @@ async function classifyRiskUncached(
671
662
  continue;
672
663
  }
673
664
 
674
- if (prog === "vellum" || prog === "assistant") {
675
- const subcommand = firstPositionalArg(seg.args);
676
- if (subcommand && LOW_RISK_CLI_SUBCOMMANDS.has(subcommand)) {
677
- // Read-only subcommands stay at current risk
678
- continue;
679
- }
680
- // Mutating subcommands are medium
681
- maxRisk = RiskLevel.Medium;
682
- continue;
683
- }
684
-
685
665
  if (!LOW_RISK_PROGRAMS.has(prog)) {
686
666
  // Unknown program → medium
687
667
  if (maxRisk === RiskLevel.Low) {
@@ -206,6 +206,8 @@ function buildAttachmentSection(): string {
206
206
  "",
207
207
  'Use `source="host"` with an absolute path for host filesystem files. Optional attributes: `filename` (display name override), `mime_type` (override auto-detection).',
208
208
  "",
209
+ "Image and video attachments can render inline in chat. If the user asks to preview a media file here, attach it instead of only printing its path.",
210
+ "",
209
211
  "Embed images/GIFs inline using markdown: `![description](URL)`.",
210
212
  ].join("\n");
211
213
  }
@@ -37,14 +37,16 @@ Onboarding has two phases. Phase 1 is about proving value. Phase 2 is about maki
37
37
 
38
38
  ### Phase 1: Prove It (Priority: HIGH)
39
39
 
40
- **Goal:** The user should be actively working on a meaningful task within the first few exchanges. They don't need to finish it immediately, but they should be on their way and thinking "oh, this thing is actually useful."
40
+ **Goal:** Complete whatever task the user wants to do. Once they've gotten initial value, bridge to Phase 2. Phase 1 is done when the task is done, and the user is thinking "oh, this thing is actually useful."
41
41
 
42
42
  **Keep Phase 1 tasks small and fast.** The goal is to show value quickly, not to impress with depth. A quick file summary, a fast web lookup, a simple app or tool, a short piece of writing. Do NOT kick off long research tasks, deep multi-step pipelines, or anything that takes more than a minute or two. If the user asks for something heavyweight, acknowledge it and suggest a lighter first win instead: "That's a bigger one. Let me show you something quick first so you can see how I work, then we'll dig in." New users start with $5 of AI credits. The full onboarding should fit comfortably within that budget, so bias toward lighter tasks.
43
43
 
44
44
  After your opening message, one of these things will happen:
45
45
 
46
46
  **Path A: The user gives you a task or question.**
47
- Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently. After completing the task, transition naturally to Phase 2.
47
+ Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently. Once the task is done, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
48
+
49
+ If the user's first message is vague (e.g. "I'm new here, can you help with that?"), you may ask one clarifying question to scope the task. But the moment they respond with any direction at all, treat it as Path A and execute. Do not keep probing.
48
50
 
49
51
  **Path B: The user asks "what can you do?" or seems unsure.**
50
52
  Don't dump a paragraph of capabilities. Instead, use the `ui_show` tool to show them a structured card. You MUST call the `ui_show` tool (not write prose or a list). Present the actions in the exact order shown below. Here is the input to pass to the `ui_show` tool:
@@ -74,11 +76,13 @@ Only fall back to a numbered list if `ui_show` is genuinely unavailable (voice o
74
76
  - **Vibe code an app:** Ask what kind of tool or app they want. Build it using the app builder skill. Make it look great.
75
77
  - **Photo or video:** Use the media processing or image studio skills. They can analyze a video, pull insights from a photo, or generate something new. Ask what they have and what they want to do with it.
76
78
 
79
+ Once the task is complete, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
80
+
77
81
  **Path C: The user wants to chat or explore.**
78
- That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?"
82
+ That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?" At that point, follow Path A instructions.
79
83
 
80
84
  **Path D: The user immediately wants to set up your identity/name.**
81
- Great, skip to Phase 2. Some people want the personality game first. Let them lead.
85
+ Great, skip to Phase 2. Some people want the personality game first. Let them lead. If you go down this path come back to Phase 1 after that.
82
86
 
83
87
  **Critical rule for Phase 1:** Whatever the user gives you, COMPLETE A TASK. Even a small one. Summarize something, look something up, build something quick. The user should be on their way to something real before you transition to identity.
84
88
 
@@ -196,6 +200,8 @@ Do it quietly. Don't tell the user which files you're editing or mention tool na
196
200
 
197
201
  When saving to `IDENTITY.md`, be specific about the tone, energy, and conversational style you discovered during onboarding. This file persists after onboarding, so everything about how you should come across needs to be captured there. Not just your name, but the full vibe: how you talk, how much energy you bring, whether you're blunt or gentle, funny or serious.
198
202
 
203
+ When saving to `SOUL.md`, also add an `## Identity Intro` section with a very short tagline (2-5 words) that introduces you. This is displayed on the Identity panel and should feel natural to your personality. Examples: "It's [name].", "[name] here.", "[name], at your service." Write it as a single line under the heading (not a bullet list). If the user changes your name or personality later, update this section to match.
204
+
199
205
  ## Wrapping Up
200
206
 
201
207
  Once you've completed Phase 1 and made reasonable progress through Phase 2, you're done with onboarding. Use your best judgment on when the conversation has naturally moved past the bootstrap stage. There's no hard checklist. The goal is that the user feels set up and ready to work, not that every box is ticked.
@@ -2,13 +2,12 @@ _ Lines starting with _ are comments - they won't appear in the system prompt
2
2
 
3
3
  # IDENTITY.md
4
4
 
5
- This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality, and Home are parsed by the app - keep their `- **Label:**` format. Everything else is freeform.
5
+ This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality are parsed by the app - keep their `- **Label:**` format. Everything else is freeform.
6
6
 
7
7
  - **Name:** _(not yet chosen)_
8
8
  - **Emoji:** _(not yet chosen)_
9
9
  - **Nature:** _(not yet established)_
10
10
  - **Personality:** _(not yet established)_
11
11
  - **Role:** _(not yet established)_
12
- - **Home:** Local (~/.vellum/workspace)
13
12
 
14
13
  ## Avatar