npm - @vellumai/assistant - Versions diffs - 0.4.57 → 0.5.1 - Mend

@vellumai/assistant 0.4.57 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/package.json +1 -1
package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -9
package/src/__tests__/conversation-runtime-assembly.test.ts +28 -21
package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
package/src/__tests__/encrypted-store.test.ts +24 -12
package/src/__tests__/file-read-tool.test.ts +40 -0
package/src/__tests__/filesystem-tools.test.ts +4 -2
package/src/__tests__/history-repair.test.ts +71 -0
package/src/__tests__/host-file-read-tool.test.ts +87 -0
package/src/__tests__/identity-intro-cache.test.ts +209 -0
package/src/__tests__/model-intents.test.ts +1 -1
package/src/__tests__/non-member-access-request.test.ts +3 -3
package/src/__tests__/skill-feature-flags-integration.test.ts +18 -17
package/src/__tests__/skill-feature-flags.test.ts +13 -13
package/src/__tests__/skill-load-feature-flag.test.ts +4 -4
package/src/__tests__/skill-memory.test.ts +14 -12
package/src/__tests__/system-prompt.test.ts +8 -0
package/src/config/feature-flag-registry.json +9 -1
package/src/daemon/conversation-agent-loop-handlers.ts +2 -39
package/src/daemon/conversation-runtime-assembly.ts +4 -3
package/src/daemon/history-repair.ts +28 -8
package/src/daemon/trace-emitter.ts +3 -2
package/src/memory/search/staleness.ts +4 -1
package/src/notifications/decision-engine.ts +43 -2
package/src/notifications/emit-signal.ts +1 -0
package/src/permissions/checker.ts +0 -20
package/src/prompts/system-prompt.ts +2 -0
package/src/prompts/templates/BOOTSTRAP.md +10 -4
package/src/prompts/templates/IDENTITY.md +1 -2
package/src/providers/anthropic/client.ts +5 -17
package/src/runtime/access-request-helper.ts +15 -1
package/src/runtime/guardian-vellum-migration.ts +1 -3
package/src/runtime/routes/btw-routes.ts +84 -0
package/src/runtime/routes/identity-intro-cache.ts +105 -0
package/src/runtime/routes/identity-routes.ts +51 -0
package/src/runtime/routes/settings-routes.ts +1 -1
package/src/security/encrypted-store.ts +1 -2
package/src/skills/skill-memory.ts +5 -3
package/src/telemetry/usage-telemetry-reporter.test.ts +6 -1
package/src/telemetry/usage-telemetry-reporter.ts +2 -0
package/src/tools/filesystem/read.ts +14 -3
package/src/tools/host-filesystem/read.ts +17 -1
package/src/tools/shared/filesystem/format-diff.ts +4 -16
package/src/util/pricing.ts +4 -0

package/src/__tests__/model-intents.test.ts CHANGED Viewed

@@ -65,7 +65,7 @@ describe("model intents", () => {
       "claude-opus-4-6",
     );
     expect(resolveModelIntent("openai", "latency-optimized")).toBe(
-      "gpt-4o-mini",
+      "gpt-5.4-nano",
     );
   });

package/src/__tests__/non-member-access-request.test.ts CHANGED Viewed

@@ -623,7 +623,7 @@ describe("access-request-helper unit tests", () => {
     expect(telegram!.status).toBe("sent");
   });
-  test("notifyGuardianOfAccessRequest records failed vellum fallback when pipeline has no vellum delivery", async () => {
+  test("notifyGuardianOfAccessRequest skips vellum fallback for same-channel-only routing (telegram)", async () => {
     mockEmitResult = {
       signalId: "sig-no-vellum",
       deduplicated: false,
@@ -657,8 +657,8 @@ describe("access-request-helper unit tests", () => {
       (d) => d.destinationChannel === "telegram",
     );
-    expect(vellum).toBeDefined();
-    expect(vellum!.status).toBe("failed");
+    // Same-channel routing skips vellum delivery entirely — no fallback record
+    expect(vellum).toBeUndefined();
     expect(telegram).toBeDefined();
     expect(telegram!.destinationChatId).toBe("guardian-chat-456");
     expect(telegram!.status).toBe("sent");

package/src/__tests__/skill-feature-flags-integration.test.ts CHANGED Viewed

@@ -138,14 +138,15 @@ describe("frontmatter feature-flag integration", () => {
     expect(key).toBeUndefined();
   });
-  test("resolveSkillStates gates skill with featureFlag when flag is OFF", () => {
+  test("resolveSkillStates includes skill with featureFlag when flag defaults to ON", () => {
     const skill = buildSkillSummary("contacts", SKILL_MD_WITH_FLAG)!;
-    // "contacts" is in the registry with defaultEnabled: false
+    // "contacts" is in the registry with defaultEnabled: true
     const config = makeConfig();
     const resolved = resolveSkillStates([skill], config);
-    // Flag defaults to false → skill is filtered out
-    expect(resolved.length).toBe(0);
+    // Flag defaults to true → skill passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe("contacts");
   });
   test("resolveSkillStates includes skill with featureFlag when flag is ON", () => {
@@ -192,22 +193,22 @@ describe("frontmatter feature-flag integration", () => {
     const key = skillFlagKey(skill);
     expect(key).toBe("feature_flags.contacts.enabled");
-    // Step 4: Check flag state — "contacts" has defaultEnabled: false in registry
-    const configOff = makeConfig();
-    expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
+    // Step 4: Check flag state — "contacts" has defaultEnabled: true in registry
+    const configDefault = makeConfig();
+    expect(isAssistantFeatureFlagEnabled(key!, configDefault)).toBe(true);
-    // Step 5: resolveSkillStates correctly filters it out
-    const resolvedOff = resolveSkillStates([skill], configOff);
-    expect(resolvedOff.length).toBe(0);
+    // Step 5: resolveSkillStates includes it by default
+    const resolvedDefault = resolveSkillStates([skill], configDefault);
+    expect(resolvedDefault.length).toBe(1);
+    expect(resolvedDefault[0].summary.id).toBe("contacts");
-    // Step 6: With override enabled, skill passes through
-    const configOn = makeConfig({
-      assistantFeatureFlagValues: { [key!]: true },
+    // Step 6: With override disabled, skill is filtered out
+    const configOff = makeConfig({
+      assistantFeatureFlagValues: { [key!]: false },
     });
-    expect(isAssistantFeatureFlagEnabled(key!, configOn)).toBe(true);
+    expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
-    const resolvedOn = resolveSkillStates([skill], configOn);
-    expect(resolvedOn.length).toBe(1);
-    expect(resolvedOn[0].summary.id).toBe("contacts");
+    const resolvedOff = resolveSkillStates([skill], configOff);
+    expect(resolvedOff.length).toBe(0);
   });
 });

package/src/__tests__/skill-feature-flags.test.ts CHANGED Viewed

@@ -81,14 +81,14 @@ describe("skillFlagKey", () => {
 // ---------------------------------------------------------------------------
 describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
-  test("returns false when no flag overrides (registry default is false)", () => {
+  test("returns true when no flag overrides (registry default is true)", () => {
     const config = makeConfig();
     expect(
       isAssistantFeatureFlagEnabled(
         skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
         config,
       ),
-    ).toBe(false);
+    ).toBe(true);
   });
   test("returns true when skill key is explicitly true", () => {
@@ -140,10 +140,8 @@ describe("isAssistantFeatureFlagEnabled", () => {
   test("falls back to registry default when no override", () => {
     const config = makeConfig();
-    // contacts defaults to false in the registry
-    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
-      false,
-    );
+    // contacts defaults to true in the registry
+    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
   });
   test("respects persisted overrides for undeclared keys", () => {
@@ -207,13 +205,14 @@ describe("resolveSkillStates with feature flags", () => {
     expect(ids).toContain("browser");
   });
-  test("declared flag key defaults to registry value (false)", () => {
+  test("declared flag key defaults to registry value (true)", () => {
     const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
     const config = makeConfig();
     const resolved = resolveSkillStates(catalog, config);
-    // contacts registry default is false, so it's filtered out
-    expect(resolved.length).toBe(0);
+    // contacts registry default is true, so it passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
   });
   test("skill without featureFlag is never flag-gated", () => {
@@ -280,14 +279,15 @@ describe("resolveSkillStates with feature flags", () => {
 // ---------------------------------------------------------------------------
 describe("resolveSkillStates with frontmatter featureFlag", () => {
-  test("skill with featureFlag (defaultEnabled: false) is filtered when no config override", () => {
-    // contacts has defaultEnabled: false in the registry
+  test("skill with featureFlag (defaultEnabled: true) is included when no config override", () => {
+    // contacts has defaultEnabled: true in the registry
     const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
     const config = makeConfig();
     const resolved = resolveSkillStates(catalog, config);
-    // No override, registry default is false → filtered out
-    expect(resolved.length).toBe(0);
+    // No override, registry default is true → passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
   });
   test("skill with featureFlag is included when config override enables it", () => {

package/src/__tests__/skill-load-feature-flag.test.ts CHANGED Viewed

@@ -166,7 +166,7 @@ describe("skill_load feature flag enforcement", () => {
     expect(result.content).toContain("Skill: Contacts");
   });
-  test("rejects skill when flag key is absent (registry defaults to disabled)", async () => {
+  test("loads skill when flag key is absent (registry defaults to enabled)", async () => {
     writeSkill(
       DECLARED_SKILL_ID,
       "Contacts",
@@ -184,8 +184,8 @@ describe("skill_load feature flag enforcement", () => {
     const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
-    // contacts is declared in the registry with defaultEnabled: false
-    expect(result.isError).toBe(true);
-    expect(result.content).toContain("disabled by feature flag");
+    // contacts is declared in the registry with defaultEnabled: true
+    expect(result.isError).toBe(false);
+    expect(result.content).toContain("Skill: Contacts");
   });
 });

package/src/__tests__/skill-memory.test.ts CHANGED Viewed

@@ -1,14 +1,7 @@
 import { mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import {
-  afterAll,
-  beforeEach,
-  describe,
-  expect,
-  mock,
-  test,
-} from "bun:test";
+import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
 import { eq } from "drizzle-orm";
@@ -46,8 +39,9 @@ mock.module("../memory/qdrant-client.js", () => ({
 }));
 // Controllable mock for resolveCatalog used by seedCatalogSkillMemories
-let mockResolveCatalog: () => Promise<import("../skills/catalog-install.js").CatalogSkill[]> =
-  async () => [];
+let mockResolveCatalog: () => Promise<
+  import("../skills/catalog-install.js").CatalogSkill[]
+> = async () => [];
 mock.module("../skills/catalog-install.js", () => ({
   resolveCatalog: (..._args: unknown[]) => mockResolveCatalog(),
@@ -453,7 +447,11 @@ describe("seedCatalogSkillMemories", () => {
   test("skips skills whose feature flag is disabled", async () => {
     const skills: CatalogSkill[] = [
-      makeSkill({ id: "unflagged-skill", name: "Unflagged", description: "No flag" }),
+      makeSkill({
+        id: "unflagged-skill",
+        name: "Unflagged",
+        description: "No flag",
+      }),
       makeSkill({
         id: "flagged-skill",
         name: "Flagged",
@@ -485,7 +483,11 @@ describe("seedCatalogSkillMemories", () => {
   test("prunes pre-existing capability for a skill whose flag becomes disabled", async () => {
     // First seed with both skills, all flags enabled
     const skills: CatalogSkill[] = [
-      makeSkill({ id: "unflagged-skill", name: "Unflagged", description: "No flag" }),
+      makeSkill({
+        id: "unflagged-skill",
+        name: "Unflagged",
+        description: "No flag",
+      }),
       makeSkill({
         id: "flagged-skill",
         name: "Flagged",

package/src/__tests__/system-prompt.test.ts CHANGED Viewed

@@ -237,6 +237,14 @@ describe("buildSystemPrompt", () => {
     expect(result).toContain("browser automation as last resort");
   });
+  test("includes inline media attachment guidance", () => {
+    const result = buildSystemPrompt();
+    expect(result).toContain(
+      "Image and video attachments can render inline in chat.",
+    );
+    expect(result).toContain("attach it instead of only printing its path");
+  });
   test("does not include removed sections", () => {
     const result = buildSystemPrompt();
     expect(result).not.toContain("## External Communications Identity");

package/src/config/feature-flag-registry.json CHANGED Viewed

@@ -23,7 +23,7 @@
       "key": "feature_flags.contacts.enabled",
       "label": "Contacts",
       "description": "Show the Contacts tab in Settings for viewing and managing contacts",
-      "defaultEnabled": false
+      "defaultEnabled": true
     },
     {
       "id": "email-channel",
@@ -256,6 +256,14 @@
       "label": "Quick Input",
       "description": "Enable the Quick Input popover on right-click of the menu bar icon",
       "defaultEnabled": false
+    },
+    {
+      "id": "expand-completed-steps",
+      "scope": "macos",
+      "key": "expand_completed_steps",
+      "label": "Expand Completed Steps",
+      "description": "Auto-expand completed tool call step groups instead of showing them collapsed",
+      "defaultEnabled": false
     }
   ]
 }

package/src/daemon/conversation-agent-loop-handlers.ts CHANGED Viewed

@@ -167,30 +167,6 @@ export function emitLlmCallStartedIfNeeded(
   );
 }
-// ── Client Payload Size Caps ─────────────────────────────────────────
-// The client truncates tool results anyway (20 000 chars in ChatViewModel),
-// but the full string can be megabytes (file_read, bash output). Capping
-// here avoids sending oversized payloads which get decoded on the
-// client's main thread.
-const TOOL_RESULT_MAX_CHARS = 20_000;
-const TOOL_RESULT_TRUNCATION_SUFFIX = "...[truncated]";
-// tool_input_delta streams accumulated JSON as tools run. For non-app
-// tools the client discards it (extractCodePreview only handles app tools),
-// so we cap it aggressively to avoid excessive client traffic.
-const TOOL_INPUT_DELTA_MAX_CHARS = 50_000;
-const APP_TOOL_NAMES = new Set(["app_create", "app_update"]);
-function truncateForClient(
-  value: string,
-  maxChars: number,
-  suffix: string,
-): string {
-  if (value.length <= maxChars) return value;
-  return value.slice(0, maxChars - suffix.length) + suffix;
-}
 // ── Friendly Tool Names ──────────────────────────────────────────────
 const TOOL_FRIENDLY_NAMES: Record<string, string> = {
@@ -409,19 +385,10 @@ export function handleInputJsonDelta(
   deps: EventHandlerDeps,
   event: Extract<AgentEvent, { type: "input_json_delta" }>,
 ): void {
-  // Cap non-app tool input deltas — the client only uses this data for
-  // app_create/app_update code previews; all other tools discard it.
-  const content = APP_TOOL_NAMES.has(event.toolName)
-    ? event.accumulatedJson
-    : truncateForClient(
-        event.accumulatedJson,
-        TOOL_INPUT_DELTA_MAX_CHARS,
-        TOOL_RESULT_TRUNCATION_SUFFIX,
-      );
   deps.onEvent({
     type: "tool_input_delta",
     toolName: event.toolName,
-    content,
+    content: event.accumulatedJson,
     conversationId: deps.ctx.conversationId,
     toolUseId: event.toolUseId,
   });
@@ -438,11 +405,7 @@ export function handleToolResult(
   deps.onEvent({
     type: "tool_result",
     toolName: "",
-    result: truncateForClient(
-      event.content,
-      TOOL_RESULT_MAX_CHARS,
-      TOOL_RESULT_TRUNCATION_SUFFIX,
-    ),
+    result: event.content,
     isError: event.isError,
     diff: event.diff,
     status: event.status,

package/src/daemon/conversation-runtime-assembly.ts CHANGED Viewed

@@ -655,7 +655,6 @@ export function injectTurnContext(
   };
 }
 /**
  * Build the `<inbound_actor_context>` text block used for model grounding.
  *
@@ -737,7 +736,10 @@ export function buildInboundActorContextBlock(
   }
   // Contact metadata - only included when the sender has a contact record
   // with non-default values.
-  if (ctx.contactNotes && sanitizeInlineContextValue(ctx.contactNotes) !== ctx.trustClass) {
+  if (
+    ctx.contactNotes &&
+    sanitizeInlineContextValue(ctx.contactNotes) !== ctx.trustClass
+  ) {
     lines.push(
       `contact_notes: ${sanitizeInlineContextValue(ctx.contactNotes)}`,
     );
@@ -932,7 +934,6 @@ export interface InterfaceTurnContextParams {
   conversationOriginInterface: InterfaceId | null;
 }
 /** Strip interface turn context blocks (both legacy separate and unified). */
 export function stripInterfaceTurnContext(messages: Message[]): Message[] {
   return stripUserTextBlocksByPrefix(messages, [

package/src/daemon/history-repair.ts CHANGED Viewed

@@ -69,7 +69,10 @@ export function repairHistory(messages: Message[]): RepairResult {
       }
       // Ensure every server_tool_use has a paired web_search_tool_result
-      // in the same assistant message (handles interrupted streams)
+      // in the same assistant message (handles interrupted streams).
+      // Synthetic results are inserted IMMEDIATELY AFTER their corresponding
+      // server_tool_use block — not appended to the end — so that
+      // ensureToolPairing's split at tool_use boundaries cannot separate them.
       const serverToolIds = new Set(
         cleanedContent
           .filter(
@@ -82,18 +85,35 @@ export function repairHistory(messages: Message[]): RepairResult {
           .filter((b) => b.type === "web_search_tool_result")
           .map((b) => (b as { tool_use_id: string }).tool_use_id),
       );
+      const orphanedServerIds = new Set<string>();
       for (const id of serverToolIds) {
         if (!matchedServerIds.has(id)) {
-          cleanedContent.push({
-            type: "web_search_tool_result",
-            tool_use_id: id,
-            content: SYNTHETIC_WEB_SEARCH_ERROR,
-          });
-          stats.missingToolResultsInserted++;
+          orphanedServerIds.add(id);
         }
       }
-      result.push({ role: "assistant", content: cleanedContent });
+      let repairedContent: ContentBlock[];
+      if (orphanedServerIds.size > 0) {
+        repairedContent = [];
+        for (const block of cleanedContent) {
+          repairedContent.push(block);
+          if (
+            block.type === "server_tool_use" &&
+            orphanedServerIds.has(block.id)
+          ) {
+            repairedContent.push({
+              type: "web_search_tool_result",
+              tool_use_id: block.id,
+              content: SYNTHETIC_WEB_SEARCH_ERROR,
+            });
+            stats.missingToolResultsInserted++;
+          }
+        }
+      } else {
+        repairedContent = cleanedContent;
+      }
+      result.push({ role: "assistant", content: repairedContent });
       // Only track client-side tool_use IDs as pending (not server_tool_use)
       pendingToolUseIds = new Set(

package/src/daemon/trace-emitter.ts CHANGED Viewed

@@ -67,13 +67,14 @@ export class TraceEmitter {
       attributes,
     };
+    // Send to client first so synchronous DB writes don't block SSE delivery.
+    this.sendToClient(event);
     try {
       persistTraceEvent(event as TraceEvent);
     } catch (err) {
       log.warn({ err, eventId }, "Failed to persist trace event");
     }
-    this.sendToClient(event);
   }
 }

package/src/memory/search/staleness.ts CHANGED Viewed

@@ -22,7 +22,10 @@ export function computeStaleness(
   now: number,
 ): { level: StalenessLevel; ratio: number } {
   const baseLifetime = BASE_LIFETIME_MS[item.kind] ?? DEFAULT_LIFETIME_MS;
-  const reinforcement = Math.max(1, 1 + 0.3 * (item.sourceConversationCount - 1));
+  const reinforcement = Math.max(
+    1,
+    1 + 0.3 * (item.sourceConversationCount - 1),
+  );
   const effectiveLifetime = baseLifetime * reinforcement;
   const age = now - item.firstSeenAt;
   const ratio = age / effectiveLifetime;

package/src/notifications/decision-engine.ts CHANGED Viewed

@@ -852,17 +852,58 @@ async function classifyWithLLM(
  *
  * - `all_channels`: force selected channels to all connected channels.
  * - `multi_channel`: ensure at least 2 channels when 2+ are connected.
- * - `single_channel`: no override (default behavior).
+ * - `single_channel`: cap to a single channel. When explicitly set, reduces
+ *   selected channels to one — preferring the source channel if present.
  */
 export function enforceRoutingIntent(
   decision: NotificationDecision,
   routingIntent: RoutingIntent | undefined,
   connectedChannels: NotificationChannel[],
+  sourceChannel?: string,
 ): NotificationDecision {
-  if (!routingIntent || routingIntent === "single_channel") {
+  if (!routingIntent) {
     return decision;
   }
+  if (routingIntent === "single_channel") {
+    if (!decision.shouldNotify) {
+      return decision;
+    }
+    // Force delivery to the source channel only. If the source channel
+    // is among the connected channels, use it regardless of what the LLM
+    // picked (even if the LLM picked exactly one wrong channel).
+    // Otherwise fall back to capping at the first selected channel.
+    const sourceIsConnected =
+      sourceChannel &&
+      connectedChannels.includes(sourceChannel as NotificationChannel);
+    const preferred = sourceIsConnected
+      ? (sourceChannel as NotificationChannel)
+      : decision.selectedChannels[0];
+    // No change needed if the decision already matches.
+    if (
+      decision.selectedChannels.length === 1 &&
+      decision.selectedChannels[0] === preferred
+    ) {
+      return decision;
+    }
+    const enforced = { ...decision };
+    enforced.selectedChannels = [preferred];
+    enforced.reasoningSummary = `${decision.reasoningSummary} [routing_intent=single_channel enforced: capped to ${preferred}]`;
+    log.info(
+      {
+        routingIntent,
+        sourceChannel,
+        originalChannels: decision.selectedChannels,
+        enforcedChannel: preferred,
+      },
+      "Routing intent enforcement: single_channel → capped to one channel",
+    );
+    return enforced;
+  }
   if (!decision.shouldNotify) {
     return decision;
   }

package/src/notifications/emit-signal.ts CHANGED Viewed

@@ -256,6 +256,7 @@ export async function emitNotificationSignal<TEventName extends string>(
       decision,
       signal.routingIntent,
       connectedChannels,
+      signal.sourceChannel,
     );
     // Re-persist the decision if routing intent enforcement changed it,

package/src/permissions/checker.ts CHANGED Viewed

@@ -197,15 +197,6 @@ const LOW_RISK_GIT_SUBCOMMANDS = new Set([
   "reflog",
 ]);
-// Vellum/assistant CLI subcommands that are low-risk (read-only)
-const LOW_RISK_CLI_SUBCOMMANDS = new Set([
-  "ps",
-  "doctor",
-  "audit",
-  "completions",
-  "map",
-]);
 // Commands that wrap another program — the real program appears as the first
 // non-flag argument.  When one of these is the segment program we look through
 // its args to find the effective program (e.g. `env curl …` → curl).
@@ -671,17 +662,6 @@ async function classifyRiskUncached(
         continue;
       }
-      if (prog === "vellum" || prog === "assistant") {
-        const subcommand = firstPositionalArg(seg.args);
-        if (subcommand && LOW_RISK_CLI_SUBCOMMANDS.has(subcommand)) {
-          // Read-only subcommands stay at current risk
-          continue;
-        }
-        // Mutating subcommands are medium
-        maxRisk = RiskLevel.Medium;
-        continue;
-      }
       if (!LOW_RISK_PROGRAMS.has(prog)) {
         // Unknown program → medium
         if (maxRisk === RiskLevel.Low) {

package/src/prompts/system-prompt.ts CHANGED Viewed

@@ -206,6 +206,8 @@ function buildAttachmentSection(): string {
     "",
     'Use `source="host"` with an absolute path for host filesystem files. Optional attributes: `filename` (display name override), `mime_type` (override auto-detection).',
     "",
+    "Image and video attachments can render inline in chat. If the user asks to preview a media file here, attach it instead of only printing its path.",
+    "",
     "Embed images/GIFs inline using markdown: `![description](URL)`.",
   ].join("\n");
 }

package/src/prompts/templates/BOOTSTRAP.md CHANGED Viewed

@@ -37,14 +37,16 @@ Onboarding has two phases. Phase 1 is about proving value. Phase 2 is about maki
 ### Phase 1: Prove It (Priority: HIGH)
-**Goal:** The user should be actively working on a meaningful task within the first few exchanges. They don't need to finish it immediately, but they should be on their way and thinking "oh, this thing is actually useful."
+**Goal:** Complete whatever task the user wants to do. Once they've gotten initial value, bridge to Phase 2. Phase 1 is done when the task is done, and the user is thinking "oh, this thing is actually useful."
 **Keep Phase 1 tasks small and fast.** The goal is to show value quickly, not to impress with depth. A quick file summary, a fast web lookup, a simple app or tool, a short piece of writing. Do NOT kick off long research tasks, deep multi-step pipelines, or anything that takes more than a minute or two. If the user asks for something heavyweight, acknowledge it and suggest a lighter first win instead: "That's a bigger one. Let me show you something quick first so you can see how I work, then we'll dig in." New users start with $5 of AI credits. The full onboarding should fit comfortably within that budget, so bias toward lighter tasks.
 After your opening message, one of these things will happen:
 **Path A: The user gives you a task or question.**
-Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently. After completing the task, transition naturally to Phase 2.
+Great. Do it. Do it well. This is your audition. While you work on their task, quietly observe what you can learn about them (name, interests, work context, communication style). Save what you learn to USER.md silently. Once the task is done, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
+If the user's first message is vague (e.g. "I'm new here, can you help with that?"), you may ask one clarifying question to scope the task. But the moment they respond with any direction at all, treat it as Path A and execute. Do not keep probing.
 **Path B: The user asks "what can you do?" or seems unsure.**
 Don't dump a paragraph of capabilities. Instead, use the `ui_show` tool to show them a structured card. You MUST call the `ui_show` tool (not write prose or a list). Present the actions in the exact order shown below. Here is the input to pass to the `ui_show` tool:
@@ -74,11 +76,13 @@ Only fall back to a numbered list if `ui_show` is genuinely unavailable (voice o
 - **Vibe code an app:** Ask what kind of tool or app they want. Build it using the app builder skill. Make it look great.
 - **Photo or video:** Use the media processing or image studio skills. They can analyze a video, pull insights from a photo, or generate something new. Ask what they have and what they want to do with it.
+Once the task is complete, bridge to Phase 2 immediately — in that same response or the very next one. Do NOT wait for the user to ask for more. Do NOT treat "that's all" or "thanks" as a goodbye. Treat it as your cue to bridge.
 **Path C: The user wants to chat or explore.**
-That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?"
+That's fine. Roll with it. Be interesting. But steer toward action within 3-4 exchanges. You can weave in something like: "I'm enjoying this, but I'm itching to actually do something for you. Got anything I can sink my teeth into?" At that point, follow Path A instructions.
 **Path D: The user immediately wants to set up your identity/name.**
-Great, skip to Phase 2. Some people want the personality game first. Let them lead.
+Great, skip to Phase 2. Some people want the personality game first. Let them lead. If you go down this path come back to Phase 1 after that.
 **Critical rule for Phase 1:** Whatever the user gives you, COMPLETE A TASK. Even a small one. Summarize something, look something up, build something quick. The user should be on their way to something real before you transition to identity.
@@ -196,6 +200,8 @@ Do it quietly. Don't tell the user which files you're editing or mention tool na
 When saving to `IDENTITY.md`, be specific about the tone, energy, and conversational style you discovered during onboarding. This file persists after onboarding, so everything about how you should come across needs to be captured there. Not just your name, but the full vibe: how you talk, how much energy you bring, whether you're blunt or gentle, funny or serious.
+When saving to `SOUL.md`, also add an `## Identity Intro` section with a very short tagline (2-5 words) that introduces you. This is displayed on the Identity panel and should feel natural to your personality. Examples: "It's [name].", "[name] here.", "[name], at your service." Write it as a single line under the heading (not a bullet list). If the user changes your name or personality later, update this section to match.
 ## Wrapping Up
 Once you've completed Phase 1 and made reasonable progress through Phase 2, you're done with onboarding. Use your best judgment on when the conversation has naturally moved past the bootstrap stage. There's no hard checklist. The goal is that the user feels set up and ready to work, not that every box is ticked.

package/src/prompts/templates/IDENTITY.md CHANGED Viewed

@@ -2,13 +2,12 @@ _ Lines starting with _ are comments - they won't appear in the system prompt
 # IDENTITY.md
-This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality, and Home are parsed by the app - keep their `- **Label:**` format. Everything else is freeform.
+This file is yours. Add sections, restructure it, make it reflect who you are. Name, Emoji, Role, Personality are parsed by the app - keep their `- **Label:**` format. Everything else is freeform.
 - **Name:** _(not yet chosen)_
 - **Emoji:** _(not yet chosen)_
 - **Nature:** _(not yet established)_
 - **Personality:** _(not yet established)_
 - **Role:** _(not yet established)_
-- **Home:** Local (~/.vellum/workspace)
 ## Avatar