npm - @vellumai/assistant - Versions diffs - 0.5.0 → 0.5.1 - Mend

@vellumai/assistant 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -9
package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
package/src/__tests__/filesystem-tools.test.ts +4 -2
package/src/__tests__/history-repair.test.ts +71 -0
package/src/__tests__/skill-feature-flags-integration.test.ts +18 -17
package/src/__tests__/skill-feature-flags.test.ts +13 -13
package/src/__tests__/skill-load-feature-flag.test.ts +4 -4
package/src/__tests__/system-prompt.test.ts +8 -0
package/src/config/feature-flag-registry.json +9 -1
package/src/daemon/conversation-agent-loop-handlers.ts +2 -39
package/src/daemon/history-repair.ts +28 -8
package/src/permissions/checker.ts +0 -20
package/src/prompts/system-prompt.ts +2 -0
package/src/tools/shared/filesystem/format-diff.ts +4 -16

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "type": "module",
   "exports": {
     ".": "./src/index.ts"

package/src/__tests__/assistant-feature-flags-integration.test.ts CHANGED Viewed

@@ -228,7 +228,7 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
     expect(result).not.toContain(`**${DECLARED_SKILL_ID}**`);
   });
-  test("declared skills hidden when no flag overrides set (registry defaults to false)", () => {
+  test("contacts visible but email-channel hidden when no flag overrides set (contacts defaults true, email-channel defaults false)", () => {
     createSkillOnDisk(
       DECLARED_SKILL_ID,
       "Contacts",
@@ -263,8 +263,8 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
     const result = buildSystemPrompt();
-    // Both skills declare feature flags with registry defaultEnabled: false
-    expect(result).not.toContain(`**${DECLARED_SKILL_ID}**`);
+    // contacts defaults to true, email-channel defaults to false
+    expect(result).toContain(`**${DECLARED_SKILL_ID}**`);
     expect(result).not.toContain("**email-channel**");
   });
@@ -466,12 +466,10 @@ describe("isAssistantFeatureFlagEnabled", () => {
   test("missing persisted value falls back to defaults registry defaultEnabled", () => {
     // No explicit config at all — should fall back to defaults registry
-    // which has defaultEnabled: false for contacts
+    // which has defaultEnabled: true for contacts
     const config = {} as any;
-    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
-      false,
-    );
+    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
   });
   test("unknown flag defaults to true when no persisted override", () => {
@@ -510,7 +508,7 @@ describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
     ).toBe(false);
   });
-  test("disabled when no override set (registry default is false)", () => {
+  test("enabled when no override set (registry default is true)", () => {
     const config = {} as any;
     expect(
@@ -518,6 +516,6 @@ describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
         skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
         config,
       ),
-    ).toBe(false);
+    ).toBe(true);
   });
 });

package/src/__tests__/credential-execution-feature-gates.test.ts CHANGED Viewed

@@ -154,16 +154,16 @@ describe("CES flags do not affect unrelated flags", () => {
     ).toBe(true);
   });
-  test("enabling all CES flags does not change contacts flag (defaultEnabled: false)", () => {
+  test("enabling all CES flags does not change contacts flag (defaultEnabled: true)", () => {
     const overrides: Record<string, boolean> = {};
     for (const key of ALL_CES_FLAG_KEYS) {
       overrides[key] = true;
     }
     const config = makeConfig(overrides);
-    // contacts defaults to false in the registry and should stay false
+    // contacts defaults to true in the registry and should stay true
     expect(
       isAssistantFeatureFlagEnabled("feature_flags.contacts.enabled", config),
-    ).toBe(false);
+    ).toBe(true);
   });
 });

package/src/__tests__/filesystem-tools.test.ts CHANGED Viewed

@@ -325,12 +325,14 @@ describe("formatEditDiff", () => {
     expect(result).not.toContain("+ ");
   });
-  test("truncates long diffs beyond 8 lines", () => {
+  test("shows all diff lines without truncation", () => {
     const longOld = Array.from({ length: 12 }, (_, i) => `old-line-${i}`).join(
       "\n",
     );
     const result = formatEditDiff(longOld, "short");
-    expect(result).toContain("more lines");
+    expect(result).not.toContain("more lines");
+    expect(result).toContain("old-line-11");
+    expect(result).toContain("+ short");
   });
 });

package/src/__tests__/history-repair.test.ts CHANGED Viewed

@@ -588,6 +588,77 @@ describe("repairHistory", () => {
     });
   });
+  test("synthetic web_search_tool_result is placed immediately after its server_tool_use, not at end", () => {
+    // Regression: synthetic results appended to the end of the content array
+    // get separated from their server_tool_use by ensureToolPairing's split
+    // at tool_use boundaries, causing the API to reject with "web_search
+    // tool use without a corresponding web_search_tool_result block".
+    const messages: Message[] = [
+      { role: "user", content: [{ type: "text", text: "Search and act" }] },
+      {
+        role: "assistant",
+        content: [
+          { type: "text", text: "Let me search" },
+          {
+            type: "server_tool_use",
+            id: "stu_1",
+            name: "web_search",
+            input: { query: "openai" },
+          },
+          {
+            type: "server_tool_use",
+            id: "stu_2",
+            name: "web_search",
+            input: { query: "anthropic" },
+          },
+          { type: "text", text: "Based on my research" },
+          {
+            type: "tool_use",
+            id: "tu_1",
+            name: "skill_load",
+            input: { skill: "app-builder" },
+          },
+        ],
+      },
+      {
+        role: "user",
+        content: [
+          {
+            type: "tool_result",
+            tool_use_id: "tu_1",
+            content: "Skill loaded",
+          },
+        ],
+      },
+    ];
+    const { messages: repaired, stats } = repairHistory(messages);
+    expect(stats.missingToolResultsInserted).toBe(2);
+    const assistantMsg = repaired[1];
+    // Synthetic results must appear immediately after their server_tool_use,
+    // NOT after the tool_use block at the end
+    const blockTypes = assistantMsg.content.map((b) => b.type);
+    expect(blockTypes).toEqual([
+      "text",
+      "server_tool_use",
+      "web_search_tool_result", // right after stu_1
+      "server_tool_use",
+      "web_search_tool_result", // right after stu_2
+      "text",
+      "tool_use",
+    ]);
+    // Verify the pairings are correct
+    expect(
+      (assistantMsg.content[2] as { tool_use_id: string }).tool_use_id,
+    ).toBe("stu_1");
+    expect(
+      (assistantMsg.content[4] as { tool_use_id: string }).tool_use_id,
+    ).toBe("stu_2");
+  });
   test("downgrades type-mismatched tool_result for server_tool_use", () => {
     // A tool_result in the user message for a server_tool_use ID is orphaned —
     // server-side results belong in the assistant message

package/src/__tests__/skill-feature-flags-integration.test.ts CHANGED Viewed

@@ -138,14 +138,15 @@ describe("frontmatter feature-flag integration", () => {
     expect(key).toBeUndefined();
   });
-  test("resolveSkillStates gates skill with featureFlag when flag is OFF", () => {
+  test("resolveSkillStates includes skill with featureFlag when flag defaults to ON", () => {
     const skill = buildSkillSummary("contacts", SKILL_MD_WITH_FLAG)!;
-    // "contacts" is in the registry with defaultEnabled: false
+    // "contacts" is in the registry with defaultEnabled: true
     const config = makeConfig();
     const resolved = resolveSkillStates([skill], config);
-    // Flag defaults to false → skill is filtered out
-    expect(resolved.length).toBe(0);
+    // Flag defaults to true → skill passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe("contacts");
   });
   test("resolveSkillStates includes skill with featureFlag when flag is ON", () => {
@@ -192,22 +193,22 @@ describe("frontmatter feature-flag integration", () => {
     const key = skillFlagKey(skill);
     expect(key).toBe("feature_flags.contacts.enabled");
-    // Step 4: Check flag state — "contacts" has defaultEnabled: false in registry
-    const configOff = makeConfig();
-    expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
+    // Step 4: Check flag state — "contacts" has defaultEnabled: true in registry
+    const configDefault = makeConfig();
+    expect(isAssistantFeatureFlagEnabled(key!, configDefault)).toBe(true);
-    // Step 5: resolveSkillStates correctly filters it out
-    const resolvedOff = resolveSkillStates([skill], configOff);
-    expect(resolvedOff.length).toBe(0);
+    // Step 5: resolveSkillStates includes it by default
+    const resolvedDefault = resolveSkillStates([skill], configDefault);
+    expect(resolvedDefault.length).toBe(1);
+    expect(resolvedDefault[0].summary.id).toBe("contacts");
-    // Step 6: With override enabled, skill passes through
-    const configOn = makeConfig({
-      assistantFeatureFlagValues: { [key!]: true },
+    // Step 6: With override disabled, skill is filtered out
+    const configOff = makeConfig({
+      assistantFeatureFlagValues: { [key!]: false },
     });
-    expect(isAssistantFeatureFlagEnabled(key!, configOn)).toBe(true);
+    expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
-    const resolvedOn = resolveSkillStates([skill], configOn);
-    expect(resolvedOn.length).toBe(1);
-    expect(resolvedOn[0].summary.id).toBe("contacts");
+    const resolvedOff = resolveSkillStates([skill], configOff);
+    expect(resolvedOff.length).toBe(0);
   });
 });

package/src/__tests__/skill-feature-flags.test.ts CHANGED Viewed

@@ -81,14 +81,14 @@ describe("skillFlagKey", () => {
 // ---------------------------------------------------------------------------
 describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
-  test("returns false when no flag overrides (registry default is false)", () => {
+  test("returns true when no flag overrides (registry default is true)", () => {
     const config = makeConfig();
     expect(
       isAssistantFeatureFlagEnabled(
         skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
         config,
       ),
-    ).toBe(false);
+    ).toBe(true);
   });
   test("returns true when skill key is explicitly true", () => {
@@ -140,10 +140,8 @@ describe("isAssistantFeatureFlagEnabled", () => {
   test("falls back to registry default when no override", () => {
     const config = makeConfig();
-    // contacts defaults to false in the registry
-    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
-      false,
-    );
+    // contacts defaults to true in the registry
+    expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
   });
   test("respects persisted overrides for undeclared keys", () => {
@@ -207,13 +205,14 @@ describe("resolveSkillStates with feature flags", () => {
     expect(ids).toContain("browser");
   });
-  test("declared flag key defaults to registry value (false)", () => {
+  test("declared flag key defaults to registry value (true)", () => {
     const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
     const config = makeConfig();
     const resolved = resolveSkillStates(catalog, config);
-    // contacts registry default is false, so it's filtered out
-    expect(resolved.length).toBe(0);
+    // contacts registry default is true, so it passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
   });
   test("skill without featureFlag is never flag-gated", () => {
@@ -280,14 +279,15 @@ describe("resolveSkillStates with feature flags", () => {
 // ---------------------------------------------------------------------------
 describe("resolveSkillStates with frontmatter featureFlag", () => {
-  test("skill with featureFlag (defaultEnabled: false) is filtered when no config override", () => {
-    // contacts has defaultEnabled: false in the registry
+  test("skill with featureFlag (defaultEnabled: true) is included when no config override", () => {
+    // contacts has defaultEnabled: true in the registry
     const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
     const config = makeConfig();
     const resolved = resolveSkillStates(catalog, config);
-    // No override, registry default is false → filtered out
-    expect(resolved.length).toBe(0);
+    // No override, registry default is true → passes through
+    expect(resolved.length).toBe(1);
+    expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
   });
   test("skill with featureFlag is included when config override enables it", () => {

package/src/__tests__/skill-load-feature-flag.test.ts CHANGED Viewed

@@ -166,7 +166,7 @@ describe("skill_load feature flag enforcement", () => {
     expect(result.content).toContain("Skill: Contacts");
   });
-  test("rejects skill when flag key is absent (registry defaults to disabled)", async () => {
+  test("loads skill when flag key is absent (registry defaults to enabled)", async () => {
     writeSkill(
       DECLARED_SKILL_ID,
       "Contacts",
@@ -184,8 +184,8 @@ describe("skill_load feature flag enforcement", () => {
     const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
-    // contacts is declared in the registry with defaultEnabled: false
-    expect(result.isError).toBe(true);
-    expect(result.content).toContain("disabled by feature flag");
+    // contacts is declared in the registry with defaultEnabled: true
+    expect(result.isError).toBe(false);
+    expect(result.content).toContain("Skill: Contacts");
   });
 });

package/src/__tests__/system-prompt.test.ts CHANGED Viewed

@@ -237,6 +237,14 @@ describe("buildSystemPrompt", () => {
     expect(result).toContain("browser automation as last resort");
   });
+  test("includes inline media attachment guidance", () => {
+    const result = buildSystemPrompt();
+    expect(result).toContain(
+      "Image and video attachments can render inline in chat.",
+    );
+    expect(result).toContain("attach it instead of only printing its path");
+  });
   test("does not include removed sections", () => {
     const result = buildSystemPrompt();
     expect(result).not.toContain("## External Communications Identity");

package/src/config/feature-flag-registry.json CHANGED Viewed

@@ -23,7 +23,7 @@
       "key": "feature_flags.contacts.enabled",
       "label": "Contacts",
       "description": "Show the Contacts tab in Settings for viewing and managing contacts",
-      "defaultEnabled": false
+      "defaultEnabled": true
     },
     {
       "id": "email-channel",
@@ -256,6 +256,14 @@
       "label": "Quick Input",
       "description": "Enable the Quick Input popover on right-click of the menu bar icon",
       "defaultEnabled": false
+    },
+    {
+      "id": "expand-completed-steps",
+      "scope": "macos",
+      "key": "expand_completed_steps",
+      "label": "Expand Completed Steps",
+      "description": "Auto-expand completed tool call step groups instead of showing them collapsed",
+      "defaultEnabled": false
     }
   ]
 }

package/src/daemon/conversation-agent-loop-handlers.ts CHANGED Viewed

@@ -167,30 +167,6 @@ export function emitLlmCallStartedIfNeeded(
   );
 }
-// ── Client Payload Size Caps ─────────────────────────────────────────
-// The client truncates tool results anyway (20 000 chars in ChatViewModel),
-// but the full string can be megabytes (file_read, bash output). Capping
-// here avoids sending oversized payloads which get decoded on the
-// client's main thread.
-const TOOL_RESULT_MAX_CHARS = 20_000;
-const TOOL_RESULT_TRUNCATION_SUFFIX = "...[truncated]";
-// tool_input_delta streams accumulated JSON as tools run. For non-app
-// tools the client discards it (extractCodePreview only handles app tools),
-// so we cap it aggressively to avoid excessive client traffic.
-const TOOL_INPUT_DELTA_MAX_CHARS = 50_000;
-const APP_TOOL_NAMES = new Set(["app_create", "app_update"]);
-function truncateForClient(
-  value: string,
-  maxChars: number,
-  suffix: string,
-): string {
-  if (value.length <= maxChars) return value;
-  return value.slice(0, maxChars - suffix.length) + suffix;
-}
 // ── Friendly Tool Names ──────────────────────────────────────────────
 const TOOL_FRIENDLY_NAMES: Record<string, string> = {
@@ -409,19 +385,10 @@ export function handleInputJsonDelta(
   deps: EventHandlerDeps,
   event: Extract<AgentEvent, { type: "input_json_delta" }>,
 ): void {
-  // Cap non-app tool input deltas — the client only uses this data for
-  // app_create/app_update code previews; all other tools discard it.
-  const content = APP_TOOL_NAMES.has(event.toolName)
-    ? event.accumulatedJson
-    : truncateForClient(
-        event.accumulatedJson,
-        TOOL_INPUT_DELTA_MAX_CHARS,
-        TOOL_RESULT_TRUNCATION_SUFFIX,
-      );
   deps.onEvent({
     type: "tool_input_delta",
     toolName: event.toolName,
-    content,
+    content: event.accumulatedJson,
     conversationId: deps.ctx.conversationId,
     toolUseId: event.toolUseId,
   });
@@ -438,11 +405,7 @@ export function handleToolResult(
   deps.onEvent({
     type: "tool_result",
     toolName: "",
-    result: truncateForClient(
-      event.content,
-      TOOL_RESULT_MAX_CHARS,
-      TOOL_RESULT_TRUNCATION_SUFFIX,
-    ),
+    result: event.content,
     isError: event.isError,
     diff: event.diff,
     status: event.status,

package/src/daemon/history-repair.ts CHANGED Viewed

@@ -69,7 +69,10 @@ export function repairHistory(messages: Message[]): RepairResult {
       }
       // Ensure every server_tool_use has a paired web_search_tool_result
-      // in the same assistant message (handles interrupted streams)
+      // in the same assistant message (handles interrupted streams).
+      // Synthetic results are inserted IMMEDIATELY AFTER their corresponding
+      // server_tool_use block — not appended to the end — so that
+      // ensureToolPairing's split at tool_use boundaries cannot separate them.
       const serverToolIds = new Set(
         cleanedContent
           .filter(
@@ -82,18 +85,35 @@ export function repairHistory(messages: Message[]): RepairResult {
           .filter((b) => b.type === "web_search_tool_result")
           .map((b) => (b as { tool_use_id: string }).tool_use_id),
       );
+      const orphanedServerIds = new Set<string>();
       for (const id of serverToolIds) {
         if (!matchedServerIds.has(id)) {
-          cleanedContent.push({
-            type: "web_search_tool_result",
-            tool_use_id: id,
-            content: SYNTHETIC_WEB_SEARCH_ERROR,
-          });
-          stats.missingToolResultsInserted++;
+          orphanedServerIds.add(id);
         }
       }
-      result.push({ role: "assistant", content: cleanedContent });
+      let repairedContent: ContentBlock[];
+      if (orphanedServerIds.size > 0) {
+        repairedContent = [];
+        for (const block of cleanedContent) {
+          repairedContent.push(block);
+          if (
+            block.type === "server_tool_use" &&
+            orphanedServerIds.has(block.id)
+          ) {
+            repairedContent.push({
+              type: "web_search_tool_result",
+              tool_use_id: block.id,
+              content: SYNTHETIC_WEB_SEARCH_ERROR,
+            });
+            stats.missingToolResultsInserted++;
+          }
+        }
+      } else {
+        repairedContent = cleanedContent;
+      }
+      result.push({ role: "assistant", content: repairedContent });
       // Only track client-side tool_use IDs as pending (not server_tool_use)
       pendingToolUseIds = new Set(

package/src/permissions/checker.ts CHANGED Viewed

@@ -197,15 +197,6 @@ const LOW_RISK_GIT_SUBCOMMANDS = new Set([
   "reflog",
 ]);
-// Vellum/assistant CLI subcommands that are low-risk (read-only)
-const LOW_RISK_CLI_SUBCOMMANDS = new Set([
-  "ps",
-  "doctor",
-  "audit",
-  "completions",
-  "map",
-]);
 // Commands that wrap another program — the real program appears as the first
 // non-flag argument.  When one of these is the segment program we look through
 // its args to find the effective program (e.g. `env curl …` → curl).
@@ -671,17 +662,6 @@ async function classifyRiskUncached(
         continue;
       }
-      if (prog === "vellum" || prog === "assistant") {
-        const subcommand = firstPositionalArg(seg.args);
-        if (subcommand && LOW_RISK_CLI_SUBCOMMANDS.has(subcommand)) {
-          // Read-only subcommands stay at current risk
-          continue;
-        }
-        // Mutating subcommands are medium
-        maxRisk = RiskLevel.Medium;
-        continue;
-      }
       if (!LOW_RISK_PROGRAMS.has(prog)) {
         // Unknown program → medium
         if (maxRisk === RiskLevel.Low) {

package/src/prompts/system-prompt.ts CHANGED Viewed

@@ -206,6 +206,8 @@ function buildAttachmentSection(): string {
     "",
     'Use `source="host"` with an absolute path for host filesystem files. Optional attributes: `filename` (display name override), `mime_type` (override auto-detection).',
     "",
+    "Image and video attachments can render inline in chat. If the user asks to preview a media file here, attach it instead of only printing its path.",
+    "",
     "Embed images/GIFs inline using markdown: `![description](URL)`.",
   ].join("\n");
 }

package/src/tools/shared/filesystem/format-diff.ts CHANGED Viewed

@@ -1,21 +1,15 @@
-const MAX_DIFF_LINES = 8;
 /**
- * Build a compact inline diff from an old→new string replacement.
- * Lines are prefixed with - / + and truncated if the change is large.
+ * Build an inline diff from an old→new string replacement.
+ * Lines are prefixed with - / +.
  */
 export function formatEditDiff(oldString: string, newString: string): string {
   const removed =
     oldString.length > 0
-      ? truncateLines(oldString.split("\n"), MAX_DIFF_LINES).map(
-          (l) => `- ${l}`,
-        )
+      ? oldString.split("\n").map((l) => `- ${l}`)
       : [];
   const added =
     newString.length > 0
-      ? truncateLines(newString.split("\n"), MAX_DIFF_LINES).map(
-          (l) => `+ ${l}`,
-        )
+      ? newString.split("\n").map((l) => `+ ${l}`)
       : [];
   return [...removed, ...added].join("\n");
@@ -37,9 +31,3 @@ export function formatWriteSummary(
   return `(${oldLineCount} → ${newLineCount} lines)`;
 }
-function truncateLines(lines: string[], max: number): string[] {
-  if (lines.length <= max) return lines;
-  const kept = lines.slice(0, max);
-  kept.push(`... (${lines.length - max} more lines)`);
-  return kept;
-}