npm - @vellumai/assistant - Versions diffs - 0.10.0-staging.2 → 0.10.0 - Mend

@vellumai/assistant 0.10.0-staging.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/openapi.yaml +31 -3
package/package.json +1 -1
package/src/__tests__/approval-routes-http.test.ts +128 -0
package/src/__tests__/document-tool-security.test.ts +9 -6
package/src/__tests__/document-update-default-surface.test.ts +202 -0
package/src/__tests__/secret-response-routing.test.ts +35 -0
package/src/__tests__/secret-routes-platform-proxy.test.ts +16 -2
package/src/__tests__/skill-execute-input.test.ts +126 -0
package/src/config/bundled-skills/document-editor/SKILL.md +3 -3
package/src/config/bundled-skills/document-editor/TOOLS.json +2 -2
package/src/daemon/conversation-tool-setup.ts +11 -3
package/src/daemon/memory-v2-startup.test.ts +131 -0
package/src/daemon/memory-v2-startup.ts +100 -1
package/src/permissions/secret-prompter.ts +15 -1
package/src/runtime/pending-interactions.ts +20 -0
package/src/runtime/routes/approval-routes.ts +40 -9
package/src/runtime/routes/secret-routes.ts +5 -0
package/src/tools/document/document-tool.ts +28 -3
package/src/tools/skills/execute.ts +66 -2

package/openapi.yaml CHANGED Viewed

@@ -19315,9 +19315,37 @@ paths:
                     additionalProperties: {}
                     description: Pending confirmation details or null
                   pendingSecret:
-                    type: object
-                    properties: {}
-                    additionalProperties: {}
+                    anyOf:
+                      - type: object
+                        properties:
+                          requestId:
+                            type: string
+                          service:
+                            type: string
+                          field:
+                            type: string
+                          label:
+                            type: string
+                          description:
+                            type: string
+                          placeholder:
+                            type: string
+                          purpose:
+                            type: string
+                          allowedTools:
+                            type: array
+                            items:
+                              type: string
+                          allowedDomains:
+                            type: array
+                            items:
+                              type: string
+                          allowOneTimeSend:
+                            type: boolean
+                        required:
+                          - requestId
+                        additionalProperties: {}
+                      - type: "null"
                     description: Pending secret request or null
                   interactions:
                     type: array

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vellumai/assistant",
-  "version": "0.10.0-staging.2",
+  "version": "0.10.0",
   "license": "MIT",
   "type": "module",
   "exports": {

package/src/__tests__/approval-routes-http.test.ts CHANGED Viewed

@@ -689,6 +689,82 @@ describe("standalone approval endpoints — HTTP layer", () => {
       await stopServer();
     });
+    test("cancels a secret request when value is omitted", async () => {
+      let secretRequestId: string | undefined;
+      let secretValue: string | undefined;
+      const session = makeIdleSession({
+        onSecret: (reqId, val) => {
+          secretRequestId = reqId;
+          secretValue = val;
+        },
+      });
+      await startServer(() => session);
+      pendingInteractions.register("secret-cancel-1", {
+        conversationId: "conv-1",
+        kind: "secret",
+      });
+      const res = await fetch(url("secret"), {
+        method: "POST",
+        headers: { "Content-Type": "application/json", ...AUTH_HEADERS },
+        body: JSON.stringify({ requestId: "secret-cancel-1" }),
+      });
+      const body = (await res.json()) as { accepted: boolean };
+      expect(res.status).toBe(200);
+      expect(body.accepted).toBe(true);
+      expect(secretRequestId).toBe("secret-cancel-1");
+      expect(secretValue).toBeUndefined();
+      expect(pendingInteractions.get("secret-cancel-1")).toBeUndefined();
+      await stopServer();
+    });
+    test('legacy delivery "none" cancels the request without 400', async () => {
+      let secretRequestId: string | undefined;
+      let secretValue: string | undefined;
+      let secretDelivery: string | undefined;
+      const session = makeIdleSession({
+        onSecret: (reqId, val, del) => {
+          secretRequestId = reqId;
+          secretValue = val;
+          secretDelivery = del;
+        },
+      });
+      await startServer(() => session);
+      pendingInteractions.register("secret-legacy-cancel-1", {
+        conversationId: "conv-1",
+        kind: "secret",
+      });
+      const res = await fetch(url("secret"), {
+        method: "POST",
+        headers: { "Content-Type": "application/json", ...AUTH_HEADERS },
+        body: JSON.stringify({
+          requestId: "secret-legacy-cancel-1",
+          value: "ignored-by-cancel",
+          delivery: "none",
+        }),
+      });
+      const body = (await res.json()) as { accepted: boolean };
+      expect(res.status).toBe(200);
+      expect(body.accepted).toBe(true);
+      expect(secretRequestId).toBe("secret-legacy-cancel-1");
+      // delivery "none" normalizes to the cancellation path: value/delivery dropped.
+      expect(secretValue).toBeUndefined();
+      expect(secretDelivery).toBeUndefined();
+      expect(pendingInteractions.get("secret-legacy-cancel-1")).toBeUndefined();
+      await stopServer();
+    });
     test("rejects a non-secret requestId without consuming it", async () => {
       /**
        * /v1/secret only settles secret prompts. A confirmation (or any other
@@ -790,6 +866,58 @@ describe("standalone approval endpoints — HTTP layer", () => {
     });
   });
+  // ── GET /v1/pending-interactions ─────────────────────────────────────
+  describe("GET /v1/pending-interactions", () => {
+    test("returns full secret prompt metadata for a registered secret", async () => {
+      const session = makeIdleSession();
+      await startServer(() => session);
+      pendingInteractions.register("secret-meta-1", {
+        conversationId: "conv-meta",
+        kind: "secret",
+        secretDetails: {
+          service: "github",
+          field: "token",
+          label: "GitHub Token",
+          description: "Personal access token",
+          placeholder: "ghp_...",
+          purpose: "Push commits",
+          allowedTools: ["git_push"],
+          allowedDomains: ["github.com"],
+          allowOneTimeSend: true,
+        },
+      });
+      const res = await fetch(
+        url("pending-interactions?conversationId=conv-meta"),
+        {
+          method: "GET",
+          headers: { ...AUTH_HEADERS },
+        },
+      );
+      const body = (await res.json()) as {
+        pendingSecret: Record<string, unknown> | null;
+      };
+      expect(res.status).toBe(200);
+      expect(body.pendingSecret).toEqual({
+        requestId: "secret-meta-1",
+        service: "github",
+        field: "token",
+        label: "GitHub Token",
+        description: "Personal access token",
+        placeholder: "ghp_...",
+        purpose: "Push commits",
+        allowedTools: ["git_push"],
+        allowedDomains: ["github.com"],
+        allowOneTimeSend: true,
+      });
+      await stopServer();
+    });
+  });
   // ── getByConversation ────────────────────────────────────────────────
   describe("getByConversation", () => {

package/src/__tests__/document-tool-security.test.ts CHANGED Viewed

@@ -264,12 +264,15 @@ describe("executeDocumentUpdate — input validation", () => {
     seedFixtureDocuments();
   });
-  test("returns Invalid input when surface_id is missing", () => {
-    const result = executeDocumentUpdate({}, makeContext());
-    expect(result.isError).toBe(true);
-    const body = parseResult<{ error: string }>(result);
-    expect(body.error).toContain("Invalid input: surface_id is required");
-    expect(body.error).not.toContain("Document not found");
+  test("resolves to the conversation's document when surface_id is omitted", () => {
+    const result = executeDocumentUpdate(
+      { content: "appended chunk" },
+      makeContext({ sendToClient: () => {} }),
+    );
+    expect(result.isError).toBe(false);
+    const body = parseResult<{ surface_id: string; success: boolean }>(result);
+    expect(body.success).toBe(true);
+    expect(body.surface_id).toBe("doc-current");
   });
   test("returns Invalid input when content is missing", () => {

package/src/__tests__/document-update-default-surface.test.ts ADDED Viewed

@@ -0,0 +1,202 @@
+import { beforeEach, describe, expect, test } from "bun:test";
+import { getDocumentById } from "../documents/document-store.js";
+import { getSqlite } from "../memory/db-connection.js";
+import { executeDocumentUpdate } from "../tools/document/document-tool.js";
+import type { ToolContext, ToolExecutionResult } from "../tools/types.js";
+import { resetDbForTesting } from "./db-test-helpers.js";
+function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
+  return {
+    workingDir: "/tmp/project",
+    conversationId: "conv-current",
+    trustClass: "trusted_contact",
+    executionChannel: "slack",
+    sendToClient: () => {},
+    ...overrides,
+  };
+}
+function parseResult<T>(result: ToolExecutionResult): T {
+  return JSON.parse(result.content) as T;
+}
+function bootstrapDocumentTables(): void {
+  resetDbForTesting();
+  const raw = getSqlite();
+  raw.exec(/*sql*/ `
+    DROP TABLE IF EXISTS document_conversations;
+    DROP TABLE IF EXISTS documents;
+    DROP TABLE IF EXISTS conversations;
+    CREATE TABLE conversations (
+      id TEXT PRIMARY KEY,
+      created_at INTEGER NOT NULL DEFAULT (strftime('%s','now') * 1000)
+    );
+    CREATE TABLE documents (
+      surface_id TEXT PRIMARY KEY,
+      conversation_id TEXT NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
+      title TEXT NOT NULL,
+      content TEXT NOT NULL,
+      word_count INTEGER NOT NULL DEFAULT 0,
+      created_at INTEGER NOT NULL,
+      updated_at INTEGER NOT NULL
+    );
+    CREATE TABLE document_conversations (
+      surface_id TEXT NOT NULL,
+      conversation_id TEXT NOT NULL,
+      created_at INTEGER NOT NULL,
+      PRIMARY KEY (surface_id, conversation_id),
+      FOREIGN KEY (surface_id) REFERENCES documents(surface_id) ON DELETE CASCADE
+    );
+  `);
+}
+function seedDocument(params: {
+  surfaceId: string;
+  conversationId: string;
+  title: string;
+  content: string;
+  createdAt: number;
+  updatedAt?: number;
+}): void {
+  const raw = getSqlite();
+  raw
+    .query(`INSERT OR IGNORE INTO conversations (id, created_at) VALUES (?, ?)`)
+    .run(params.conversationId, params.createdAt);
+  raw
+    .query(
+      `INSERT INTO documents (surface_id, conversation_id, title, content, word_count, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)`,
+    )
+    .run(
+      params.surfaceId,
+      params.conversationId,
+      params.title,
+      params.content,
+      params.content.split(/\s+/).filter(Boolean).length,
+      params.createdAt,
+      params.updatedAt ?? params.createdAt,
+    );
+  raw
+    .query(
+      `INSERT OR IGNORE INTO document_conversations (surface_id, conversation_id, created_at) VALUES (?, ?, ?)`,
+    )
+    .run(params.surfaceId, params.conversationId, params.createdAt);
+}
+describe("executeDocumentUpdate — default surface_id resolution", () => {
+  beforeEach(() => {
+    bootstrapDocumentTables();
+  });
+  test("appends to the conversation's only document when surface_id is omitted", () => {
+    const surfaceId = "doc-only";
+    seedDocument({
+      surfaceId,
+      conversationId: "conv-current",
+      title: "Dating in 2026",
+      content: "# Dating in 2026\n\nIntro.",
+      createdAt: Date.now(),
+    });
+    const result = executeDocumentUpdate(
+      { content: "## Section two", mode: "append" },
+      makeContext(),
+    );
+    expect(result.isError).toBe(false);
+    const body = parseResult<{ surface_id: string; success: boolean }>(result);
+    expect(body.success).toBe(true);
+    expect(body.surface_id).toBe(surfaceId);
+    expect(getDocumentById(surfaceId)?.content).toBe(
+      "# Dating in 2026\n\nIntro.\n\n## Section two",
+    );
+  });
+  test("targets the most recently updated document when several exist", () => {
+    const now = Date.now();
+    seedDocument({
+      surfaceId: "doc-old",
+      conversationId: "conv-current",
+      title: "Old",
+      content: "old",
+      createdAt: now - 10_000,
+      updatedAt: now - 10_000,
+    });
+    seedDocument({
+      surfaceId: "doc-fresh",
+      conversationId: "conv-current",
+      title: "Fresh",
+      content: "fresh",
+      createdAt: now,
+      updatedAt: now,
+    });
+    const result = executeDocumentUpdate({ content: "more" }, makeContext());
+    expect(result.isError).toBe(false);
+    const body = parseResult<{ surface_id: string }>(result);
+    expect(body.surface_id).toBe("doc-fresh");
+    expect(getDocumentById("doc-fresh")?.content).toBe("fresh\n\nmore");
+    expect(getDocumentById("doc-old")?.content).toBe("old");
+  });
+  test("an explicit surface_id still wins over the default", () => {
+    const now = Date.now();
+    seedDocument({
+      surfaceId: "doc-target",
+      conversationId: "conv-current",
+      title: "Target",
+      content: "target",
+      createdAt: now - 10_000,
+      updatedAt: now - 10_000,
+    });
+    seedDocument({
+      surfaceId: "doc-fresh",
+      conversationId: "conv-current",
+      title: "Fresh",
+      content: "fresh",
+      createdAt: now,
+      updatedAt: now,
+    });
+    const result = executeDocumentUpdate(
+      { surface_id: "doc-target", content: "hit" },
+      makeContext(),
+    );
+    expect(result.isError).toBe(false);
+    expect(parseResult<{ surface_id: string }>(result).surface_id).toBe(
+      "doc-target",
+    );
+    expect(getDocumentById("doc-target")?.content).toBe("target\n\nhit");
+    expect(getDocumentById("doc-fresh")?.content).toBe("fresh");
+  });
+  test("errors helpfully when the conversation has no document", () => {
+    const result = executeDocumentUpdate(
+      { content: "orphan chunk" },
+      makeContext(),
+    );
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain("no document is open");
+    expect(result.content).toContain("document_create");
+  });
+  test("still requires content", () => {
+    seedDocument({
+      surfaceId: "doc-only",
+      conversationId: "conv-current",
+      title: "X",
+      content: "x",
+      createdAt: Date.now(),
+    });
+    const result = executeDocumentUpdate({ mode: "append" }, makeContext());
+    expect(result.isError).toBe(true);
+    expect(result.content).toContain("content is required");
+  });
+});

package/src/__tests__/secret-response-routing.test.ts CHANGED Viewed

@@ -106,6 +106,41 @@ describe("secret response routing", () => {
     await promise;
   });
+  test("prompt registers public secretDetails without the value", async () => {
+    const promise = prompter.prompt(
+      "github",
+      "token",
+      "GitHub Token",
+      "desc",
+      "placeholder",
+      "session-1",
+      "Push commits",
+      ["git_push"],
+      ["github.com"],
+    );
+    const msg = broadcastedMessages[0] as SecretRequestEvent;
+    const entry = _piStore.get(msg.requestId) as {
+      kind: string;
+      secretDetails?: Record<string, unknown>;
+    };
+    expect(entry.kind).toBe("secret");
+    expect(entry.secretDetails).toMatchObject({
+      service: "github",
+      field: "token",
+      label: "GitHub Token",
+      description: "desc",
+      placeholder: "placeholder",
+      purpose: "Push commits",
+      allowedTools: ["git_push"],
+      allowedDomains: ["github.com"],
+    });
+    // SECURITY: the secret value is never part of the registered metadata.
+    expect(JSON.stringify(entry.secretDetails)).not.toContain("test-value");
+    // Clean up
+    prompter.resolveSecret(msg.requestId, undefined);
+    await promise;
+  });
   test("resolveSecret for unknown requestId is a no-op", () => {
     // Should not throw
     prompter.resolveSecret("unknown-id", "value", "store");

package/src/__tests__/secret-routes-platform-proxy.test.ts CHANGED Viewed

@@ -12,7 +12,12 @@ let providerRefreshCalls = 0;
 const PLATFORM_BASE_URL = "https://platform.example.com";
 const ASSISTANT_API_KEY_PATH = credentialKey("vellum", "assistant_api_key");
 const PLATFORM_BASE_URL_PATH = credentialKey("vellum", "platform_base_url");
-const MANAGED_PROVIDERS = ["anthropic", "openai", "gemini", "fireworks"] as const;
+const MANAGED_PROVIDERS = [
+  "anthropic",
+  "openai",
+  "gemini",
+  "fireworks",
+] as const;
 let platformBaseUrlOverride: string | undefined;
@@ -116,6 +121,13 @@ mock.module("../util/logger.js", () => ({
     }),
 }));
+// `handleAddSecret` fires this detached when a managed-proxy credential lands —
+// a v2-memory side effect outside this suite's provider-registry scope. Stub it
+// to a no-op; its behavior is covered by memory-v2-startup.test.ts.
+mock.module("../daemon/memory-v2-startup.js", () => ({
+  maybeReseedCapabilitiesAfterManagedCredential: async () => {},
+}));
 import {
   getProviderRoutingSource,
   initializeProviders,
@@ -199,7 +211,9 @@ describe("secret routes managed proxy registry sync", () => {
   test("provider API key writes notify live-conversation refresh listeners", async () => {
     await addApiKey("fireworks", "fw-key");
-    expect(secureKeyStore[credentialKey("fireworks", "api_key")]).toBe("fw-key");
+    expect(secureKeyStore[credentialKey("fireworks", "api_key")]).toBe(
+      "fw-key",
+    );
     expect(providerRefreshCalls).toBe(1);
     await deleteApiKey("fireworks");

package/src/__tests__/skill-execute-input.test.ts CHANGED Viewed

@@ -2,9 +2,20 @@ import { describe, expect, test } from "bun:test";
 import {
   augmentSkillExecuteError,
+  recoverSkillExecuteEnvelope,
   resolveSkillExecuteInput,
 } from "../tools/skills/execute.js";
+/** Schema with exactly one required string field (e.g. document_update). */
+const SINGLE_REQUIRED_STRING_SCHEMA = {
+  type: "object",
+  properties: {
+    content: { type: "string" },
+    mode: { type: "string", enum: ["replace", "append"] },
+  },
+  required: ["content"],
+};
 describe("resolveSkillExecuteInput", () => {
   test("returns a correctly nested object unchanged", () => {
     const input = { prompt: "a sunset", variants: 2 };
@@ -85,6 +96,121 @@ describe("resolveSkillExecuteInput", () => {
     });
     expect(result).toEqual({ foo: "bar" });
   });
+  test("maps a bare (non-JSON) input string to the sole required string field", () => {
+    // The exact shape from the doc-writer incident: the full Markdown body
+    // passed as `input` instead of `{ "content": "..." }`.
+    const body = "# AI in 2026\n\nWe're halfway through the year.";
+    const result = resolveSkillExecuteInput(
+      { tool: "document_update", input: body, activity: "Streaming article" },
+      SINGLE_REQUIRED_STRING_SCHEMA,
+    );
+    expect(result).toEqual({ content: body });
+  });
+  test("does not map a bare string without the inner schema", () => {
+    const result = resolveSkillExecuteInput({
+      tool: "document_update",
+      input: "# AI in 2026",
+      activity: "Streaming article",
+    });
+    expect(result).toEqual({});
+  });
+  test("does not map a bare string when the schema has multiple required fields", () => {
+    const schema = {
+      type: "object",
+      properties: { a: { type: "string" }, b: { type: "string" } },
+      required: ["a", "b"],
+    };
+    const result = resolveSkillExecuteInput(
+      { tool: "t", input: "some text", activity: "x" },
+      schema,
+    );
+    expect(result).toEqual({});
+  });
+  test("does not map a bare string when the sole required field is not a string", () => {
+    const schema = {
+      type: "object",
+      properties: { count: { type: "number" } },
+      required: ["count"],
+    };
+    const result = resolveSkillExecuteInput(
+      { tool: "t", input: "42", activity: "x" },
+      schema,
+    );
+    // "42" parses as JSON but isn't an object, and the lone required field is
+    // not a string — no rescue applies.
+    expect(result).toEqual({});
+  });
+  test("a valid JSON-object string still wins over the bare-string rescue", () => {
+    const result = resolveSkillExecuteInput(
+      {
+        tool: "document_update",
+        input: '{"content":"hello","mode":"append"}',
+        activity: "x",
+      },
+      SINGLE_REQUIRED_STRING_SCHEMA,
+    );
+    expect(result).toEqual({ content: "hello", mode: "append" });
+  });
+  test("an empty input string is not rescued (nothing to map)", () => {
+    const result = resolveSkillExecuteInput(
+      { tool: "document_update", input: "", activity: "x" },
+      SINGLE_REQUIRED_STRING_SCHEMA,
+    );
+    expect(result).toEqual({});
+  });
+});
+describe("recoverSkillExecuteEnvelope", () => {
+  test("recovers a valid envelope wrapped under the _raw marker", () => {
+    // MiniMax coercion marks a bare-string `input` call unparseable even though
+    // the outer arguments are valid JSON.
+    const raw = JSON.stringify({
+      tool: "document_update",
+      input: "# AI in 2026\n\nbody",
+      activity: "Streaming",
+    });
+    const recovered = recoverSkillExecuteEnvelope({ _raw: raw });
+    expect(recovered).toEqual({
+      tool: "document_update",
+      input: "# AI in 2026\n\nbody",
+      activity: "Streaming",
+    });
+  });
+  test("leaves a genuinely unparseable (truncated) call wrapped", () => {
+    const wrapped = { _raw: '{"tool":"document_update","input":"# AI' };
+    expect(recoverSkillExecuteEnvelope(wrapped)).toBe(wrapped);
+  });
+  test("passes a normal envelope through untouched", () => {
+    const envelope = {
+      tool: "document_update",
+      input: { content: "hi" },
+      activity: "x",
+    };
+    expect(recoverSkillExecuteEnvelope(envelope)).toBe(envelope);
+  });
+  test("end-to-end: recovered bare-string envelope resolves to content", () => {
+    const body = "# Title\n\nThe full article body.";
+    const raw = JSON.stringify({
+      tool: "document_update",
+      input: body,
+      activity: "Streaming",
+    });
+    const envelope = recoverSkillExecuteEnvelope({ _raw: raw });
+    const resolved = resolveSkillExecuteInput(
+      envelope,
+      SINGLE_REQUIRED_STRING_SCHEMA,
+    );
+    expect(resolved).toEqual({ content: body });
+  });
 });
 describe("augmentSkillExecuteError", () => {

package/src/config/bundled-skills/document-editor/SKILL.md CHANGED Viewed

@@ -33,13 +33,13 @@ Write and edit long-form documents using the built-in rich text editor. Document
 This is the default path when the user asks you to write something.
-1. **Create the document**: Call `document_create` with a title (inferred from the request). Call the tool immediately, not after conversational preamble. Capture the `surface_id` from the response — every subsequent `document_update` call must reference it.
+1. **Create the document**: Call `document_create` with a title (inferred from the request). Call the tool immediately, not after conversational preamble.
 2. **Write content in Markdown**: Use proper structure (`#` for titles, `##` for sections), **bold**, _italic_, code blocks, tables, lists, blockquotes as appropriate.
-3. **CRITICAL - Stream content in chunks**: Call `document_update` MULTIPLE times, not just once. Break content into logical chunks (paragraphs, sections, or every 200-300 words). Call `document_update` with `mode: "append"` for EACH chunk separately. The user experiences real-time content appearing as you write.
+3. **CRITICAL - Stream content in chunks**: Call `document_update` MULTIPLE times, not just once. Break content into logical chunks (paragraphs, sections, or every 200-300 words). Call `document_update` with `mode: "append"` for EACH chunk separately. When you are streaming into the document you just created, `surface_id` is optional — omit it and pass only `content`, and the update targets that document. The user experiences real-time content appearing as you write.
 ### Recovering from a failed update
-If a `document_update` call fails with an `Invalid input` error (for example because `surface_id` was missing), do NOT call `document_create` again. The `surface_id` you need is in the tool result of the most recent `document_create` call in this turn. Retry `document_update` with that `surface_id` and the same content. Creating a second document with the same title produces a duplicate for the user.
+If a `document_update` call fails with an `Invalid input` error, do NOT call `document_create` again — that produces a duplicate for the user. The most common cause is a missing `content` field: resend the call with the chunk's Markdown in `content`. You can omit `surface_id` to target the document you are currently writing; pass it explicitly only when editing a different existing document.
 ## Editing an existing document

package/src/config/bundled-skills/document-editor/TOOLS.json CHANGED Viewed

@@ -50,7 +50,7 @@
         "properties": {
           "surface_id": {
             "type": "string",
-            "description": "The ID of the document surface to update"
+            "description": "The ID of the document surface to update. Optional — when omitted, the most recently updated document in this conversation is used, so streaming successive chunks needs only `content`."
           },
           "content": {
             "type": "string",
@@ -62,7 +62,7 @@
             "description": "Whether to replace all content or append to the end. Defaults to append."
           }
         },
-        "required": ["surface_id", "content"]
+        "required": ["content"]
       },
       "executor": "tools/document-update.ts",
       "execution_target": "host"

package/src/daemon/conversation-tool-setup.ts CHANGED Viewed

@@ -21,13 +21,14 @@ import type { Message, ToolDefinition } from "../providers/types.js";
 import { assistantEventHub } from "../runtime/assistant-event-hub.js";
 import { registerConversationSender } from "../tools/browser/browser-screencast.js";
 import type { ToolExecutor } from "../tools/executor.js";
-import { getMcpToolDefinitions } from "../tools/registry.js";
+import { getMcpToolDefinitions, getTool } from "../tools/registry.js";
 import {
   ACTIVITY_SKIP_SET,
   injectActivityField,
 } from "../tools/schema-transforms.js";
 import {
   augmentSkillExecuteError,
+  recoverSkillExecuteEnvelope,
   resolveSkillExecuteInput,
 } from "../tools/skills/execute.js";
 import { resolveToolInvocationAlias } from "../tools/tool-name-aliases.js";
@@ -303,9 +304,16 @@ export function createToolExecutor(
     // risk level, permission checks, hooks, and lifecycle events all fire
     // with the real tool name.
     if (executionName === "skill_execute") {
+      // Recover an envelope the provider wrapped as unparseable when MiniMax's
+      // coercion failed to JSON-decode a bare-string `input` (see
+      // recoverSkillExecuteEnvelope), then resolve the inner tool + params.
+      const envelope = recoverSkillExecuteEnvelope(executionInput);
       const rawToolName =
-        typeof executionInput.tool === "string" ? executionInput.tool : "";
-      const rawToolInput = resolveSkillExecuteInput(executionInput);
+        typeof envelope.tool === "string" ? envelope.tool : "";
+      const innerSchema = rawToolName
+        ? getTool(rawToolName)?.input_schema
+        : undefined;
+      const rawToolInput = resolveSkillExecuteInput(envelope, innerSchema);
       // Clone to avoid mutating shared input objects
       const { name: toolName, input: toolInput } = resolveToolInvocationAlias(

package/src/daemon/memory-v2-startup.test.ts ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * Tests for `maybeReseedCapabilitiesAfterManagedCredential` in
+ * `memory-v2-startup.ts`.
+ *
+ * The secrets route calls this when a managed-proxy credential lands, to close
+ * the first-boot race where the daemon's startup capability seed (skills + CLI
+ * commands) runs before the platform provisions the managed embedding
+ * credential — the seed's embed throws and the synthetic capability pages never
+ * reach the page index. The reseed must fire only when v2 memory is enabled AND
+ * the managed-proxy prerequisites are now satisfied, so self-hosted / BYOK
+ * assistants (no managed proxy) are never made to run a doomed embed. When v3 is
+ * live it then enqueues a `memory_v3_maintain` job so v3 picks up the capability
+ * pages immediately instead of waiting out the 6h maintain backstop.
+ *
+ * Dynamic-imported collaborators are mocked at module scope; `bun:test`
+ * isolates `mock.module` per test file.
+ */
+import { afterEach, describe, expect, mock, test } from "bun:test";
+import { makeMockLogger } from "../__tests__/helpers/mock-logger.js";
+import type { AssistantConfig } from "../config/schema.js";
+const proxyState = { prereqs: true };
+const v3State = { live: true };
+const seedSkill = mock(async () => {});
+const seedCli = mock(async () => {});
+const enqueueJob = mock(
+  (_type: string, _payload: Record<string, unknown>) => 1,
+);
+mock.module("../util/logger.js", () => ({
+  getLogger: () => makeMockLogger(),
+}));
+mock.module("../providers/platform-proxy/context.js", () => ({
+  hasManagedProxyPrereqs: async () => proxyState.prereqs,
+}));
+mock.module("../config/memory-v3-gate.js", () => ({
+  isMemoryV3Live: () => v3State.live,
+}));
+mock.module("../memory/jobs-store.js", () => ({
+  enqueueMemoryJob: enqueueJob,
+}));
+mock.module("../memory/v2/skill-store.js", () => ({
+  seedV2SkillEntries: seedSkill,
+}));
+mock.module("../memory/v2/cli-command-store.js", () => ({
+  seedV2CliCommandEntries: seedCli,
+}));
+const { maybeReseedCapabilitiesAfterManagedCredential } =
+  await import("./memory-v2-startup.js");
+function configWithV2(enabled: boolean): AssistantConfig {
+  return { memory: { v2: { enabled } } } as unknown as AssistantConfig;
+}
+afterEach(() => {
+  seedSkill.mockClear();
+  seedCli.mockClear();
+  enqueueJob.mockClear();
+  proxyState.prereqs = true;
+  v3State.live = true;
+});
+describe("maybeReseedCapabilitiesAfterManagedCredential", () => {
+  test("reseeds both skill and CLI entries when v2 is enabled and managed-proxy prereqs are satisfied", async () => {
+    proxyState.prereqs = true;
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
+    expect(seedSkill).toHaveBeenCalledTimes(1);
+    expect(seedCli).toHaveBeenCalledTimes(1);
+  });
+  test("enqueues a v3 maintain pass after reseeding when v3 is live", async () => {
+    proxyState.prereqs = true;
+    v3State.live = true;
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
+    expect(enqueueJob).toHaveBeenCalledTimes(1);
+    expect(enqueueJob).toHaveBeenCalledWith("memory_v3_maintain", {});
+  });
+  test("reseeds but does not enqueue a v3 maintain pass when v3 is not live", async () => {
+    proxyState.prereqs = true;
+    v3State.live = false;
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
+    expect(seedSkill).toHaveBeenCalledTimes(1);
+    expect(seedCli).toHaveBeenCalledTimes(1);
+    expect(enqueueJob).not.toHaveBeenCalled();
+  });
+  test("no-op when v2 memory is disabled", async () => {
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(false));
+    expect(seedSkill).not.toHaveBeenCalled();
+    expect(seedCli).not.toHaveBeenCalled();
+    expect(enqueueJob).not.toHaveBeenCalled();
+  });
+  test("no-op for non-managed assistants (managed-proxy prereqs not satisfied)", async () => {
+    proxyState.prereqs = false;
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
+    expect(seedSkill).not.toHaveBeenCalled();
+    expect(seedCli).not.toHaveBeenCalled();
+    expect(enqueueJob).not.toHaveBeenCalled();
+  });
+  test("swallows a seed failure and still reseeds the other catalog", async () => {
+    proxyState.prereqs = true;
+    seedSkill.mockImplementationOnce(async () => {
+      throw new Error('Embedding backend "gemini" is not configured');
+    });
+    // Must not reject — the helper contains each seed's failure so a doomed
+    // embed never propagates back to the credential-store caller.
+    await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
+    expect(seedCli).toHaveBeenCalledTimes(1);
+  });
+});

package/src/daemon/memory-v2-startup.ts CHANGED Viewed

@@ -3,7 +3,8 @@
 // ---------------------------------------------------------------------------
 //
 // Small focused module that holds the gating + dispatch logic for v2-specific
-// startup work invoked from `lifecycle.ts`. Lives in its own file so the unit
+// startup work invoked from `lifecycle.ts` (and, for the post-credential
+// capability reseed, from the secrets route). Lives in its own file so the unit
 // test for the gate does not have to mount the entire lifecycle import graph.
 import type { AssistantConfig } from "../config/schema.js";
@@ -48,6 +49,104 @@ export function maybeSeedMemoryV2CliCommands(config: AssistantConfig): void {
     .catch((err) => log.warn({ err }, "Failed to seed v2 CLI-command entries"));
 }
+/**
+ * Re-seed the v2 skill and CLI-command capability entries once a managed-proxy
+ * credential lands, closing the first-boot race where the daemon's startup seed
+ * runs before the platform has provisioned the managed embedding credential.
+ *
+ * On a brand-new managed assistant the memory worker fires the startup seed
+ * (`maybeSeedMemoryV2Skills` / `maybeSeedMemoryV2CliCommands`) seconds after
+ * boot, but the platform pushes `vellum:assistant_api_key` (the credential the
+ * managed Gemini embedding backend needs) tens of seconds later. The seed's
+ * `embedWithBackend` call throws `EmbeddingBackendUnavailableError` before the
+ * skill/CLI `entries` cache is replaced, so `listSkillEntries()` /
+ * `listCliCommandEntries()` stay empty and the synthetic `skills/<id>` and
+ * `cli-commands/<name>` rows never reach the page index — leaving the v3 needle
+ * finder lane and always-candidate skill pinning with nothing to surface until
+ * the next daemon restart. Re-running the seed when the credential arrives
+ * restores the capability pages without a restart.
+ *
+ * Gated on the managed-proxy prerequisites now being satisfied (both the
+ * platform base URL and the assistant API key present) so a non-managed
+ * credential write — or a partial update that has not yet completed the pair —
+ * does not kick a doomed embed. Idempotent: `seedV2SkillEntries` /
+ * `seedV2CliCommandEntries` atomically replace their caches, so a redundant
+ * reseed (the startup seed already succeeded) is cheap and harmless. The two
+ * catalogs are independent, so they reseed in parallel. Callers invoke this
+ * detached (`void`) — it must not block the credential-store response.
+ *
+ * Reseeding alone only repopulates the shared page index — v3 reads its
+ * synthetic capability rows from the v2 stores, but its memoized lanes and its
+ * `memory_v3_sections` dense store refresh only on the v3 maintain pass (6-hour
+ * backstop). So when v3 is live, enqueue a `memory_v3_maintain` job after the
+ * reseed: its capability-reconcile stage embeds the freshly-seeded rows into the
+ * dense store and its lane-invalidation stage forces a rebuild against the now-
+ * populated index, so v3 surfaces the skill/CLI pages within seconds instead of
+ * waiting out the backstop.
+ */
+export async function maybeReseedCapabilitiesAfterManagedCredential(
+  config: AssistantConfig,
+): Promise<void> {
+  if (!config.memory.v2.enabled) return;
+  const { hasManagedProxyPrereqs } =
+    await import("../providers/platform-proxy/context.js");
+  if (!(await hasManagedProxyPrereqs())) return;
+  // Skills and CLI commands are independent catalogs sharing the unified
+  // collection — reseed in parallel, each contained so one catalog's embed
+  // failure does not abort the other or reject the detached caller.
+  const catalogs: ReadonlyArray<[label: string, seed: () => Promise<void>]> = [
+    [
+      "skill",
+      async () => {
+        const { seedV2SkillEntries } =
+          await import("../memory/v2/skill-store.js");
+        await seedV2SkillEntries({ throwOnError: true });
+      },
+    ],
+    [
+      "CLI-command",
+      async () => {
+        const { seedV2CliCommandEntries } =
+          await import("../memory/v2/cli-command-store.js");
+        await seedV2CliCommandEntries({ throwOnError: true });
+      },
+    ],
+  ];
+  await Promise.all(
+    catalogs.map(async ([label, seed]) => {
+      try {
+        await seed();
+        log.info(
+          `Memory v2 ${label} entries seeded after managed proxy credential update`,
+        );
+      } catch (err) {
+        log.warn(
+          { err },
+          `Failed to seed v2 ${label} entries after managed proxy credential update`,
+        );
+      }
+    }),
+  );
+  // The stores (and the page index) are now populated; when v3 is live, kick a
+  // maintain pass so it embeds the capability rows into `memory_v3_sections` and
+  // invalidates its lanes immediately rather than waiting out the 6h backstop.
+  const { isMemoryV3Live } = await import("../config/memory-v3-gate.js");
+  if (!isMemoryV3Live(config)) return;
+  try {
+    const { enqueueMemoryJob } = await import("../memory/jobs-store.js");
+    enqueueMemoryJob("memory_v3_maintain", {});
+  } catch (err) {
+    log.warn(
+      { err },
+      "Failed to enqueue memory_v3_maintain after managed proxy credential update",
+    );
+  }
+}
 /**
  * Build the v2 BM25 corpus stats (per-token document frequencies + avg doc
  * length), then re-seed the v2 skill entries so any skills written during

package/src/permissions/secret-prompter.ts CHANGED Viewed

@@ -77,18 +77,32 @@ export class SecretPrompter {
         resolve({ value: null, delivery: "store" });
       }, timeoutMs);
+      const config = getConfig();
       // Register all lifecycle state in pendingInteractions — same pattern as
       // host proxies and PermissionPrompter. The prompter tracks ownership via ownedIds.
+      // SECURITY: secretDetails carries only the public prompt metadata broadcast
+      // on the secret_request event — never the secret value the user will supply.
       pendingInteractions.register(requestId, {
         conversationId: effectiveConversationId,
         kind: "secret",
+        secretDetails: {
+          service,
+          field,
+          label,
+          description,
+          placeholder,
+          purpose,
+          allowedTools,
+          allowedDomains,
+          allowOneTimeSend: config.secretDetection.allowOneTimeSend,
+        },
         rpcResolve: resolve as (value: unknown) => void,
         rpcReject: reject,
         timer,
       });
       this.ownedIds.add(requestId);
-      const config = getConfig();
       const msg: SecretRequestMessage = {
         type: "secret_request",
         requestId,

package/src/runtime/pending-interactions.ts CHANGED Viewed

@@ -63,6 +63,24 @@ export interface QuestionDetails {
   entries: QuestionEntry[];
 }
+/**
+ * Public prompt metadata for a pending `secret` interaction, retained so a
+ * cold conversation load can rehydrate the secret prompt with its full
+ * descriptive context. SECURITY: never carries the secret value — only the
+ * public fields already broadcast on the `secret_request` event.
+ */
+export interface SecretDetails {
+  service: string;
+  field: string;
+  label: string;
+  description?: string;
+  placeholder?: string;
+  purpose?: string;
+  allowedTools?: string[];
+  allowedDomains?: string[];
+  allowOneTimeSend?: boolean;
+}
 export interface PendingInteraction {
   /**
    * Owning conversation, when the interaction was raised inside one. Absent
@@ -84,6 +102,8 @@ export interface PendingInteraction {
   confirmationDetails?: ConfirmationDetails;
   /** For a pending `question`: the full batched entries, so a history-load render can rehydrate the question card. */
   questionDetails?: QuestionDetails;
+  /** For a pending `secret`: the public prompt metadata, so a cold load can rehydrate the secret prompt. */
+  secretDetails?: SecretDetails;
   /** For ACP permissions: resolves directly without a Conversation object. */
   directResolve?: (decision: UserDecision) => void;
   /** When set, the host_bash request should be routed to this specific client. */

package/src/runtime/routes/approval-routes.ts CHANGED Viewed

@@ -116,21 +116,35 @@ function handleConfirm({ body }: RouteHandlerArgs) {
  */
 function handleSecret({ body }: RouteHandlerArgs) {
   const requestId = body?.requestId as string | undefined;
-  const value = body?.value as string | undefined;
   const delivery = body?.delivery as string | undefined;
   if (!requestId || typeof requestId !== "string") {
     throw new BadRequestError("requestId is required");
   }
+  // Legacy compat shim: already-shipped web clients send `delivery: "none"` to
+  // cancel a secret prompt. Normalize it to the cancellation path (value
+  // undefined) so the request settles cleanly rather than 400-ing and stranding
+  // the pending interaction.
+  const isCancel = delivery === "none";
+  const value = isCancel
+    ? undefined
+    : (body?.value as string | undefined);
   if (
     delivery !== undefined &&
     delivery !== "store" &&
-    delivery !== "transient_send"
+    delivery !== "transient_send" &&
+    delivery !== "none"
   ) {
     throw new BadRequestError('delivery must be "store" or "transient_send"');
   }
+  const effectiveDelivery =
+    isCancel || delivery === undefined
+      ? undefined
+      : (delivery as "store" | "transient_send");
   const interaction = pendingInteractions.get(requestId);
   if (!interaction) {
     throw new NotFoundError("No pending interaction found for this requestId");
@@ -153,11 +167,7 @@ function handleSecret({ body }: RouteHandlerArgs) {
     ? findConversation(interaction.conversationId)
     : undefined;
   if (conversation?.hasPendingSecret(requestId)) {
-    conversation.handleSecretResponse(
-      requestId,
-      value,
-      delivery as "store" | "transient_send" | undefined,
-    );
+    conversation.handleSecretResponse(requestId, value, effectiveDelivery);
     return { accepted: true };
   }
@@ -170,7 +180,7 @@ function handleSecret({ body }: RouteHandlerArgs) {
   );
   (resolved?.rpcResolve as ((r: SecretPromptResult) => void) | undefined)?.({
     value: value ?? null,
-    delivery: (delivery as SecretDelivery) ?? "store",
+    delivery: (effectiveDelivery as SecretDelivery) ?? "store",
   });
   return { accepted: true };
 }
@@ -246,6 +256,15 @@ function handleListPendingInteractions({ queryParams }: RouteHandlerArgs) {
     pendingSecret: secret
       ? {
           requestId: secret.requestId,
+          service: secret.secretDetails?.service,
+          field: secret.secretDetails?.field,
+          label: secret.secretDetails?.label,
+          description: secret.secretDetails?.description,
+          placeholder: secret.secretDetails?.placeholder,
+          purpose: secret.secretDetails?.purpose,
+          allowedTools: secret.secretDetails?.allowedTools,
+          allowedDomains: secret.secretDetails?.allowedDomains,
+          allowOneTimeSend: secret.secretDetails?.allowOneTimeSend,
         }
       : null,
   };
@@ -340,8 +359,20 @@ export const ROUTES: RouteDefinition[] = [
         .describe("Pending confirmation details or null")
         .optional(),
       pendingSecret: z
-        .object({})
+        .object({
+          requestId: z.string(),
+          service: z.string().optional(),
+          field: z.string().optional(),
+          label: z.string().optional(),
+          description: z.string().optional(),
+          placeholder: z.string().optional(),
+          purpose: z.string().optional(),
+          allowedTools: z.array(z.string()).optional(),
+          allowedDomains: z.array(z.string()).optional(),
+          allowOneTimeSend: z.boolean().optional(),
+        })
         .passthrough()
+        .nullable()
         .describe("Pending secret request or null")
         .optional(),
       interactions: z

package/src/runtime/routes/secret-routes.ts CHANGED Viewed

@@ -22,6 +22,7 @@ import {
   invalidateConfigCache,
 } from "../../config/loader.js";
 import type { CesClient } from "../../credential-execution/client.js";
+import { maybeReseedCapabilitiesAfterManagedCredential } from "../../daemon/memory-v2-startup.js";
 import { setSentryOrganizationId, setSentryUserId } from "../../instrument.js";
 import { clearEmbeddingBackendCache } from "../../memory/embedding-backend.js";
 import { syncManualTokenConnection } from "../../oauth/manual-token-connection.js";
@@ -296,6 +297,10 @@ async function handleAddSecret({ body }: RouteHandlerArgs) {
       }
       if (isManagedProxyCredential(service, field)) {
         await refreshProvidersAfterSecretChange();
+        // Close the first-boot race where the startup capability seed ran before
+        // the managed embedding credential was provisioned, leaving skill/CLI
+        // pages unseeded until restart. Detached — must not block the response.
+        void maybeReseedCapabilitiesAfterManagedCredential(getConfig());
         if (service === "vellum" && field === "assistant_api_key") {
           const generation = ++apiKeyGeneration;
           const deps = getSecretsDeps();

package/src/tools/document/document-tool.ts CHANGED Viewed

@@ -269,16 +269,41 @@ export function executeDocumentCreate(
   };
 }
+/**
+ * Resolve the target document for an update. An explicit `surface_id` is used
+ * verbatim; when absent, the update targets the conversation's most recently
+ * updated document (`getDocumentsForConversation` orders by `updated_at DESC`),
+ * which is the document being streamed into. This lets a model stream chunks
+ * with only `content` instead of threading the opaque `surface_id` back through
+ * every call — a step weak models routinely drop, leaving the document stuck on
+ * its first chunk.
+ */
+function resolveUpdateSurfaceId(
+  input: Record<string, unknown>,
+  context: ToolContext,
+): ToolExecutionResult | string {
+  if (typeof input.surface_id === "string" && input.surface_id.trim() !== "") {
+    return input.surface_id;
+  }
+  const docs = getDocumentsForConversation(context.conversationId);
+  if (docs.length === 0) {
+    return invalidInput(
+      "surface_id is required: no document is open in this conversation. Call document_create first.",
+    );
+  }
+  return docs[0].surfaceId;
+}
 export function executeDocumentUpdate(
   input: Record<string, unknown>,
   context: ToolContext,
 ): ToolExecutionResult {
-  const surfaceIdOrError = validateSurfaceId(input);
-  if (typeof surfaceIdOrError !== "string") return surfaceIdOrError;
-  const surfaceId = surfaceIdOrError;
   if (typeof input.content !== "string") {
     return invalidInput("content is required and must be a string");
   }
+  const surfaceIdOrError = resolveUpdateSurfaceId(input, context);
+  if (typeof surfaceIdOrError !== "string") return surfaceIdOrError;
+  const surfaceId = surfaceIdOrError;
   // Loose `!= null` to match validateInputAgainstSchema, which treats null as
   // "absent" for enum checks — without this, { mode: null } passes the
   // factory validator but rejects here. The `?? "append"` below handles null.

package/src/tools/skills/execute.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { RiskLevel } from "../../permissions/types.js";
+import { isUnparseableToolArgs } from "../../providers/unparseable-tool-args.js";
 import { registerTool } from "../registry.js";
 import type {
   ToolContext,
@@ -9,6 +10,38 @@ import type {
 /** Envelope keys consumed by `skill_execute` itself, never inner-tool params. */
 const SKILL_EXECUTE_ENVELOPE_KEYS = new Set(["tool", "input", "activity"]);
+/**
+ * Recover a `skill_execute` envelope that the provider layer wrapped under the
+ * `_raw` unparseable marker.
+ *
+ * MiniMax's object→string argument coercion JSON-decodes the inner `input`
+ * value after parsing the outer arguments. When the model passes a bare string
+ * as `input` (e.g. Markdown body instead of `{ "content": "..." }`), that inner
+ * decode fails and the whole call is marked unparseable — even though the outer
+ * envelope is valid JSON. Re-parsing `_raw` recovers `{ tool, input, activity }`
+ * so the inner tool can still be dispatched. A genuinely truncated/malformed
+ * call's `_raw` won't parse and is returned unchanged, preserving the
+ * retryable-error path for real stream corruption.
+ */
+export function recoverSkillExecuteEnvelope(
+  envelope: Record<string, unknown>,
+): Record<string, unknown> {
+  if (!isUnparseableToolArgs(envelope)) return envelope;
+  try {
+    const parsed: unknown = JSON.parse(envelope._raw);
+    if (
+      parsed != null &&
+      typeof parsed === "object" &&
+      !Array.isArray(parsed)
+    ) {
+      return parsed as Record<string, unknown>;
+    }
+  } catch {
+    // Genuinely malformed/truncated — leave wrapped for the retryable error.
+  }
+  return envelope;
+}
 /**
  * Resolve the inner tool's parameters from a `skill_execute` envelope.
  *
@@ -19,15 +52,20 @@ const SKILL_EXECUTE_ENVELOPE_KEYS = new Set(["tool", "input", "activity"]);
  * Weaker models routinely misplace the parameters. Left unhandled, the inner
  * tool receives `{}`, fails schema validation ("<field> is required"), and the
  * model retries the identical malformed call until it gives up — the empty-
- * input retry loop. Two common misplacements are rescued so the call can
+ * input retry loop. Three common misplacements are rescued so the call can
  * succeed instead:
  *
  * 1. `input` passed as a JSON-encoded string instead of an object.
  * 2. Parameters spread as top-level siblings of `tool`/`activity`, with `input`
  *    absent or an empty object.
+ * 3. The sole required field's value passed bare as `input` (a non-JSON string)
+ *    — e.g. the full Markdown body as `input` instead of `{ "content": "..." }`.
+ *    Rescued only when `innerSchema` has exactly one required string field, so
+ *    the mapping is unambiguous.
  */
 export function resolveSkillExecuteInput(
   envelope: Record<string, unknown>,
+  innerSchema?: unknown,
 ): Record<string, unknown> {
   const raw = envelope.input;
@@ -48,7 +86,14 @@ export function resolveSkillExecuteInput(
         return parsed as Record<string, unknown>;
       }
     } catch {
-      // Not JSON — fall through to sibling rescue.
+      // Not JSON. A weak model may have placed the inner tool's sole required
+      // string value directly as `input` (e.g. the full Markdown body as
+      // `document_update`'s `content`) instead of a `{ "content": "..." }`
+      // object. When the inner tool has exactly one required string field, map
+      // the bare string onto it rather than discarding content the model
+      // actually produced.
+      const field = soleRequiredStringField(innerSchema);
+      if (field) return { [field]: raw };
     }
   }
@@ -62,6 +107,25 @@ export function resolveSkillExecuteInput(
   return {};
 }
+/**
+ * The single required string property of an inner tool's input schema, or
+ * `null` when the schema has zero or more than one required field, or its lone
+ * required field is not a string. Used to map a bare `input` string onto the
+ * one field it can unambiguously belong to.
+ */
+function soleRequiredStringField(innerSchema: unknown): string | null {
+  if (innerSchema == null || typeof innerSchema !== "object") return null;
+  const schema = innerSchema as {
+    required?: unknown;
+    properties?: Record<string, { type?: unknown } | undefined>;
+  };
+  const required = Array.isArray(schema.required) ? schema.required : [];
+  if (required.length !== 1) return null;
+  const field = required[0];
+  if (typeof field !== "string") return null;
+  return schema.properties?.[field]?.type === "string" ? field : null;
+}
 /**
  * Augment an inner-tool error with `skill_execute` envelope guidance when the
  * call carried no inner parameters.