@vellumai/assistant 0.10.0-staging.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openapi.yaml CHANGED
@@ -19315,9 +19315,37 @@ paths:
19315
19315
  additionalProperties: {}
19316
19316
  description: Pending confirmation details or null
19317
19317
  pendingSecret:
19318
- type: object
19319
- properties: {}
19320
- additionalProperties: {}
19318
+ anyOf:
19319
+ - type: object
19320
+ properties:
19321
+ requestId:
19322
+ type: string
19323
+ service:
19324
+ type: string
19325
+ field:
19326
+ type: string
19327
+ label:
19328
+ type: string
19329
+ description:
19330
+ type: string
19331
+ placeholder:
19332
+ type: string
19333
+ purpose:
19334
+ type: string
19335
+ allowedTools:
19336
+ type: array
19337
+ items:
19338
+ type: string
19339
+ allowedDomains:
19340
+ type: array
19341
+ items:
19342
+ type: string
19343
+ allowOneTimeSend:
19344
+ type: boolean
19345
+ required:
19346
+ - requestId
19347
+ additionalProperties: {}
19348
+ - type: "null"
19321
19349
  description: Pending secret request or null
19322
19350
  interactions:
19323
19351
  type: array
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.10.0-staging.2",
3
+ "version": "0.10.0",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "exports": {
@@ -689,6 +689,82 @@ describe("standalone approval endpoints — HTTP layer", () => {
689
689
  await stopServer();
690
690
  });
691
691
 
692
+ test("cancels a secret request when value is omitted", async () => {
693
+ let secretRequestId: string | undefined;
694
+ let secretValue: string | undefined;
695
+
696
+ const session = makeIdleSession({
697
+ onSecret: (reqId, val) => {
698
+ secretRequestId = reqId;
699
+ secretValue = val;
700
+ },
701
+ });
702
+
703
+ await startServer(() => session);
704
+
705
+ pendingInteractions.register("secret-cancel-1", {
706
+ conversationId: "conv-1",
707
+ kind: "secret",
708
+ });
709
+
710
+ const res = await fetch(url("secret"), {
711
+ method: "POST",
712
+ headers: { "Content-Type": "application/json", ...AUTH_HEADERS },
713
+ body: JSON.stringify({ requestId: "secret-cancel-1" }),
714
+ });
715
+ const body = (await res.json()) as { accepted: boolean };
716
+
717
+ expect(res.status).toBe(200);
718
+ expect(body.accepted).toBe(true);
719
+ expect(secretRequestId).toBe("secret-cancel-1");
720
+ expect(secretValue).toBeUndefined();
721
+ expect(pendingInteractions.get("secret-cancel-1")).toBeUndefined();
722
+
723
+ await stopServer();
724
+ });
725
+
726
+ test('legacy delivery "none" cancels the request without 400', async () => {
727
+ let secretRequestId: string | undefined;
728
+ let secretValue: string | undefined;
729
+ let secretDelivery: string | undefined;
730
+
731
+ const session = makeIdleSession({
732
+ onSecret: (reqId, val, del) => {
733
+ secretRequestId = reqId;
734
+ secretValue = val;
735
+ secretDelivery = del;
736
+ },
737
+ });
738
+
739
+ await startServer(() => session);
740
+
741
+ pendingInteractions.register("secret-legacy-cancel-1", {
742
+ conversationId: "conv-1",
743
+ kind: "secret",
744
+ });
745
+
746
+ const res = await fetch(url("secret"), {
747
+ method: "POST",
748
+ headers: { "Content-Type": "application/json", ...AUTH_HEADERS },
749
+ body: JSON.stringify({
750
+ requestId: "secret-legacy-cancel-1",
751
+ value: "ignored-by-cancel",
752
+ delivery: "none",
753
+ }),
754
+ });
755
+ const body = (await res.json()) as { accepted: boolean };
756
+
757
+ expect(res.status).toBe(200);
758
+ expect(body.accepted).toBe(true);
759
+ expect(secretRequestId).toBe("secret-legacy-cancel-1");
760
+ // delivery "none" normalizes to the cancellation path: value/delivery dropped.
761
+ expect(secretValue).toBeUndefined();
762
+ expect(secretDelivery).toBeUndefined();
763
+ expect(pendingInteractions.get("secret-legacy-cancel-1")).toBeUndefined();
764
+
765
+ await stopServer();
766
+ });
767
+
692
768
  test("rejects a non-secret requestId without consuming it", async () => {
693
769
  /**
694
770
  * /v1/secret only settles secret prompts. A confirmation (or any other
@@ -790,6 +866,58 @@ describe("standalone approval endpoints — HTTP layer", () => {
790
866
  });
791
867
  });
792
868
 
869
+ // ── GET /v1/pending-interactions ─────────────────────────────────────
870
+
871
+ describe("GET /v1/pending-interactions", () => {
872
+ test("returns full secret prompt metadata for a registered secret", async () => {
873
+ const session = makeIdleSession();
874
+ await startServer(() => session);
875
+
876
+ pendingInteractions.register("secret-meta-1", {
877
+ conversationId: "conv-meta",
878
+ kind: "secret",
879
+ secretDetails: {
880
+ service: "github",
881
+ field: "token",
882
+ label: "GitHub Token",
883
+ description: "Personal access token",
884
+ placeholder: "ghp_...",
885
+ purpose: "Push commits",
886
+ allowedTools: ["git_push"],
887
+ allowedDomains: ["github.com"],
888
+ allowOneTimeSend: true,
889
+ },
890
+ });
891
+
892
+ const res = await fetch(
893
+ url("pending-interactions?conversationId=conv-meta"),
894
+ {
895
+ method: "GET",
896
+ headers: { ...AUTH_HEADERS },
897
+ },
898
+ );
899
+ const body = (await res.json()) as {
900
+ pendingSecret: Record<string, unknown> | null;
901
+ };
902
+
903
+ expect(res.status).toBe(200);
904
+ expect(body.pendingSecret).toEqual({
905
+ requestId: "secret-meta-1",
906
+ service: "github",
907
+ field: "token",
908
+ label: "GitHub Token",
909
+ description: "Personal access token",
910
+ placeholder: "ghp_...",
911
+ purpose: "Push commits",
912
+ allowedTools: ["git_push"],
913
+ allowedDomains: ["github.com"],
914
+ allowOneTimeSend: true,
915
+ });
916
+
917
+ await stopServer();
918
+ });
919
+ });
920
+
793
921
  // ── getByConversation ────────────────────────────────────────────────
794
922
 
795
923
  describe("getByConversation", () => {
@@ -264,12 +264,15 @@ describe("executeDocumentUpdate — input validation", () => {
264
264
  seedFixtureDocuments();
265
265
  });
266
266
 
267
- test("returns Invalid input when surface_id is missing", () => {
268
- const result = executeDocumentUpdate({}, makeContext());
269
- expect(result.isError).toBe(true);
270
- const body = parseResult<{ error: string }>(result);
271
- expect(body.error).toContain("Invalid input: surface_id is required");
272
- expect(body.error).not.toContain("Document not found");
267
+ test("resolves to the conversation's document when surface_id is omitted", () => {
268
+ const result = executeDocumentUpdate(
269
+ { content: "appended chunk" },
270
+ makeContext({ sendToClient: () => {} }),
271
+ );
272
+ expect(result.isError).toBe(false);
273
+ const body = parseResult<{ surface_id: string; success: boolean }>(result);
274
+ expect(body.success).toBe(true);
275
+ expect(body.surface_id).toBe("doc-current");
273
276
  });
274
277
 
275
278
  test("returns Invalid input when content is missing", () => {
@@ -0,0 +1,202 @@
1
+ import { beforeEach, describe, expect, test } from "bun:test";
2
+
3
+ import { getDocumentById } from "../documents/document-store.js";
4
+ import { getSqlite } from "../memory/db-connection.js";
5
+ import { executeDocumentUpdate } from "../tools/document/document-tool.js";
6
+ import type { ToolContext, ToolExecutionResult } from "../tools/types.js";
7
+ import { resetDbForTesting } from "./db-test-helpers.js";
8
+
9
+ function makeContext(overrides: Partial<ToolContext> = {}): ToolContext {
10
+ return {
11
+ workingDir: "/tmp/project",
12
+ conversationId: "conv-current",
13
+ trustClass: "trusted_contact",
14
+ executionChannel: "slack",
15
+ sendToClient: () => {},
16
+ ...overrides,
17
+ };
18
+ }
19
+
20
+ function parseResult<T>(result: ToolExecutionResult): T {
21
+ return JSON.parse(result.content) as T;
22
+ }
23
+
24
+ function bootstrapDocumentTables(): void {
25
+ resetDbForTesting();
26
+ const raw = getSqlite();
27
+ raw.exec(/*sql*/ `
28
+ DROP TABLE IF EXISTS document_conversations;
29
+ DROP TABLE IF EXISTS documents;
30
+ DROP TABLE IF EXISTS conversations;
31
+
32
+ CREATE TABLE conversations (
33
+ id TEXT PRIMARY KEY,
34
+ created_at INTEGER NOT NULL DEFAULT (strftime('%s','now') * 1000)
35
+ );
36
+
37
+ CREATE TABLE documents (
38
+ surface_id TEXT PRIMARY KEY,
39
+ conversation_id TEXT NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
40
+ title TEXT NOT NULL,
41
+ content TEXT NOT NULL,
42
+ word_count INTEGER NOT NULL DEFAULT 0,
43
+ created_at INTEGER NOT NULL,
44
+ updated_at INTEGER NOT NULL
45
+ );
46
+
47
+ CREATE TABLE document_conversations (
48
+ surface_id TEXT NOT NULL,
49
+ conversation_id TEXT NOT NULL,
50
+ created_at INTEGER NOT NULL,
51
+ PRIMARY KEY (surface_id, conversation_id),
52
+ FOREIGN KEY (surface_id) REFERENCES documents(surface_id) ON DELETE CASCADE
53
+ );
54
+ `);
55
+ }
56
+
57
+ function seedDocument(params: {
58
+ surfaceId: string;
59
+ conversationId: string;
60
+ title: string;
61
+ content: string;
62
+ createdAt: number;
63
+ updatedAt?: number;
64
+ }): void {
65
+ const raw = getSqlite();
66
+ raw
67
+ .query(`INSERT OR IGNORE INTO conversations (id, created_at) VALUES (?, ?)`)
68
+ .run(params.conversationId, params.createdAt);
69
+ raw
70
+ .query(
71
+ `INSERT INTO documents (surface_id, conversation_id, title, content, word_count, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)`,
72
+ )
73
+ .run(
74
+ params.surfaceId,
75
+ params.conversationId,
76
+ params.title,
77
+ params.content,
78
+ params.content.split(/\s+/).filter(Boolean).length,
79
+ params.createdAt,
80
+ params.updatedAt ?? params.createdAt,
81
+ );
82
+ raw
83
+ .query(
84
+ `INSERT OR IGNORE INTO document_conversations (surface_id, conversation_id, created_at) VALUES (?, ?, ?)`,
85
+ )
86
+ .run(params.surfaceId, params.conversationId, params.createdAt);
87
+ }
88
+
89
+ describe("executeDocumentUpdate — default surface_id resolution", () => {
90
+ beforeEach(() => {
91
+ bootstrapDocumentTables();
92
+ });
93
+
94
+ test("appends to the conversation's only document when surface_id is omitted", () => {
95
+ const surfaceId = "doc-only";
96
+ seedDocument({
97
+ surfaceId,
98
+ conversationId: "conv-current",
99
+ title: "Dating in 2026",
100
+ content: "# Dating in 2026\n\nIntro.",
101
+ createdAt: Date.now(),
102
+ });
103
+
104
+ const result = executeDocumentUpdate(
105
+ { content: "## Section two", mode: "append" },
106
+ makeContext(),
107
+ );
108
+
109
+ expect(result.isError).toBe(false);
110
+ const body = parseResult<{ surface_id: string; success: boolean }>(result);
111
+ expect(body.success).toBe(true);
112
+ expect(body.surface_id).toBe(surfaceId);
113
+ expect(getDocumentById(surfaceId)?.content).toBe(
114
+ "# Dating in 2026\n\nIntro.\n\n## Section two",
115
+ );
116
+ });
117
+
118
+ test("targets the most recently updated document when several exist", () => {
119
+ const now = Date.now();
120
+ seedDocument({
121
+ surfaceId: "doc-old",
122
+ conversationId: "conv-current",
123
+ title: "Old",
124
+ content: "old",
125
+ createdAt: now - 10_000,
126
+ updatedAt: now - 10_000,
127
+ });
128
+ seedDocument({
129
+ surfaceId: "doc-fresh",
130
+ conversationId: "conv-current",
131
+ title: "Fresh",
132
+ content: "fresh",
133
+ createdAt: now,
134
+ updatedAt: now,
135
+ });
136
+
137
+ const result = executeDocumentUpdate({ content: "more" }, makeContext());
138
+
139
+ expect(result.isError).toBe(false);
140
+ const body = parseResult<{ surface_id: string }>(result);
141
+ expect(body.surface_id).toBe("doc-fresh");
142
+ expect(getDocumentById("doc-fresh")?.content).toBe("fresh\n\nmore");
143
+ expect(getDocumentById("doc-old")?.content).toBe("old");
144
+ });
145
+
146
+ test("an explicit surface_id still wins over the default", () => {
147
+ const now = Date.now();
148
+ seedDocument({
149
+ surfaceId: "doc-target",
150
+ conversationId: "conv-current",
151
+ title: "Target",
152
+ content: "target",
153
+ createdAt: now - 10_000,
154
+ updatedAt: now - 10_000,
155
+ });
156
+ seedDocument({
157
+ surfaceId: "doc-fresh",
158
+ conversationId: "conv-current",
159
+ title: "Fresh",
160
+ content: "fresh",
161
+ createdAt: now,
162
+ updatedAt: now,
163
+ });
164
+
165
+ const result = executeDocumentUpdate(
166
+ { surface_id: "doc-target", content: "hit" },
167
+ makeContext(),
168
+ );
169
+
170
+ expect(result.isError).toBe(false);
171
+ expect(parseResult<{ surface_id: string }>(result).surface_id).toBe(
172
+ "doc-target",
173
+ );
174
+ expect(getDocumentById("doc-target")?.content).toBe("target\n\nhit");
175
+ expect(getDocumentById("doc-fresh")?.content).toBe("fresh");
176
+ });
177
+
178
+ test("errors helpfully when the conversation has no document", () => {
179
+ const result = executeDocumentUpdate(
180
+ { content: "orphan chunk" },
181
+ makeContext(),
182
+ );
183
+
184
+ expect(result.isError).toBe(true);
185
+ expect(result.content).toContain("no document is open");
186
+ expect(result.content).toContain("document_create");
187
+ });
188
+
189
+ test("still requires content", () => {
190
+ seedDocument({
191
+ surfaceId: "doc-only",
192
+ conversationId: "conv-current",
193
+ title: "X",
194
+ content: "x",
195
+ createdAt: Date.now(),
196
+ });
197
+
198
+ const result = executeDocumentUpdate({ mode: "append" }, makeContext());
199
+ expect(result.isError).toBe(true);
200
+ expect(result.content).toContain("content is required");
201
+ });
202
+ });
@@ -106,6 +106,41 @@ describe("secret response routing", () => {
106
106
  await promise;
107
107
  });
108
108
 
109
+ test("prompt registers public secretDetails without the value", async () => {
110
+ const promise = prompter.prompt(
111
+ "github",
112
+ "token",
113
+ "GitHub Token",
114
+ "desc",
115
+ "placeholder",
116
+ "session-1",
117
+ "Push commits",
118
+ ["git_push"],
119
+ ["github.com"],
120
+ );
121
+ const msg = broadcastedMessages[0] as SecretRequestEvent;
122
+ const entry = _piStore.get(msg.requestId) as {
123
+ kind: string;
124
+ secretDetails?: Record<string, unknown>;
125
+ };
126
+ expect(entry.kind).toBe("secret");
127
+ expect(entry.secretDetails).toMatchObject({
128
+ service: "github",
129
+ field: "token",
130
+ label: "GitHub Token",
131
+ description: "desc",
132
+ placeholder: "placeholder",
133
+ purpose: "Push commits",
134
+ allowedTools: ["git_push"],
135
+ allowedDomains: ["github.com"],
136
+ });
137
+ // SECURITY: the secret value is never part of the registered metadata.
138
+ expect(JSON.stringify(entry.secretDetails)).not.toContain("test-value");
139
+ // Clean up
140
+ prompter.resolveSecret(msg.requestId, undefined);
141
+ await promise;
142
+ });
143
+
109
144
  test("resolveSecret for unknown requestId is a no-op", () => {
110
145
  // Should not throw
111
146
  prompter.resolveSecret("unknown-id", "value", "store");
@@ -12,7 +12,12 @@ let providerRefreshCalls = 0;
12
12
  const PLATFORM_BASE_URL = "https://platform.example.com";
13
13
  const ASSISTANT_API_KEY_PATH = credentialKey("vellum", "assistant_api_key");
14
14
  const PLATFORM_BASE_URL_PATH = credentialKey("vellum", "platform_base_url");
15
- const MANAGED_PROVIDERS = ["anthropic", "openai", "gemini", "fireworks"] as const;
15
+ const MANAGED_PROVIDERS = [
16
+ "anthropic",
17
+ "openai",
18
+ "gemini",
19
+ "fireworks",
20
+ ] as const;
16
21
 
17
22
  let platformBaseUrlOverride: string | undefined;
18
23
 
@@ -116,6 +121,13 @@ mock.module("../util/logger.js", () => ({
116
121
  }),
117
122
  }));
118
123
 
124
+ // `handleAddSecret` fires this detached when a managed-proxy credential lands —
125
+ // a v2-memory side effect outside this suite's provider-registry scope. Stub it
126
+ // to a no-op; its behavior is covered by memory-v2-startup.test.ts.
127
+ mock.module("../daemon/memory-v2-startup.js", () => ({
128
+ maybeReseedCapabilitiesAfterManagedCredential: async () => {},
129
+ }));
130
+
119
131
  import {
120
132
  getProviderRoutingSource,
121
133
  initializeProviders,
@@ -199,7 +211,9 @@ describe("secret routes managed proxy registry sync", () => {
199
211
  test("provider API key writes notify live-conversation refresh listeners", async () => {
200
212
  await addApiKey("fireworks", "fw-key");
201
213
 
202
- expect(secureKeyStore[credentialKey("fireworks", "api_key")]).toBe("fw-key");
214
+ expect(secureKeyStore[credentialKey("fireworks", "api_key")]).toBe(
215
+ "fw-key",
216
+ );
203
217
  expect(providerRefreshCalls).toBe(1);
204
218
 
205
219
  await deleteApiKey("fireworks");
@@ -2,9 +2,20 @@ import { describe, expect, test } from "bun:test";
2
2
 
3
3
  import {
4
4
  augmentSkillExecuteError,
5
+ recoverSkillExecuteEnvelope,
5
6
  resolveSkillExecuteInput,
6
7
  } from "../tools/skills/execute.js";
7
8
 
9
+ /** Schema with exactly one required string field (e.g. document_update). */
10
+ const SINGLE_REQUIRED_STRING_SCHEMA = {
11
+ type: "object",
12
+ properties: {
13
+ content: { type: "string" },
14
+ mode: { type: "string", enum: ["replace", "append"] },
15
+ },
16
+ required: ["content"],
17
+ };
18
+
8
19
  describe("resolveSkillExecuteInput", () => {
9
20
  test("returns a correctly nested object unchanged", () => {
10
21
  const input = { prompt: "a sunset", variants: 2 };
@@ -85,6 +96,121 @@ describe("resolveSkillExecuteInput", () => {
85
96
  });
86
97
  expect(result).toEqual({ foo: "bar" });
87
98
  });
99
+
100
+ test("maps a bare (non-JSON) input string to the sole required string field", () => {
101
+ // The exact shape from the doc-writer incident: the full Markdown body
102
+ // passed as `input` instead of `{ "content": "..." }`.
103
+ const body = "# AI in 2026\n\nWe're halfway through the year.";
104
+ const result = resolveSkillExecuteInput(
105
+ { tool: "document_update", input: body, activity: "Streaming article" },
106
+ SINGLE_REQUIRED_STRING_SCHEMA,
107
+ );
108
+ expect(result).toEqual({ content: body });
109
+ });
110
+
111
+ test("does not map a bare string without the inner schema", () => {
112
+ const result = resolveSkillExecuteInput({
113
+ tool: "document_update",
114
+ input: "# AI in 2026",
115
+ activity: "Streaming article",
116
+ });
117
+ expect(result).toEqual({});
118
+ });
119
+
120
+ test("does not map a bare string when the schema has multiple required fields", () => {
121
+ const schema = {
122
+ type: "object",
123
+ properties: { a: { type: "string" }, b: { type: "string" } },
124
+ required: ["a", "b"],
125
+ };
126
+ const result = resolveSkillExecuteInput(
127
+ { tool: "t", input: "some text", activity: "x" },
128
+ schema,
129
+ );
130
+ expect(result).toEqual({});
131
+ });
132
+
133
+ test("does not map a bare string when the sole required field is not a string", () => {
134
+ const schema = {
135
+ type: "object",
136
+ properties: { count: { type: "number" } },
137
+ required: ["count"],
138
+ };
139
+ const result = resolveSkillExecuteInput(
140
+ { tool: "t", input: "42", activity: "x" },
141
+ schema,
142
+ );
143
+ // "42" parses as JSON but isn't an object, and the lone required field is
144
+ // not a string — no rescue applies.
145
+ expect(result).toEqual({});
146
+ });
147
+
148
+ test("a valid JSON-object string still wins over the bare-string rescue", () => {
149
+ const result = resolveSkillExecuteInput(
150
+ {
151
+ tool: "document_update",
152
+ input: '{"content":"hello","mode":"append"}',
153
+ activity: "x",
154
+ },
155
+ SINGLE_REQUIRED_STRING_SCHEMA,
156
+ );
157
+ expect(result).toEqual({ content: "hello", mode: "append" });
158
+ });
159
+
160
+ test("an empty input string is not rescued (nothing to map)", () => {
161
+ const result = resolveSkillExecuteInput(
162
+ { tool: "document_update", input: "", activity: "x" },
163
+ SINGLE_REQUIRED_STRING_SCHEMA,
164
+ );
165
+ expect(result).toEqual({});
166
+ });
167
+ });
168
+
169
+ describe("recoverSkillExecuteEnvelope", () => {
170
+ test("recovers a valid envelope wrapped under the _raw marker", () => {
171
+ // MiniMax coercion marks a bare-string `input` call unparseable even though
172
+ // the outer arguments are valid JSON.
173
+ const raw = JSON.stringify({
174
+ tool: "document_update",
175
+ input: "# AI in 2026\n\nbody",
176
+ activity: "Streaming",
177
+ });
178
+ const recovered = recoverSkillExecuteEnvelope({ _raw: raw });
179
+ expect(recovered).toEqual({
180
+ tool: "document_update",
181
+ input: "# AI in 2026\n\nbody",
182
+ activity: "Streaming",
183
+ });
184
+ });
185
+
186
+ test("leaves a genuinely unparseable (truncated) call wrapped", () => {
187
+ const wrapped = { _raw: '{"tool":"document_update","input":"# AI' };
188
+ expect(recoverSkillExecuteEnvelope(wrapped)).toBe(wrapped);
189
+ });
190
+
191
+ test("passes a normal envelope through untouched", () => {
192
+ const envelope = {
193
+ tool: "document_update",
194
+ input: { content: "hi" },
195
+ activity: "x",
196
+ };
197
+ expect(recoverSkillExecuteEnvelope(envelope)).toBe(envelope);
198
+ });
199
+
200
+ test("end-to-end: recovered bare-string envelope resolves to content", () => {
201
+ const body = "# Title\n\nThe full article body.";
202
+ const raw = JSON.stringify({
203
+ tool: "document_update",
204
+ input: body,
205
+ activity: "Streaming",
206
+ });
207
+ const envelope = recoverSkillExecuteEnvelope({ _raw: raw });
208
+ const resolved = resolveSkillExecuteInput(
209
+ envelope,
210
+ SINGLE_REQUIRED_STRING_SCHEMA,
211
+ );
212
+ expect(resolved).toEqual({ content: body });
213
+ });
88
214
  });
89
215
 
90
216
  describe("augmentSkillExecuteError", () => {
@@ -33,13 +33,13 @@ Write and edit long-form documents using the built-in rich text editor. Document
33
33
 
34
34
  This is the default path when the user asks you to write something.
35
35
 
36
- 1. **Create the document**: Call `document_create` with a title (inferred from the request). Call the tool immediately, not after conversational preamble. Capture the `surface_id` from the response — every subsequent `document_update` call must reference it.
36
+ 1. **Create the document**: Call `document_create` with a title (inferred from the request). Call the tool immediately, not after conversational preamble.
37
37
  2. **Write content in Markdown**: Use proper structure (`#` for titles, `##` for sections), **bold**, _italic_, code blocks, tables, lists, blockquotes as appropriate.
38
- 3. **CRITICAL - Stream content in chunks**: Call `document_update` MULTIPLE times, not just once. Break content into logical chunks (paragraphs, sections, or every 200-300 words). Call `document_update` with `mode: "append"` for EACH chunk separately. The user experiences real-time content appearing as you write.
38
+ 3. **CRITICAL - Stream content in chunks**: Call `document_update` MULTIPLE times, not just once. Break content into logical chunks (paragraphs, sections, or every 200-300 words). Call `document_update` with `mode: "append"` for EACH chunk separately. When you are streaming into the document you just created, `surface_id` is optional — omit it and pass only `content`, and the update targets that document. The user experiences real-time content appearing as you write.
39
39
 
40
40
  ### Recovering from a failed update
41
41
 
42
- If a `document_update` call fails with an `Invalid input` error (for example because `surface_id` was missing), do NOT call `document_create` again. The `surface_id` you need is in the tool result of the most recent `document_create` call in this turn. Retry `document_update` with that `surface_id` and the same content. Creating a second document with the same title produces a duplicate for the user.
42
+ If a `document_update` call fails with an `Invalid input` error, do NOT call `document_create` again that produces a duplicate for the user. The most common cause is a missing `content` field: resend the call with the chunk's Markdown in `content`. You can omit `surface_id` to target the document you are currently writing; pass it explicitly only when editing a different existing document.
43
43
 
44
44
  ## Editing an existing document
45
45
 
@@ -50,7 +50,7 @@
50
50
  "properties": {
51
51
  "surface_id": {
52
52
  "type": "string",
53
- "description": "The ID of the document surface to update"
53
+ "description": "The ID of the document surface to update. Optional — when omitted, the most recently updated document in this conversation is used, so streaming successive chunks needs only `content`."
54
54
  },
55
55
  "content": {
56
56
  "type": "string",
@@ -62,7 +62,7 @@
62
62
  "description": "Whether to replace all content or append to the end. Defaults to append."
63
63
  }
64
64
  },
65
- "required": ["surface_id", "content"]
65
+ "required": ["content"]
66
66
  },
67
67
  "executor": "tools/document-update.ts",
68
68
  "execution_target": "host"
@@ -21,13 +21,14 @@ import type { Message, ToolDefinition } from "../providers/types.js";
21
21
  import { assistantEventHub } from "../runtime/assistant-event-hub.js";
22
22
  import { registerConversationSender } from "../tools/browser/browser-screencast.js";
23
23
  import type { ToolExecutor } from "../tools/executor.js";
24
- import { getMcpToolDefinitions } from "../tools/registry.js";
24
+ import { getMcpToolDefinitions, getTool } from "../tools/registry.js";
25
25
  import {
26
26
  ACTIVITY_SKIP_SET,
27
27
  injectActivityField,
28
28
  } from "../tools/schema-transforms.js";
29
29
  import {
30
30
  augmentSkillExecuteError,
31
+ recoverSkillExecuteEnvelope,
31
32
  resolveSkillExecuteInput,
32
33
  } from "../tools/skills/execute.js";
33
34
  import { resolveToolInvocationAlias } from "../tools/tool-name-aliases.js";
@@ -303,9 +304,16 @@ export function createToolExecutor(
303
304
  // risk level, permission checks, hooks, and lifecycle events all fire
304
305
  // with the real tool name.
305
306
  if (executionName === "skill_execute") {
307
+ // Recover an envelope the provider wrapped as unparseable when MiniMax's
308
+ // coercion failed to JSON-decode a bare-string `input` (see
309
+ // recoverSkillExecuteEnvelope), then resolve the inner tool + params.
310
+ const envelope = recoverSkillExecuteEnvelope(executionInput);
306
311
  const rawToolName =
307
- typeof executionInput.tool === "string" ? executionInput.tool : "";
308
- const rawToolInput = resolveSkillExecuteInput(executionInput);
312
+ typeof envelope.tool === "string" ? envelope.tool : "";
313
+ const innerSchema = rawToolName
314
+ ? getTool(rawToolName)?.input_schema
315
+ : undefined;
316
+ const rawToolInput = resolveSkillExecuteInput(envelope, innerSchema);
309
317
 
310
318
  // Clone to avoid mutating shared input objects
311
319
  const { name: toolName, input: toolInput } = resolveToolInvocationAlias(
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Tests for `maybeReseedCapabilitiesAfterManagedCredential` in
3
+ * `memory-v2-startup.ts`.
4
+ *
5
+ * The secrets route calls this when a managed-proxy credential lands, to close
6
+ * the first-boot race where the daemon's startup capability seed (skills + CLI
7
+ * commands) runs before the platform provisions the managed embedding
8
+ * credential — the seed's embed throws and the synthetic capability pages never
9
+ * reach the page index. The reseed must fire only when v2 memory is enabled AND
10
+ * the managed-proxy prerequisites are now satisfied, so self-hosted / BYOK
11
+ * assistants (no managed proxy) are never made to run a doomed embed. When v3 is
12
+ * live it then enqueues a `memory_v3_maintain` job so v3 picks up the capability
13
+ * pages immediately instead of waiting out the 6h maintain backstop.
14
+ *
15
+ * Dynamic-imported collaborators are mocked at module scope; `bun:test`
16
+ * isolates `mock.module` per test file.
17
+ */
18
+ import { afterEach, describe, expect, mock, test } from "bun:test";
19
+
20
+ import { makeMockLogger } from "../__tests__/helpers/mock-logger.js";
21
+ import type { AssistantConfig } from "../config/schema.js";
22
+
23
+ const proxyState = { prereqs: true };
24
+ const v3State = { live: true };
25
+ const seedSkill = mock(async () => {});
26
+ const seedCli = mock(async () => {});
27
+ const enqueueJob = mock(
28
+ (_type: string, _payload: Record<string, unknown>) => 1,
29
+ );
30
+
31
+ mock.module("../util/logger.js", () => ({
32
+ getLogger: () => makeMockLogger(),
33
+ }));
34
+
35
+ mock.module("../providers/platform-proxy/context.js", () => ({
36
+ hasManagedProxyPrereqs: async () => proxyState.prereqs,
37
+ }));
38
+
39
+ mock.module("../config/memory-v3-gate.js", () => ({
40
+ isMemoryV3Live: () => v3State.live,
41
+ }));
42
+
43
+ mock.module("../memory/jobs-store.js", () => ({
44
+ enqueueMemoryJob: enqueueJob,
45
+ }));
46
+
47
+ mock.module("../memory/v2/skill-store.js", () => ({
48
+ seedV2SkillEntries: seedSkill,
49
+ }));
50
+
51
+ mock.module("../memory/v2/cli-command-store.js", () => ({
52
+ seedV2CliCommandEntries: seedCli,
53
+ }));
54
+
55
+ const { maybeReseedCapabilitiesAfterManagedCredential } =
56
+ await import("./memory-v2-startup.js");
57
+
58
+ function configWithV2(enabled: boolean): AssistantConfig {
59
+ return { memory: { v2: { enabled } } } as unknown as AssistantConfig;
60
+ }
61
+
62
+ afterEach(() => {
63
+ seedSkill.mockClear();
64
+ seedCli.mockClear();
65
+ enqueueJob.mockClear();
66
+ proxyState.prereqs = true;
67
+ v3State.live = true;
68
+ });
69
+
70
+ describe("maybeReseedCapabilitiesAfterManagedCredential", () => {
71
+ test("reseeds both skill and CLI entries when v2 is enabled and managed-proxy prereqs are satisfied", async () => {
72
+ proxyState.prereqs = true;
73
+
74
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
75
+
76
+ expect(seedSkill).toHaveBeenCalledTimes(1);
77
+ expect(seedCli).toHaveBeenCalledTimes(1);
78
+ });
79
+
80
+ test("enqueues a v3 maintain pass after reseeding when v3 is live", async () => {
81
+ proxyState.prereqs = true;
82
+ v3State.live = true;
83
+
84
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
85
+
86
+ expect(enqueueJob).toHaveBeenCalledTimes(1);
87
+ expect(enqueueJob).toHaveBeenCalledWith("memory_v3_maintain", {});
88
+ });
89
+
90
+ test("reseeds but does not enqueue a v3 maintain pass when v3 is not live", async () => {
91
+ proxyState.prereqs = true;
92
+ v3State.live = false;
93
+
94
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
95
+
96
+ expect(seedSkill).toHaveBeenCalledTimes(1);
97
+ expect(seedCli).toHaveBeenCalledTimes(1);
98
+ expect(enqueueJob).not.toHaveBeenCalled();
99
+ });
100
+
101
+ test("no-op when v2 memory is disabled", async () => {
102
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(false));
103
+
104
+ expect(seedSkill).not.toHaveBeenCalled();
105
+ expect(seedCli).not.toHaveBeenCalled();
106
+ expect(enqueueJob).not.toHaveBeenCalled();
107
+ });
108
+
109
+ test("no-op for non-managed assistants (managed-proxy prereqs not satisfied)", async () => {
110
+ proxyState.prereqs = false;
111
+
112
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
113
+
114
+ expect(seedSkill).not.toHaveBeenCalled();
115
+ expect(seedCli).not.toHaveBeenCalled();
116
+ expect(enqueueJob).not.toHaveBeenCalled();
117
+ });
118
+
119
+ test("swallows a seed failure and still reseeds the other catalog", async () => {
120
+ proxyState.prereqs = true;
121
+ seedSkill.mockImplementationOnce(async () => {
122
+ throw new Error('Embedding backend "gemini" is not configured');
123
+ });
124
+
125
+ // Must not reject — the helper contains each seed's failure so a doomed
126
+ // embed never propagates back to the credential-store caller.
127
+ await maybeReseedCapabilitiesAfterManagedCredential(configWithV2(true));
128
+
129
+ expect(seedCli).toHaveBeenCalledTimes(1);
130
+ });
131
+ });
@@ -3,7 +3,8 @@
3
3
  // ---------------------------------------------------------------------------
4
4
  //
5
5
  // Small focused module that holds the gating + dispatch logic for v2-specific
6
- // startup work invoked from `lifecycle.ts`. Lives in its own file so the unit
6
+ // startup work invoked from `lifecycle.ts` (and, for the post-credential
7
+ // capability reseed, from the secrets route). Lives in its own file so the unit
7
8
  // test for the gate does not have to mount the entire lifecycle import graph.
8
9
 
9
10
  import type { AssistantConfig } from "../config/schema.js";
@@ -48,6 +49,104 @@ export function maybeSeedMemoryV2CliCommands(config: AssistantConfig): void {
48
49
  .catch((err) => log.warn({ err }, "Failed to seed v2 CLI-command entries"));
49
50
  }
50
51
 
52
+ /**
53
+ * Re-seed the v2 skill and CLI-command capability entries once a managed-proxy
54
+ * credential lands, closing the first-boot race where the daemon's startup seed
55
+ * runs before the platform has provisioned the managed embedding credential.
56
+ *
57
+ * On a brand-new managed assistant the memory worker fires the startup seed
58
+ * (`maybeSeedMemoryV2Skills` / `maybeSeedMemoryV2CliCommands`) seconds after
59
+ * boot, but the platform pushes `vellum:assistant_api_key` (the credential the
60
+ * managed Gemini embedding backend needs) tens of seconds later. The seed's
61
+ * `embedWithBackend` call throws `EmbeddingBackendUnavailableError` before the
62
+ * skill/CLI `entries` cache is replaced, so `listSkillEntries()` /
63
+ * `listCliCommandEntries()` stay empty and the synthetic `skills/<id>` and
64
+ * `cli-commands/<name>` rows never reach the page index — leaving the v3 needle
65
+ * finder lane and always-candidate skill pinning with nothing to surface until
66
+ * the next daemon restart. Re-running the seed when the credential arrives
67
+ * restores the capability pages without a restart.
68
+ *
69
+ * Gated on the managed-proxy prerequisites now being satisfied (both the
70
+ * platform base URL and the assistant API key present) so a non-managed
71
+ * credential write — or a partial update that has not yet completed the pair —
72
+ * does not kick a doomed embed. Idempotent: `seedV2SkillEntries` /
73
+ * `seedV2CliCommandEntries` atomically replace their caches, so a redundant
74
+ * reseed (the startup seed already succeeded) is cheap and harmless. The two
75
+ * catalogs are independent, so they reseed in parallel. Callers invoke this
76
+ * detached (`void`) — it must not block the credential-store response.
77
+ *
78
+ * Reseeding alone only repopulates the shared page index — v3 reads its
79
+ * synthetic capability rows from the v2 stores, but its memoized lanes and its
80
+ * `memory_v3_sections` dense store refresh only on the v3 maintain pass (6-hour
81
+ * backstop). So when v3 is live, enqueue a `memory_v3_maintain` job after the
82
+ * reseed: its capability-reconcile stage embeds the freshly-seeded rows into the
83
+ * dense store and its lane-invalidation stage forces a rebuild against the now-
84
+ * populated index, so v3 surfaces the skill/CLI pages within seconds instead of
85
+ * waiting out the backstop.
86
+ */
87
+ export async function maybeReseedCapabilitiesAfterManagedCredential(
88
+ config: AssistantConfig,
89
+ ): Promise<void> {
90
+ if (!config.memory.v2.enabled) return;
91
+
92
+ const { hasManagedProxyPrereqs } =
93
+ await import("../providers/platform-proxy/context.js");
94
+ if (!(await hasManagedProxyPrereqs())) return;
95
+
96
+ // Skills and CLI commands are independent catalogs sharing the unified
97
+ // collection — reseed in parallel, each contained so one catalog's embed
98
+ // failure does not abort the other or reject the detached caller.
99
+ const catalogs: ReadonlyArray<[label: string, seed: () => Promise<void>]> = [
100
+ [
101
+ "skill",
102
+ async () => {
103
+ const { seedV2SkillEntries } =
104
+ await import("../memory/v2/skill-store.js");
105
+ await seedV2SkillEntries({ throwOnError: true });
106
+ },
107
+ ],
108
+ [
109
+ "CLI-command",
110
+ async () => {
111
+ const { seedV2CliCommandEntries } =
112
+ await import("../memory/v2/cli-command-store.js");
113
+ await seedV2CliCommandEntries({ throwOnError: true });
114
+ },
115
+ ],
116
+ ];
117
+
118
+ await Promise.all(
119
+ catalogs.map(async ([label, seed]) => {
120
+ try {
121
+ await seed();
122
+ log.info(
123
+ `Memory v2 ${label} entries seeded after managed proxy credential update`,
124
+ );
125
+ } catch (err) {
126
+ log.warn(
127
+ { err },
128
+ `Failed to seed v2 ${label} entries after managed proxy credential update`,
129
+ );
130
+ }
131
+ }),
132
+ );
133
+
134
+ // The stores (and the page index) are now populated; when v3 is live, kick a
135
+ // maintain pass so it embeds the capability rows into `memory_v3_sections` and
136
+ // invalidates its lanes immediately rather than waiting out the 6h backstop.
137
+ const { isMemoryV3Live } = await import("../config/memory-v3-gate.js");
138
+ if (!isMemoryV3Live(config)) return;
139
+ try {
140
+ const { enqueueMemoryJob } = await import("../memory/jobs-store.js");
141
+ enqueueMemoryJob("memory_v3_maintain", {});
142
+ } catch (err) {
143
+ log.warn(
144
+ { err },
145
+ "Failed to enqueue memory_v3_maintain after managed proxy credential update",
146
+ );
147
+ }
148
+ }
149
+
51
150
  /**
52
151
  * Build the v2 BM25 corpus stats (per-token document frequencies + avg doc
53
152
  * length), then re-seed the v2 skill entries so any skills written during
@@ -77,18 +77,32 @@ export class SecretPrompter {
77
77
  resolve({ value: null, delivery: "store" });
78
78
  }, timeoutMs);
79
79
 
80
+ const config = getConfig();
81
+
80
82
  // Register all lifecycle state in pendingInteractions — same pattern as
81
83
  // host proxies and PermissionPrompter. The prompter tracks ownership via ownedIds.
84
+ // SECURITY: secretDetails carries only the public prompt metadata broadcast
85
+ // on the secret_request event — never the secret value the user will supply.
82
86
  pendingInteractions.register(requestId, {
83
87
  conversationId: effectiveConversationId,
84
88
  kind: "secret",
89
+ secretDetails: {
90
+ service,
91
+ field,
92
+ label,
93
+ description,
94
+ placeholder,
95
+ purpose,
96
+ allowedTools,
97
+ allowedDomains,
98
+ allowOneTimeSend: config.secretDetection.allowOneTimeSend,
99
+ },
85
100
  rpcResolve: resolve as (value: unknown) => void,
86
101
  rpcReject: reject,
87
102
  timer,
88
103
  });
89
104
  this.ownedIds.add(requestId);
90
105
 
91
- const config = getConfig();
92
106
  const msg: SecretRequestMessage = {
93
107
  type: "secret_request",
94
108
  requestId,
@@ -63,6 +63,24 @@ export interface QuestionDetails {
63
63
  entries: QuestionEntry[];
64
64
  }
65
65
 
66
+ /**
67
+ * Public prompt metadata for a pending `secret` interaction, retained so a
68
+ * cold conversation load can rehydrate the secret prompt with its full
69
+ * descriptive context. SECURITY: never carries the secret value — only the
70
+ * public fields already broadcast on the `secret_request` event.
71
+ */
72
+ export interface SecretDetails {
73
+ service: string;
74
+ field: string;
75
+ label: string;
76
+ description?: string;
77
+ placeholder?: string;
78
+ purpose?: string;
79
+ allowedTools?: string[];
80
+ allowedDomains?: string[];
81
+ allowOneTimeSend?: boolean;
82
+ }
83
+
66
84
  export interface PendingInteraction {
67
85
  /**
68
86
  * Owning conversation, when the interaction was raised inside one. Absent
@@ -84,6 +102,8 @@ export interface PendingInteraction {
84
102
  confirmationDetails?: ConfirmationDetails;
85
103
  /** For a pending `question`: the full batched entries, so a history-load render can rehydrate the question card. */
86
104
  questionDetails?: QuestionDetails;
105
+ /** For a pending `secret`: the public prompt metadata, so a cold load can rehydrate the secret prompt. */
106
+ secretDetails?: SecretDetails;
87
107
  /** For ACP permissions: resolves directly without a Conversation object. */
88
108
  directResolve?: (decision: UserDecision) => void;
89
109
  /** When set, the host_bash request should be routed to this specific client. */
@@ -116,21 +116,35 @@ function handleConfirm({ body }: RouteHandlerArgs) {
116
116
  */
117
117
  function handleSecret({ body }: RouteHandlerArgs) {
118
118
  const requestId = body?.requestId as string | undefined;
119
- const value = body?.value as string | undefined;
120
119
  const delivery = body?.delivery as string | undefined;
121
120
 
122
121
  if (!requestId || typeof requestId !== "string") {
123
122
  throw new BadRequestError("requestId is required");
124
123
  }
125
124
 
125
+ // Legacy compat shim: already-shipped web clients send `delivery: "none"` to
126
+ // cancel a secret prompt. Normalize it to the cancellation path (value
127
+ // undefined) so the request settles cleanly rather than 400-ing and stranding
128
+ // the pending interaction.
129
+ const isCancel = delivery === "none";
130
+ const value = isCancel
131
+ ? undefined
132
+ : (body?.value as string | undefined);
133
+
126
134
  if (
127
135
  delivery !== undefined &&
128
136
  delivery !== "store" &&
129
- delivery !== "transient_send"
137
+ delivery !== "transient_send" &&
138
+ delivery !== "none"
130
139
  ) {
131
140
  throw new BadRequestError('delivery must be "store" or "transient_send"');
132
141
  }
133
142
 
143
+ const effectiveDelivery =
144
+ isCancel || delivery === undefined
145
+ ? undefined
146
+ : (delivery as "store" | "transient_send");
147
+
134
148
  const interaction = pendingInteractions.get(requestId);
135
149
  if (!interaction) {
136
150
  throw new NotFoundError("No pending interaction found for this requestId");
@@ -153,11 +167,7 @@ function handleSecret({ body }: RouteHandlerArgs) {
153
167
  ? findConversation(interaction.conversationId)
154
168
  : undefined;
155
169
  if (conversation?.hasPendingSecret(requestId)) {
156
- conversation.handleSecretResponse(
157
- requestId,
158
- value,
159
- delivery as "store" | "transient_send" | undefined,
160
- );
170
+ conversation.handleSecretResponse(requestId, value, effectiveDelivery);
161
171
  return { accepted: true };
162
172
  }
163
173
 
@@ -170,7 +180,7 @@ function handleSecret({ body }: RouteHandlerArgs) {
170
180
  );
171
181
  (resolved?.rpcResolve as ((r: SecretPromptResult) => void) | undefined)?.({
172
182
  value: value ?? null,
173
- delivery: (delivery as SecretDelivery) ?? "store",
183
+ delivery: (effectiveDelivery as SecretDelivery) ?? "store",
174
184
  });
175
185
  return { accepted: true };
176
186
  }
@@ -246,6 +256,15 @@ function handleListPendingInteractions({ queryParams }: RouteHandlerArgs) {
246
256
  pendingSecret: secret
247
257
  ? {
248
258
  requestId: secret.requestId,
259
+ service: secret.secretDetails?.service,
260
+ field: secret.secretDetails?.field,
261
+ label: secret.secretDetails?.label,
262
+ description: secret.secretDetails?.description,
263
+ placeholder: secret.secretDetails?.placeholder,
264
+ purpose: secret.secretDetails?.purpose,
265
+ allowedTools: secret.secretDetails?.allowedTools,
266
+ allowedDomains: secret.secretDetails?.allowedDomains,
267
+ allowOneTimeSend: secret.secretDetails?.allowOneTimeSend,
249
268
  }
250
269
  : null,
251
270
  };
@@ -340,8 +359,20 @@ export const ROUTES: RouteDefinition[] = [
340
359
  .describe("Pending confirmation details or null")
341
360
  .optional(),
342
361
  pendingSecret: z
343
- .object({})
362
+ .object({
363
+ requestId: z.string(),
364
+ service: z.string().optional(),
365
+ field: z.string().optional(),
366
+ label: z.string().optional(),
367
+ description: z.string().optional(),
368
+ placeholder: z.string().optional(),
369
+ purpose: z.string().optional(),
370
+ allowedTools: z.array(z.string()).optional(),
371
+ allowedDomains: z.array(z.string()).optional(),
372
+ allowOneTimeSend: z.boolean().optional(),
373
+ })
344
374
  .passthrough()
375
+ .nullable()
345
376
  .describe("Pending secret request or null")
346
377
  .optional(),
347
378
  interactions: z
@@ -22,6 +22,7 @@ import {
22
22
  invalidateConfigCache,
23
23
  } from "../../config/loader.js";
24
24
  import type { CesClient } from "../../credential-execution/client.js";
25
+ import { maybeReseedCapabilitiesAfterManagedCredential } from "../../daemon/memory-v2-startup.js";
25
26
  import { setSentryOrganizationId, setSentryUserId } from "../../instrument.js";
26
27
  import { clearEmbeddingBackendCache } from "../../memory/embedding-backend.js";
27
28
  import { syncManualTokenConnection } from "../../oauth/manual-token-connection.js";
@@ -296,6 +297,10 @@ async function handleAddSecret({ body }: RouteHandlerArgs) {
296
297
  }
297
298
  if (isManagedProxyCredential(service, field)) {
298
299
  await refreshProvidersAfterSecretChange();
300
+ // Close the first-boot race where the startup capability seed ran before
301
+ // the managed embedding credential was provisioned, leaving skill/CLI
302
+ // pages unseeded until restart. Detached — must not block the response.
303
+ void maybeReseedCapabilitiesAfterManagedCredential(getConfig());
299
304
  if (service === "vellum" && field === "assistant_api_key") {
300
305
  const generation = ++apiKeyGeneration;
301
306
  const deps = getSecretsDeps();
@@ -269,16 +269,41 @@ export function executeDocumentCreate(
269
269
  };
270
270
  }
271
271
 
272
+ /**
273
+ * Resolve the target document for an update. An explicit `surface_id` is used
274
+ * verbatim; when absent, the update targets the conversation's most recently
275
+ * updated document (`getDocumentsForConversation` orders by `updated_at DESC`),
276
+ * which is the document being streamed into. This lets a model stream chunks
277
+ * with only `content` instead of threading the opaque `surface_id` back through
278
+ * every call — a step weak models routinely drop, leaving the document stuck on
279
+ * its first chunk.
280
+ */
281
+ function resolveUpdateSurfaceId(
282
+ input: Record<string, unknown>,
283
+ context: ToolContext,
284
+ ): ToolExecutionResult | string {
285
+ if (typeof input.surface_id === "string" && input.surface_id.trim() !== "") {
286
+ return input.surface_id;
287
+ }
288
+ const docs = getDocumentsForConversation(context.conversationId);
289
+ if (docs.length === 0) {
290
+ return invalidInput(
291
+ "surface_id is required: no document is open in this conversation. Call document_create first.",
292
+ );
293
+ }
294
+ return docs[0].surfaceId;
295
+ }
296
+
272
297
  export function executeDocumentUpdate(
273
298
  input: Record<string, unknown>,
274
299
  context: ToolContext,
275
300
  ): ToolExecutionResult {
276
- const surfaceIdOrError = validateSurfaceId(input);
277
- if (typeof surfaceIdOrError !== "string") return surfaceIdOrError;
278
- const surfaceId = surfaceIdOrError;
279
301
  if (typeof input.content !== "string") {
280
302
  return invalidInput("content is required and must be a string");
281
303
  }
304
+ const surfaceIdOrError = resolveUpdateSurfaceId(input, context);
305
+ if (typeof surfaceIdOrError !== "string") return surfaceIdOrError;
306
+ const surfaceId = surfaceIdOrError;
282
307
  // Loose `!= null` to match validateInputAgainstSchema, which treats null as
283
308
  // "absent" for enum checks — without this, { mode: null } passes the
284
309
  // factory validator but rejects here. The `?? "append"` below handles null.
@@ -1,4 +1,5 @@
1
1
  import { RiskLevel } from "../../permissions/types.js";
2
+ import { isUnparseableToolArgs } from "../../providers/unparseable-tool-args.js";
2
3
  import { registerTool } from "../registry.js";
3
4
  import type {
4
5
  ToolContext,
@@ -9,6 +10,38 @@ import type {
9
10
  /** Envelope keys consumed by `skill_execute` itself, never inner-tool params. */
10
11
  const SKILL_EXECUTE_ENVELOPE_KEYS = new Set(["tool", "input", "activity"]);
11
12
 
13
+ /**
14
+ * Recover a `skill_execute` envelope that the provider layer wrapped under the
15
+ * `_raw` unparseable marker.
16
+ *
17
+ * MiniMax's object→string argument coercion JSON-decodes the inner `input`
18
+ * value after parsing the outer arguments. When the model passes a bare string
19
+ * as `input` (e.g. Markdown body instead of `{ "content": "..." }`), that inner
20
+ * decode fails and the whole call is marked unparseable — even though the outer
21
+ * envelope is valid JSON. Re-parsing `_raw` recovers `{ tool, input, activity }`
22
+ * so the inner tool can still be dispatched. A genuinely truncated/malformed
23
+ * call's `_raw` won't parse and is returned unchanged, preserving the
24
+ * retryable-error path for real stream corruption.
25
+ */
26
+ export function recoverSkillExecuteEnvelope(
27
+ envelope: Record<string, unknown>,
28
+ ): Record<string, unknown> {
29
+ if (!isUnparseableToolArgs(envelope)) return envelope;
30
+ try {
31
+ const parsed: unknown = JSON.parse(envelope._raw);
32
+ if (
33
+ parsed != null &&
34
+ typeof parsed === "object" &&
35
+ !Array.isArray(parsed)
36
+ ) {
37
+ return parsed as Record<string, unknown>;
38
+ }
39
+ } catch {
40
+ // Genuinely malformed/truncated — leave wrapped for the retryable error.
41
+ }
42
+ return envelope;
43
+ }
44
+
12
45
  /**
13
46
  * Resolve the inner tool's parameters from a `skill_execute` envelope.
14
47
  *
@@ -19,15 +52,20 @@ const SKILL_EXECUTE_ENVELOPE_KEYS = new Set(["tool", "input", "activity"]);
19
52
  * Weaker models routinely misplace the parameters. Left unhandled, the inner
20
53
  * tool receives `{}`, fails schema validation ("<field> is required"), and the
21
54
  * model retries the identical malformed call until it gives up — the empty-
22
- * input retry loop. Two common misplacements are rescued so the call can
55
+ * input retry loop. Three common misplacements are rescued so the call can
23
56
  * succeed instead:
24
57
  *
25
58
  * 1. `input` passed as a JSON-encoded string instead of an object.
26
59
  * 2. Parameters spread as top-level siblings of `tool`/`activity`, with `input`
27
60
  * absent or an empty object.
61
+ * 3. The sole required field's value passed bare as `input` (a non-JSON string)
62
+ * — e.g. the full Markdown body as `input` instead of `{ "content": "..." }`.
63
+ * Rescued only when `innerSchema` has exactly one required string field, so
64
+ * the mapping is unambiguous.
28
65
  */
29
66
  export function resolveSkillExecuteInput(
30
67
  envelope: Record<string, unknown>,
68
+ innerSchema?: unknown,
31
69
  ): Record<string, unknown> {
32
70
  const raw = envelope.input;
33
71
 
@@ -48,7 +86,14 @@ export function resolveSkillExecuteInput(
48
86
  return parsed as Record<string, unknown>;
49
87
  }
50
88
  } catch {
51
- // Not JSON fall through to sibling rescue.
89
+ // Not JSON. A weak model may have placed the inner tool's sole required
90
+ // string value directly as `input` (e.g. the full Markdown body as
91
+ // `document_update`'s `content`) instead of a `{ "content": "..." }`
92
+ // object. When the inner tool has exactly one required string field, map
93
+ // the bare string onto it rather than discarding content the model
94
+ // actually produced.
95
+ const field = soleRequiredStringField(innerSchema);
96
+ if (field) return { [field]: raw };
52
97
  }
53
98
  }
54
99
 
@@ -62,6 +107,25 @@ export function resolveSkillExecuteInput(
62
107
  return {};
63
108
  }
64
109
 
110
+ /**
111
+ * The single required string property of an inner tool's input schema, or
112
+ * `null` when the schema has zero or more than one required field, or its lone
113
+ * required field is not a string. Used to map a bare `input` string onto the
114
+ * one field it can unambiguously belong to.
115
+ */
116
+ function soleRequiredStringField(innerSchema: unknown): string | null {
117
+ if (innerSchema == null || typeof innerSchema !== "object") return null;
118
+ const schema = innerSchema as {
119
+ required?: unknown;
120
+ properties?: Record<string, { type?: unknown } | undefined>;
121
+ };
122
+ const required = Array.isArray(schema.required) ? schema.required : [];
123
+ if (required.length !== 1) return null;
124
+ const field = required[0];
125
+ if (typeof field !== "string") return null;
126
+ return schema.properties?.[field]?.type === "string" ? field : null;
127
+ }
128
+
65
129
  /**
66
130
  * Augment an inner-tool error with `skill_execute` envelope guidance when the
67
131
  * call carried no inner parameters.