@vellumai/assistant 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -9
- package/src/__tests__/credential-execution-feature-gates.test.ts +3 -3
- package/src/__tests__/filesystem-tools.test.ts +4 -2
- package/src/__tests__/history-repair.test.ts +71 -0
- package/src/__tests__/skill-feature-flags-integration.test.ts +18 -17
- package/src/__tests__/skill-feature-flags.test.ts +13 -13
- package/src/__tests__/skill-load-feature-flag.test.ts +4 -4
- package/src/__tests__/system-prompt.test.ts +8 -0
- package/src/config/feature-flag-registry.json +9 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +2 -39
- package/src/daemon/history-repair.ts +28 -8
- package/src/permissions/checker.ts +0 -20
- package/src/prompts/system-prompt.ts +2 -0
- package/src/tools/shared/filesystem/format-diff.ts +4 -16
package/package.json
CHANGED
|
@@ -228,7 +228,7 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
|
|
|
228
228
|
expect(result).not.toContain(`**${DECLARED_SKILL_ID}**`);
|
|
229
229
|
});
|
|
230
230
|
|
|
231
|
-
test("
|
|
231
|
+
test("contacts visible but email-channel hidden when no flag overrides set (contacts defaults true, email-channel defaults false)", () => {
|
|
232
232
|
createSkillOnDisk(
|
|
233
233
|
DECLARED_SKILL_ID,
|
|
234
234
|
"Contacts",
|
|
@@ -263,8 +263,8 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
|
|
|
263
263
|
|
|
264
264
|
const result = buildSystemPrompt();
|
|
265
265
|
|
|
266
|
-
//
|
|
267
|
-
expect(result).
|
|
266
|
+
// contacts defaults to true, email-channel defaults to false
|
|
267
|
+
expect(result).toContain(`**${DECLARED_SKILL_ID}**`);
|
|
268
268
|
expect(result).not.toContain("**email-channel**");
|
|
269
269
|
});
|
|
270
270
|
|
|
@@ -466,12 +466,10 @@ describe("isAssistantFeatureFlagEnabled", () => {
|
|
|
466
466
|
|
|
467
467
|
test("missing persisted value falls back to defaults registry defaultEnabled", () => {
|
|
468
468
|
// No explicit config at all — should fall back to defaults registry
|
|
469
|
-
// which has defaultEnabled:
|
|
469
|
+
// which has defaultEnabled: true for contacts
|
|
470
470
|
const config = {} as any;
|
|
471
471
|
|
|
472
|
-
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
|
|
473
|
-
false,
|
|
474
|
-
);
|
|
472
|
+
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
|
|
475
473
|
});
|
|
476
474
|
|
|
477
475
|
test("unknown flag defaults to true when no persisted override", () => {
|
|
@@ -510,7 +508,7 @@ describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
|
|
|
510
508
|
).toBe(false);
|
|
511
509
|
});
|
|
512
510
|
|
|
513
|
-
test("
|
|
511
|
+
test("enabled when no override set (registry default is true)", () => {
|
|
514
512
|
const config = {} as any;
|
|
515
513
|
|
|
516
514
|
expect(
|
|
@@ -518,6 +516,6 @@ describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
|
|
|
518
516
|
skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
|
|
519
517
|
config,
|
|
520
518
|
),
|
|
521
|
-
).toBe(
|
|
519
|
+
).toBe(true);
|
|
522
520
|
});
|
|
523
521
|
});
|
|
@@ -154,16 +154,16 @@ describe("CES flags do not affect unrelated flags", () => {
|
|
|
154
154
|
).toBe(true);
|
|
155
155
|
});
|
|
156
156
|
|
|
157
|
-
test("enabling all CES flags does not change contacts flag (defaultEnabled:
|
|
157
|
+
test("enabling all CES flags does not change contacts flag (defaultEnabled: true)", () => {
|
|
158
158
|
const overrides: Record<string, boolean> = {};
|
|
159
159
|
for (const key of ALL_CES_FLAG_KEYS) {
|
|
160
160
|
overrides[key] = true;
|
|
161
161
|
}
|
|
162
162
|
const config = makeConfig(overrides);
|
|
163
163
|
|
|
164
|
-
// contacts defaults to
|
|
164
|
+
// contacts defaults to true in the registry and should stay true
|
|
165
165
|
expect(
|
|
166
166
|
isAssistantFeatureFlagEnabled("feature_flags.contacts.enabled", config),
|
|
167
|
-
).toBe(
|
|
167
|
+
).toBe(true);
|
|
168
168
|
});
|
|
169
169
|
});
|
|
@@ -325,12 +325,14 @@ describe("formatEditDiff", () => {
|
|
|
325
325
|
expect(result).not.toContain("+ ");
|
|
326
326
|
});
|
|
327
327
|
|
|
328
|
-
test("
|
|
328
|
+
test("shows all diff lines without truncation", () => {
|
|
329
329
|
const longOld = Array.from({ length: 12 }, (_, i) => `old-line-${i}`).join(
|
|
330
330
|
"\n",
|
|
331
331
|
);
|
|
332
332
|
const result = formatEditDiff(longOld, "short");
|
|
333
|
-
expect(result).toContain("more lines");
|
|
333
|
+
expect(result).not.toContain("more lines");
|
|
334
|
+
expect(result).toContain("old-line-11");
|
|
335
|
+
expect(result).toContain("+ short");
|
|
334
336
|
});
|
|
335
337
|
});
|
|
336
338
|
|
|
@@ -588,6 +588,77 @@ describe("repairHistory", () => {
|
|
|
588
588
|
});
|
|
589
589
|
});
|
|
590
590
|
|
|
591
|
+
test("synthetic web_search_tool_result is placed immediately after its server_tool_use, not at end", () => {
|
|
592
|
+
// Regression: synthetic results appended to the end of the content array
|
|
593
|
+
// get separated from their server_tool_use by ensureToolPairing's split
|
|
594
|
+
// at tool_use boundaries, causing the API to reject with "web_search
|
|
595
|
+
// tool use without a corresponding web_search_tool_result block".
|
|
596
|
+
const messages: Message[] = [
|
|
597
|
+
{ role: "user", content: [{ type: "text", text: "Search and act" }] },
|
|
598
|
+
{
|
|
599
|
+
role: "assistant",
|
|
600
|
+
content: [
|
|
601
|
+
{ type: "text", text: "Let me search" },
|
|
602
|
+
{
|
|
603
|
+
type: "server_tool_use",
|
|
604
|
+
id: "stu_1",
|
|
605
|
+
name: "web_search",
|
|
606
|
+
input: { query: "openai" },
|
|
607
|
+
},
|
|
608
|
+
{
|
|
609
|
+
type: "server_tool_use",
|
|
610
|
+
id: "stu_2",
|
|
611
|
+
name: "web_search",
|
|
612
|
+
input: { query: "anthropic" },
|
|
613
|
+
},
|
|
614
|
+
{ type: "text", text: "Based on my research" },
|
|
615
|
+
{
|
|
616
|
+
type: "tool_use",
|
|
617
|
+
id: "tu_1",
|
|
618
|
+
name: "skill_load",
|
|
619
|
+
input: { skill: "app-builder" },
|
|
620
|
+
},
|
|
621
|
+
],
|
|
622
|
+
},
|
|
623
|
+
{
|
|
624
|
+
role: "user",
|
|
625
|
+
content: [
|
|
626
|
+
{
|
|
627
|
+
type: "tool_result",
|
|
628
|
+
tool_use_id: "tu_1",
|
|
629
|
+
content: "Skill loaded",
|
|
630
|
+
},
|
|
631
|
+
],
|
|
632
|
+
},
|
|
633
|
+
];
|
|
634
|
+
|
|
635
|
+
const { messages: repaired, stats } = repairHistory(messages);
|
|
636
|
+
|
|
637
|
+
expect(stats.missingToolResultsInserted).toBe(2);
|
|
638
|
+
|
|
639
|
+
const assistantMsg = repaired[1];
|
|
640
|
+
// Synthetic results must appear immediately after their server_tool_use,
|
|
641
|
+
// NOT after the tool_use block at the end
|
|
642
|
+
const blockTypes = assistantMsg.content.map((b) => b.type);
|
|
643
|
+
expect(blockTypes).toEqual([
|
|
644
|
+
"text",
|
|
645
|
+
"server_tool_use",
|
|
646
|
+
"web_search_tool_result", // right after stu_1
|
|
647
|
+
"server_tool_use",
|
|
648
|
+
"web_search_tool_result", // right after stu_2
|
|
649
|
+
"text",
|
|
650
|
+
"tool_use",
|
|
651
|
+
]);
|
|
652
|
+
|
|
653
|
+
// Verify the pairings are correct
|
|
654
|
+
expect(
|
|
655
|
+
(assistantMsg.content[2] as { tool_use_id: string }).tool_use_id,
|
|
656
|
+
).toBe("stu_1");
|
|
657
|
+
expect(
|
|
658
|
+
(assistantMsg.content[4] as { tool_use_id: string }).tool_use_id,
|
|
659
|
+
).toBe("stu_2");
|
|
660
|
+
});
|
|
661
|
+
|
|
591
662
|
test("downgrades type-mismatched tool_result for server_tool_use", () => {
|
|
592
663
|
// A tool_result in the user message for a server_tool_use ID is orphaned —
|
|
593
664
|
// server-side results belong in the assistant message
|
|
@@ -138,14 +138,15 @@ describe("frontmatter feature-flag integration", () => {
|
|
|
138
138
|
expect(key).toBeUndefined();
|
|
139
139
|
});
|
|
140
140
|
|
|
141
|
-
test("resolveSkillStates
|
|
141
|
+
test("resolveSkillStates includes skill with featureFlag when flag defaults to ON", () => {
|
|
142
142
|
const skill = buildSkillSummary("contacts", SKILL_MD_WITH_FLAG)!;
|
|
143
|
-
// "contacts" is in the registry with defaultEnabled:
|
|
143
|
+
// "contacts" is in the registry with defaultEnabled: true
|
|
144
144
|
const config = makeConfig();
|
|
145
145
|
|
|
146
146
|
const resolved = resolveSkillStates([skill], config);
|
|
147
|
-
// Flag defaults to
|
|
148
|
-
expect(resolved.length).toBe(
|
|
147
|
+
// Flag defaults to true → skill passes through
|
|
148
|
+
expect(resolved.length).toBe(1);
|
|
149
|
+
expect(resolved[0].summary.id).toBe("contacts");
|
|
149
150
|
});
|
|
150
151
|
|
|
151
152
|
test("resolveSkillStates includes skill with featureFlag when flag is ON", () => {
|
|
@@ -192,22 +193,22 @@ describe("frontmatter feature-flag integration", () => {
|
|
|
192
193
|
const key = skillFlagKey(skill);
|
|
193
194
|
expect(key).toBe("feature_flags.contacts.enabled");
|
|
194
195
|
|
|
195
|
-
// Step 4: Check flag state — "contacts" has defaultEnabled:
|
|
196
|
-
const
|
|
197
|
-
expect(isAssistantFeatureFlagEnabled(key!,
|
|
196
|
+
// Step 4: Check flag state — "contacts" has defaultEnabled: true in registry
|
|
197
|
+
const configDefault = makeConfig();
|
|
198
|
+
expect(isAssistantFeatureFlagEnabled(key!, configDefault)).toBe(true);
|
|
198
199
|
|
|
199
|
-
// Step 5: resolveSkillStates
|
|
200
|
-
const
|
|
201
|
-
expect(
|
|
200
|
+
// Step 5: resolveSkillStates includes it by default
|
|
201
|
+
const resolvedDefault = resolveSkillStates([skill], configDefault);
|
|
202
|
+
expect(resolvedDefault.length).toBe(1);
|
|
203
|
+
expect(resolvedDefault[0].summary.id).toBe("contacts");
|
|
202
204
|
|
|
203
|
-
// Step 6: With override
|
|
204
|
-
const
|
|
205
|
-
assistantFeatureFlagValues: { [key!]:
|
|
205
|
+
// Step 6: With override disabled, skill is filtered out
|
|
206
|
+
const configOff = makeConfig({
|
|
207
|
+
assistantFeatureFlagValues: { [key!]: false },
|
|
206
208
|
});
|
|
207
|
-
expect(isAssistantFeatureFlagEnabled(key!,
|
|
209
|
+
expect(isAssistantFeatureFlagEnabled(key!, configOff)).toBe(false);
|
|
208
210
|
|
|
209
|
-
const
|
|
210
|
-
expect(
|
|
211
|
-
expect(resolvedOn[0].summary.id).toBe("contacts");
|
|
211
|
+
const resolvedOff = resolveSkillStates([skill], configOff);
|
|
212
|
+
expect(resolvedOff.length).toBe(0);
|
|
212
213
|
});
|
|
213
214
|
});
|
|
@@ -81,14 +81,14 @@ describe("skillFlagKey", () => {
|
|
|
81
81
|
// ---------------------------------------------------------------------------
|
|
82
82
|
|
|
83
83
|
describe("isAssistantFeatureFlagEnabled with skillFlagKey", () => {
|
|
84
|
-
test("returns
|
|
84
|
+
test("returns true when no flag overrides (registry default is true)", () => {
|
|
85
85
|
const config = makeConfig();
|
|
86
86
|
expect(
|
|
87
87
|
isAssistantFeatureFlagEnabled(
|
|
88
88
|
skillFlagKey({ featureFlag: DECLARED_FLAG_ID })!,
|
|
89
89
|
config,
|
|
90
90
|
),
|
|
91
|
-
).toBe(
|
|
91
|
+
).toBe(true);
|
|
92
92
|
});
|
|
93
93
|
|
|
94
94
|
test("returns true when skill key is explicitly true", () => {
|
|
@@ -140,10 +140,8 @@ describe("isAssistantFeatureFlagEnabled", () => {
|
|
|
140
140
|
|
|
141
141
|
test("falls back to registry default when no override", () => {
|
|
142
142
|
const config = makeConfig();
|
|
143
|
-
// contacts defaults to
|
|
144
|
-
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(
|
|
145
|
-
false,
|
|
146
|
-
);
|
|
143
|
+
// contacts defaults to true in the registry
|
|
144
|
+
expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
|
|
147
145
|
});
|
|
148
146
|
|
|
149
147
|
test("respects persisted overrides for undeclared keys", () => {
|
|
@@ -207,13 +205,14 @@ describe("resolveSkillStates with feature flags", () => {
|
|
|
207
205
|
expect(ids).toContain("browser");
|
|
208
206
|
});
|
|
209
207
|
|
|
210
|
-
test("declared flag key defaults to registry value (
|
|
208
|
+
test("declared flag key defaults to registry value (true)", () => {
|
|
211
209
|
const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
|
|
212
210
|
const config = makeConfig();
|
|
213
211
|
|
|
214
212
|
const resolved = resolveSkillStates(catalog, config);
|
|
215
|
-
// contacts registry default is
|
|
216
|
-
expect(resolved.length).toBe(
|
|
213
|
+
// contacts registry default is true, so it passes through
|
|
214
|
+
expect(resolved.length).toBe(1);
|
|
215
|
+
expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
|
|
217
216
|
});
|
|
218
217
|
|
|
219
218
|
test("skill without featureFlag is never flag-gated", () => {
|
|
@@ -280,14 +279,15 @@ describe("resolveSkillStates with feature flags", () => {
|
|
|
280
279
|
// ---------------------------------------------------------------------------
|
|
281
280
|
|
|
282
281
|
describe("resolveSkillStates with frontmatter featureFlag", () => {
|
|
283
|
-
test("skill with featureFlag (defaultEnabled:
|
|
284
|
-
// contacts has defaultEnabled:
|
|
282
|
+
test("skill with featureFlag (defaultEnabled: true) is included when no config override", () => {
|
|
283
|
+
// contacts has defaultEnabled: true in the registry
|
|
285
284
|
const catalog = [makeSkill(DECLARED_SKILL_ID, "bundled", DECLARED_FLAG_ID)];
|
|
286
285
|
const config = makeConfig();
|
|
287
286
|
|
|
288
287
|
const resolved = resolveSkillStates(catalog, config);
|
|
289
|
-
// No override, registry default is
|
|
290
|
-
expect(resolved.length).toBe(
|
|
288
|
+
// No override, registry default is true → passes through
|
|
289
|
+
expect(resolved.length).toBe(1);
|
|
290
|
+
expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
|
|
291
291
|
});
|
|
292
292
|
|
|
293
293
|
test("skill with featureFlag is included when config override enables it", () => {
|
|
@@ -166,7 +166,7 @@ describe("skill_load feature flag enforcement", () => {
|
|
|
166
166
|
expect(result.content).toContain("Skill: Contacts");
|
|
167
167
|
});
|
|
168
168
|
|
|
169
|
-
test("
|
|
169
|
+
test("loads skill when flag key is absent (registry defaults to enabled)", async () => {
|
|
170
170
|
writeSkill(
|
|
171
171
|
DECLARED_SKILL_ID,
|
|
172
172
|
"Contacts",
|
|
@@ -184,8 +184,8 @@ describe("skill_load feature flag enforcement", () => {
|
|
|
184
184
|
|
|
185
185
|
const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
|
|
186
186
|
|
|
187
|
-
// contacts is declared in the registry with defaultEnabled:
|
|
188
|
-
expect(result.isError).toBe(
|
|
189
|
-
expect(result.content).toContain("
|
|
187
|
+
// contacts is declared in the registry with defaultEnabled: true
|
|
188
|
+
expect(result.isError).toBe(false);
|
|
189
|
+
expect(result.content).toContain("Skill: Contacts");
|
|
190
190
|
});
|
|
191
191
|
});
|
|
@@ -237,6 +237,14 @@ describe("buildSystemPrompt", () => {
|
|
|
237
237
|
expect(result).toContain("browser automation as last resort");
|
|
238
238
|
});
|
|
239
239
|
|
|
240
|
+
test("includes inline media attachment guidance", () => {
|
|
241
|
+
const result = buildSystemPrompt();
|
|
242
|
+
expect(result).toContain(
|
|
243
|
+
"Image and video attachments can render inline in chat.",
|
|
244
|
+
);
|
|
245
|
+
expect(result).toContain("attach it instead of only printing its path");
|
|
246
|
+
});
|
|
247
|
+
|
|
240
248
|
test("does not include removed sections", () => {
|
|
241
249
|
const result = buildSystemPrompt();
|
|
242
250
|
expect(result).not.toContain("## External Communications Identity");
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"key": "feature_flags.contacts.enabled",
|
|
24
24
|
"label": "Contacts",
|
|
25
25
|
"description": "Show the Contacts tab in Settings for viewing and managing contacts",
|
|
26
|
-
"defaultEnabled":
|
|
26
|
+
"defaultEnabled": true
|
|
27
27
|
},
|
|
28
28
|
{
|
|
29
29
|
"id": "email-channel",
|
|
@@ -256,6 +256,14 @@
|
|
|
256
256
|
"label": "Quick Input",
|
|
257
257
|
"description": "Enable the Quick Input popover on right-click of the menu bar icon",
|
|
258
258
|
"defaultEnabled": false
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
"id": "expand-completed-steps",
|
|
262
|
+
"scope": "macos",
|
|
263
|
+
"key": "expand_completed_steps",
|
|
264
|
+
"label": "Expand Completed Steps",
|
|
265
|
+
"description": "Auto-expand completed tool call step groups instead of showing them collapsed",
|
|
266
|
+
"defaultEnabled": false
|
|
259
267
|
}
|
|
260
268
|
]
|
|
261
269
|
}
|
|
@@ -167,30 +167,6 @@ export function emitLlmCallStartedIfNeeded(
|
|
|
167
167
|
);
|
|
168
168
|
}
|
|
169
169
|
|
|
170
|
-
// ── Client Payload Size Caps ─────────────────────────────────────────
|
|
171
|
-
// The client truncates tool results anyway (20 000 chars in ChatViewModel),
|
|
172
|
-
// but the full string can be megabytes (file_read, bash output). Capping
|
|
173
|
-
// here avoids sending oversized payloads which get decoded on the
|
|
174
|
-
// client's main thread.
|
|
175
|
-
|
|
176
|
-
const TOOL_RESULT_MAX_CHARS = 20_000;
|
|
177
|
-
const TOOL_RESULT_TRUNCATION_SUFFIX = "...[truncated]";
|
|
178
|
-
|
|
179
|
-
// tool_input_delta streams accumulated JSON as tools run. For non-app
|
|
180
|
-
// tools the client discards it (extractCodePreview only handles app tools),
|
|
181
|
-
// so we cap it aggressively to avoid excessive client traffic.
|
|
182
|
-
const TOOL_INPUT_DELTA_MAX_CHARS = 50_000;
|
|
183
|
-
const APP_TOOL_NAMES = new Set(["app_create", "app_update"]);
|
|
184
|
-
|
|
185
|
-
function truncateForClient(
|
|
186
|
-
value: string,
|
|
187
|
-
maxChars: number,
|
|
188
|
-
suffix: string,
|
|
189
|
-
): string {
|
|
190
|
-
if (value.length <= maxChars) return value;
|
|
191
|
-
return value.slice(0, maxChars - suffix.length) + suffix;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
170
|
// ── Friendly Tool Names ──────────────────────────────────────────────
|
|
195
171
|
|
|
196
172
|
const TOOL_FRIENDLY_NAMES: Record<string, string> = {
|
|
@@ -409,19 +385,10 @@ export function handleInputJsonDelta(
|
|
|
409
385
|
deps: EventHandlerDeps,
|
|
410
386
|
event: Extract<AgentEvent, { type: "input_json_delta" }>,
|
|
411
387
|
): void {
|
|
412
|
-
// Cap non-app tool input deltas — the client only uses this data for
|
|
413
|
-
// app_create/app_update code previews; all other tools discard it.
|
|
414
|
-
const content = APP_TOOL_NAMES.has(event.toolName)
|
|
415
|
-
? event.accumulatedJson
|
|
416
|
-
: truncateForClient(
|
|
417
|
-
event.accumulatedJson,
|
|
418
|
-
TOOL_INPUT_DELTA_MAX_CHARS,
|
|
419
|
-
TOOL_RESULT_TRUNCATION_SUFFIX,
|
|
420
|
-
);
|
|
421
388
|
deps.onEvent({
|
|
422
389
|
type: "tool_input_delta",
|
|
423
390
|
toolName: event.toolName,
|
|
424
|
-
content,
|
|
391
|
+
content: event.accumulatedJson,
|
|
425
392
|
conversationId: deps.ctx.conversationId,
|
|
426
393
|
toolUseId: event.toolUseId,
|
|
427
394
|
});
|
|
@@ -438,11 +405,7 @@ export function handleToolResult(
|
|
|
438
405
|
deps.onEvent({
|
|
439
406
|
type: "tool_result",
|
|
440
407
|
toolName: "",
|
|
441
|
-
result:
|
|
442
|
-
event.content,
|
|
443
|
-
TOOL_RESULT_MAX_CHARS,
|
|
444
|
-
TOOL_RESULT_TRUNCATION_SUFFIX,
|
|
445
|
-
),
|
|
408
|
+
result: event.content,
|
|
446
409
|
isError: event.isError,
|
|
447
410
|
diff: event.diff,
|
|
448
411
|
status: event.status,
|
|
@@ -69,7 +69,10 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
// Ensure every server_tool_use has a paired web_search_tool_result
|
|
72
|
-
// in the same assistant message (handles interrupted streams)
|
|
72
|
+
// in the same assistant message (handles interrupted streams).
|
|
73
|
+
// Synthetic results are inserted IMMEDIATELY AFTER their corresponding
|
|
74
|
+
// server_tool_use block — not appended to the end — so that
|
|
75
|
+
// ensureToolPairing's split at tool_use boundaries cannot separate them.
|
|
73
76
|
const serverToolIds = new Set(
|
|
74
77
|
cleanedContent
|
|
75
78
|
.filter(
|
|
@@ -82,18 +85,35 @@ export function repairHistory(messages: Message[]): RepairResult {
|
|
|
82
85
|
.filter((b) => b.type === "web_search_tool_result")
|
|
83
86
|
.map((b) => (b as { tool_use_id: string }).tool_use_id),
|
|
84
87
|
);
|
|
88
|
+
const orphanedServerIds = new Set<string>();
|
|
85
89
|
for (const id of serverToolIds) {
|
|
86
90
|
if (!matchedServerIds.has(id)) {
|
|
87
|
-
|
|
88
|
-
type: "web_search_tool_result",
|
|
89
|
-
tool_use_id: id,
|
|
90
|
-
content: SYNTHETIC_WEB_SEARCH_ERROR,
|
|
91
|
-
});
|
|
92
|
-
stats.missingToolResultsInserted++;
|
|
91
|
+
orphanedServerIds.add(id);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
94
|
|
|
96
|
-
|
|
95
|
+
let repairedContent: ContentBlock[];
|
|
96
|
+
if (orphanedServerIds.size > 0) {
|
|
97
|
+
repairedContent = [];
|
|
98
|
+
for (const block of cleanedContent) {
|
|
99
|
+
repairedContent.push(block);
|
|
100
|
+
if (
|
|
101
|
+
block.type === "server_tool_use" &&
|
|
102
|
+
orphanedServerIds.has(block.id)
|
|
103
|
+
) {
|
|
104
|
+
repairedContent.push({
|
|
105
|
+
type: "web_search_tool_result",
|
|
106
|
+
tool_use_id: block.id,
|
|
107
|
+
content: SYNTHETIC_WEB_SEARCH_ERROR,
|
|
108
|
+
});
|
|
109
|
+
stats.missingToolResultsInserted++;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
repairedContent = cleanedContent;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
result.push({ role: "assistant", content: repairedContent });
|
|
97
117
|
|
|
98
118
|
// Only track client-side tool_use IDs as pending (not server_tool_use)
|
|
99
119
|
pendingToolUseIds = new Set(
|
|
@@ -197,15 +197,6 @@ const LOW_RISK_GIT_SUBCOMMANDS = new Set([
|
|
|
197
197
|
"reflog",
|
|
198
198
|
]);
|
|
199
199
|
|
|
200
|
-
// Vellum/assistant CLI subcommands that are low-risk (read-only)
|
|
201
|
-
const LOW_RISK_CLI_SUBCOMMANDS = new Set([
|
|
202
|
-
"ps",
|
|
203
|
-
"doctor",
|
|
204
|
-
"audit",
|
|
205
|
-
"completions",
|
|
206
|
-
"map",
|
|
207
|
-
]);
|
|
208
|
-
|
|
209
200
|
// Commands that wrap another program — the real program appears as the first
|
|
210
201
|
// non-flag argument. When one of these is the segment program we look through
|
|
211
202
|
// its args to find the effective program (e.g. `env curl …` → curl).
|
|
@@ -671,17 +662,6 @@ async function classifyRiskUncached(
|
|
|
671
662
|
continue;
|
|
672
663
|
}
|
|
673
664
|
|
|
674
|
-
if (prog === "vellum" || prog === "assistant") {
|
|
675
|
-
const subcommand = firstPositionalArg(seg.args);
|
|
676
|
-
if (subcommand && LOW_RISK_CLI_SUBCOMMANDS.has(subcommand)) {
|
|
677
|
-
// Read-only subcommands stay at current risk
|
|
678
|
-
continue;
|
|
679
|
-
}
|
|
680
|
-
// Mutating subcommands are medium
|
|
681
|
-
maxRisk = RiskLevel.Medium;
|
|
682
|
-
continue;
|
|
683
|
-
}
|
|
684
|
-
|
|
685
665
|
if (!LOW_RISK_PROGRAMS.has(prog)) {
|
|
686
666
|
// Unknown program → medium
|
|
687
667
|
if (maxRisk === RiskLevel.Low) {
|
|
@@ -206,6 +206,8 @@ function buildAttachmentSection(): string {
|
|
|
206
206
|
"",
|
|
207
207
|
'Use `source="host"` with an absolute path for host filesystem files. Optional attributes: `filename` (display name override), `mime_type` (override auto-detection).',
|
|
208
208
|
"",
|
|
209
|
+
"Image and video attachments can render inline in chat. If the user asks to preview a media file here, attach it instead of only printing its path.",
|
|
210
|
+
"",
|
|
209
211
|
"Embed images/GIFs inline using markdown: ``.",
|
|
210
212
|
].join("\n");
|
|
211
213
|
}
|
|
@@ -1,21 +1,15 @@
|
|
|
1
|
-
const MAX_DIFF_LINES = 8;
|
|
2
|
-
|
|
3
1
|
/**
|
|
4
|
-
* Build
|
|
5
|
-
* Lines are prefixed with - /
|
|
2
|
+
* Build an inline diff from an old→new string replacement.
|
|
3
|
+
* Lines are prefixed with - / +.
|
|
6
4
|
*/
|
|
7
5
|
export function formatEditDiff(oldString: string, newString: string): string {
|
|
8
6
|
const removed =
|
|
9
7
|
oldString.length > 0
|
|
10
|
-
?
|
|
11
|
-
(l) => `- ${l}`,
|
|
12
|
-
)
|
|
8
|
+
? oldString.split("\n").map((l) => `- ${l}`)
|
|
13
9
|
: [];
|
|
14
10
|
const added =
|
|
15
11
|
newString.length > 0
|
|
16
|
-
?
|
|
17
|
-
(l) => `+ ${l}`,
|
|
18
|
-
)
|
|
12
|
+
? newString.split("\n").map((l) => `+ ${l}`)
|
|
19
13
|
: [];
|
|
20
14
|
|
|
21
15
|
return [...removed, ...added].join("\n");
|
|
@@ -37,9 +31,3 @@ export function formatWriteSummary(
|
|
|
37
31
|
return `(${oldLineCount} → ${newLineCount} lines)`;
|
|
38
32
|
}
|
|
39
33
|
|
|
40
|
-
function truncateLines(lines: string[], max: number): string[] {
|
|
41
|
-
if (lines.length <= max) return lines;
|
|
42
|
-
const kept = lines.slice(0, max);
|
|
43
|
-
kept.push(`... (${lines.length - max} more lines)`);
|
|
44
|
-
return kept;
|
|
45
|
-
}
|