@vellumai/assistant 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/system-prompt.test.ts +2 -7
- package/src/__tests__/tool-execution-abort-cleanup.test.ts +0 -1
- package/src/agent/loop.ts +324 -163
- package/src/config/bundled-skills/app-builder/SKILL.md +7 -5
- package/src/config/bundled-skills/app-builder/TOOLS.json +2 -2
- package/src/config/system-prompt.ts +563 -539
- package/src/daemon/session-surfaces.ts +28 -0
- package/src/daemon/session.ts +255 -191
- package/src/daemon/tool-side-effects.ts +3 -13
- package/src/security/secure-keys.ts +27 -3
- package/src/tools/apps/definitions.ts +5 -0
- package/src/tools/apps/executors.ts +18 -22
- package/src/__tests__/response-tier.test.ts +0 -195
- package/src/daemon/response-tier.ts +0 -250
package/src/agent/loop.ts
CHANGED
|
@@ -1,20 +1,31 @@
|
|
|
1
|
-
import * as Sentry from
|
|
2
|
-
|
|
3
|
-
import { truncateOversizedToolResults } from
|
|
4
|
-
import { getHookManager } from
|
|
5
|
-
import type {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
import * as Sentry from "@sentry/node";
|
|
2
|
+
|
|
3
|
+
import { truncateOversizedToolResults } from "../context/tool-result-truncation.js";
|
|
4
|
+
import { getHookManager } from "../hooks/manager.js";
|
|
5
|
+
import type {
|
|
6
|
+
ContentBlock,
|
|
7
|
+
Message,
|
|
8
|
+
Provider,
|
|
9
|
+
ToolDefinition,
|
|
10
|
+
} from "../providers/types.js";
|
|
11
|
+
import type { ToolResultContent } from "../providers/types.js";
|
|
12
|
+
import type { SensitiveOutputBinding } from "../tools/sensitive-output-placeholders.js";
|
|
13
|
+
import {
|
|
14
|
+
applyStreamingSubstitution,
|
|
15
|
+
applySubstitutions,
|
|
16
|
+
} from "../tools/sensitive-output-placeholders.js";
|
|
17
|
+
import { getLogger, isDebug, truncateForLog } from "../util/logger.js";
|
|
18
|
+
|
|
19
|
+
const log = getLogger("agent-loop");
|
|
12
20
|
|
|
13
21
|
export interface AgentLoopConfig {
|
|
14
22
|
maxTokens: number;
|
|
15
23
|
maxInputTokens?: number; // context window size for tool result truncation
|
|
16
24
|
thinking?: { enabled: boolean; budgetTokens: number };
|
|
17
|
-
toolChoice?:
|
|
25
|
+
toolChoice?:
|
|
26
|
+
| { type: "auto" }
|
|
27
|
+
| { type: "any" }
|
|
28
|
+
| { type: "tool"; name: string };
|
|
18
29
|
maxToolUseTurns?: number;
|
|
19
30
|
/** Minimum interval (ms) between consecutive LLM calls to prevent spin when tools return instantly */
|
|
20
31
|
minTurnIntervalMs?: number;
|
|
@@ -26,18 +37,46 @@ export interface CheckpointInfo {
|
|
|
26
37
|
hasToolUse: boolean;
|
|
27
38
|
}
|
|
28
39
|
|
|
29
|
-
export type CheckpointDecision =
|
|
40
|
+
export type CheckpointDecision = "continue" | "yield";
|
|
30
41
|
|
|
31
42
|
export type AgentEvent =
|
|
32
|
-
| { type:
|
|
33
|
-
| { type:
|
|
34
|
-
| { type:
|
|
35
|
-
| {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
43
|
+
| { type: "text_delta"; text: string }
|
|
44
|
+
| { type: "thinking_delta"; thinking: string }
|
|
45
|
+
| { type: "message_complete"; message: Message }
|
|
46
|
+
| {
|
|
47
|
+
type: "tool_use";
|
|
48
|
+
id: string;
|
|
49
|
+
name: string;
|
|
50
|
+
input: Record<string, unknown>;
|
|
51
|
+
}
|
|
52
|
+
| { type: "tool_output_chunk"; toolUseId: string; chunk: string }
|
|
53
|
+
| {
|
|
54
|
+
type: "tool_result";
|
|
55
|
+
toolUseId: string;
|
|
56
|
+
content: string;
|
|
57
|
+
isError: boolean;
|
|
58
|
+
diff?: {
|
|
59
|
+
filePath: string;
|
|
60
|
+
oldContent: string;
|
|
61
|
+
newContent: string;
|
|
62
|
+
isNewFile: boolean;
|
|
63
|
+
};
|
|
64
|
+
status?: string;
|
|
65
|
+
contentBlocks?: ContentBlock[];
|
|
66
|
+
}
|
|
67
|
+
| { type: "input_json_delta"; toolName: string; accumulatedJson: string }
|
|
68
|
+
| { type: "error"; error: Error }
|
|
69
|
+
| {
|
|
70
|
+
type: "usage";
|
|
71
|
+
inputTokens: number;
|
|
72
|
+
outputTokens: number;
|
|
73
|
+
cacheCreationInputTokens?: number;
|
|
74
|
+
cacheReadInputTokens?: number;
|
|
75
|
+
model: string;
|
|
76
|
+
providerDurationMs: number;
|
|
77
|
+
rawRequest?: unknown;
|
|
78
|
+
rawResponse?: unknown;
|
|
79
|
+
};
|
|
41
80
|
|
|
42
81
|
const DEFAULT_CONFIG: AgentLoopConfig = {
|
|
43
82
|
maxTokens: 16000,
|
|
@@ -46,11 +85,13 @@ const DEFAULT_CONFIG: AgentLoopConfig = {
|
|
|
46
85
|
};
|
|
47
86
|
|
|
48
87
|
const PROGRESS_CHECK_INTERVAL = 5;
|
|
49
|
-
const PROGRESS_CHECK_REMINDER =
|
|
88
|
+
const PROGRESS_CHECK_REMINDER =
|
|
89
|
+
"You have been using tools for several turns. Check whether you are making meaningful progress toward the user's goal. If you are stuck in a loop or not making progress, summarize what you have tried and ask the user for guidance instead of continuing.";
|
|
50
90
|
|
|
51
91
|
// Warn the model N turns before the hard limit so it can wrap up gracefully
|
|
52
92
|
const APPROACHING_LIMIT_OFFSET = 5;
|
|
53
|
-
const APPROACHING_LIMIT_WARNING =
|
|
93
|
+
const APPROACHING_LIMIT_WARNING =
|
|
94
|
+
"You are approaching the tool-use turn limit. You have {remaining} turns remaining. Wrap up your current task — summarize progress and present results to the user. If you cannot finish, explain what remains and ask the user how to proceed.";
|
|
54
95
|
|
|
55
96
|
export interface ResolvedSystemPrompt {
|
|
56
97
|
systemPrompt: string;
|
|
@@ -64,15 +105,53 @@ export class AgentLoop {
|
|
|
64
105
|
private config: AgentLoopConfig;
|
|
65
106
|
private tools: ToolDefinition[];
|
|
66
107
|
private resolveTools: ((history: Message[]) => ToolDefinition[]) | null;
|
|
67
|
-
private resolveSystemPrompt:
|
|
68
|
-
|
|
108
|
+
private resolveSystemPrompt:
|
|
109
|
+
| ((history: Message[]) => ResolvedSystemPrompt)
|
|
110
|
+
| null;
|
|
111
|
+
private toolExecutor:
|
|
112
|
+
| ((
|
|
113
|
+
name: string,
|
|
114
|
+
input: Record<string, unknown>,
|
|
115
|
+
onOutput?: (chunk: string) => void,
|
|
116
|
+
) => Promise<{
|
|
117
|
+
content: string;
|
|
118
|
+
isError: boolean;
|
|
119
|
+
diff?: {
|
|
120
|
+
filePath: string;
|
|
121
|
+
oldContent: string;
|
|
122
|
+
newContent: string;
|
|
123
|
+
isNewFile: boolean;
|
|
124
|
+
};
|
|
125
|
+
status?: string;
|
|
126
|
+
contentBlocks?: ContentBlock[];
|
|
127
|
+
sensitiveBindings?: SensitiveOutputBinding[];
|
|
128
|
+
yieldToUser?: boolean;
|
|
129
|
+
}>)
|
|
130
|
+
| null;
|
|
69
131
|
|
|
70
132
|
constructor(
|
|
71
133
|
provider: Provider,
|
|
72
134
|
systemPrompt: string,
|
|
73
135
|
config?: Partial<AgentLoopConfig>,
|
|
74
136
|
tools?: ToolDefinition[],
|
|
75
|
-
toolExecutor?: (
|
|
137
|
+
toolExecutor?: (
|
|
138
|
+
name: string,
|
|
139
|
+
input: Record<string, unknown>,
|
|
140
|
+
onOutput?: (chunk: string) => void,
|
|
141
|
+
) => Promise<{
|
|
142
|
+
content: string;
|
|
143
|
+
isError: boolean;
|
|
144
|
+
diff?: {
|
|
145
|
+
filePath: string;
|
|
146
|
+
oldContent: string;
|
|
147
|
+
newContent: string;
|
|
148
|
+
isNewFile: boolean;
|
|
149
|
+
};
|
|
150
|
+
status?: string;
|
|
151
|
+
contentBlocks?: ContentBlock[];
|
|
152
|
+
sensitiveBindings?: SensitiveOutputBinding[];
|
|
153
|
+
yieldToUser?: boolean;
|
|
154
|
+
}>,
|
|
76
155
|
resolveTools?: (history: Message[]) => ToolDefinition[],
|
|
77
156
|
resolveSystemPrompt?: (history: Message[]) => ResolvedSystemPrompt,
|
|
78
157
|
) {
|
|
@@ -103,13 +182,13 @@ export class AgentLoop {
|
|
|
103
182
|
// Bindings are accumulated from tool results; placeholders are
|
|
104
183
|
// resolved in streamed deltas and final assistant message text.
|
|
105
184
|
const substitutionMap = new Map<string, string>();
|
|
106
|
-
let streamingPending =
|
|
185
|
+
let streamingPending = "";
|
|
107
186
|
|
|
108
187
|
while (true) {
|
|
109
188
|
if (signal?.aborted) break;
|
|
110
189
|
|
|
111
190
|
const turnStart = Date.now();
|
|
112
|
-
let toolUseBlocks: Extract<ContentBlock, { type:
|
|
191
|
+
let toolUseBlocks: Extract<ContentBlock, { type: "tool_use" }>[] = [];
|
|
113
192
|
|
|
114
193
|
try {
|
|
115
194
|
// Resolve tools for this turn: use the dynamic resolver if provided,
|
|
@@ -126,19 +205,21 @@ export class AgentLoop {
|
|
|
126
205
|
const turnMaxTokens = resolved?.maxTokens ?? this.config.maxTokens;
|
|
127
206
|
const turnModel = resolved?.model;
|
|
128
207
|
|
|
129
|
-
const providerConfig: Record<string, unknown> = {
|
|
208
|
+
const providerConfig: Record<string, unknown> = {
|
|
209
|
+
max_tokens: turnMaxTokens,
|
|
210
|
+
};
|
|
130
211
|
if (turnModel) {
|
|
131
212
|
providerConfig.model = turnModel;
|
|
132
213
|
}
|
|
133
214
|
if (this.config.thinking?.enabled && turnMaxTokens >= 4000) {
|
|
134
|
-
// Skip thinking when turnMaxTokens is too low
|
|
135
|
-
//
|
|
215
|
+
// Skip thinking when turnMaxTokens is too low to avoid the
|
|
216
|
+
// thinking budget consuming nearly all output tokens.
|
|
136
217
|
const budgetTokens = Math.min(
|
|
137
218
|
this.config.thinking.budgetTokens,
|
|
138
219
|
Math.floor(turnMaxTokens * 0.75),
|
|
139
220
|
);
|
|
140
221
|
providerConfig.thinking = {
|
|
141
|
-
type:
|
|
222
|
+
type: "enabled",
|
|
142
223
|
budget_tokens: budgetTokens,
|
|
143
224
|
};
|
|
144
225
|
}
|
|
@@ -148,25 +229,34 @@ export class AgentLoop {
|
|
|
148
229
|
}
|
|
149
230
|
|
|
150
231
|
if (debug) {
|
|
151
|
-
rlog.debug(
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
232
|
+
rlog.debug(
|
|
233
|
+
{
|
|
234
|
+
systemPrompt: truncateForLog(turnSystemPrompt, 200),
|
|
235
|
+
messageCount: history.length,
|
|
236
|
+
lastMessage:
|
|
237
|
+
history.length > 0
|
|
238
|
+
? summarizeMessage(history[history.length - 1])
|
|
239
|
+
: null,
|
|
240
|
+
toolCount: currentTools.length,
|
|
241
|
+
config: providerConfig,
|
|
242
|
+
},
|
|
243
|
+
"Sending request to provider",
|
|
244
|
+
);
|
|
160
245
|
}
|
|
161
246
|
|
|
162
|
-
const preLlmResult = await getHookManager().trigger(
|
|
247
|
+
const preLlmResult = await getHookManager().trigger("pre-llm-call", {
|
|
163
248
|
systemPrompt: turnSystemPrompt,
|
|
164
249
|
messages: history,
|
|
165
250
|
toolCount: currentTools.length,
|
|
166
251
|
});
|
|
167
252
|
|
|
168
253
|
if (preLlmResult.blocked) {
|
|
169
|
-
onEvent({
|
|
254
|
+
onEvent({
|
|
255
|
+
type: "error",
|
|
256
|
+
error: new Error(
|
|
257
|
+
`LLM call blocked by hook "${preLlmResult.blockedBy}"`,
|
|
258
|
+
),
|
|
259
|
+
});
|
|
170
260
|
break;
|
|
171
261
|
}
|
|
172
262
|
|
|
@@ -195,22 +285,29 @@ export class AgentLoop {
|
|
|
195
285
|
{
|
|
196
286
|
config: providerConfig,
|
|
197
287
|
onEvent: (event) => {
|
|
198
|
-
if (event.type ===
|
|
288
|
+
if (event.type === "text_delta") {
|
|
199
289
|
// Apply sensitive-output placeholder substitution (chunk-safe)
|
|
200
290
|
if (substitutionMap.size > 0) {
|
|
201
291
|
const combined = streamingPending + event.text;
|
|
202
|
-
const { emit, pending } = applyStreamingSubstitution(
|
|
292
|
+
const { emit, pending } = applyStreamingSubstitution(
|
|
293
|
+
combined,
|
|
294
|
+
substitutionMap,
|
|
295
|
+
);
|
|
203
296
|
streamingPending = pending;
|
|
204
297
|
if (emit.length > 0) {
|
|
205
|
-
onEvent({ type:
|
|
298
|
+
onEvent({ type: "text_delta", text: emit });
|
|
206
299
|
}
|
|
207
300
|
} else {
|
|
208
|
-
onEvent({ type:
|
|
301
|
+
onEvent({ type: "text_delta", text: event.text });
|
|
209
302
|
}
|
|
210
|
-
} else if (event.type ===
|
|
211
|
-
onEvent({ type:
|
|
212
|
-
} else if (event.type ===
|
|
213
|
-
onEvent({
|
|
303
|
+
} else if (event.type === "thinking_delta") {
|
|
304
|
+
onEvent({ type: "thinking_delta", thinking: event.thinking });
|
|
305
|
+
} else if (event.type === "input_json_delta") {
|
|
306
|
+
onEvent({
|
|
307
|
+
type: "input_json_delta",
|
|
308
|
+
toolName: event.toolName,
|
|
309
|
+
accumulatedJson: event.accumulatedJson,
|
|
310
|
+
});
|
|
214
311
|
}
|
|
215
312
|
},
|
|
216
313
|
signal,
|
|
@@ -220,24 +317,34 @@ export class AgentLoop {
|
|
|
220
317
|
const providerDurationMs = Date.now() - providerStart;
|
|
221
318
|
|
|
222
319
|
if (debug) {
|
|
223
|
-
rlog.debug(
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
320
|
+
rlog.debug(
|
|
321
|
+
{
|
|
322
|
+
providerDurationMs,
|
|
323
|
+
model: response.model,
|
|
324
|
+
stopReason: response.stopReason,
|
|
325
|
+
inputTokens: response.usage.inputTokens,
|
|
326
|
+
outputTokens: response.usage.outputTokens,
|
|
327
|
+
cacheCreationInputTokens: response.usage.cacheCreationInputTokens,
|
|
328
|
+
cacheReadInputTokens: response.usage.cacheReadInputTokens,
|
|
329
|
+
contentBlocks: response.content.map((b) => ({
|
|
330
|
+
type: b.type,
|
|
331
|
+
...(b.type === "text"
|
|
332
|
+
? { text: truncateForLog(b.text, 1200) }
|
|
333
|
+
: {}),
|
|
334
|
+
...(b.type === "tool_use"
|
|
335
|
+
? {
|
|
336
|
+
name: b.name,
|
|
337
|
+
input: truncateForLog(JSON.stringify(b.input), 1200),
|
|
338
|
+
}
|
|
339
|
+
: {}),
|
|
340
|
+
})),
|
|
341
|
+
},
|
|
342
|
+
"Provider response received",
|
|
343
|
+
);
|
|
237
344
|
}
|
|
238
345
|
|
|
239
346
|
onEvent({
|
|
240
|
-
type:
|
|
347
|
+
type: "usage",
|
|
241
348
|
inputTokens: response.usage.inputTokens,
|
|
242
349
|
outputTokens: response.usage.outputTokens,
|
|
243
350
|
cacheCreationInputTokens: response.usage.cacheCreationInputTokens,
|
|
@@ -248,7 +355,7 @@ export class AgentLoop {
|
|
|
248
355
|
rawResponse: response.rawResponse,
|
|
249
356
|
});
|
|
250
357
|
|
|
251
|
-
void getHookManager().trigger(
|
|
358
|
+
void getHookManager().trigger("post-llm-call", {
|
|
252
359
|
model: response.model,
|
|
253
360
|
inputTokens: response.usage.inputTokens,
|
|
254
361
|
outputTokens: response.usage.outputTokens,
|
|
@@ -260,9 +367,9 @@ export class AgentLoop {
|
|
|
260
367
|
if (streamingPending.length > 0) {
|
|
261
368
|
const flushed = applySubstitutions(streamingPending, substitutionMap);
|
|
262
369
|
if (flushed.length > 0) {
|
|
263
|
-
onEvent({ type:
|
|
370
|
+
onEvent({ type: "text_delta", text: flushed });
|
|
264
371
|
}
|
|
265
|
-
streamingPending =
|
|
372
|
+
streamingPending = "";
|
|
266
373
|
}
|
|
267
374
|
|
|
268
375
|
// Build the assistant message with placeholder-only text.
|
|
@@ -271,40 +378,41 @@ export class AgentLoop {
|
|
|
271
378
|
// on subsequent loop turns nor on session reload from the database.
|
|
272
379
|
// Substitution to real values happens only in streamed text_delta events.
|
|
273
380
|
const assistantMessage: Message = {
|
|
274
|
-
role:
|
|
381
|
+
role: "assistant",
|
|
275
382
|
content: response.content,
|
|
276
383
|
};
|
|
277
384
|
history.push(assistantMessage);
|
|
278
385
|
|
|
279
|
-
await onEvent({ type:
|
|
386
|
+
await onEvent({ type: "message_complete", message: assistantMessage });
|
|
280
387
|
|
|
281
388
|
// Check for tool use
|
|
282
389
|
toolUseBlocks = response.content.filter(
|
|
283
|
-
(block): block is Extract<ContentBlock, { type:
|
|
284
|
-
block.type ===
|
|
390
|
+
(block): block is Extract<ContentBlock, { type: "tool_use" }> =>
|
|
391
|
+
block.type === "tool_use",
|
|
285
392
|
);
|
|
286
393
|
|
|
287
394
|
// Check if the assistant turn contained any visible text (used for
|
|
288
395
|
// both the empty-response nudge and the anti-repetition notice).
|
|
289
396
|
const hasTextBlock = response.content.some(
|
|
290
|
-
(block) => block.type ===
|
|
397
|
+
(block) => block.type === "text" && block.text.trim().length > 0,
|
|
291
398
|
);
|
|
292
399
|
|
|
293
400
|
if (toolUseBlocks.length === 0 || !this.toolExecutor) {
|
|
294
401
|
// Check if the LLM returned no text after tool results — nudge it to respond
|
|
295
|
-
const lastUserMsg =
|
|
402
|
+
const lastUserMsg =
|
|
403
|
+
history.length >= 2 ? history[history.length - 2] : undefined;
|
|
296
404
|
const lastWasToolResult =
|
|
297
|
-
lastUserMsg?.role ===
|
|
298
|
-
lastUserMsg.content.some((block) => block.type ===
|
|
405
|
+
lastUserMsg?.role === "user" &&
|
|
406
|
+
lastUserMsg.content.some((block) => block.type === "tool_result");
|
|
299
407
|
|
|
300
408
|
if (!hasTextBlock && lastWasToolResult && !nudgedForEmptyResponse) {
|
|
301
409
|
nudgedForEmptyResponse = true;
|
|
302
410
|
history.push({
|
|
303
|
-
role:
|
|
411
|
+
role: "user",
|
|
304
412
|
content: [
|
|
305
413
|
{
|
|
306
|
-
type:
|
|
307
|
-
text:
|
|
414
|
+
type: "text",
|
|
415
|
+
text: "<system_notice>You executed tools but didn't tell the user what happened. Provide a brief, conversational summary of the results.</system_notice>",
|
|
308
416
|
},
|
|
309
417
|
],
|
|
310
418
|
});
|
|
@@ -318,29 +426,34 @@ export class AgentLoop {
|
|
|
318
426
|
// Emit all tool_use events upfront, then execute tools in parallel
|
|
319
427
|
for (const toolUse of toolUseBlocks) {
|
|
320
428
|
onEvent({
|
|
321
|
-
type:
|
|
429
|
+
type: "tool_use",
|
|
322
430
|
id: toolUse.id,
|
|
323
431
|
name: toolUse.name,
|
|
324
432
|
input: toolUse.input,
|
|
325
433
|
});
|
|
326
434
|
|
|
327
435
|
if (debug) {
|
|
328
|
-
rlog.debug(
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
436
|
+
rlog.debug(
|
|
437
|
+
{
|
|
438
|
+
tool: toolUse.name,
|
|
439
|
+
input: truncateForLog(JSON.stringify(toolUse.input), 300),
|
|
440
|
+
},
|
|
441
|
+
"Executing tool",
|
|
442
|
+
);
|
|
332
443
|
}
|
|
333
444
|
}
|
|
334
445
|
|
|
335
446
|
// If already cancelled, synthesize cancelled results and stop
|
|
336
447
|
if (signal?.aborted) {
|
|
337
|
-
const cancelledBlocks: ContentBlock[] = toolUseBlocks.map(
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
448
|
+
const cancelledBlocks: ContentBlock[] = toolUseBlocks.map(
|
|
449
|
+
(toolUse) => ({
|
|
450
|
+
type: "tool_result" as const,
|
|
451
|
+
tool_use_id: toolUse.id,
|
|
452
|
+
content: "Cancelled by user",
|
|
453
|
+
is_error: true,
|
|
454
|
+
}),
|
|
455
|
+
);
|
|
456
|
+
history.push({ role: "user", content: cancelledBlocks });
|
|
344
457
|
break;
|
|
345
458
|
}
|
|
346
459
|
|
|
@@ -349,12 +462,12 @@ export class AgentLoop {
|
|
|
349
462
|
// in the same response create competing browser sessions/windows and can
|
|
350
463
|
// thrash renderer CPU. Reject browser_* calls in that turn.
|
|
351
464
|
const hasComputerUseEscalation = toolUseBlocks.some(
|
|
352
|
-
(toolUse) => toolUse.name ===
|
|
465
|
+
(toolUse) => toolUse.name === "computer_use_request_control",
|
|
353
466
|
);
|
|
354
467
|
const blockedBrowserToolIds = hasComputerUseEscalation
|
|
355
468
|
? new Set(
|
|
356
469
|
toolUseBlocks
|
|
357
|
-
.filter((toolUse) => toolUse.name.startsWith(
|
|
470
|
+
.filter((toolUse) => toolUse.name.startsWith("browser_"))
|
|
358
471
|
.map((toolUse) => toolUse.id),
|
|
359
472
|
)
|
|
360
473
|
: new Set<string>();
|
|
@@ -365,7 +478,7 @@ export class AgentLoop {
|
|
|
365
478
|
blockedBrowserToolCount: blockedBrowserToolIds.size,
|
|
366
479
|
toolNames: toolUseBlocks.map((toolUse) => toolUse.name),
|
|
367
480
|
},
|
|
368
|
-
|
|
481
|
+
"Blocking browser_* tools: computer_use_request_control was requested in same turn",
|
|
369
482
|
);
|
|
370
483
|
}
|
|
371
484
|
|
|
@@ -380,25 +493,37 @@ export class AgentLoop {
|
|
|
380
493
|
return {
|
|
381
494
|
toolUse,
|
|
382
495
|
result: {
|
|
383
|
-
content:
|
|
496
|
+
content:
|
|
497
|
+
"Error: browser_* tools cannot run in the same turn as computer_use_request_control. Continue using the foreground computer-use session only.",
|
|
384
498
|
isError: true,
|
|
385
499
|
},
|
|
386
500
|
};
|
|
387
501
|
}
|
|
388
502
|
|
|
389
|
-
const result = await this.toolExecutor!(
|
|
390
|
-
|
|
391
|
-
|
|
503
|
+
const result = await this.toolExecutor!(
|
|
504
|
+
toolUse.name,
|
|
505
|
+
toolUse.input,
|
|
506
|
+
(chunk) => {
|
|
507
|
+
onEvent({
|
|
508
|
+
type: "tool_output_chunk",
|
|
509
|
+
toolUseId: toolUse.id,
|
|
510
|
+
chunk,
|
|
511
|
+
});
|
|
512
|
+
},
|
|
513
|
+
);
|
|
392
514
|
|
|
393
515
|
const toolDurationMs = Date.now() - toolStart;
|
|
394
516
|
|
|
395
517
|
if (debug) {
|
|
396
|
-
rlog.debug(
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
518
|
+
rlog.debug(
|
|
519
|
+
{
|
|
520
|
+
tool: toolUse.name,
|
|
521
|
+
toolDurationMs,
|
|
522
|
+
isError: result.isError,
|
|
523
|
+
output: truncateForLog(result.content, 300),
|
|
524
|
+
},
|
|
525
|
+
"Tool execution complete",
|
|
526
|
+
);
|
|
402
527
|
}
|
|
403
528
|
|
|
404
529
|
return { toolUse, result };
|
|
@@ -409,13 +534,19 @@ export class AgentLoop {
|
|
|
409
534
|
if (signal && !signal.aborted) {
|
|
410
535
|
let abortHandler!: () => void;
|
|
411
536
|
const abortPromise = new Promise<never>((_, reject) => {
|
|
412
|
-
abortHandler = () =>
|
|
413
|
-
|
|
537
|
+
abortHandler = () =>
|
|
538
|
+
reject(
|
|
539
|
+
new DOMException("The operation was aborted", "AbortError"),
|
|
540
|
+
);
|
|
541
|
+
signal.addEventListener("abort", abortHandler, { once: true });
|
|
414
542
|
});
|
|
415
543
|
try {
|
|
416
|
-
toolResults = await Promise.race([
|
|
544
|
+
toolResults = await Promise.race([
|
|
545
|
+
toolExecutionPromise,
|
|
546
|
+
abortPromise,
|
|
547
|
+
]);
|
|
417
548
|
} finally {
|
|
418
|
-
signal.removeEventListener(
|
|
549
|
+
signal.removeEventListener("abort", abortHandler);
|
|
419
550
|
// Suppress unhandled rejection from abandoned tool executions
|
|
420
551
|
toolExecutionPromise.catch(() => {});
|
|
421
552
|
}
|
|
@@ -435,21 +566,28 @@ export class AgentLoop {
|
|
|
435
566
|
}
|
|
436
567
|
|
|
437
568
|
// Collect result blocks preserving tool_use order (Promise.all maintains order)
|
|
438
|
-
const rawResultBlocks: ContentBlock[] = toolResults.map(
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
569
|
+
const rawResultBlocks: ContentBlock[] = toolResults.map(
|
|
570
|
+
({ toolUse, result }) => ({
|
|
571
|
+
type: "tool_result" as const,
|
|
572
|
+
tool_use_id: toolUse.id,
|
|
573
|
+
content: result.content,
|
|
574
|
+
is_error: result.isError,
|
|
575
|
+
...(result.contentBlocks
|
|
576
|
+
? { contentBlocks: result.contentBlocks }
|
|
577
|
+
: {}),
|
|
578
|
+
}),
|
|
579
|
+
);
|
|
445
580
|
|
|
446
581
|
// Pre-emptively truncate oversized tool results to prevent context overflow
|
|
447
|
-
const { blocks: resultBlocks, truncatedCount } =
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
582
|
+
const { blocks: resultBlocks, truncatedCount } =
|
|
583
|
+
truncateOversizedToolResults(
|
|
584
|
+
rawResultBlocks,
|
|
585
|
+
this.config.maxInputTokens ?? 180_000,
|
|
586
|
+
);
|
|
451
587
|
if (truncatedCount > 0) {
|
|
452
|
-
log.warn(
|
|
588
|
+
log.warn(
|
|
589
|
+
`Truncated ${truncatedCount} oversized tool result(s) to prevent context overflow`,
|
|
590
|
+
);
|
|
453
591
|
}
|
|
454
592
|
|
|
455
593
|
// Emit tool_result events AFTER truncation so downstream consumers
|
|
@@ -457,13 +595,14 @@ export class AgentLoop {
|
|
|
457
595
|
for (const { toolUse, result } of toolResults) {
|
|
458
596
|
// Look up the (possibly truncated) content from resultBlocks
|
|
459
597
|
const truncatedBlock = resultBlocks.find(
|
|
460
|
-
(b) => b.type ===
|
|
598
|
+
(b) => b.type === "tool_result" && b.tool_use_id === toolUse.id,
|
|
461
599
|
);
|
|
462
|
-
const emitContent =
|
|
463
|
-
|
|
464
|
-
|
|
600
|
+
const emitContent =
|
|
601
|
+
truncatedBlock && truncatedBlock.type === "tool_result"
|
|
602
|
+
? truncatedBlock.content
|
|
603
|
+
: result.content;
|
|
465
604
|
onEvent({
|
|
466
|
-
type:
|
|
605
|
+
type: "tool_result",
|
|
467
606
|
toolUseId: toolUse.id,
|
|
468
607
|
content: emitContent,
|
|
469
608
|
isError: result.isError,
|
|
@@ -475,39 +614,44 @@ export class AgentLoop {
|
|
|
475
614
|
|
|
476
615
|
// If cancelled during execution, push completed results and stop
|
|
477
616
|
if (signal?.aborted) {
|
|
478
|
-
history.push({ role:
|
|
617
|
+
history.push({ role: "user", content: resultBlocks });
|
|
479
618
|
break;
|
|
480
619
|
}
|
|
481
620
|
|
|
482
621
|
// If any tool result requests yielding to the user (e.g. interactive
|
|
483
622
|
// surface awaiting a button click), push results and stop the loop.
|
|
484
623
|
if (toolResults.some(({ result }) => result.yieldToUser)) {
|
|
485
|
-
history.push({ role:
|
|
624
|
+
history.push({ role: "user", content: resultBlocks });
|
|
486
625
|
break;
|
|
487
626
|
}
|
|
488
627
|
|
|
489
628
|
// Track tool-use turns and inject progress reminder every N turns
|
|
490
629
|
toolUseTurns++;
|
|
491
|
-
if (
|
|
630
|
+
if (
|
|
631
|
+
this.config.maxToolUseTurns &&
|
|
632
|
+
this.config.maxToolUseTurns > 0 &&
|
|
633
|
+
toolUseTurns >= this.config.maxToolUseTurns
|
|
634
|
+
) {
|
|
492
635
|
const limitMessage = `Tool-use turn limit reached (${this.config.maxToolUseTurns}). Stopping to prevent runaway loops; ask the user for guidance.`;
|
|
493
|
-
onEvent({ type:
|
|
636
|
+
onEvent({ type: "error", error: new Error(limitMessage) });
|
|
494
637
|
resultBlocks.push({
|
|
495
|
-
type:
|
|
638
|
+
type: "text",
|
|
496
639
|
text: `<system_notice>${limitMessage}</system_notice>`,
|
|
497
640
|
});
|
|
498
|
-
history.push({ role:
|
|
641
|
+
history.push({ role: "user", content: resultBlocks });
|
|
499
642
|
break;
|
|
500
643
|
}
|
|
501
644
|
// Soft warning a few turns before the hard limit
|
|
502
|
-
const softLimit =
|
|
645
|
+
const softLimit =
|
|
646
|
+
(this.config.maxToolUseTurns ?? 0) - APPROACHING_LIMIT_OFFSET;
|
|
503
647
|
if (softLimit > 0 && toolUseTurns === softLimit) {
|
|
504
648
|
resultBlocks.push({
|
|
505
|
-
type:
|
|
506
|
-
text: `<system_notice>${APPROACHING_LIMIT_WARNING.replace(
|
|
649
|
+
type: "text",
|
|
650
|
+
text: `<system_notice>${APPROACHING_LIMIT_WARNING.replace("{remaining}", String(APPROACHING_LIMIT_OFFSET))}</system_notice>`,
|
|
507
651
|
});
|
|
508
652
|
} else if (toolUseTurns % PROGRESS_CHECK_INTERVAL === 0) {
|
|
509
653
|
resultBlocks.push({
|
|
510
|
-
type:
|
|
654
|
+
type: "text",
|
|
511
655
|
text: `<system_notice>${PROGRESS_CHECK_REMINDER}</system_notice>`,
|
|
512
656
|
});
|
|
513
657
|
}
|
|
@@ -515,22 +659,25 @@ export class AgentLoop {
|
|
|
515
659
|
// Remind the LLM not to repeat text it already streamed
|
|
516
660
|
if (hasTextBlock) {
|
|
517
661
|
resultBlocks.push({
|
|
518
|
-
type:
|
|
662
|
+
type: "text",
|
|
519
663
|
text: '<system_notice>Your previous text was already shown to the user in real time. Do not repeat or rephrase it. Do not narrate retries or internal process chatter ("let me try", "that didn\'t work"). Keep working with tools silently unless you need user input, and only send user-facing text when you have concrete progress or final results.</system_notice>',
|
|
520
664
|
});
|
|
521
665
|
}
|
|
522
666
|
|
|
523
667
|
// Add tool results as a user message and continue the loop
|
|
524
|
-
history.push({ role:
|
|
668
|
+
history.push({ role: "user", content: resultBlocks });
|
|
525
669
|
|
|
526
670
|
if (debug) {
|
|
527
671
|
const turnDurationMs = Date.now() - turnStart;
|
|
528
|
-
rlog.debug(
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
672
|
+
rlog.debug(
|
|
673
|
+
{
|
|
674
|
+
turnDurationMs,
|
|
675
|
+
providerDurationMs,
|
|
676
|
+
toolCount: toolUseBlocks.length,
|
|
677
|
+
turn: toolUseTurns,
|
|
678
|
+
},
|
|
679
|
+
"Turn complete",
|
|
680
|
+
);
|
|
534
681
|
}
|
|
535
682
|
|
|
536
683
|
// Invoke checkpoint callback after tool results are in history
|
|
@@ -540,7 +687,7 @@ export class AgentLoop {
|
|
|
540
687
|
toolCount: toolUseBlocks.length,
|
|
541
688
|
hasToolUse: true,
|
|
542
689
|
});
|
|
543
|
-
if (decision ===
|
|
690
|
+
if (decision === "yield") {
|
|
544
691
|
break;
|
|
545
692
|
}
|
|
546
693
|
}
|
|
@@ -550,20 +697,25 @@ export class AgentLoop {
|
|
|
550
697
|
// Anthropic API (every tool_use must have a matching tool_result).
|
|
551
698
|
if (signal?.aborted) {
|
|
552
699
|
if (toolUseBlocks.length > 0) {
|
|
553
|
-
const cancelledBlocks: ContentBlock[] = toolUseBlocks.map(
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
700
|
+
const cancelledBlocks: ContentBlock[] = toolUseBlocks.map(
|
|
701
|
+
(toolUse) => ({
|
|
702
|
+
type: "tool_result" as const,
|
|
703
|
+
tool_use_id: toolUse.id,
|
|
704
|
+
content: "Cancelled by user",
|
|
705
|
+
is_error: true,
|
|
706
|
+
}),
|
|
707
|
+
);
|
|
708
|
+
history.push({ role: "user", content: cancelledBlocks });
|
|
560
709
|
}
|
|
561
710
|
break;
|
|
562
711
|
}
|
|
563
712
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
564
|
-
rlog.error(
|
|
713
|
+
rlog.error(
|
|
714
|
+
{ err, turn: toolUseTurns, messageCount: history.length },
|
|
715
|
+
"Agent loop error during turn processing",
|
|
716
|
+
);
|
|
565
717
|
Sentry.captureException(err);
|
|
566
|
-
onEvent({ type:
|
|
718
|
+
onEvent({ type: "error", error: err });
|
|
567
719
|
break;
|
|
568
720
|
}
|
|
569
721
|
}
|
|
@@ -572,7 +724,10 @@ export class AgentLoop {
|
|
|
572
724
|
}
|
|
573
725
|
}
|
|
574
726
|
|
|
575
|
-
function summarizeMessage(msg: Message): {
|
|
727
|
+
function summarizeMessage(msg: Message): {
|
|
728
|
+
role: string;
|
|
729
|
+
blockTypes: string[];
|
|
730
|
+
} {
|
|
576
731
|
return {
|
|
577
732
|
role: msg.role,
|
|
578
733
|
blockTypes: msg.content.map((b) => b.type),
|
|
@@ -596,8 +751,8 @@ function stripOldImageBlocks(history: Message[]): Message[] {
|
|
|
596
751
|
let lastToolResultUserIdx = -1;
|
|
597
752
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
598
753
|
if (
|
|
599
|
-
history[i].role ===
|
|
600
|
-
history[i].content.some((b) => b.type ===
|
|
754
|
+
history[i].role === "user" &&
|
|
755
|
+
history[i].content.some((b) => b.type === "tool_result")
|
|
601
756
|
) {
|
|
602
757
|
lastToolResultUserIdx = i;
|
|
603
758
|
break;
|
|
@@ -606,11 +761,15 @@ function stripOldImageBlocks(history: Message[]): Message[] {
|
|
|
606
761
|
|
|
607
762
|
return history.map((msg, idx) => {
|
|
608
763
|
// Keep the most recent tool-result user message intact (current turn)
|
|
609
|
-
if (idx === lastToolResultUserIdx || msg.role !==
|
|
764
|
+
if (idx === lastToolResultUserIdx || msg.role !== "user") return msg;
|
|
610
765
|
|
|
611
766
|
// Check if any tool_result blocks have image contentBlocks
|
|
612
767
|
const hasImages = msg.content.some(
|
|
613
|
-
(b) =>
|
|
768
|
+
(b) =>
|
|
769
|
+
b.type === "tool_result" &&
|
|
770
|
+
(b as ToolResultContent).contentBlocks?.some(
|
|
771
|
+
(cb) => cb.type === "image",
|
|
772
|
+
),
|
|
614
773
|
);
|
|
615
774
|
if (!hasImages) return msg;
|
|
616
775
|
|
|
@@ -618,13 +777,15 @@ function stripOldImageBlocks(history: Message[]): Message[] {
|
|
|
618
777
|
return {
|
|
619
778
|
...msg,
|
|
620
779
|
content: msg.content.map((b) => {
|
|
621
|
-
if (b.type !==
|
|
780
|
+
if (b.type !== "tool_result") return b;
|
|
622
781
|
const tr = b as ToolResultContent;
|
|
623
|
-
if (!tr.contentBlocks?.some(cb => cb.type ===
|
|
782
|
+
if (!tr.contentBlocks?.some((cb) => cb.type === "image")) return b;
|
|
624
783
|
return {
|
|
625
784
|
...tr,
|
|
626
785
|
contentBlocks: undefined,
|
|
627
|
-
content:
|
|
786
|
+
content:
|
|
787
|
+
(tr.content || "") +
|
|
788
|
+
"\n[Screenshot was captured and shown previously — image data removed to save context.]",
|
|
628
789
|
};
|
|
629
790
|
}),
|
|
630
791
|
};
|