@bluecopa/harness 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -117
- package/dist/arc/index.d.ts +796 -0
- package/dist/arc/index.js +2863 -0
- package/dist/arc/index.js.map +1 -0
- package/dist/observability/otel.d.ts +36 -0
- package/dist/observability/otel.js +73 -0
- package/dist/observability/otel.js.map +1 -0
- package/dist/shared-types-DRxnerLT.d.ts +138 -0
- package/dist/skills/index.d.ts +67 -0
- package/dist/skills/index.js +282 -0
- package/dist/skills/index.js.map +1 -0
- package/package.json +26 -2
- package/AGENTS.md +0 -18
- package/docs/guides/observability.md +0 -32
- package/docs/guides/providers.md +0 -51
- package/docs/guides/skills.md +0 -25
- package/docs/security/skill-sandbox-threat-model.md +0 -20
- package/src/agent/create-agent.ts +0 -884
- package/src/agent/create-tools.ts +0 -33
- package/src/agent/step-executor.ts +0 -15
- package/src/agent/types.ts +0 -57
- package/src/context/llm-compaction-strategy.ts +0 -37
- package/src/context/prepare-step.ts +0 -65
- package/src/context/token-tracker.ts +0 -26
- package/src/extracted/manifest.json +0 -10
- package/src/extracted/prompts/compaction.md +0 -5
- package/src/extracted/prompts/system.md +0 -5
- package/src/extracted/tools.json +0 -82
- package/src/hooks/hook-runner.ts +0 -22
- package/src/hooks/tool-wrappers.ts +0 -64
- package/src/interfaces/compaction-strategy.ts +0 -18
- package/src/interfaces/hooks.ts +0 -24
- package/src/interfaces/sandbox-provider.ts +0 -29
- package/src/interfaces/session-store.ts +0 -48
- package/src/interfaces/tool-provider.ts +0 -70
- package/src/loop/bridge.ts +0 -363
- package/src/loop/context-store.ts +0 -207
- package/src/loop/lcm-tool-loop.ts +0 -163
- package/src/loop/vercel-agent-loop.ts +0 -279
- package/src/observability/context.ts +0 -17
- package/src/observability/metrics.ts +0 -27
- package/src/observability/otel.ts +0 -105
- package/src/observability/tracing.ts +0 -13
- package/src/optimization/agent-evaluator.ts +0 -40
- package/src/optimization/config-serializer.ts +0 -16
- package/src/optimization/optimization-runner.ts +0 -39
- package/src/optimization/trace-collector.ts +0 -33
- package/src/permissions/permission-manager.ts +0 -34
- package/src/providers/composite-tool-provider.ts +0 -72
- package/src/providers/control-plane-e2b-executor.ts +0 -218
- package/src/providers/e2b-tool-provider.ts +0 -68
- package/src/providers/local-tool-provider.ts +0 -190
- package/src/providers/skill-sandbox-provider.ts +0 -46
- package/src/sessions/file-session-store.ts +0 -61
- package/src/sessions/in-memory-session-store.ts +0 -39
- package/src/sessions/session-manager.ts +0 -44
- package/src/skills/skill-loader.ts +0 -52
- package/src/skills/skill-manager.ts +0 -175
- package/src/skills/skill-router.ts +0 -99
- package/src/skills/skill-types.ts +0 -26
- package/src/subagents/subagent-manager.ts +0 -22
- package/src/subagents/task-tool.ts +0 -13
- package/tests/integration/agent-loop-basic.spec.ts +0 -56
- package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
- package/tests/integration/concurrency-single-turn.spec.ts +0 -35
- package/tests/integration/otel-metrics-emission.spec.ts +0 -62
- package/tests/integration/otel-trace-propagation.spec.ts +0 -48
- package/tests/integration/parity-benchmark.spec.ts +0 -45
- package/tests/integration/provider-local-smoke.spec.ts +0 -63
- package/tests/integration/session-resume.spec.ts +0 -30
- package/tests/integration/skill-install-rollback.spec.ts +0 -64
- package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
- package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
- package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
- package/tests/integration/structured-messages-agent.spec.ts +0 -265
- package/tests/integration/subagent-isolation.spec.ts +0 -24
- package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
- package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
- package/tests/unit/extracted-manifest.spec.ts +0 -41
- package/tests/unit/interfaces-contract.spec.ts +0 -101
- package/tests/unit/structured-messages.spec.ts +0 -176
- package/tests/unit/token-tracker.spec.ts +0 -22
- package/tsconfig.json +0 -14
- package/vitest.config.ts +0 -7
|
@@ -1,884 +0,0 @@
|
|
|
1
|
-
import { randomUUID } from 'node:crypto';
|
|
2
|
-
import type { BatchOp, BatchResult, ToolProvider, ToolResult } from '../interfaces/tool-provider';
|
|
3
|
-
import type { SandboxProvider } from '../interfaces/sandbox-provider';
|
|
4
|
-
import { recordAgentError, recordAgentStep, recordToolCallDuration } from '../observability/metrics';
|
|
5
|
-
import { traceStep } from '../observability/tracing';
|
|
6
|
-
import type { HarnessTelemetry } from '../observability/otel';
|
|
7
|
-
import { HookRunner } from '../hooks/hook-runner';
|
|
8
|
-
import { PermissionManager } from '../permissions/permission-manager';
|
|
9
|
-
import { VercelAgentLoop } from '../loop/vercel-agent-loop';
|
|
10
|
-
import { SkillManager } from '../skills/skill-manager';
|
|
11
|
-
import { SkillRouter } from '../skills/skill-router';
|
|
12
|
-
import type { SkillSummary } from '../skills/skill-types';
|
|
13
|
-
import { SingleFlightStepExecutor } from './step-executor';
|
|
14
|
-
import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
|
|
15
|
-
export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
|
|
16
|
-
export { HookRunner } from '../hooks/hook-runner';
|
|
17
|
-
export { PermissionManager } from '../permissions/permission-manager';
|
|
18
|
-
export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
|
|
19
|
-
export type { HookCallback, HookContext, HookDecision, HookEventName } from '../interfaces/hooks';
|
|
20
|
-
|
|
21
|
-
export interface AgentRuntime {
|
|
22
|
-
toolProvider: ToolProvider;
|
|
23
|
-
sandboxProvider?: SandboxProvider;
|
|
24
|
-
skillManager?: SkillManager;
|
|
25
|
-
skillIndexPath?: string;
|
|
26
|
-
loop?: AgentLoop;
|
|
27
|
-
nextAction?(messages: AgentMessage[]): Promise<AgentAction>;
|
|
28
|
-
askUser?(question: string, options?: string[]): Promise<string>;
|
|
29
|
-
tellUser?(message: string): Promise<void>;
|
|
30
|
-
downloadRawFile?(path: string): Promise<string>;
|
|
31
|
-
maxSteps?: number;
|
|
32
|
-
telemetry?: HarnessTelemetry;
|
|
33
|
-
/** Hook runner for PreToolUse/PostToolUse lifecycle hooks. When provided, hooks are applied to ALL tool calls including custom ones via executeToolAction. */
|
|
34
|
-
hookRunner?: HookRunner;
|
|
35
|
-
/** Permission manager for tool-level access control. When provided, permission checks are applied to ALL tool calls including custom ones via executeToolAction. */
|
|
36
|
-
permissionManager?: PermissionManager;
|
|
37
|
-
/** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
|
|
38
|
-
* When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
|
|
39
|
-
executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Run PreToolUse hook + PermissionManager check + execute + PostToolUse hook + telemetry.
|
|
44
|
-
* Wraps any tool execution (custom or built-in) with the full lifecycle when configured.
|
|
45
|
-
*/
|
|
46
|
-
async function guardedExecute(
|
|
47
|
-
action: ToolCallAction,
|
|
48
|
-
execute: () => Promise<ToolResult>,
|
|
49
|
-
runtime: AgentRuntime,
|
|
50
|
-
): Promise<ToolResult> {
|
|
51
|
-
const { hookRunner, permissionManager, telemetry } = runtime;
|
|
52
|
-
if (!hookRunner && !permissionManager) {
|
|
53
|
-
return execute();
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
return traceStep(telemetry, 'tool.call', { tool: action.name }, async () => {
|
|
57
|
-
const started = Date.now();
|
|
58
|
-
|
|
59
|
-
// PreToolUse hook
|
|
60
|
-
if (hookRunner) {
|
|
61
|
-
const pre = await hookRunner.run({ event: 'PreToolUse', toolName: action.name, input: action.args });
|
|
62
|
-
if (!pre.allow) {
|
|
63
|
-
const result = { success: false, output: '', error: pre.reason ?? 'blocked by pre-hook' };
|
|
64
|
-
recordToolCallDuration(telemetry, action.name, Date.now() - started, false);
|
|
65
|
-
return result;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Permission check
|
|
70
|
-
if (permissionManager) {
|
|
71
|
-
const permission = await permissionManager.check({ toolName: action.name, input: action.args });
|
|
72
|
-
if (!permission.allow) {
|
|
73
|
-
const result = { success: false, output: '', error: permission.reason ?? 'blocked by permission manager' };
|
|
74
|
-
recordToolCallDuration(telemetry, action.name, Date.now() - started, false);
|
|
75
|
-
return result;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
const result = await execute();
|
|
80
|
-
|
|
81
|
-
// PostToolUse hook
|
|
82
|
-
if (hookRunner) {
|
|
83
|
-
await hookRunner.run({ event: 'PostToolUse', toolName: action.name, input: action.args, output: result });
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
recordToolCallDuration(telemetry, action.name, Date.now() - started, result.success);
|
|
87
|
-
return result;
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
async function executeTool(provider: ToolProvider, action: ToolCallAction, runtime: AgentRuntime): Promise<ToolResult> {
|
|
92
|
-
return guardedExecute(action, async () => {
|
|
93
|
-
if (runtime.executeToolAction) {
|
|
94
|
-
const result = await runtime.executeToolAction(action);
|
|
95
|
-
if (result) return result;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
return executeBuiltinTool(provider, action, runtime);
|
|
99
|
-
}, runtime);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
async function executeBuiltinTool(provider: ToolProvider, action: ToolCallAction, runtime: AgentRuntime): Promise<ToolResult> {
|
|
103
|
-
if (action.name === 'Bash') {
|
|
104
|
-
return provider.bash(String(action.args.command ?? ''), {
|
|
105
|
-
cwd: action.args.cwd as string | undefined,
|
|
106
|
-
timeout: action.args.timeout as number | undefined
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (action.name === 'Read') {
|
|
111
|
-
return provider.readFile(String(action.args.path ?? ''));
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
if (action.name === 'Write') {
|
|
115
|
-
return provider.writeFile(String(action.args.path ?? ''), String(action.args.content ?? ''));
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
if (action.name === 'Edit') {
|
|
119
|
-
return provider.editFile(
|
|
120
|
-
String(action.args.path ?? ''),
|
|
121
|
-
String(action.args.old_text ?? ''),
|
|
122
|
-
String(action.args.new_text ?? '')
|
|
123
|
-
);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
if (action.name === 'Glob') {
|
|
127
|
-
return provider.glob(String(action.args.pattern ?? ''));
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (action.name === 'WebFetch') {
|
|
131
|
-
if (!provider.webFetch) {
|
|
132
|
-
return { success: false, output: '', error: 'WebFetch unavailable: provider.webFetch not configured' };
|
|
133
|
-
}
|
|
134
|
-
return provider.webFetch({
|
|
135
|
-
url: String(action.args.url ?? ''),
|
|
136
|
-
selector: action.args.selector as string | undefined,
|
|
137
|
-
maxContentLength: action.args.maxContentLength as number | undefined,
|
|
138
|
-
headers: action.args.headers as Record<string, string> | undefined
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
if (action.name === 'WebSearch') {
|
|
143
|
-
if (!provider.webSearch) {
|
|
144
|
-
return { success: false, output: '', error: 'WebSearch unavailable: provider.webSearch not configured' };
|
|
145
|
-
}
|
|
146
|
-
return provider.webSearch(String(action.args.query ?? ''));
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
if (action.name === 'AskUser') {
|
|
150
|
-
if (!runtime.askUser) {
|
|
151
|
-
return { success: false, output: '', error: 'AskUser unavailable: runtime.askUser not configured' };
|
|
152
|
-
}
|
|
153
|
-
const answer = await runtime.askUser(
|
|
154
|
-
String(action.args.question ?? ''),
|
|
155
|
-
Array.isArray(action.args.options)
|
|
156
|
-
? action.args.options.map((item) => String(item))
|
|
157
|
-
: undefined
|
|
158
|
-
);
|
|
159
|
-
return { success: true, output: answer };
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (action.name === 'TellUser') {
|
|
163
|
-
if (!runtime.tellUser) {
|
|
164
|
-
return { success: false, output: '', error: 'TellUser unavailable: runtime.tellUser not configured' };
|
|
165
|
-
}
|
|
166
|
-
await runtime.tellUser(String(action.args.message ?? ''));
|
|
167
|
-
return { success: true, output: 'ok' };
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
if (action.name === 'DownloadRawFile') {
|
|
171
|
-
if (!runtime.downloadRawFile) {
|
|
172
|
-
return { success: false, output: '', error: 'DownloadRawFile unavailable: runtime.downloadRawFile not configured' };
|
|
173
|
-
}
|
|
174
|
-
const localPath = await runtime.downloadRawFile(String(action.args.path ?? ''));
|
|
175
|
-
return { success: true, output: localPath };
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
if (action.name === 'Grep') {
|
|
179
|
-
return provider.grep(String(action.args.pattern ?? ''), action.args.path as string | undefined);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
return { success: false, output: '', error: `Unknown tool: ${action.name}` };
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
function validateToolAction(action: ToolCallAction): string | null {
|
|
186
|
-
const requireNonEmpty = (value: unknown, field: string): string | null => {
|
|
187
|
-
if (typeof value !== 'string' || value.trim().length === 0) {
|
|
188
|
-
return `${action.name} requires non-empty ${field}`;
|
|
189
|
-
}
|
|
190
|
-
return null;
|
|
191
|
-
};
|
|
192
|
-
|
|
193
|
-
if (action.name === 'Bash') return requireNonEmpty(action.args.command, 'command');
|
|
194
|
-
if (action.name === 'Read') return requireNonEmpty(action.args.path, 'path');
|
|
195
|
-
if (action.name === 'Write') {
|
|
196
|
-
return requireNonEmpty(action.args.path, 'path') ?? requireNonEmpty(action.args.content, 'content');
|
|
197
|
-
}
|
|
198
|
-
if (action.name === 'Edit') {
|
|
199
|
-
return (
|
|
200
|
-
requireNonEmpty(action.args.path, 'path') ??
|
|
201
|
-
requireNonEmpty(action.args.old_text, 'old_text') ??
|
|
202
|
-
requireNonEmpty(action.args.new_text, 'new_text')
|
|
203
|
-
);
|
|
204
|
-
}
|
|
205
|
-
if (action.name === 'Glob') return requireNonEmpty(action.args.pattern, 'pattern');
|
|
206
|
-
if (action.name === 'Grep') return requireNonEmpty(action.args.pattern, 'pattern');
|
|
207
|
-
if (action.name === 'WebFetch') return requireNonEmpty(action.args.url, 'url');
|
|
208
|
-
if (action.name === 'WebSearch') return requireNonEmpty(action.args.query, 'query');
|
|
209
|
-
if (action.name === 'AskUser') return requireNonEmpty(action.args.question, 'question');
|
|
210
|
-
if (action.name === 'TellUser') return requireNonEmpty(action.args.message, 'message');
|
|
211
|
-
if (action.name === 'DownloadRawFile') return requireNonEmpty(action.args.path, 'path');
|
|
212
|
-
return null;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
function toStreamResult(r: ToolResult): { success: boolean; output: string; error?: string } {
|
|
216
|
-
const base: { success: boolean; output: string; error?: string } = { success: r.success, output: r.output };
|
|
217
|
-
if (r.error != null) base.error = r.error;
|
|
218
|
-
return base;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/** Format a display-friendly content string for tool results (used in content field). */
|
|
222
|
-
function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
|
|
223
|
-
const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
224
|
-
switch (call.name) {
|
|
225
|
-
case 'Write':
|
|
226
|
-
return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
|
|
227
|
-
case 'Read':
|
|
228
|
-
return `Read(${call.args.path}): ${content}`;
|
|
229
|
-
case 'Edit':
|
|
230
|
-
return `Edit(${call.args.path}): ${content}`;
|
|
231
|
-
case 'Bash': {
|
|
232
|
-
const cmd = String(call.args.command ?? '').slice(0, 100);
|
|
233
|
-
return `Bash(${JSON.stringify(cmd)}): ${content}`;
|
|
234
|
-
}
|
|
235
|
-
default:
|
|
236
|
-
return `${call.name}: ${content}`;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
/** Format a display-friendly content string for assistant tool call summaries. */
|
|
241
|
-
function formatToolCallContent(calls: ToolCallAction[]): string {
|
|
242
|
-
const lines = calls.map(call => {
|
|
243
|
-
switch (call.name) {
|
|
244
|
-
case 'Write': {
|
|
245
|
-
const p = String(call.args.path ?? '');
|
|
246
|
-
const len = String(call.args.content ?? '').length;
|
|
247
|
-
return `- Write: path="${p}" (${len} chars)`;
|
|
248
|
-
}
|
|
249
|
-
case 'Bash': {
|
|
250
|
-
const cmd = String(call.args.command ?? '').slice(0, 200);
|
|
251
|
-
return `- Bash: ${JSON.stringify(cmd)}`;
|
|
252
|
-
}
|
|
253
|
-
case 'Read':
|
|
254
|
-
return `- Read: path="${call.args.path}"`;
|
|
255
|
-
case 'Edit':
|
|
256
|
-
return `- Edit: path="${call.args.path}"`;
|
|
257
|
-
case 'Glob':
|
|
258
|
-
return `- Glob: pattern="${call.args.pattern}"`;
|
|
259
|
-
case 'Grep':
|
|
260
|
-
return `- Grep: pattern="${call.args.pattern}" path="${call.args.path ?? '.'}"`;
|
|
261
|
-
default: {
|
|
262
|
-
const summary = JSON.stringify(call.args);
|
|
263
|
-
return `- ${call.name}: ${summary.length > 200 ? summary.slice(0, 200) + '…' : summary}`;
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
});
|
|
267
|
-
return `[Tool calls:\n${lines.join('\n')}]`;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const SANDBOX_TOOLS = new Set(['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep']);
|
|
271
|
-
|
|
272
|
-
function isSandboxTool(name: string): boolean {
|
|
273
|
-
return SANDBOX_TOOLS.has(name);
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
/**
|
|
277
|
-
* Convert a validated ToolCallAction into BatchOp(s).
|
|
278
|
-
* Edit expands into a read_file placeholder — the actual read+replace+write
|
|
279
|
-
* is handled by expandEditOps after the batch returns.
|
|
280
|
-
*/
|
|
281
|
-
function toolCallToBatchOps(call: ToolCallAction): BatchOp[] {
|
|
282
|
-
switch (call.name) {
|
|
283
|
-
case 'Bash': {
|
|
284
|
-
const op: BatchOp = {
|
|
285
|
-
op: 'exec' as const,
|
|
286
|
-
command: String(call.args.command ?? ''),
|
|
287
|
-
timeoutMs: (call.args.timeout as number | undefined) ?? 60_000,
|
|
288
|
-
};
|
|
289
|
-
if (call.args.cwd) (op as { cwd?: string }).cwd = String(call.args.cwd);
|
|
290
|
-
return [op];
|
|
291
|
-
}
|
|
292
|
-
case 'Write':
|
|
293
|
-
return [{
|
|
294
|
-
op: 'write_file' as const,
|
|
295
|
-
path: String(call.args.path ?? ''),
|
|
296
|
-
content: String(call.args.content ?? ''),
|
|
297
|
-
}];
|
|
298
|
-
case 'Read':
|
|
299
|
-
return [{
|
|
300
|
-
op: 'read_file' as const,
|
|
301
|
-
path: String(call.args.path ?? ''),
|
|
302
|
-
}];
|
|
303
|
-
case 'Edit':
|
|
304
|
-
// Edit = read_file + write_file; we'll read first, then apply the
|
|
305
|
-
// replacement client-side and append a write_file op.
|
|
306
|
-
return [{
|
|
307
|
-
op: 'read_file' as const,
|
|
308
|
-
path: String(call.args.path ?? ''),
|
|
309
|
-
}];
|
|
310
|
-
case 'Glob': {
|
|
311
|
-
const escaped = String(call.args.pattern ?? '').replace(/'/g, "'\\''");
|
|
312
|
-
return [{
|
|
313
|
-
op: 'exec' as const,
|
|
314
|
-
command: `find / -type f -name '${escaped}' 2>/dev/null | head -n 200`,
|
|
315
|
-
}];
|
|
316
|
-
}
|
|
317
|
-
case 'Grep': {
|
|
318
|
-
const escapedPattern = String(call.args.pattern ?? '').replace(/'/g, "'\\''");
|
|
319
|
-
const escapedPath = (String(call.args.path ?? '/') || '/').replace(/'/g, "'\\''");
|
|
320
|
-
return [{
|
|
321
|
-
op: 'exec' as const,
|
|
322
|
-
command: `grep -R -n -- '${escapedPattern}' '${escapedPath}' 2>/dev/null | head -n 200`,
|
|
323
|
-
}];
|
|
324
|
-
}
|
|
325
|
-
default:
|
|
326
|
-
return [];
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
/** Map a BatchResult back to a ToolResult */
|
|
331
|
-
function batchResultToToolResult(result: BatchResult): ToolResult {
|
|
332
|
-
if (!result.success) {
|
|
333
|
-
return { success: false, output: '', error: (result as { error: string }).error };
|
|
334
|
-
}
|
|
335
|
-
if (result.op === 'exec') {
|
|
336
|
-
const success = result.exitCode === 0;
|
|
337
|
-
return {
|
|
338
|
-
success,
|
|
339
|
-
output: result.stdout ?? '',
|
|
340
|
-
error: success ? undefined : result.stderr || `exit code ${result.exitCode}`,
|
|
341
|
-
};
|
|
342
|
-
}
|
|
343
|
-
if (result.op === 'read_file') {
|
|
344
|
-
return { success: true, output: result.content };
|
|
345
|
-
}
|
|
346
|
-
// write_file
|
|
347
|
-
return { success: true, output: 'ok' };
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
/**
|
|
351
|
-
* Execute a batch of tool calls using provider.batch() when available.
|
|
352
|
-
* Sandbox ops go through the batch endpoint (sequential, single HTTP call).
|
|
353
|
-
* Non-sandbox ops run locally in parallel.
|
|
354
|
-
* Returns results in the same order as the input calls.
|
|
355
|
-
*/
|
|
356
|
-
async function executeBatch(
|
|
357
|
-
calls: ToolCallAction[],
|
|
358
|
-
provider: ToolProvider,
|
|
359
|
-
runtime: AgentRuntime,
|
|
360
|
-
): Promise<ToolResult[]> {
|
|
361
|
-
// If batch not available, fall back to parallel execution
|
|
362
|
-
if (!provider.batch) {
|
|
363
|
-
return Promise.all(
|
|
364
|
-
calls.map(async (call) => {
|
|
365
|
-
try {
|
|
366
|
-
return await executeTool(provider, call, runtime);
|
|
367
|
-
} catch (error) {
|
|
368
|
-
return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
|
|
369
|
-
}
|
|
370
|
-
})
|
|
371
|
-
);
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
// Partition into sandbox ops and non-sandbox ops
|
|
375
|
-
const sandboxCalls: { index: number; call: ToolCallAction }[] = [];
|
|
376
|
-
const nonSandboxCalls: { index: number; call: ToolCallAction }[] = [];
|
|
377
|
-
for (let i = 0; i < calls.length; i++) {
|
|
378
|
-
if (isSandboxTool(calls[i]!.name)) {
|
|
379
|
-
sandboxCalls.push({ index: i, call: calls[i]! });
|
|
380
|
-
} else {
|
|
381
|
-
nonSandboxCalls.push({ index: i, call: calls[i]! });
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// If no sandbox calls, just run everything in parallel
|
|
386
|
-
if (sandboxCalls.length === 0) {
|
|
387
|
-
return Promise.all(
|
|
388
|
-
calls.map(async (call) => {
|
|
389
|
-
try {
|
|
390
|
-
return await executeTool(provider, call, runtime);
|
|
391
|
-
} catch (error) {
|
|
392
|
-
return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
|
|
393
|
-
}
|
|
394
|
-
})
|
|
395
|
-
);
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
const allResults: ToolResult[] = new Array(calls.length);
|
|
399
|
-
|
|
400
|
-
// Build BatchOp[] from sandbox calls, tracking which original call each op maps to.
|
|
401
|
-
// Edit calls produce a read_file op; we'll need a second pass to add write_file ops.
|
|
402
|
-
const batchOps: BatchOp[] = [];
|
|
403
|
-
// Maps: batchOps index → { callsIndex, isEditRead }
|
|
404
|
-
const opMapping: { callsIndex: number; isEditRead: boolean }[] = [];
|
|
405
|
-
|
|
406
|
-
for (const { index, call } of sandboxCalls) {
|
|
407
|
-
const ops = toolCallToBatchOps(call);
|
|
408
|
-
for (const op of ops) {
|
|
409
|
-
opMapping.push({ callsIndex: index, isEditRead: call.name === 'Edit' });
|
|
410
|
-
batchOps.push(op);
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// First batch pass (includes Edit reads)
|
|
415
|
-
let batchResults: BatchResult[];
|
|
416
|
-
try {
|
|
417
|
-
batchResults = await provider.batch(batchOps);
|
|
418
|
-
} catch (error) {
|
|
419
|
-
// If batch fails entirely, fall back to parallel
|
|
420
|
-
return Promise.all(
|
|
421
|
-
calls.map(async (call) => {
|
|
422
|
-
try {
|
|
423
|
-
return await executeTool(provider, call, runtime);
|
|
424
|
-
} catch (err) {
|
|
425
|
-
return { success: false, output: '', error: err instanceof Error ? err.message : String(err) };
|
|
426
|
-
}
|
|
427
|
-
})
|
|
428
|
-
);
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
// Process Edit calls: for each Edit read result, apply the text replacement
|
|
432
|
-
// and do a second batch with the write_file ops.
|
|
433
|
-
const editWrites: BatchOp[] = [];
|
|
434
|
-
const editWriteMapping: { callsIndex: number }[] = [];
|
|
435
|
-
|
|
436
|
-
for (let i = 0; i < batchResults.length; i++) {
|
|
437
|
-
const mapping = opMapping[i]!;
|
|
438
|
-
const result = batchResults[i]!;
|
|
439
|
-
|
|
440
|
-
if (mapping.isEditRead) {
|
|
441
|
-
const call = calls[mapping.callsIndex]!;
|
|
442
|
-
if (!result.success) {
|
|
443
|
-
allResults[mapping.callsIndex] = batchResultToToolResult(result);
|
|
444
|
-
continue;
|
|
445
|
-
}
|
|
446
|
-
const content = (result as { content: string }).content;
|
|
447
|
-
const oldText = String(call.args.old_text ?? '');
|
|
448
|
-
if (!content.includes(oldText)) {
|
|
449
|
-
allResults[mapping.callsIndex] = {
|
|
450
|
-
success: false,
|
|
451
|
-
output: '',
|
|
452
|
-
error: 'old text not found',
|
|
453
|
-
metadata: { path: String(call.args.path ?? '') },
|
|
454
|
-
};
|
|
455
|
-
continue;
|
|
456
|
-
}
|
|
457
|
-
const newContent = content.replace(oldText, String(call.args.new_text ?? ''));
|
|
458
|
-
editWrites.push({
|
|
459
|
-
op: 'write_file' as const,
|
|
460
|
-
path: String(call.args.path ?? ''),
|
|
461
|
-
content: newContent,
|
|
462
|
-
});
|
|
463
|
-
editWriteMapping.push({ callsIndex: mapping.callsIndex });
|
|
464
|
-
} else {
|
|
465
|
-
allResults[mapping.callsIndex] = batchResultToToolResult(result);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
// Second batch pass for Edit writes (if any)
|
|
470
|
-
if (editWrites.length > 0) {
|
|
471
|
-
try {
|
|
472
|
-
const writeResults = await provider.batch(editWrites);
|
|
473
|
-
for (let i = 0; i < writeResults.length; i++) {
|
|
474
|
-
allResults[editWriteMapping[i]!.callsIndex] = batchResultToToolResult(writeResults[i]!);
|
|
475
|
-
}
|
|
476
|
-
} catch (error) {
|
|
477
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
478
|
-
for (const { callsIndex } of editWriteMapping) {
|
|
479
|
-
allResults[callsIndex] = { success: false, output: '', error: errMsg };
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
// Run non-sandbox ops in parallel
|
|
485
|
-
if (nonSandboxCalls.length > 0) {
|
|
486
|
-
const localResults = await Promise.all(
|
|
487
|
-
nonSandboxCalls.map(async ({ call }) => {
|
|
488
|
-
try {
|
|
489
|
-
return await executeTool(provider, call, runtime);
|
|
490
|
-
} catch (error) {
|
|
491
|
-
return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
|
|
492
|
-
}
|
|
493
|
-
})
|
|
494
|
-
);
|
|
495
|
-
for (let i = 0; i < nonSandboxCalls.length; i++) {
|
|
496
|
-
allResults[nonSandboxCalls[i]!.index] = localResults[i]!;
|
|
497
|
-
}
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
return allResults;
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
export function createAgent(runtime: AgentRuntime) {
|
|
504
|
-
const executor = new SingleFlightStepExecutor();
|
|
505
|
-
const skillManager =
|
|
506
|
-
runtime.skillManager ??
|
|
507
|
-
(runtime.sandboxProvider ? new SkillManager(runtime.sandboxProvider, runtime.telemetry) : undefined);
|
|
508
|
-
const skillIndexPath = runtime.skillIndexPath ?? process.env.SKILL_INDEX_PATH;
|
|
509
|
-
let skillSummariesPromise: Promise<SkillSummary[]> | null = null;
|
|
510
|
-
const skillRouter = new SkillRouter();
|
|
511
|
-
|
|
512
|
-
const loop: AgentLoop =
|
|
513
|
-
runtime.loop ??
|
|
514
|
-
(runtime.nextAction
|
|
515
|
-
? { nextAction: runtime.nextAction }
|
|
516
|
-
: new VercelAgentLoop());
|
|
517
|
-
|
|
518
|
-
async function resolveSkillContext(prompt: string): Promise<string> {
|
|
519
|
-
if (!skillManager || !skillIndexPath) return '';
|
|
520
|
-
|
|
521
|
-
try {
|
|
522
|
-
skillSummariesPromise ??= skillManager.discover(skillIndexPath);
|
|
523
|
-
const summaries = await skillSummariesPromise;
|
|
524
|
-
if (summaries.length === 0) return '';
|
|
525
|
-
|
|
526
|
-
const matched = await skillRouter.selectSkill(prompt, summaries);
|
|
527
|
-
if (!matched) return '';
|
|
528
|
-
|
|
529
|
-
const invoked = await skillManager.invoke(matched.name);
|
|
530
|
-
const exec = invoked.execution;
|
|
531
|
-
const execSummary = exec
|
|
532
|
-
? `Skill execution: attempted=${exec.attempted} success=${exec.success} commandsRun=${exec.commandsRun ?? 0}`
|
|
533
|
-
: 'Skill execution: n/a';
|
|
534
|
-
return `Skill selected: ${matched.name}\n${execSummary}\n${invoked.instructions}`;
|
|
535
|
-
} catch {
|
|
536
|
-
return '';
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
return {
|
|
541
|
-
async run(prompt: string, options?: { history?: AgentMessage[] }): Promise<AgentRunResult> {
|
|
542
|
-
return traceStep(runtime.telemetry, 'agent.run', { component: 'agent' }, async () => {
|
|
543
|
-
const skillContext = await resolveSkillContext(prompt);
|
|
544
|
-
const effectivePrompt = skillContext
|
|
545
|
-
? `${prompt}\n\nSkill guidance:\n${skillContext}`
|
|
546
|
-
: prompt;
|
|
547
|
-
const history = options?.history ?? [];
|
|
548
|
-
const messages: AgentMessage[] = [...history, { role: 'user', content: effectivePrompt }];
|
|
549
|
-
const maxSteps = runtime.maxSteps ?? 50;
|
|
550
|
-
const maxConsecutiveInvalid = 3;
|
|
551
|
-
let consecutiveInvalid = 0;
|
|
552
|
-
|
|
553
|
-
for (let step = 1; step <= maxSteps; step += 1) {
|
|
554
|
-
recordAgentStep(runtime.telemetry);
|
|
555
|
-
|
|
556
|
-
const action = await traceStep(
|
|
557
|
-
runtime.telemetry,
|
|
558
|
-
'agent.step',
|
|
559
|
-
{ step },
|
|
560
|
-
() => executor.run(() => loop.nextAction(messages))
|
|
561
|
-
);
|
|
562
|
-
|
|
563
|
-
if (action.type === 'final') {
|
|
564
|
-
messages.push({ role: 'assistant', content: action.content });
|
|
565
|
-
return {
|
|
566
|
-
messages,
|
|
567
|
-
output: action.content,
|
|
568
|
-
steps: step
|
|
569
|
-
};
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
if (action.type === 'tool_batch') {
|
|
573
|
-
// Validate each call individually; partition into valid and invalid
|
|
574
|
-
const validCalls: ToolCallAction[] = [];
|
|
575
|
-
const invalidMessages: string[] = [];
|
|
576
|
-
for (const call of action.calls) {
|
|
577
|
-
const err = validateToolAction(call);
|
|
578
|
-
if (err) {
|
|
579
|
-
invalidMessages.push(`${call.name}: ERROR: ${err}`);
|
|
580
|
-
} else {
|
|
581
|
-
validCalls.push(call);
|
|
582
|
-
}
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
// Record assistant message with structured toolCalls
|
|
586
|
-
const allCalls = action.calls;
|
|
587
|
-
messages.push({
|
|
588
|
-
role: 'assistant',
|
|
589
|
-
content: formatToolCallContent(allCalls),
|
|
590
|
-
toolCalls: allCalls.map(c => ({
|
|
591
|
-
toolCallId: c.toolCallId ?? randomUUID(),
|
|
592
|
-
toolName: c.name,
|
|
593
|
-
args: c.args,
|
|
594
|
-
})),
|
|
595
|
-
});
|
|
596
|
-
|
|
597
|
-
// Execute valid calls via batch (sequential sandbox ops) or parallel fallback
|
|
598
|
-
if (validCalls.length > 0) {
|
|
599
|
-
const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
|
|
600
|
-
for (let i = 0; i < validCalls.length; i++) {
|
|
601
|
-
const call = validCalls[i]!;
|
|
602
|
-
const r = results[i]!;
|
|
603
|
-
if (!r.success) {
|
|
604
|
-
recordAgentError(runtime.telemetry);
|
|
605
|
-
}
|
|
606
|
-
const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
607
|
-
messages.push({
|
|
608
|
-
role: 'tool',
|
|
609
|
-
content: formatToolResultContent(call, r),
|
|
610
|
-
toolResults: [{
|
|
611
|
-
toolCallId: call.toolCallId ?? '',
|
|
612
|
-
toolName: call.name,
|
|
613
|
-
result: resultText,
|
|
614
|
-
isError: !r.success,
|
|
615
|
-
}],
|
|
616
|
-
});
|
|
617
|
-
}
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
// Append messages for invalid calls so the LLM sees the errors
|
|
621
|
-
for (const msg of invalidMessages) {
|
|
622
|
-
messages.push({ role: 'tool', content: msg });
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
consecutiveInvalid = invalidMessages.length > 0 && validCalls.length === 0
|
|
626
|
-
? consecutiveInvalid + 1
|
|
627
|
-
: 0;
|
|
628
|
-
|
|
629
|
-
if (consecutiveInvalid >= maxConsecutiveInvalid) {
|
|
630
|
-
recordAgentError(runtime.telemetry);
|
|
631
|
-
const msg = `ERROR: ${maxConsecutiveInvalid} consecutive invalid actions. Stopping.`;
|
|
632
|
-
return { messages, output: msg, steps: step };
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
continue;
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
// Single tool call — record assistant message with structured toolCalls
|
|
639
|
-
const singleCallId = action.toolCallId ?? randomUUID();
|
|
640
|
-
messages.push({
|
|
641
|
-
role: 'assistant',
|
|
642
|
-
content: formatToolCallContent([action]),
|
|
643
|
-
toolCalls: [{
|
|
644
|
-
toolCallId: singleCallId,
|
|
645
|
-
toolName: action.name,
|
|
646
|
-
args: action.args,
|
|
647
|
-
}],
|
|
648
|
-
});
|
|
649
|
-
|
|
650
|
-
const validationError = validateToolAction(action);
|
|
651
|
-
if (validationError) {
|
|
652
|
-
consecutiveInvalid += 1;
|
|
653
|
-
if (consecutiveInvalid >= maxConsecutiveInvalid) {
|
|
654
|
-
recordAgentError(runtime.telemetry);
|
|
655
|
-
const msg = `ERROR: ${maxConsecutiveInvalid} consecutive invalid actions (last: ${validationError}). Stopping.`;
|
|
656
|
-
messages.push({ role: 'tool', content: `${action.name}: ${msg}` });
|
|
657
|
-
return { messages, output: msg, steps: step };
|
|
658
|
-
}
|
|
659
|
-
} else {
|
|
660
|
-
consecutiveInvalid = 0;
|
|
661
|
-
}
|
|
662
|
-
const result = validationError
|
|
663
|
-
? ({ success: false, output: '', error: validationError } as ToolResult)
|
|
664
|
-
: await executor.run(async () => {
|
|
665
|
-
try {
|
|
666
|
-
return await executeTool(runtime.toolProvider, action, runtime);
|
|
667
|
-
} catch (error) {
|
|
668
|
-
return {
|
|
669
|
-
success: false,
|
|
670
|
-
output: '',
|
|
671
|
-
error: error instanceof Error ? error.message : String(error)
|
|
672
|
-
};
|
|
673
|
-
}
|
|
674
|
-
});
|
|
675
|
-
if (!result.success) {
|
|
676
|
-
recordAgentError(runtime.telemetry);
|
|
677
|
-
}
|
|
678
|
-
const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
|
|
679
|
-
messages.push({
|
|
680
|
-
role: 'tool',
|
|
681
|
-
content: formatToolResultContent(action, result),
|
|
682
|
-
toolResults: [{
|
|
683
|
-
toolCallId: singleCallId,
|
|
684
|
-
toolName: action.name,
|
|
685
|
-
result: singleResultText,
|
|
686
|
-
isError: !result.success,
|
|
687
|
-
}],
|
|
688
|
-
});
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
recordAgentError(runtime.telemetry);
|
|
692
|
-
return {
|
|
693
|
-
messages,
|
|
694
|
-
output: 'ERROR: max steps exceeded',
|
|
695
|
-
steps: maxSteps
|
|
696
|
-
};
|
|
697
|
-
});
|
|
698
|
-
},
|
|
699
|
-
|
|
700
|
-
async *stream(prompt: string, options?: { history?: AgentMessage[] }): AsyncGenerator<AgentStreamEvent> {
|
|
701
|
-
const REPL_MARKER = '##REPL##\n';
|
|
702
|
-
const skillContext = await resolveSkillContext(prompt);
|
|
703
|
-
const effectivePrompt = skillContext
|
|
704
|
-
? `${prompt}\n\nSkill guidance:\n${skillContext}`
|
|
705
|
-
: prompt;
|
|
706
|
-
const history = options?.history ?? [];
|
|
707
|
-
const messages: AgentMessage[] = [...history, { role: 'user', content: effectivePrompt }];
|
|
708
|
-
const maxSteps = runtime.maxSteps ?? 50;
|
|
709
|
-
|
|
710
|
-
for (let step = 1; step <= maxSteps; step++) {
|
|
711
|
-
yield { type: 'step_start', step };
|
|
712
|
-
|
|
713
|
-
if (loop.streamAction) {
|
|
714
|
-
// Streaming path: yield text deltas + collect tool calls
|
|
715
|
-
const pendingTools: ToolCallAction[] = [];
|
|
716
|
-
let finalText = '';
|
|
717
|
-
for await (const event of loop.streamAction(messages)) {
|
|
718
|
-
if (event.type === 'text_delta') {
|
|
719
|
-
finalText += event.text;
|
|
720
|
-
yield event;
|
|
721
|
-
}
|
|
722
|
-
if (event.type === 'tool_start') {
|
|
723
|
-
pendingTools.push({
|
|
724
|
-
type: 'tool',
|
|
725
|
-
name: event.name,
|
|
726
|
-
args: event.args,
|
|
727
|
-
...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
|
|
728
|
-
});
|
|
729
|
-
yield event;
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
// If no tools → final response
|
|
734
|
-
if (pendingTools.length === 0) {
|
|
735
|
-
messages.push({ role: 'assistant', content: finalText });
|
|
736
|
-
yield { type: 'step_end', step };
|
|
737
|
-
yield { type: 'done', output: finalText, steps: step };
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
// Record the assistant message with structured toolCalls
|
|
742
|
-
messages.push({
|
|
743
|
-
role: 'assistant',
|
|
744
|
-
content: finalText || formatToolCallContent(pendingTools),
|
|
745
|
-
toolCalls: pendingTools.map(t => ({
|
|
746
|
-
toolCallId: t.toolCallId ?? randomUUID(),
|
|
747
|
-
toolName: t.name,
|
|
748
|
-
args: t.args,
|
|
749
|
-
})),
|
|
750
|
-
});
|
|
751
|
-
|
|
752
|
-
// REPL fallback: if any Bash command starts with ##REPL##, fall back to
|
|
753
|
-
// non-streaming nextAction() which routes through LCMToolLoop REPL handling
|
|
754
|
-
const hasRepl = pendingTools.some(
|
|
755
|
-
t => t.name === 'Bash' && String(t.args.command ?? '').startsWith(REPL_MARKER)
|
|
756
|
-
);
|
|
757
|
-
if (hasRepl) {
|
|
758
|
-
const action = await loop.nextAction(messages);
|
|
759
|
-
if (action.type === 'final') {
|
|
760
|
-
yield { type: 'text_delta', text: action.content };
|
|
761
|
-
messages.push({ role: 'assistant', content: action.content });
|
|
762
|
-
yield { type: 'step_end', step };
|
|
763
|
-
yield { type: 'done', output: action.content, steps: step };
|
|
764
|
-
return;
|
|
765
|
-
}
|
|
766
|
-
// Non-final REPL result: execute tools and continue loop
|
|
767
|
-
if (action.type === 'tool_batch') {
|
|
768
|
-
for (const call of action.calls) {
|
|
769
|
-
const callId = call.toolCallId ?? randomUUID();
|
|
770
|
-
yield { type: 'tool_start', name: call.name, args: call.args, toolCallId: callId };
|
|
771
|
-
try {
|
|
772
|
-
const r = await executeTool(runtime.toolProvider, call, runtime);
|
|
773
|
-
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
774
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
775
|
-
messages.push({
|
|
776
|
-
role: 'tool',
|
|
777
|
-
content: formatToolResultContent(call, r),
|
|
778
|
-
toolResults: [{ toolCallId: callId, toolName: call.name, result: rText, isError: !r.success }],
|
|
779
|
-
});
|
|
780
|
-
} catch (error) {
|
|
781
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
782
|
-
yield { type: 'tool_end', name: call.name, result: { success: false, output: '', error: errMsg } };
|
|
783
|
-
messages.push({
|
|
784
|
-
role: 'tool',
|
|
785
|
-
content: `${call.name}: ERROR: ${errMsg}`,
|
|
786
|
-
toolResults: [{ toolCallId: callId, toolName: call.name, result: errMsg, isError: true }],
|
|
787
|
-
});
|
|
788
|
-
}
|
|
789
|
-
}
|
|
790
|
-
} else if (action.type === 'tool') {
|
|
791
|
-
const callId = action.toolCallId ?? randomUUID();
|
|
792
|
-
yield { type: 'tool_start', name: action.name, args: action.args, toolCallId: callId };
|
|
793
|
-
try {
|
|
794
|
-
const r = await executeTool(runtime.toolProvider, action, runtime);
|
|
795
|
-
yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
|
|
796
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
797
|
-
messages.push({
|
|
798
|
-
role: 'tool',
|
|
799
|
-
content: formatToolResultContent(action, r),
|
|
800
|
-
toolResults: [{ toolCallId: callId, toolName: action.name, result: rText, isError: !r.success }],
|
|
801
|
-
});
|
|
802
|
-
} catch (error) {
|
|
803
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
804
|
-
yield { type: 'tool_end', name: action.name, result: { success: false, output: '', error: errMsg } };
|
|
805
|
-
messages.push({
|
|
806
|
-
role: 'tool',
|
|
807
|
-
content: `${action.name}: ERROR: ${errMsg}`,
|
|
808
|
-
toolResults: [{ toolCallId: callId, toolName: action.name, result: errMsg, isError: true }],
|
|
809
|
-
});
|
|
810
|
-
}
|
|
811
|
-
}
|
|
812
|
-
yield { type: 'step_end', step };
|
|
813
|
-
continue;
|
|
814
|
-
}
|
|
815
|
-
|
|
816
|
-
// Execute tools via batch (sequential sandbox ops) or parallel fallback
|
|
817
|
-
const results = await executeBatch(pendingTools, runtime.toolProvider, runtime);
|
|
818
|
-
for (let i = 0; i < pendingTools.length; i++) {
|
|
819
|
-
const call = pendingTools[i]!;
|
|
820
|
-
const r = results[i]!;
|
|
821
|
-
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
822
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
823
|
-
messages.push({
|
|
824
|
-
role: 'tool',
|
|
825
|
-
content: formatToolResultContent(call, r),
|
|
826
|
-
toolResults: [{
|
|
827
|
-
toolCallId: call.toolCallId ?? '',
|
|
828
|
-
toolName: call.name,
|
|
829
|
-
result: rText,
|
|
830
|
-
isError: !r.success,
|
|
831
|
-
}],
|
|
832
|
-
});
|
|
833
|
-
}
|
|
834
|
-
} else {
|
|
835
|
-
// Fallback: wrap nextAction() in synthetic events
|
|
836
|
-
const action = await loop.nextAction(messages);
|
|
837
|
-
|
|
838
|
-
if (action.type === 'final') {
|
|
839
|
-
messages.push({ role: 'assistant', content: action.content });
|
|
840
|
-
yield { type: 'step_end', step };
|
|
841
|
-
yield { type: 'done', output: action.content, steps: step };
|
|
842
|
-
return;
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
const calls: ToolCallAction[] = action.type === 'tool_batch' ? action.calls : [action];
|
|
846
|
-
// Record assistant message with structured toolCalls
|
|
847
|
-
messages.push({
|
|
848
|
-
role: 'assistant',
|
|
849
|
-
content: formatToolCallContent(calls),
|
|
850
|
-
toolCalls: calls.map(c => ({
|
|
851
|
-
toolCallId: c.toolCallId ?? randomUUID(),
|
|
852
|
-
toolName: c.name,
|
|
853
|
-
args: c.args,
|
|
854
|
-
})),
|
|
855
|
-
});
|
|
856
|
-
for (const call of calls) {
|
|
857
|
-
yield { type: 'tool_start', name: call.name, args: call.args, ...(call.toolCallId != null ? { toolCallId: call.toolCallId } : {}) };
|
|
858
|
-
}
|
|
859
|
-
const results = await executeBatch(calls, runtime.toolProvider, runtime);
|
|
860
|
-
for (let i = 0; i < calls.length; i++) {
|
|
861
|
-
const call = calls[i]!;
|
|
862
|
-
const r = results[i]!;
|
|
863
|
-
yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
|
|
864
|
-
const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
|
|
865
|
-
messages.push({
|
|
866
|
-
role: 'tool',
|
|
867
|
-
content: formatToolResultContent(call, r),
|
|
868
|
-
toolResults: [{
|
|
869
|
-
toolCallId: call.toolCallId ?? '',
|
|
870
|
-
toolName: call.name,
|
|
871
|
-
result: rText,
|
|
872
|
-
isError: !r.success,
|
|
873
|
-
}],
|
|
874
|
-
});
|
|
875
|
-
}
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
yield { type: 'step_end', step };
|
|
879
|
-
}
|
|
880
|
-
|
|
881
|
-
yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
|
|
882
|
-
}
|
|
883
|
-
};
|
|
884
|
-
}
|