@bluecopa/harness 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +212 -117
  2. package/dist/arc/index.d.ts +796 -0
  3. package/dist/arc/index.js +2863 -0
  4. package/dist/arc/index.js.map +1 -0
  5. package/dist/observability/otel.d.ts +36 -0
  6. package/dist/observability/otel.js +73 -0
  7. package/dist/observability/otel.js.map +1 -0
  8. package/dist/shared-types-DRxnerLT.d.ts +138 -0
  9. package/dist/skills/index.d.ts +67 -0
  10. package/dist/skills/index.js +282 -0
  11. package/dist/skills/index.js.map +1 -0
  12. package/package.json +26 -2
  13. package/AGENTS.md +0 -18
  14. package/docs/guides/observability.md +0 -32
  15. package/docs/guides/providers.md +0 -51
  16. package/docs/guides/skills.md +0 -25
  17. package/docs/security/skill-sandbox-threat-model.md +0 -20
  18. package/src/agent/create-agent.ts +0 -884
  19. package/src/agent/create-tools.ts +0 -33
  20. package/src/agent/step-executor.ts +0 -15
  21. package/src/agent/types.ts +0 -57
  22. package/src/context/llm-compaction-strategy.ts +0 -37
  23. package/src/context/prepare-step.ts +0 -65
  24. package/src/context/token-tracker.ts +0 -26
  25. package/src/extracted/manifest.json +0 -10
  26. package/src/extracted/prompts/compaction.md +0 -5
  27. package/src/extracted/prompts/system.md +0 -5
  28. package/src/extracted/tools.json +0 -82
  29. package/src/hooks/hook-runner.ts +0 -22
  30. package/src/hooks/tool-wrappers.ts +0 -64
  31. package/src/interfaces/compaction-strategy.ts +0 -18
  32. package/src/interfaces/hooks.ts +0 -24
  33. package/src/interfaces/sandbox-provider.ts +0 -29
  34. package/src/interfaces/session-store.ts +0 -48
  35. package/src/interfaces/tool-provider.ts +0 -70
  36. package/src/loop/bridge.ts +0 -363
  37. package/src/loop/context-store.ts +0 -207
  38. package/src/loop/lcm-tool-loop.ts +0 -163
  39. package/src/loop/vercel-agent-loop.ts +0 -279
  40. package/src/observability/context.ts +0 -17
  41. package/src/observability/metrics.ts +0 -27
  42. package/src/observability/otel.ts +0 -105
  43. package/src/observability/tracing.ts +0 -13
  44. package/src/optimization/agent-evaluator.ts +0 -40
  45. package/src/optimization/config-serializer.ts +0 -16
  46. package/src/optimization/optimization-runner.ts +0 -39
  47. package/src/optimization/trace-collector.ts +0 -33
  48. package/src/permissions/permission-manager.ts +0 -34
  49. package/src/providers/composite-tool-provider.ts +0 -72
  50. package/src/providers/control-plane-e2b-executor.ts +0 -218
  51. package/src/providers/e2b-tool-provider.ts +0 -68
  52. package/src/providers/local-tool-provider.ts +0 -190
  53. package/src/providers/skill-sandbox-provider.ts +0 -46
  54. package/src/sessions/file-session-store.ts +0 -61
  55. package/src/sessions/in-memory-session-store.ts +0 -39
  56. package/src/sessions/session-manager.ts +0 -44
  57. package/src/skills/skill-loader.ts +0 -52
  58. package/src/skills/skill-manager.ts +0 -175
  59. package/src/skills/skill-router.ts +0 -99
  60. package/src/skills/skill-types.ts +0 -26
  61. package/src/subagents/subagent-manager.ts +0 -22
  62. package/src/subagents/task-tool.ts +0 -13
  63. package/tests/integration/agent-loop-basic.spec.ts +0 -56
  64. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
  65. package/tests/integration/concurrency-single-turn.spec.ts +0 -35
  66. package/tests/integration/otel-metrics-emission.spec.ts +0 -62
  67. package/tests/integration/otel-trace-propagation.spec.ts +0 -48
  68. package/tests/integration/parity-benchmark.spec.ts +0 -45
  69. package/tests/integration/provider-local-smoke.spec.ts +0 -63
  70. package/tests/integration/session-resume.spec.ts +0 -30
  71. package/tests/integration/skill-install-rollback.spec.ts +0 -64
  72. package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
  73. package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
  74. package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
  75. package/tests/integration/structured-messages-agent.spec.ts +0 -265
  76. package/tests/integration/subagent-isolation.spec.ts +0 -24
  77. package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
  78. package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
  79. package/tests/unit/extracted-manifest.spec.ts +0 -41
  80. package/tests/unit/interfaces-contract.spec.ts +0 -101
  81. package/tests/unit/structured-messages.spec.ts +0 -176
  82. package/tests/unit/token-tracker.spec.ts +0 -22
  83. package/tsconfig.json +0 -14
  84. package/vitest.config.ts +0 -7
@@ -1,884 +0,0 @@
1
- import { randomUUID } from 'node:crypto';
2
- import type { BatchOp, BatchResult, ToolProvider, ToolResult } from '../interfaces/tool-provider';
3
- import type { SandboxProvider } from '../interfaces/sandbox-provider';
4
- import { recordAgentError, recordAgentStep, recordToolCallDuration } from '../observability/metrics';
5
- import { traceStep } from '../observability/tracing';
6
- import type { HarnessTelemetry } from '../observability/otel';
7
- import { HookRunner } from '../hooks/hook-runner';
8
- import { PermissionManager } from '../permissions/permission-manager';
9
- import { VercelAgentLoop } from '../loop/vercel-agent-loop';
10
- import { SkillManager } from '../skills/skill-manager';
11
- import { SkillRouter } from '../skills/skill-router';
12
- import type { SkillSummary } from '../skills/skill-types';
13
- import { SingleFlightStepExecutor } from './step-executor';
14
- import type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo } from './types';
15
- export type { AgentAction, AgentLoop, AgentMessage, AgentRunResult, AgentStreamEvent, ToolCallAction, ToolBatchAction, ToolCallInfo, ToolResultInfo };
16
- export { HookRunner } from '../hooks/hook-runner';
17
- export { PermissionManager } from '../permissions/permission-manager';
18
- export type { PermissionMode, PermissionResolver, PermissionRequest } from '../permissions/permission-manager';
19
- export type { HookCallback, HookContext, HookDecision, HookEventName } from '../interfaces/hooks';
20
-
21
- export interface AgentRuntime {
22
- toolProvider: ToolProvider;
23
- sandboxProvider?: SandboxProvider;
24
- skillManager?: SkillManager;
25
- skillIndexPath?: string;
26
- loop?: AgentLoop;
27
- nextAction?(messages: AgentMessage[]): Promise<AgentAction>;
28
- askUser?(question: string, options?: string[]): Promise<string>;
29
- tellUser?(message: string): Promise<void>;
30
- downloadRawFile?(path: string): Promise<string>;
31
- maxSteps?: number;
32
- telemetry?: HarnessTelemetry;
33
- /** Hook runner for PreToolUse/PostToolUse lifecycle hooks. When provided, hooks are applied to ALL tool calls including custom ones via executeToolAction. */
34
- hookRunner?: HookRunner;
35
- /** Permission manager for tool-level access control. When provided, permission checks are applied to ALL tool calls including custom ones via executeToolAction. */
36
- permissionManager?: PermissionManager;
37
- /** Custom tool executor. Called for every tool action. Return null to fall through to built-in dispatch.
38
- * When hookRunner/permissionManager are provided on the runtime, they are automatically applied before/after this callback — no manual wiring needed. */
39
- executeToolAction?: (action: ToolCallAction) => Promise<ToolResult | null>;
40
- }
41
-
42
- /**
43
- * Run PreToolUse hook + PermissionManager check + execute + PostToolUse hook + telemetry.
44
- * Wraps any tool execution (custom or built-in) with the full lifecycle when configured.
45
- */
46
- async function guardedExecute(
47
- action: ToolCallAction,
48
- execute: () => Promise<ToolResult>,
49
- runtime: AgentRuntime,
50
- ): Promise<ToolResult> {
51
- const { hookRunner, permissionManager, telemetry } = runtime;
52
- if (!hookRunner && !permissionManager) {
53
- return execute();
54
- }
55
-
56
- return traceStep(telemetry, 'tool.call', { tool: action.name }, async () => {
57
- const started = Date.now();
58
-
59
- // PreToolUse hook
60
- if (hookRunner) {
61
- const pre = await hookRunner.run({ event: 'PreToolUse', toolName: action.name, input: action.args });
62
- if (!pre.allow) {
63
- const result = { success: false, output: '', error: pre.reason ?? 'blocked by pre-hook' };
64
- recordToolCallDuration(telemetry, action.name, Date.now() - started, false);
65
- return result;
66
- }
67
- }
68
-
69
- // Permission check
70
- if (permissionManager) {
71
- const permission = await permissionManager.check({ toolName: action.name, input: action.args });
72
- if (!permission.allow) {
73
- const result = { success: false, output: '', error: permission.reason ?? 'blocked by permission manager' };
74
- recordToolCallDuration(telemetry, action.name, Date.now() - started, false);
75
- return result;
76
- }
77
- }
78
-
79
- const result = await execute();
80
-
81
- // PostToolUse hook
82
- if (hookRunner) {
83
- await hookRunner.run({ event: 'PostToolUse', toolName: action.name, input: action.args, output: result });
84
- }
85
-
86
- recordToolCallDuration(telemetry, action.name, Date.now() - started, result.success);
87
- return result;
88
- });
89
- }
90
-
91
- async function executeTool(provider: ToolProvider, action: ToolCallAction, runtime: AgentRuntime): Promise<ToolResult> {
92
- return guardedExecute(action, async () => {
93
- if (runtime.executeToolAction) {
94
- const result = await runtime.executeToolAction(action);
95
- if (result) return result;
96
- }
97
-
98
- return executeBuiltinTool(provider, action, runtime);
99
- }, runtime);
100
- }
101
-
102
- async function executeBuiltinTool(provider: ToolProvider, action: ToolCallAction, runtime: AgentRuntime): Promise<ToolResult> {
103
- if (action.name === 'Bash') {
104
- return provider.bash(String(action.args.command ?? ''), {
105
- cwd: action.args.cwd as string | undefined,
106
- timeout: action.args.timeout as number | undefined
107
- });
108
- }
109
-
110
- if (action.name === 'Read') {
111
- return provider.readFile(String(action.args.path ?? ''));
112
- }
113
-
114
- if (action.name === 'Write') {
115
- return provider.writeFile(String(action.args.path ?? ''), String(action.args.content ?? ''));
116
- }
117
-
118
- if (action.name === 'Edit') {
119
- return provider.editFile(
120
- String(action.args.path ?? ''),
121
- String(action.args.old_text ?? ''),
122
- String(action.args.new_text ?? '')
123
- );
124
- }
125
-
126
- if (action.name === 'Glob') {
127
- return provider.glob(String(action.args.pattern ?? ''));
128
- }
129
-
130
- if (action.name === 'WebFetch') {
131
- if (!provider.webFetch) {
132
- return { success: false, output: '', error: 'WebFetch unavailable: provider.webFetch not configured' };
133
- }
134
- return provider.webFetch({
135
- url: String(action.args.url ?? ''),
136
- selector: action.args.selector as string | undefined,
137
- maxContentLength: action.args.maxContentLength as number | undefined,
138
- headers: action.args.headers as Record<string, string> | undefined
139
- });
140
- }
141
-
142
- if (action.name === 'WebSearch') {
143
- if (!provider.webSearch) {
144
- return { success: false, output: '', error: 'WebSearch unavailable: provider.webSearch not configured' };
145
- }
146
- return provider.webSearch(String(action.args.query ?? ''));
147
- }
148
-
149
- if (action.name === 'AskUser') {
150
- if (!runtime.askUser) {
151
- return { success: false, output: '', error: 'AskUser unavailable: runtime.askUser not configured' };
152
- }
153
- const answer = await runtime.askUser(
154
- String(action.args.question ?? ''),
155
- Array.isArray(action.args.options)
156
- ? action.args.options.map((item) => String(item))
157
- : undefined
158
- );
159
- return { success: true, output: answer };
160
- }
161
-
162
- if (action.name === 'TellUser') {
163
- if (!runtime.tellUser) {
164
- return { success: false, output: '', error: 'TellUser unavailable: runtime.tellUser not configured' };
165
- }
166
- await runtime.tellUser(String(action.args.message ?? ''));
167
- return { success: true, output: 'ok' };
168
- }
169
-
170
- if (action.name === 'DownloadRawFile') {
171
- if (!runtime.downloadRawFile) {
172
- return { success: false, output: '', error: 'DownloadRawFile unavailable: runtime.downloadRawFile not configured' };
173
- }
174
- const localPath = await runtime.downloadRawFile(String(action.args.path ?? ''));
175
- return { success: true, output: localPath };
176
- }
177
-
178
- if (action.name === 'Grep') {
179
- return provider.grep(String(action.args.pattern ?? ''), action.args.path as string | undefined);
180
- }
181
-
182
- return { success: false, output: '', error: `Unknown tool: ${action.name}` };
183
- }
184
-
185
- function validateToolAction(action: ToolCallAction): string | null {
186
- const requireNonEmpty = (value: unknown, field: string): string | null => {
187
- if (typeof value !== 'string' || value.trim().length === 0) {
188
- return `${action.name} requires non-empty ${field}`;
189
- }
190
- return null;
191
- };
192
-
193
- if (action.name === 'Bash') return requireNonEmpty(action.args.command, 'command');
194
- if (action.name === 'Read') return requireNonEmpty(action.args.path, 'path');
195
- if (action.name === 'Write') {
196
- return requireNonEmpty(action.args.path, 'path') ?? requireNonEmpty(action.args.content, 'content');
197
- }
198
- if (action.name === 'Edit') {
199
- return (
200
- requireNonEmpty(action.args.path, 'path') ??
201
- requireNonEmpty(action.args.old_text, 'old_text') ??
202
- requireNonEmpty(action.args.new_text, 'new_text')
203
- );
204
- }
205
- if (action.name === 'Glob') return requireNonEmpty(action.args.pattern, 'pattern');
206
- if (action.name === 'Grep') return requireNonEmpty(action.args.pattern, 'pattern');
207
- if (action.name === 'WebFetch') return requireNonEmpty(action.args.url, 'url');
208
- if (action.name === 'WebSearch') return requireNonEmpty(action.args.query, 'query');
209
- if (action.name === 'AskUser') return requireNonEmpty(action.args.question, 'question');
210
- if (action.name === 'TellUser') return requireNonEmpty(action.args.message, 'message');
211
- if (action.name === 'DownloadRawFile') return requireNonEmpty(action.args.path, 'path');
212
- return null;
213
- }
214
-
215
- function toStreamResult(r: ToolResult): { success: boolean; output: string; error?: string } {
216
- const base: { success: boolean; output: string; error?: string } = { success: r.success, output: r.output };
217
- if (r.error != null) base.error = r.error;
218
- return base;
219
- }
220
-
221
- /** Format a display-friendly content string for tool results (used in content field). */
222
- function formatToolResultContent(call: ToolCallAction, result: ToolResult): string {
223
- const content = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
224
- switch (call.name) {
225
- case 'Write':
226
- return `Write(${call.args.path}): ${result.success ? 'ok' : content}`;
227
- case 'Read':
228
- return `Read(${call.args.path}): ${content}`;
229
- case 'Edit':
230
- return `Edit(${call.args.path}): ${content}`;
231
- case 'Bash': {
232
- const cmd = String(call.args.command ?? '').slice(0, 100);
233
- return `Bash(${JSON.stringify(cmd)}): ${content}`;
234
- }
235
- default:
236
- return `${call.name}: ${content}`;
237
- }
238
- }
239
-
240
- /** Format a display-friendly content string for assistant tool call summaries. */
241
- function formatToolCallContent(calls: ToolCallAction[]): string {
242
- const lines = calls.map(call => {
243
- switch (call.name) {
244
- case 'Write': {
245
- const p = String(call.args.path ?? '');
246
- const len = String(call.args.content ?? '').length;
247
- return `- Write: path="${p}" (${len} chars)`;
248
- }
249
- case 'Bash': {
250
- const cmd = String(call.args.command ?? '').slice(0, 200);
251
- return `- Bash: ${JSON.stringify(cmd)}`;
252
- }
253
- case 'Read':
254
- return `- Read: path="${call.args.path}"`;
255
- case 'Edit':
256
- return `- Edit: path="${call.args.path}"`;
257
- case 'Glob':
258
- return `- Glob: pattern="${call.args.pattern}"`;
259
- case 'Grep':
260
- return `- Grep: pattern="${call.args.pattern}" path="${call.args.path ?? '.'}"`;
261
- default: {
262
- const summary = JSON.stringify(call.args);
263
- return `- ${call.name}: ${summary.length > 200 ? summary.slice(0, 200) + '…' : summary}`;
264
- }
265
- }
266
- });
267
- return `[Tool calls:\n${lines.join('\n')}]`;
268
- }
269
-
270
- const SANDBOX_TOOLS = new Set(['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep']);
271
-
272
- function isSandboxTool(name: string): boolean {
273
- return SANDBOX_TOOLS.has(name);
274
- }
275
-
276
- /**
277
- * Convert a validated ToolCallAction into BatchOp(s).
278
- * Edit expands into a read_file placeholder — the actual read+replace+write
279
- * is handled by expandEditOps after the batch returns.
280
- */
281
- function toolCallToBatchOps(call: ToolCallAction): BatchOp[] {
282
- switch (call.name) {
283
- case 'Bash': {
284
- const op: BatchOp = {
285
- op: 'exec' as const,
286
- command: String(call.args.command ?? ''),
287
- timeoutMs: (call.args.timeout as number | undefined) ?? 60_000,
288
- };
289
- if (call.args.cwd) (op as { cwd?: string }).cwd = String(call.args.cwd);
290
- return [op];
291
- }
292
- case 'Write':
293
- return [{
294
- op: 'write_file' as const,
295
- path: String(call.args.path ?? ''),
296
- content: String(call.args.content ?? ''),
297
- }];
298
- case 'Read':
299
- return [{
300
- op: 'read_file' as const,
301
- path: String(call.args.path ?? ''),
302
- }];
303
- case 'Edit':
304
- // Edit = read_file + write_file; we'll read first, then apply the
305
- // replacement client-side and append a write_file op.
306
- return [{
307
- op: 'read_file' as const,
308
- path: String(call.args.path ?? ''),
309
- }];
310
- case 'Glob': {
311
- const escaped = String(call.args.pattern ?? '').replace(/'/g, "'\\''");
312
- return [{
313
- op: 'exec' as const,
314
- command: `find / -type f -name '${escaped}' 2>/dev/null | head -n 200`,
315
- }];
316
- }
317
- case 'Grep': {
318
- const escapedPattern = String(call.args.pattern ?? '').replace(/'/g, "'\\''");
319
- const escapedPath = (String(call.args.path ?? '/') || '/').replace(/'/g, "'\\''");
320
- return [{
321
- op: 'exec' as const,
322
- command: `grep -R -n -- '${escapedPattern}' '${escapedPath}' 2>/dev/null | head -n 200`,
323
- }];
324
- }
325
- default:
326
- return [];
327
- }
328
- }
329
-
330
- /** Map a BatchResult back to a ToolResult */
331
- function batchResultToToolResult(result: BatchResult): ToolResult {
332
- if (!result.success) {
333
- return { success: false, output: '', error: (result as { error: string }).error };
334
- }
335
- if (result.op === 'exec') {
336
- const success = result.exitCode === 0;
337
- return {
338
- success,
339
- output: result.stdout ?? '',
340
- error: success ? undefined : result.stderr || `exit code ${result.exitCode}`,
341
- };
342
- }
343
- if (result.op === 'read_file') {
344
- return { success: true, output: result.content };
345
- }
346
- // write_file
347
- return { success: true, output: 'ok' };
348
- }
349
-
350
- /**
351
- * Execute a batch of tool calls using provider.batch() when available.
352
- * Sandbox ops go through the batch endpoint (sequential, single HTTP call).
353
- * Non-sandbox ops run locally in parallel.
354
- * Returns results in the same order as the input calls.
355
- */
356
- async function executeBatch(
357
- calls: ToolCallAction[],
358
- provider: ToolProvider,
359
- runtime: AgentRuntime,
360
- ): Promise<ToolResult[]> {
361
- // If batch not available, fall back to parallel execution
362
- if (!provider.batch) {
363
- return Promise.all(
364
- calls.map(async (call) => {
365
- try {
366
- return await executeTool(provider, call, runtime);
367
- } catch (error) {
368
- return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
369
- }
370
- })
371
- );
372
- }
373
-
374
- // Partition into sandbox ops and non-sandbox ops
375
- const sandboxCalls: { index: number; call: ToolCallAction }[] = [];
376
- const nonSandboxCalls: { index: number; call: ToolCallAction }[] = [];
377
- for (let i = 0; i < calls.length; i++) {
378
- if (isSandboxTool(calls[i]!.name)) {
379
- sandboxCalls.push({ index: i, call: calls[i]! });
380
- } else {
381
- nonSandboxCalls.push({ index: i, call: calls[i]! });
382
- }
383
- }
384
-
385
- // If no sandbox calls, just run everything in parallel
386
- if (sandboxCalls.length === 0) {
387
- return Promise.all(
388
- calls.map(async (call) => {
389
- try {
390
- return await executeTool(provider, call, runtime);
391
- } catch (error) {
392
- return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
393
- }
394
- })
395
- );
396
- }
397
-
398
- const allResults: ToolResult[] = new Array(calls.length);
399
-
400
- // Build BatchOp[] from sandbox calls, tracking which original call each op maps to.
401
- // Edit calls produce a read_file op; we'll need a second pass to add write_file ops.
402
- const batchOps: BatchOp[] = [];
403
- // Maps: batchOps index → { callsIndex, isEditRead }
404
- const opMapping: { callsIndex: number; isEditRead: boolean }[] = [];
405
-
406
- for (const { index, call } of sandboxCalls) {
407
- const ops = toolCallToBatchOps(call);
408
- for (const op of ops) {
409
- opMapping.push({ callsIndex: index, isEditRead: call.name === 'Edit' });
410
- batchOps.push(op);
411
- }
412
- }
413
-
414
- // First batch pass (includes Edit reads)
415
- let batchResults: BatchResult[];
416
- try {
417
- batchResults = await provider.batch(batchOps);
418
- } catch (error) {
419
- // If batch fails entirely, fall back to parallel
420
- return Promise.all(
421
- calls.map(async (call) => {
422
- try {
423
- return await executeTool(provider, call, runtime);
424
- } catch (err) {
425
- return { success: false, output: '', error: err instanceof Error ? err.message : String(err) };
426
- }
427
- })
428
- );
429
- }
430
-
431
- // Process Edit calls: for each Edit read result, apply the text replacement
432
- // and do a second batch with the write_file ops.
433
- const editWrites: BatchOp[] = [];
434
- const editWriteMapping: { callsIndex: number }[] = [];
435
-
436
- for (let i = 0; i < batchResults.length; i++) {
437
- const mapping = opMapping[i]!;
438
- const result = batchResults[i]!;
439
-
440
- if (mapping.isEditRead) {
441
- const call = calls[mapping.callsIndex]!;
442
- if (!result.success) {
443
- allResults[mapping.callsIndex] = batchResultToToolResult(result);
444
- continue;
445
- }
446
- const content = (result as { content: string }).content;
447
- const oldText = String(call.args.old_text ?? '');
448
- if (!content.includes(oldText)) {
449
- allResults[mapping.callsIndex] = {
450
- success: false,
451
- output: '',
452
- error: 'old text not found',
453
- metadata: { path: String(call.args.path ?? '') },
454
- };
455
- continue;
456
- }
457
- const newContent = content.replace(oldText, String(call.args.new_text ?? ''));
458
- editWrites.push({
459
- op: 'write_file' as const,
460
- path: String(call.args.path ?? ''),
461
- content: newContent,
462
- });
463
- editWriteMapping.push({ callsIndex: mapping.callsIndex });
464
- } else {
465
- allResults[mapping.callsIndex] = batchResultToToolResult(result);
466
- }
467
- }
468
-
469
- // Second batch pass for Edit writes (if any)
470
- if (editWrites.length > 0) {
471
- try {
472
- const writeResults = await provider.batch(editWrites);
473
- for (let i = 0; i < writeResults.length; i++) {
474
- allResults[editWriteMapping[i]!.callsIndex] = batchResultToToolResult(writeResults[i]!);
475
- }
476
- } catch (error) {
477
- const errMsg = error instanceof Error ? error.message : String(error);
478
- for (const { callsIndex } of editWriteMapping) {
479
- allResults[callsIndex] = { success: false, output: '', error: errMsg };
480
- }
481
- }
482
- }
483
-
484
- // Run non-sandbox ops in parallel
485
- if (nonSandboxCalls.length > 0) {
486
- const localResults = await Promise.all(
487
- nonSandboxCalls.map(async ({ call }) => {
488
- try {
489
- return await executeTool(provider, call, runtime);
490
- } catch (error) {
491
- return { success: false, output: '', error: error instanceof Error ? error.message : String(error) };
492
- }
493
- })
494
- );
495
- for (let i = 0; i < nonSandboxCalls.length; i++) {
496
- allResults[nonSandboxCalls[i]!.index] = localResults[i]!;
497
- }
498
- }
499
-
500
- return allResults;
501
- }
502
-
503
- export function createAgent(runtime: AgentRuntime) {
504
- const executor = new SingleFlightStepExecutor();
505
- const skillManager =
506
- runtime.skillManager ??
507
- (runtime.sandboxProvider ? new SkillManager(runtime.sandboxProvider, runtime.telemetry) : undefined);
508
- const skillIndexPath = runtime.skillIndexPath ?? process.env.SKILL_INDEX_PATH;
509
- let skillSummariesPromise: Promise<SkillSummary[]> | null = null;
510
- const skillRouter = new SkillRouter();
511
-
512
- const loop: AgentLoop =
513
- runtime.loop ??
514
- (runtime.nextAction
515
- ? { nextAction: runtime.nextAction }
516
- : new VercelAgentLoop());
517
-
518
- async function resolveSkillContext(prompt: string): Promise<string> {
519
- if (!skillManager || !skillIndexPath) return '';
520
-
521
- try {
522
- skillSummariesPromise ??= skillManager.discover(skillIndexPath);
523
- const summaries = await skillSummariesPromise;
524
- if (summaries.length === 0) return '';
525
-
526
- const matched = await skillRouter.selectSkill(prompt, summaries);
527
- if (!matched) return '';
528
-
529
- const invoked = await skillManager.invoke(matched.name);
530
- const exec = invoked.execution;
531
- const execSummary = exec
532
- ? `Skill execution: attempted=${exec.attempted} success=${exec.success} commandsRun=${exec.commandsRun ?? 0}`
533
- : 'Skill execution: n/a';
534
- return `Skill selected: ${matched.name}\n${execSummary}\n${invoked.instructions}`;
535
- } catch {
536
- return '';
537
- }
538
- }
539
-
540
- return {
541
- async run(prompt: string, options?: { history?: AgentMessage[] }): Promise<AgentRunResult> {
542
- return traceStep(runtime.telemetry, 'agent.run', { component: 'agent' }, async () => {
543
- const skillContext = await resolveSkillContext(prompt);
544
- const effectivePrompt = skillContext
545
- ? `${prompt}\n\nSkill guidance:\n${skillContext}`
546
- : prompt;
547
- const history = options?.history ?? [];
548
- const messages: AgentMessage[] = [...history, { role: 'user', content: effectivePrompt }];
549
- const maxSteps = runtime.maxSteps ?? 50;
550
- const maxConsecutiveInvalid = 3;
551
- let consecutiveInvalid = 0;
552
-
553
- for (let step = 1; step <= maxSteps; step += 1) {
554
- recordAgentStep(runtime.telemetry);
555
-
556
- const action = await traceStep(
557
- runtime.telemetry,
558
- 'agent.step',
559
- { step },
560
- () => executor.run(() => loop.nextAction(messages))
561
- );
562
-
563
- if (action.type === 'final') {
564
- messages.push({ role: 'assistant', content: action.content });
565
- return {
566
- messages,
567
- output: action.content,
568
- steps: step
569
- };
570
- }
571
-
572
- if (action.type === 'tool_batch') {
573
- // Validate each call individually; partition into valid and invalid
574
- const validCalls: ToolCallAction[] = [];
575
- const invalidMessages: string[] = [];
576
- for (const call of action.calls) {
577
- const err = validateToolAction(call);
578
- if (err) {
579
- invalidMessages.push(`${call.name}: ERROR: ${err}`);
580
- } else {
581
- validCalls.push(call);
582
- }
583
- }
584
-
585
- // Record assistant message with structured toolCalls
586
- const allCalls = action.calls;
587
- messages.push({
588
- role: 'assistant',
589
- content: formatToolCallContent(allCalls),
590
- toolCalls: allCalls.map(c => ({
591
- toolCallId: c.toolCallId ?? randomUUID(),
592
- toolName: c.name,
593
- args: c.args,
594
- })),
595
- });
596
-
597
- // Execute valid calls via batch (sequential sandbox ops) or parallel fallback
598
- if (validCalls.length > 0) {
599
- const results = await executeBatch(validCalls, runtime.toolProvider, runtime);
600
- for (let i = 0; i < validCalls.length; i++) {
601
- const call = validCalls[i]!;
602
- const r = results[i]!;
603
- if (!r.success) {
604
- recordAgentError(runtime.telemetry);
605
- }
606
- const resultText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
607
- messages.push({
608
- role: 'tool',
609
- content: formatToolResultContent(call, r),
610
- toolResults: [{
611
- toolCallId: call.toolCallId ?? '',
612
- toolName: call.name,
613
- result: resultText,
614
- isError: !r.success,
615
- }],
616
- });
617
- }
618
- }
619
-
620
- // Append messages for invalid calls so the LLM sees the errors
621
- for (const msg of invalidMessages) {
622
- messages.push({ role: 'tool', content: msg });
623
- }
624
-
625
- consecutiveInvalid = invalidMessages.length > 0 && validCalls.length === 0
626
- ? consecutiveInvalid + 1
627
- : 0;
628
-
629
- if (consecutiveInvalid >= maxConsecutiveInvalid) {
630
- recordAgentError(runtime.telemetry);
631
- const msg = `ERROR: ${maxConsecutiveInvalid} consecutive invalid actions. Stopping.`;
632
- return { messages, output: msg, steps: step };
633
- }
634
-
635
- continue;
636
- }
637
-
638
- // Single tool call — record assistant message with structured toolCalls
639
- const singleCallId = action.toolCallId ?? randomUUID();
640
- messages.push({
641
- role: 'assistant',
642
- content: formatToolCallContent([action]),
643
- toolCalls: [{
644
- toolCallId: singleCallId,
645
- toolName: action.name,
646
- args: action.args,
647
- }],
648
- });
649
-
650
- const validationError = validateToolAction(action);
651
- if (validationError) {
652
- consecutiveInvalid += 1;
653
- if (consecutiveInvalid >= maxConsecutiveInvalid) {
654
- recordAgentError(runtime.telemetry);
655
- const msg = `ERROR: ${maxConsecutiveInvalid} consecutive invalid actions (last: ${validationError}). Stopping.`;
656
- messages.push({ role: 'tool', content: `${action.name}: ${msg}` });
657
- return { messages, output: msg, steps: step };
658
- }
659
- } else {
660
- consecutiveInvalid = 0;
661
- }
662
- const result = validationError
663
- ? ({ success: false, output: '', error: validationError } as ToolResult)
664
- : await executor.run(async () => {
665
- try {
666
- return await executeTool(runtime.toolProvider, action, runtime);
667
- } catch (error) {
668
- return {
669
- success: false,
670
- output: '',
671
- error: error instanceof Error ? error.message : String(error)
672
- };
673
- }
674
- });
675
- if (!result.success) {
676
- recordAgentError(runtime.telemetry);
677
- }
678
- const singleResultText = result.success ? result.output : `ERROR: ${result.error ?? 'unknown failure'}`;
679
- messages.push({
680
- role: 'tool',
681
- content: formatToolResultContent(action, result),
682
- toolResults: [{
683
- toolCallId: singleCallId,
684
- toolName: action.name,
685
- result: singleResultText,
686
- isError: !result.success,
687
- }],
688
- });
689
- }
690
-
691
- recordAgentError(runtime.telemetry);
692
- return {
693
- messages,
694
- output: 'ERROR: max steps exceeded',
695
- steps: maxSteps
696
- };
697
- });
698
- },
699
-
700
- async *stream(prompt: string, options?: { history?: AgentMessage[] }): AsyncGenerator<AgentStreamEvent> {
701
- const REPL_MARKER = '##REPL##\n';
702
- const skillContext = await resolveSkillContext(prompt);
703
- const effectivePrompt = skillContext
704
- ? `${prompt}\n\nSkill guidance:\n${skillContext}`
705
- : prompt;
706
- const history = options?.history ?? [];
707
- const messages: AgentMessage[] = [...history, { role: 'user', content: effectivePrompt }];
708
- const maxSteps = runtime.maxSteps ?? 50;
709
-
710
- for (let step = 1; step <= maxSteps; step++) {
711
- yield { type: 'step_start', step };
712
-
713
- if (loop.streamAction) {
714
- // Streaming path: yield text deltas + collect tool calls
715
- const pendingTools: ToolCallAction[] = [];
716
- let finalText = '';
717
- for await (const event of loop.streamAction(messages)) {
718
- if (event.type === 'text_delta') {
719
- finalText += event.text;
720
- yield event;
721
- }
722
- if (event.type === 'tool_start') {
723
- pendingTools.push({
724
- type: 'tool',
725
- name: event.name,
726
- args: event.args,
727
- ...(event.toolCallId != null ? { toolCallId: event.toolCallId } : {}),
728
- });
729
- yield event;
730
- }
731
- }
732
-
733
- // If no tools → final response
734
- if (pendingTools.length === 0) {
735
- messages.push({ role: 'assistant', content: finalText });
736
- yield { type: 'step_end', step };
737
- yield { type: 'done', output: finalText, steps: step };
738
- return;
739
- }
740
-
741
- // Record the assistant message with structured toolCalls
742
- messages.push({
743
- role: 'assistant',
744
- content: finalText || formatToolCallContent(pendingTools),
745
- toolCalls: pendingTools.map(t => ({
746
- toolCallId: t.toolCallId ?? randomUUID(),
747
- toolName: t.name,
748
- args: t.args,
749
- })),
750
- });
751
-
752
- // REPL fallback: if any Bash command starts with ##REPL##, fall back to
753
- // non-streaming nextAction() which routes through LCMToolLoop REPL handling
754
- const hasRepl = pendingTools.some(
755
- t => t.name === 'Bash' && String(t.args.command ?? '').startsWith(REPL_MARKER)
756
- );
757
- if (hasRepl) {
758
- const action = await loop.nextAction(messages);
759
- if (action.type === 'final') {
760
- yield { type: 'text_delta', text: action.content };
761
- messages.push({ role: 'assistant', content: action.content });
762
- yield { type: 'step_end', step };
763
- yield { type: 'done', output: action.content, steps: step };
764
- return;
765
- }
766
- // Non-final REPL result: execute tools and continue loop
767
- if (action.type === 'tool_batch') {
768
- for (const call of action.calls) {
769
- const callId = call.toolCallId ?? randomUUID();
770
- yield { type: 'tool_start', name: call.name, args: call.args, toolCallId: callId };
771
- try {
772
- const r = await executeTool(runtime.toolProvider, call, runtime);
773
- yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
774
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
775
- messages.push({
776
- role: 'tool',
777
- content: formatToolResultContent(call, r),
778
- toolResults: [{ toolCallId: callId, toolName: call.name, result: rText, isError: !r.success }],
779
- });
780
- } catch (error) {
781
- const errMsg = error instanceof Error ? error.message : String(error);
782
- yield { type: 'tool_end', name: call.name, result: { success: false, output: '', error: errMsg } };
783
- messages.push({
784
- role: 'tool',
785
- content: `${call.name}: ERROR: ${errMsg}`,
786
- toolResults: [{ toolCallId: callId, toolName: call.name, result: errMsg, isError: true }],
787
- });
788
- }
789
- }
790
- } else if (action.type === 'tool') {
791
- const callId = action.toolCallId ?? randomUUID();
792
- yield { type: 'tool_start', name: action.name, args: action.args, toolCallId: callId };
793
- try {
794
- const r = await executeTool(runtime.toolProvider, action, runtime);
795
- yield { type: 'tool_end', name: action.name, result: toStreamResult(r) };
796
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
797
- messages.push({
798
- role: 'tool',
799
- content: formatToolResultContent(action, r),
800
- toolResults: [{ toolCallId: callId, toolName: action.name, result: rText, isError: !r.success }],
801
- });
802
- } catch (error) {
803
- const errMsg = error instanceof Error ? error.message : String(error);
804
- yield { type: 'tool_end', name: action.name, result: { success: false, output: '', error: errMsg } };
805
- messages.push({
806
- role: 'tool',
807
- content: `${action.name}: ERROR: ${errMsg}`,
808
- toolResults: [{ toolCallId: callId, toolName: action.name, result: errMsg, isError: true }],
809
- });
810
- }
811
- }
812
- yield { type: 'step_end', step };
813
- continue;
814
- }
815
-
816
- // Execute tools via batch (sequential sandbox ops) or parallel fallback
817
- const results = await executeBatch(pendingTools, runtime.toolProvider, runtime);
818
- for (let i = 0; i < pendingTools.length; i++) {
819
- const call = pendingTools[i]!;
820
- const r = results[i]!;
821
- yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
822
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
823
- messages.push({
824
- role: 'tool',
825
- content: formatToolResultContent(call, r),
826
- toolResults: [{
827
- toolCallId: call.toolCallId ?? '',
828
- toolName: call.name,
829
- result: rText,
830
- isError: !r.success,
831
- }],
832
- });
833
- }
834
- } else {
835
- // Fallback: wrap nextAction() in synthetic events
836
- const action = await loop.nextAction(messages);
837
-
838
- if (action.type === 'final') {
839
- messages.push({ role: 'assistant', content: action.content });
840
- yield { type: 'step_end', step };
841
- yield { type: 'done', output: action.content, steps: step };
842
- return;
843
- }
844
-
845
- const calls: ToolCallAction[] = action.type === 'tool_batch' ? action.calls : [action];
846
- // Record assistant message with structured toolCalls
847
- messages.push({
848
- role: 'assistant',
849
- content: formatToolCallContent(calls),
850
- toolCalls: calls.map(c => ({
851
- toolCallId: c.toolCallId ?? randomUUID(),
852
- toolName: c.name,
853
- args: c.args,
854
- })),
855
- });
856
- for (const call of calls) {
857
- yield { type: 'tool_start', name: call.name, args: call.args, ...(call.toolCallId != null ? { toolCallId: call.toolCallId } : {}) };
858
- }
859
- const results = await executeBatch(calls, runtime.toolProvider, runtime);
860
- for (let i = 0; i < calls.length; i++) {
861
- const call = calls[i]!;
862
- const r = results[i]!;
863
- yield { type: 'tool_end', name: call.name, result: toStreamResult(r) };
864
- const rText = r.success ? r.output : `ERROR: ${r.error ?? 'unknown failure'}`;
865
- messages.push({
866
- role: 'tool',
867
- content: formatToolResultContent(call, r),
868
- toolResults: [{
869
- toolCallId: call.toolCallId ?? '',
870
- toolName: call.name,
871
- result: rText,
872
- isError: !r.success,
873
- }],
874
- });
875
- }
876
- }
877
-
878
- yield { type: 'step_end', step };
879
- }
880
-
881
- yield { type: 'done', output: 'ERROR: max steps exceeded', steps: maxSteps };
882
- }
883
- };
884
- }