ak-claude 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/code-agent.js +434 -111
- package/index.cjs +407 -79
- package/package.json +1 -1
- package/types.d.ts +69 -6
package/code-agent.js
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview CodeAgent class — AI agent
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
* (read files, write files, run commands) in a single script.
|
|
2
|
+
* @fileoverview CodeAgent class — AI agent with multiple code-oriented tools.
|
|
3
|
+
* Provides write_code, execute_code, write_and_run_code, fix_code, run_bash,
|
|
4
|
+
* and (optionally) use_skill tools for autonomous coding tasks.
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
7
|
import BaseClaude from './base.js';
|
|
9
8
|
import log from './logger.js';
|
|
10
9
|
import { execFile } from 'node:child_process';
|
|
11
10
|
import { writeFile, unlink, readdir, readFile, mkdir } from 'node:fs/promises';
|
|
12
|
-
import { join, sep } from 'node:path';
|
|
11
|
+
import { join, sep, basename } from 'node:path';
|
|
13
12
|
import { randomUUID } from 'node:crypto';
|
|
14
13
|
|
|
15
14
|
/**
|
|
@@ -22,30 +21,9 @@ const MAX_OUTPUT_CHARS = 50_000;
|
|
|
22
21
|
const MAX_FILE_TREE_LINES = 500;
|
|
23
22
|
const IGNORE_DIRS = new Set(['node_modules', '.git', 'dist', 'coverage', '.next', 'build', '__pycache__']);
|
|
24
23
|
|
|
25
|
-
/**
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
* During init, gathers codebase context (file tree + key files) and injects it
|
|
29
|
-
* into the system prompt. The model uses the `execute_code` tool to run scripts
|
|
30
|
-
* in a Node.js child process that inherits the parent's environment variables.
|
|
31
|
-
*
|
|
32
|
-
* @example
|
|
33
|
-
* ```javascript
|
|
34
|
-
* import { CodeAgent } from 'ak-claude';
|
|
35
|
-
*
|
|
36
|
-
* const agent = new CodeAgent({
|
|
37
|
-
* workingDirectory: '/path/to/my/project',
|
|
38
|
-
* onCodeExecution: (code, output) => {
|
|
39
|
-
* console.log('Executed:', code.slice(0, 100));
|
|
40
|
-
* console.log('Output:', output.stdout);
|
|
41
|
-
* }
|
|
42
|
-
* });
|
|
43
|
-
*
|
|
44
|
-
* const result = await agent.chat('List all TODO comments in the codebase');
|
|
45
|
-
* console.log(result.text);
|
|
46
|
-
* console.log(`Ran ${result.codeExecutions.length} scripts`);
|
|
47
|
-
* ```
|
|
48
|
-
*/
|
|
24
|
+
/** Tools that execute code/commands and can fail */
|
|
25
|
+
const EXECUTING_TOOLS = new Set(['execute_code', 'write_and_run_code', 'run_bash']);
|
|
26
|
+
|
|
49
27
|
class CodeAgent extends BaseClaude {
|
|
50
28
|
/**
|
|
51
29
|
* @param {CodeAgentOptions} [options={}]
|
|
@@ -68,6 +46,8 @@ class CodeAgent extends BaseClaude {
|
|
|
68
46
|
this.keepArtifacts = options.keepArtifacts ?? false;
|
|
69
47
|
this.comments = options.comments ?? false;
|
|
70
48
|
this.codeMaxRetries = options.maxRetries ?? 3;
|
|
49
|
+
this.skills = options.skills || [];
|
|
50
|
+
this.envOverview = options.envOverview || '';
|
|
71
51
|
|
|
72
52
|
// ── Internal state ──
|
|
73
53
|
this._codebaseContext = null;
|
|
@@ -76,34 +56,112 @@ class CodeAgent extends BaseClaude {
|
|
|
76
56
|
this._activeProcess = null;
|
|
77
57
|
this._userSystemPrompt = options.systemPrompt || '';
|
|
78
58
|
this._allExecutions = [];
|
|
59
|
+
this._skillRegistry = new Map();
|
|
60
|
+
|
|
61
|
+
// ── Tools (built after skill loading; placeholder until init) ──
|
|
62
|
+
this._tools = this._buildToolDefinitions();
|
|
63
|
+
|
|
64
|
+
log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ── Tool Definitions ─────────────────────────────────────────────────────
|
|
79
68
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
69
|
+
/**
|
|
70
|
+
* Build tool definitions in Claude format.
|
|
71
|
+
* use_skill is only included when skills are registered.
|
|
72
|
+
* @private
|
|
73
|
+
* @returns {Array<{name: string, description: string, input_schema: Object}>}
|
|
74
|
+
*/
|
|
75
|
+
_buildToolDefinitions() {
|
|
76
|
+
/** @type {Array<{name: string, description: string, input_schema: Object}>} */
|
|
77
|
+
const tools = [
|
|
78
|
+
{
|
|
79
|
+
name: 'write_code',
|
|
80
|
+
description: 'Output code without executing it. Use this when you want to show, propose, or present code to the user without running it.',
|
|
81
|
+
input_schema: {
|
|
82
|
+
type: 'object',
|
|
83
|
+
properties: {
|
|
84
|
+
code: { type: 'string', description: 'The code to output.' },
|
|
85
|
+
purpose: { type: 'string', description: 'A short 2-4 word slug describing the code (e.g., "api-client", "data-parser").' },
|
|
86
|
+
language: { type: 'string', description: 'Programming language of the code (default: "javascript").' }
|
|
90
87
|
},
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
88
|
+
required: ['code']
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: 'execute_code',
|
|
93
|
+
description: 'Execute a given piece of JavaScript code in a Node.js child process. Use this when you already have code to run — e.g., running code from a previous write_code call, re-running a snippet, or executing code the user provided. Use console.log() for output.',
|
|
94
|
+
input_schema: {
|
|
95
|
+
type: 'object',
|
|
96
|
+
properties: {
|
|
97
|
+
code: { type: 'string', description: 'JavaScript code to execute. Use console.log() for output. Use import syntax (ES modules).' },
|
|
98
|
+
purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "read-config", "parse-logs").' }
|
|
99
|
+
},
|
|
100
|
+
required: ['code']
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
name: 'write_and_run_code',
|
|
105
|
+
description: 'Write a fresh solution from scratch and execute it in one step. Use this when you need to figure out the code AND run it — the autonomous, end-to-end tool for solving problems with code.',
|
|
106
|
+
input_schema: {
|
|
107
|
+
type: 'object',
|
|
108
|
+
properties: {
|
|
109
|
+
code: { type: 'string', description: 'JavaScript code to write and execute. Use console.log() for output. Use import syntax (ES modules).' },
|
|
110
|
+
purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "fetch-api-data", "generate-report").' }
|
|
111
|
+
},
|
|
112
|
+
required: ['code']
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
name: 'fix_code',
|
|
117
|
+
description: 'Fix broken code. Provide the original and fixed versions with an explanation. Optionally execute the fix to verify it works.',
|
|
118
|
+
input_schema: {
|
|
119
|
+
type: 'object',
|
|
120
|
+
properties: {
|
|
121
|
+
original_code: { type: 'string', description: 'The original broken code.' },
|
|
122
|
+
fixed_code: { type: 'string', description: 'The corrected code.' },
|
|
123
|
+
explanation: { type: 'string', description: 'Brief explanation of what was wrong and how it was fixed.' },
|
|
124
|
+
execute: { type: 'boolean', description: 'If true, execute the fixed code to verify it works (default: false).' }
|
|
125
|
+
},
|
|
126
|
+
required: ['original_code', 'fixed_code']
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
name: 'run_bash',
|
|
131
|
+
description: 'Execute a shell command in the working directory. Use this for file operations, git commands, installing packages, or any shell task. Prefer this over execute_code for simple shell operations.',
|
|
132
|
+
input_schema: {
|
|
133
|
+
type: 'object',
|
|
134
|
+
properties: {
|
|
135
|
+
command: { type: 'string', description: 'The shell command to execute.' },
|
|
136
|
+
purpose: { type: 'string', description: 'A short 2-4 word slug describing the command (e.g., "list-files", "install-deps").' }
|
|
137
|
+
},
|
|
138
|
+
required: ['command']
|
|
139
|
+
}
|
|
97
140
|
}
|
|
98
|
-
|
|
141
|
+
];
|
|
142
|
+
|
|
143
|
+
// Conditionally add use_skill
|
|
144
|
+
if (this._skillRegistry && this._skillRegistry.size > 0) {
|
|
145
|
+
tools.push({
|
|
146
|
+
name: 'use_skill',
|
|
147
|
+
description: `Load a skill by name to get instructions, templates, or patterns. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`,
|
|
148
|
+
input_schema: {
|
|
149
|
+
type: 'object',
|
|
150
|
+
properties: {
|
|
151
|
+
skill_name: { type: 'string', description: 'The name of the skill to load.' }
|
|
152
|
+
},
|
|
153
|
+
required: ['skill_name']
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
}
|
|
99
157
|
|
|
100
|
-
|
|
158
|
+
return tools;
|
|
101
159
|
}
|
|
102
160
|
|
|
103
161
|
// ── Init ─────────────────────────────────────────────────────────────────
|
|
104
162
|
|
|
105
163
|
/**
|
|
106
|
-
* Initialize the agent: gather codebase context and build system prompt.
|
|
164
|
+
* Initialize the agent: load skills, gather codebase context, and build system prompt.
|
|
107
165
|
* @param {boolean} [force=false]
|
|
108
166
|
*/
|
|
109
167
|
async init(force = false) {
|
|
@@ -111,6 +169,14 @@ class CodeAgent extends BaseClaude {
|
|
|
111
169
|
|
|
112
170
|
await this._ensureClient();
|
|
113
171
|
|
|
172
|
+
// Load skills
|
|
173
|
+
if (this.skills.length > 0 && (this._skillRegistry.size === 0 || force)) {
|
|
174
|
+
await this._loadSkills();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Rebuild tools (use_skill may now be included)
|
|
178
|
+
this._tools = this._buildToolDefinitions();
|
|
179
|
+
|
|
114
180
|
// Gather codebase context
|
|
115
181
|
if (!this._contextGathered || force) {
|
|
116
182
|
await this._gatherCodebaseContext();
|
|
@@ -122,6 +188,30 @@ class CodeAgent extends BaseClaude {
|
|
|
122
188
|
await super.init(force);
|
|
123
189
|
}
|
|
124
190
|
|
|
191
|
+
// ── Skill Loading ────────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Load skill files into the skill registry.
|
|
195
|
+
* @private
|
|
196
|
+
*/
|
|
197
|
+
async _loadSkills() {
|
|
198
|
+
this._skillRegistry.clear();
|
|
199
|
+
|
|
200
|
+
for (const filePath of this.skills) {
|
|
201
|
+
try {
|
|
202
|
+
const content = await readFile(filePath, 'utf-8');
|
|
203
|
+
// Extract name from YAML frontmatter if present
|
|
204
|
+
let name = basename(filePath).replace(/\.md$/i, '');
|
|
205
|
+
const fmMatch = content.match(/^---\s*\n[\s\S]*?^name:\s*(.+)$/m);
|
|
206
|
+
if (fmMatch) name = fmMatch[1].trim();
|
|
207
|
+
this._skillRegistry.set(name, { name, content, path: filePath });
|
|
208
|
+
log.debug(`Loaded skill: ${name} from ${filePath}`);
|
|
209
|
+
} catch (e) {
|
|
210
|
+
log.warn(`skills: could not load "${filePath}": ${e.message}`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
125
215
|
// ── Context Gathering ────────────────────────────────────────────────────
|
|
126
216
|
|
|
127
217
|
/**
|
|
@@ -240,9 +330,35 @@ class CodeAgent extends BaseClaude {
|
|
|
240
330
|
|
|
241
331
|
let prompt = `You are a coding agent working in ${this.workingDirectory}.
|
|
242
332
|
|
|
243
|
-
##
|
|
244
|
-
|
|
245
|
-
|
|
333
|
+
## Available Tools
|
|
334
|
+
|
|
335
|
+
### write_code
|
|
336
|
+
Output code without executing it. Use when showing, proposing, or presenting code to the user.
|
|
337
|
+
|
|
338
|
+
### execute_code
|
|
339
|
+
Run a given piece of JavaScript code. Use when you already have code to run — e.g., from a previous write_code call, re-running a snippet, or executing user-provided code.
|
|
340
|
+
|
|
341
|
+
### write_and_run_code
|
|
342
|
+
Write a fresh solution from scratch and execute it in one step. The autonomous, end-to-end tool for solving problems with code.
|
|
343
|
+
|
|
344
|
+
### fix_code
|
|
345
|
+
Fix broken code by providing original and fixed versions. Set execute=true to verify the fix works.
|
|
346
|
+
|
|
347
|
+
### run_bash
|
|
348
|
+
Run shell commands directly (e.g., ls, grep, curl, git, npm, cat). Prefer this over execute_code for simple shell operations.`;
|
|
349
|
+
|
|
350
|
+
if (this._skillRegistry.size > 0) {
|
|
351
|
+
prompt += `
|
|
352
|
+
|
|
353
|
+
### use_skill
|
|
354
|
+
Load a skill by name to get detailed instructions and templates. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
prompt += `
|
|
358
|
+
|
|
359
|
+
## Code Execution Rules
|
|
360
|
+
These rules apply when using execute_code, write_and_run_code, or fix_code (with execute=true):
|
|
361
|
+
- Always provide a short descriptive \`purpose\` parameter (2-4 word slug like "read-config")
|
|
246
362
|
- Your code runs in a Node.js child process with access to all built-in modules
|
|
247
363
|
- IMPORTANT: Your code runs as an ES module (.mjs). Use import syntax, NOT require():
|
|
248
364
|
- import fs from 'fs';
|
|
@@ -250,9 +366,7 @@ class CodeAgent extends BaseClaude {
|
|
|
250
366
|
- import { execSync } from 'child_process';
|
|
251
367
|
- Use console.log() to produce output — that's how results are returned to you
|
|
252
368
|
- Write efficient scripts that do multiple things per execution when possible
|
|
253
|
-
- For parallel async operations, use Promise.all()
|
|
254
|
-
const [a, b] = await Promise.all([fetchA(), fetchB()]);
|
|
255
|
-
- Read files with fs.readFileSync() when you need to understand their contents
|
|
369
|
+
- For parallel async operations, use Promise.all()
|
|
256
370
|
- Handle errors in your scripts with try/catch so you get useful error messages
|
|
257
371
|
- Top-level await is supported
|
|
258
372
|
- The working directory is: ${this.workingDirectory}`;
|
|
@@ -282,6 +396,10 @@ class CodeAgent extends BaseClaude {
|
|
|
282
396
|
prompt += `\n\n## Additional Instructions\n${this._userSystemPrompt}`;
|
|
283
397
|
}
|
|
284
398
|
|
|
399
|
+
if (this.envOverview) {
|
|
400
|
+
prompt += `\n\n## Environment Overview\n${this.envOverview}`;
|
|
401
|
+
}
|
|
402
|
+
|
|
285
403
|
return prompt;
|
|
286
404
|
}
|
|
287
405
|
|
|
@@ -298,14 +416,14 @@ class CodeAgent extends BaseClaude {
|
|
|
298
416
|
/**
|
|
299
417
|
* @private
|
|
300
418
|
*/
|
|
301
|
-
async _executeCode(code, purpose) {
|
|
419
|
+
async _executeCode(code, purpose, toolName) {
|
|
302
420
|
if (this._stopped) {
|
|
303
421
|
return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
|
|
304
422
|
}
|
|
305
423
|
|
|
306
424
|
if (this.onBeforeExecution) {
|
|
307
425
|
try {
|
|
308
|
-
const allowed = await this.onBeforeExecution(code);
|
|
426
|
+
const allowed = await this.onBeforeExecution(code, toolName || 'execute_code');
|
|
309
427
|
if (allowed === false) {
|
|
310
428
|
return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
|
|
311
429
|
}
|
|
@@ -356,7 +474,8 @@ class CodeAgent extends BaseClaude {
|
|
|
356
474
|
|
|
357
475
|
this._allExecutions.push({
|
|
358
476
|
code, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
|
|
359
|
-
exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null
|
|
477
|
+
exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null,
|
|
478
|
+
tool: toolName || 'execute_code'
|
|
360
479
|
});
|
|
361
480
|
|
|
362
481
|
if (this.onCodeExecution) {
|
|
@@ -373,6 +492,73 @@ class CodeAgent extends BaseClaude {
|
|
|
373
492
|
}
|
|
374
493
|
}
|
|
375
494
|
|
|
495
|
+
// ── Bash Execution ───────────────────────────────────────────────────────
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Execute a bash command in the working directory.
|
|
499
|
+
* @private
|
|
500
|
+
*/
|
|
501
|
+
async _executeBash(command, purpose) {
|
|
502
|
+
if (this._stopped) {
|
|
503
|
+
return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
if (this.onBeforeExecution) {
|
|
507
|
+
try {
|
|
508
|
+
const allowed = await this.onBeforeExecution(command, 'run_bash');
|
|
509
|
+
if (allowed === false) {
|
|
510
|
+
return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
|
|
511
|
+
}
|
|
512
|
+
} catch (e) {
|
|
513
|
+
log.warn(`onBeforeExecution callback error: ${e.message}`);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
const result = await new Promise((resolve) => {
|
|
518
|
+
const child = execFile('bash', ['-c', command], {
|
|
519
|
+
cwd: this.workingDirectory,
|
|
520
|
+
timeout: this.timeout,
|
|
521
|
+
env: process.env,
|
|
522
|
+
maxBuffer: 10 * 1024 * 1024
|
|
523
|
+
}, (err, stdout, stderr) => {
|
|
524
|
+
this._activeProcess = null;
|
|
525
|
+
if (err) {
|
|
526
|
+
resolve({
|
|
527
|
+
stdout: err.stdout || stdout || '',
|
|
528
|
+
stderr: (err.stderr || stderr || '') + (err.killed ? '\n[EXECUTION TIMED OUT]' : ''),
|
|
529
|
+
exitCode: err.code || 1
|
|
530
|
+
});
|
|
531
|
+
} else {
|
|
532
|
+
resolve({ stdout: stdout || '', stderr: stderr || '', exitCode: 0 });
|
|
533
|
+
}
|
|
534
|
+
});
|
|
535
|
+
this._activeProcess = child;
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
const totalLen = result.stdout.length + result.stderr.length;
|
|
539
|
+
if (totalLen > MAX_OUTPUT_CHARS) {
|
|
540
|
+
const half = Math.floor(MAX_OUTPUT_CHARS / 2);
|
|
541
|
+
if (result.stdout.length > half) {
|
|
542
|
+
result.stdout = result.stdout.slice(0, half) + '\n...[OUTPUT TRUNCATED]';
|
|
543
|
+
}
|
|
544
|
+
if (result.stderr.length > half) {
|
|
545
|
+
result.stderr = result.stderr.slice(0, half) + '\n...[STDERR TRUNCATED]';
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
this._allExecutions.push({
|
|
550
|
+
code: command, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
|
|
551
|
+
exitCode: result.exitCode, filePath: null, tool: 'run_bash'
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
if (this.onCodeExecution) {
|
|
555
|
+
try { this.onCodeExecution(command, result); }
|
|
556
|
+
catch (e) { log.warn(`onCodeExecution callback error: ${e.message}`); }
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
return result;
|
|
560
|
+
}
|
|
561
|
+
|
|
376
562
|
/**
|
|
377
563
|
* @private
|
|
378
564
|
*/
|
|
@@ -384,11 +570,102 @@ class CodeAgent extends BaseClaude {
|
|
|
384
570
|
return output || '(no output)';
|
|
385
571
|
}
|
|
386
572
|
|
|
573
|
+
// ── Tool Call Dispatch ───────────────────────────────────────────────────
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Handle a tool call by name, dispatching to the appropriate handler.
|
|
577
|
+
* @private
|
|
578
|
+
* @param {string} name - Tool name
|
|
579
|
+
* @param {Object} input - Tool arguments
|
|
580
|
+
* @returns {Promise<{output: string, type: string, data: Object}>}
|
|
581
|
+
*/
|
|
582
|
+
async _handleToolCall(name, input) {
|
|
583
|
+
switch (name) {
|
|
584
|
+
case 'execute_code':
|
|
585
|
+
case 'write_and_run_code': {
|
|
586
|
+
const result = await this._executeCode(input.code || '', input.purpose, name);
|
|
587
|
+
return {
|
|
588
|
+
output: this._formatOutput(result),
|
|
589
|
+
type: 'code_execution',
|
|
590
|
+
data: {
|
|
591
|
+
tool: name, code: input.code || '', purpose: input.purpose,
|
|
592
|
+
stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
|
|
593
|
+
denied: result.denied
|
|
594
|
+
}
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
case 'write_code': {
|
|
598
|
+
return {
|
|
599
|
+
output: 'Code written successfully.',
|
|
600
|
+
type: 'write',
|
|
601
|
+
data: {
|
|
602
|
+
tool: 'write_code', code: input.code || '',
|
|
603
|
+
purpose: input.purpose, language: input.language || 'javascript'
|
|
604
|
+
}
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
case 'fix_code': {
|
|
608
|
+
let execResult = null;
|
|
609
|
+
if (input.execute) {
|
|
610
|
+
execResult = await this._executeCode(input.fixed_code || '', 'fix', 'fix_code');
|
|
611
|
+
}
|
|
612
|
+
return {
|
|
613
|
+
output: input.execute ? this._formatOutput(execResult) : 'Fix recorded.',
|
|
614
|
+
type: 'fix',
|
|
615
|
+
data: {
|
|
616
|
+
tool: 'fix_code',
|
|
617
|
+
originalCode: input.original_code || '',
|
|
618
|
+
fixedCode: input.fixed_code || '',
|
|
619
|
+
explanation: input.explanation,
|
|
620
|
+
executed: !!input.execute,
|
|
621
|
+
stdout: execResult?.stdout, stderr: execResult?.stderr,
|
|
622
|
+
exitCode: execResult?.exitCode, denied: execResult?.denied
|
|
623
|
+
}
|
|
624
|
+
};
|
|
625
|
+
}
|
|
626
|
+
case 'run_bash': {
|
|
627
|
+
const result = await this._executeBash(input.command || '', input.purpose);
|
|
628
|
+
return {
|
|
629
|
+
output: this._formatOutput(result),
|
|
630
|
+
type: 'bash',
|
|
631
|
+
data: {
|
|
632
|
+
tool: 'run_bash', command: input.command || '', purpose: input.purpose,
|
|
633
|
+
stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
|
|
634
|
+
denied: result.denied
|
|
635
|
+
}
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
case 'use_skill': {
|
|
639
|
+
const skillName = input.skill_name || '';
|
|
640
|
+
const skill = this._skillRegistry.get(skillName);
|
|
641
|
+
if (!skill) {
|
|
642
|
+
const available = [...this._skillRegistry.keys()].join(', ');
|
|
643
|
+
return {
|
|
644
|
+
output: `Skill "${skillName}" not found. Available skills: ${available || '(none)'}`,
|
|
645
|
+
type: 'skill',
|
|
646
|
+
data: { tool: 'use_skill', skillName, found: false }
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
return {
|
|
650
|
+
output: skill.content,
|
|
651
|
+
type: 'skill',
|
|
652
|
+
data: { tool: 'use_skill', skillName: skill.name, content: skill.content, found: true }
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
default:
|
|
656
|
+
return {
|
|
657
|
+
output: `Unknown tool: ${name}`,
|
|
658
|
+
type: 'unknown',
|
|
659
|
+
data: { tool: name }
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
387
664
|
// ── Non-Streaming Chat ───────────────────────────────────────────────────
|
|
388
665
|
|
|
389
666
|
/**
|
|
390
667
|
* Send a message and get a complete response (non-streaming).
|
|
391
|
-
* Automatically handles the
|
|
668
|
+
* Automatically handles the multi-tool execution loop.
|
|
392
669
|
*
|
|
393
670
|
* @param {string} message - The user's message
|
|
394
671
|
* @param {Object} [opts={}] - Per-message options
|
|
@@ -398,7 +675,7 @@ class CodeAgent extends BaseClaude {
|
|
|
398
675
|
if (!this._initialized) await this.init();
|
|
399
676
|
this._stopped = false;
|
|
400
677
|
|
|
401
|
-
const
|
|
678
|
+
const toolCalls = [];
|
|
402
679
|
let consecutiveFailures = 0;
|
|
403
680
|
|
|
404
681
|
let response = await this._sendMessage(message, { tools: this._tools });
|
|
@@ -414,34 +691,29 @@ class CodeAgent extends BaseClaude {
|
|
|
414
691
|
for (const block of toolUseBlocks) {
|
|
415
692
|
if (this._stopped) break;
|
|
416
693
|
|
|
417
|
-
const
|
|
418
|
-
const purpose = block.input?.purpose;
|
|
419
|
-
const result = await this._executeCode(code, purpose);
|
|
694
|
+
const { output, type, data } = await this._handleToolCall(block.name, block.input || {});
|
|
420
695
|
|
|
421
|
-
|
|
422
|
-
code,
|
|
423
|
-
purpose: this._slugify(purpose),
|
|
424
|
-
output: result.stdout,
|
|
425
|
-
stderr: result.stderr,
|
|
426
|
-
exitCode: result.exitCode
|
|
427
|
-
});
|
|
696
|
+
toolCalls.push(data);
|
|
428
697
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
698
|
+
// Track consecutive failures for executing tools
|
|
699
|
+
const isExecutingTool = EXECUTING_TOOLS.has(block.name) || (block.name === 'fix_code' && block.input?.execute);
|
|
700
|
+
if (isExecutingTool) {
|
|
701
|
+
if (data.exitCode !== 0 && !data.denied) {
|
|
702
|
+
consecutiveFailures++;
|
|
703
|
+
} else {
|
|
704
|
+
consecutiveFailures = 0;
|
|
705
|
+
}
|
|
433
706
|
}
|
|
434
707
|
|
|
435
|
-
let
|
|
436
|
-
|
|
708
|
+
let toolOutput = output;
|
|
437
709
|
if (consecutiveFailures >= this.codeMaxRetries) {
|
|
438
|
-
|
|
710
|
+
toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
|
|
439
711
|
}
|
|
440
712
|
|
|
441
713
|
toolResults.push({
|
|
442
714
|
type: 'tool_result',
|
|
443
715
|
tool_use_id: block.id,
|
|
444
|
-
content:
|
|
716
|
+
content: toolOutput
|
|
445
717
|
});
|
|
446
718
|
}
|
|
447
719
|
|
|
@@ -459,9 +731,21 @@ class CodeAgent extends BaseClaude {
|
|
|
459
731
|
attempts: 1
|
|
460
732
|
};
|
|
461
733
|
|
|
734
|
+
// Build backward-compat codeExecutions (only execute_code + write_and_run_code)
|
|
735
|
+
const codeExecutions = toolCalls
|
|
736
|
+
.filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
|
|
737
|
+
.map(tc => ({
|
|
738
|
+
code: tc.code || tc.fixedCode,
|
|
739
|
+
purpose: this._slugify(tc.purpose),
|
|
740
|
+
output: tc.stdout || '',
|
|
741
|
+
stderr: tc.stderr || '',
|
|
742
|
+
exitCode: tc.exitCode ?? 0
|
|
743
|
+
}));
|
|
744
|
+
|
|
462
745
|
return {
|
|
463
746
|
text: this._extractText(response),
|
|
464
747
|
codeExecutions,
|
|
748
|
+
toolCalls,
|
|
465
749
|
usage: this.getLastUsage()
|
|
466
750
|
};
|
|
467
751
|
}
|
|
@@ -473,8 +757,12 @@ class CodeAgent extends BaseClaude {
|
|
|
473
757
|
*
|
|
474
758
|
* Event types:
|
|
475
759
|
* - `text` — A chunk of the agent's text response
|
|
476
|
-
* - `code` — The agent is about to execute code
|
|
477
|
-
* - `output` — Code finished executing
|
|
760
|
+
* - `code` — The agent is about to execute code (execute_code or write_and_run_code)
|
|
761
|
+
* - `output` — Code/bash finished executing
|
|
762
|
+
* - `write` — The agent wrote code without executing (write_code)
|
|
763
|
+
* - `fix` — The agent fixed code (fix_code)
|
|
764
|
+
* - `bash` — The agent is about to run a bash command
|
|
765
|
+
* - `skill` — The agent loaded a skill
|
|
478
766
|
* - `done` — The agent finished
|
|
479
767
|
*
|
|
480
768
|
* @param {string} message - The user's message
|
|
@@ -485,7 +773,7 @@ class CodeAgent extends BaseClaude {
|
|
|
485
773
|
if (!this._initialized) await this.init();
|
|
486
774
|
this._stopped = false;
|
|
487
775
|
|
|
488
|
-
const
|
|
776
|
+
const toolCalls = [];
|
|
489
777
|
let fullText = '';
|
|
490
778
|
let consecutiveFailures = 0;
|
|
491
779
|
|
|
@@ -512,53 +800,77 @@ class CodeAgent extends BaseClaude {
|
|
|
512
800
|
this._captureMetadata(finalMessage);
|
|
513
801
|
|
|
514
802
|
if (finalMessage.stop_reason !== 'tool_use' || toolUseBlocks.length === 0) {
|
|
515
|
-
|
|
803
|
+
const codeExecutions = toolCalls
|
|
804
|
+
.filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
|
|
805
|
+
.map(tc => ({
|
|
806
|
+
code: tc.code || tc.fixedCode,
|
|
807
|
+
purpose: this._slugify(tc.purpose),
|
|
808
|
+
output: tc.stdout || '',
|
|
809
|
+
stderr: tc.stderr || '',
|
|
810
|
+
exitCode: tc.exitCode ?? 0
|
|
811
|
+
}));
|
|
812
|
+
yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage() };
|
|
516
813
|
return;
|
|
517
814
|
}
|
|
518
815
|
|
|
519
|
-
//
|
|
816
|
+
// Handle tool calls
|
|
520
817
|
const toolResults = [];
|
|
521
818
|
for (const block of toolUseBlocks) {
|
|
522
819
|
if (this._stopped) break;
|
|
523
820
|
|
|
524
|
-
const
|
|
525
|
-
const
|
|
526
|
-
|
|
821
|
+
const toolName = block.name;
|
|
822
|
+
const toolInput = block.input || {};
|
|
823
|
+
|
|
824
|
+
// Emit pre-execution events
|
|
825
|
+
if (toolName === 'write_code') {
|
|
826
|
+
yield { type: 'write', code: toolInput.code, purpose: toolInput.purpose, language: toolInput.language || 'javascript' };
|
|
827
|
+
} else if (toolName === 'fix_code') {
|
|
828
|
+
yield { type: 'fix', originalCode: toolInput.original_code, fixedCode: toolInput.fixed_code, explanation: toolInput.explanation };
|
|
829
|
+
} else if (toolName === 'run_bash') {
|
|
830
|
+
yield { type: 'bash', command: toolInput.command };
|
|
831
|
+
} else if (toolName === 'execute_code' || toolName === 'write_and_run_code') {
|
|
832
|
+
yield { type: 'code', code: toolInput.code };
|
|
833
|
+
}
|
|
527
834
|
|
|
528
|
-
const
|
|
835
|
+
const { output, type, data } = await this._handleToolCall(toolName, toolInput);
|
|
529
836
|
|
|
530
|
-
|
|
531
|
-
code,
|
|
532
|
-
purpose: this._slugify(purpose),
|
|
533
|
-
output: result.stdout,
|
|
534
|
-
stderr: result.stderr,
|
|
535
|
-
exitCode: result.exitCode
|
|
536
|
-
});
|
|
837
|
+
toolCalls.push(data);
|
|
537
838
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
839
|
+
// Emit post-execution output events
|
|
840
|
+
if (data.stdout !== undefined || data.stderr !== undefined) {
|
|
841
|
+
yield {
|
|
842
|
+
type: 'output',
|
|
843
|
+
code: data.code || data.command || data.fixedCode,
|
|
844
|
+
stdout: data.stdout || '',
|
|
845
|
+
stderr: data.stderr || '',
|
|
846
|
+
exitCode: data.exitCode ?? 0
|
|
847
|
+
};
|
|
848
|
+
}
|
|
545
849
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
consecutiveFailures = 0;
|
|
850
|
+
// Emit skill event
|
|
851
|
+
if (toolName === 'use_skill') {
|
|
852
|
+
yield { type: 'skill', skillName: data.skillName, content: data.content, found: data.found };
|
|
550
853
|
}
|
|
551
854
|
|
|
552
|
-
|
|
855
|
+
// Track consecutive failures
|
|
856
|
+
const isExecutingTool = EXECUTING_TOOLS.has(toolName) || (toolName === 'fix_code' && toolInput.execute);
|
|
857
|
+
if (isExecutingTool) {
|
|
858
|
+
if (data.exitCode !== 0 && !data.denied) {
|
|
859
|
+
consecutiveFailures++;
|
|
860
|
+
} else {
|
|
861
|
+
consecutiveFailures = 0;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
553
864
|
|
|
865
|
+
let toolOutput = output;
|
|
554
866
|
if (consecutiveFailures >= this.codeMaxRetries) {
|
|
555
|
-
|
|
867
|
+
toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
|
|
556
868
|
}
|
|
557
869
|
|
|
558
870
|
toolResults.push({
|
|
559
871
|
type: 'tool_result',
|
|
560
872
|
tool_use_id: block.id,
|
|
561
|
-
content:
|
|
873
|
+
content: toolOutput
|
|
562
874
|
});
|
|
563
875
|
}
|
|
564
876
|
|
|
@@ -573,21 +885,32 @@ class CodeAgent extends BaseClaude {
|
|
|
573
885
|
if (this._stopped) warning = 'Agent was stopped';
|
|
574
886
|
else if (consecutiveFailures >= this.codeMaxRetries) warning = 'Retry limit reached';
|
|
575
887
|
|
|
576
|
-
|
|
888
|
+
const codeExecutions = toolCalls
|
|
889
|
+
.filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
|
|
890
|
+
.map(tc => ({
|
|
891
|
+
code: tc.code || tc.fixedCode,
|
|
892
|
+
purpose: this._slugify(tc.purpose),
|
|
893
|
+
output: tc.stdout || '',
|
|
894
|
+
stderr: tc.stderr || '',
|
|
895
|
+
exitCode: tc.exitCode ?? 0
|
|
896
|
+
}));
|
|
897
|
+
|
|
898
|
+
yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage(), warning };
|
|
577
899
|
}
|
|
578
900
|
|
|
579
901
|
// ── Dump ─────────────────────────────────────────────────────────────────
|
|
580
902
|
|
|
581
903
|
/**
|
|
582
|
-
* Returns all code scripts the agent has
|
|
583
|
-
* @returns {Array<{fileName: string, script: string}>}
|
|
904
|
+
* Returns all code scripts and bash commands the agent has executed.
|
|
905
|
+
* @returns {Array<{fileName: string, purpose: string|null, script: string, filePath: string|null, tool: string}>}
|
|
584
906
|
*/
|
|
585
907
|
dump() {
|
|
586
908
|
return this._allExecutions.map((exec, i) => ({
|
|
587
909
|
fileName: exec.purpose ? `agent-${exec.purpose}.mjs` : `script-${i + 1}.mjs`,
|
|
588
910
|
purpose: exec.purpose || null,
|
|
589
911
|
script: exec.code,
|
|
590
|
-
filePath: exec.filePath || null
|
|
912
|
+
filePath: exec.filePath || null,
|
|
913
|
+
tool: exec.tool || 'execute_code'
|
|
591
914
|
}));
|
|
592
915
|
}
|
|
593
916
|
|