ak-claude 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/code-agent.js +434 -111
  2. package/index.cjs +407 -79
  3. package/package.json +1 -1
  4. package/types.d.ts +69 -6
package/code-agent.js CHANGED
@@ -1,15 +1,14 @@
1
1
  /**
2
- * @fileoverview CodeAgent class — AI agent that writes and executes code.
3
- * Instead of traditional tool-calling with many round-trips, the model gets
4
- * a single `execute_code` tool and writes JavaScript that can do everything
5
- * (read files, write files, run commands) in a single script.
2
+ * @fileoverview CodeAgent class — AI agent with multiple code-oriented tools.
3
+ * Provides write_code, execute_code, write_and_run_code, fix_code, run_bash,
4
+ * and (optionally) use_skill tools for autonomous coding tasks.
6
5
  */
7
6
 
8
7
  import BaseClaude from './base.js';
9
8
  import log from './logger.js';
10
9
  import { execFile } from 'node:child_process';
11
10
  import { writeFile, unlink, readdir, readFile, mkdir } from 'node:fs/promises';
12
- import { join, sep } from 'node:path';
11
+ import { join, sep, basename } from 'node:path';
13
12
  import { randomUUID } from 'node:crypto';
14
13
 
15
14
  /**
@@ -22,30 +21,9 @@ const MAX_OUTPUT_CHARS = 50_000;
22
21
  const MAX_FILE_TREE_LINES = 500;
23
22
  const IGNORE_DIRS = new Set(['node_modules', '.git', 'dist', 'coverage', '.next', 'build', '__pycache__']);
24
23
 
25
- /**
26
- * AI agent that writes and executes JavaScript code autonomously.
27
- *
28
- * During init, gathers codebase context (file tree + key files) and injects it
29
- * into the system prompt. The model uses the `execute_code` tool to run scripts
30
- * in a Node.js child process that inherits the parent's environment variables.
31
- *
32
- * @example
33
- * ```javascript
34
- * import { CodeAgent } from 'ak-claude';
35
- *
36
- * const agent = new CodeAgent({
37
- * workingDirectory: '/path/to/my/project',
38
- * onCodeExecution: (code, output) => {
39
- * console.log('Executed:', code.slice(0, 100));
40
- * console.log('Output:', output.stdout);
41
- * }
42
- * });
43
- *
44
- * const result = await agent.chat('List all TODO comments in the codebase');
45
- * console.log(result.text);
46
- * console.log(`Ran ${result.codeExecutions.length} scripts`);
47
- * ```
48
- */
24
+ /** Tools that execute code/commands and can fail */
25
+ const EXECUTING_TOOLS = new Set(['execute_code', 'write_and_run_code', 'run_bash']);
26
+
49
27
  class CodeAgent extends BaseClaude {
50
28
  /**
51
29
  * @param {CodeAgentOptions} [options={}]
@@ -68,6 +46,8 @@ class CodeAgent extends BaseClaude {
68
46
  this.keepArtifacts = options.keepArtifacts ?? false;
69
47
  this.comments = options.comments ?? false;
70
48
  this.codeMaxRetries = options.maxRetries ?? 3;
49
+ this.skills = options.skills || [];
50
+ this.envOverview = options.envOverview || '';
71
51
 
72
52
  // ── Internal state ──
73
53
  this._codebaseContext = null;
@@ -76,34 +56,112 @@ class CodeAgent extends BaseClaude {
76
56
  this._activeProcess = null;
77
57
  this._userSystemPrompt = options.systemPrompt || '';
78
58
  this._allExecutions = [];
59
+ this._skillRegistry = new Map();
60
+
61
+ // ── Tools (built after skill loading; placeholder until init) ──
62
+ this._tools = this._buildToolDefinitions();
63
+
64
+ log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
65
+ }
66
+
67
+ // ── Tool Definitions ─────────────────────────────────────────────────────
79
68
 
80
- // ── Single tool: execute_code (Claude format) ──
81
- this._tools = [{
82
- name: 'execute_code',
83
- description: 'Execute JavaScript code in a Node.js child process. The code has access to all Node.js built-in modules (fs, path, child_process, http, etc.). Use console.log() to produce output that will be returned to you. The code runs in the working directory with the same environment variables as the parent process.',
84
- input_schema: {
85
- type: 'object',
86
- properties: {
87
- code: {
88
- type: 'string',
89
- description: 'JavaScript code to execute. Use console.log() for output. You can import any built-in Node.js module.'
69
+ /**
70
+ * Build tool definitions in Claude format.
71
+ * use_skill is only included when skills are registered.
72
+ * @private
73
+ * @returns {Array<{name: string, description: string, input_schema: Object}>}
74
+ */
75
+ _buildToolDefinitions() {
76
+ /** @type {Array<{name: string, description: string, input_schema: Object}>} */
77
+ const tools = [
78
+ {
79
+ name: 'write_code',
80
+ description: 'Output code without executing it. Use this when you want to show, propose, or present code to the user without running it.',
81
+ input_schema: {
82
+ type: 'object',
83
+ properties: {
84
+ code: { type: 'string', description: 'The code to output.' },
85
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing the code (e.g., "api-client", "data-parser").' },
86
+ language: { type: 'string', description: 'Programming language of the code (default: "javascript").' }
90
87
  },
91
- purpose: {
92
- type: 'string',
93
- description: 'A short 2-4 word slug describing what this script does (e.g., "read-config", "parse-logs", "fetch-api-data"). Used for naming the script file.'
94
- }
95
- },
96
- required: ['code']
88
+ required: ['code']
89
+ }
90
+ },
91
+ {
92
+ name: 'execute_code',
93
+ description: 'Execute a given piece of JavaScript code in a Node.js child process. Use this when you already have code to run — e.g., running code from a previous write_code call, re-running a snippet, or executing code the user provided. Use console.log() for output.',
94
+ input_schema: {
95
+ type: 'object',
96
+ properties: {
97
+ code: { type: 'string', description: 'JavaScript code to execute. Use console.log() for output. Use import syntax (ES modules).' },
98
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "read-config", "parse-logs").' }
99
+ },
100
+ required: ['code']
101
+ }
102
+ },
103
+ {
104
+ name: 'write_and_run_code',
105
+ description: 'Write a fresh solution from scratch and execute it in one step. Use this when you need to figure out the code AND run it — the autonomous, end-to-end tool for solving problems with code.',
106
+ input_schema: {
107
+ type: 'object',
108
+ properties: {
109
+ code: { type: 'string', description: 'JavaScript code to write and execute. Use console.log() for output. Use import syntax (ES modules).' },
110
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "fetch-api-data", "generate-report").' }
111
+ },
112
+ required: ['code']
113
+ }
114
+ },
115
+ {
116
+ name: 'fix_code',
117
+ description: 'Fix broken code. Provide the original and fixed versions with an explanation. Optionally execute the fix to verify it works.',
118
+ input_schema: {
119
+ type: 'object',
120
+ properties: {
121
+ original_code: { type: 'string', description: 'The original broken code.' },
122
+ fixed_code: { type: 'string', description: 'The corrected code.' },
123
+ explanation: { type: 'string', description: 'Brief explanation of what was wrong and how it was fixed.' },
124
+ execute: { type: 'boolean', description: 'If true, execute the fixed code to verify it works (default: false).' }
125
+ },
126
+ required: ['original_code', 'fixed_code']
127
+ }
128
+ },
129
+ {
130
+ name: 'run_bash',
131
+ description: 'Execute a shell command in the working directory. Use this for file operations, git commands, installing packages, or any shell task. Prefer this over execute_code for simple shell operations.',
132
+ input_schema: {
133
+ type: 'object',
134
+ properties: {
135
+ command: { type: 'string', description: 'The shell command to execute.' },
136
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing the command (e.g., "list-files", "install-deps").' }
137
+ },
138
+ required: ['command']
139
+ }
97
140
  }
98
- }];
141
+ ];
142
+
143
+ // Conditionally add use_skill
144
+ if (this._skillRegistry && this._skillRegistry.size > 0) {
145
+ tools.push({
146
+ name: 'use_skill',
147
+ description: `Load a skill by name to get instructions, templates, or patterns. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`,
148
+ input_schema: {
149
+ type: 'object',
150
+ properties: {
151
+ skill_name: { type: 'string', description: 'The name of the skill to load.' }
152
+ },
153
+ required: ['skill_name']
154
+ }
155
+ });
156
+ }
99
157
 
100
- log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
158
+ return tools;
101
159
  }
102
160
 
103
161
  // ── Init ─────────────────────────────────────────────────────────────────
104
162
 
105
163
  /**
106
- * Initialize the agent: gather codebase context and build system prompt.
164
+ * Initialize the agent: load skills, gather codebase context, and build system prompt.
107
165
  * @param {boolean} [force=false]
108
166
  */
109
167
  async init(force = false) {
@@ -111,6 +169,14 @@ class CodeAgent extends BaseClaude {
111
169
 
112
170
  await this._ensureClient();
113
171
 
172
+ // Load skills
173
+ if (this.skills.length > 0 && (this._skillRegistry.size === 0 || force)) {
174
+ await this._loadSkills();
175
+ }
176
+
177
+ // Rebuild tools (use_skill may now be included)
178
+ this._tools = this._buildToolDefinitions();
179
+
114
180
  // Gather codebase context
115
181
  if (!this._contextGathered || force) {
116
182
  await this._gatherCodebaseContext();
@@ -122,6 +188,30 @@ class CodeAgent extends BaseClaude {
122
188
  await super.init(force);
123
189
  }
124
190
 
191
+ // ── Skill Loading ────────────────────────────────────────────────────────
192
+
193
+ /**
194
+ * Load skill files into the skill registry.
195
+ * @private
196
+ */
197
+ async _loadSkills() {
198
+ this._skillRegistry.clear();
199
+
200
+ for (const filePath of this.skills) {
201
+ try {
202
+ const content = await readFile(filePath, 'utf-8');
203
+ // Extract name from YAML frontmatter if present
204
+ let name = basename(filePath).replace(/\.md$/i, '');
205
+ const fmMatch = content.match(/^---\s*\n[\s\S]*?^name:\s*(.+)$/m);
206
+ if (fmMatch) name = fmMatch[1].trim();
207
+ this._skillRegistry.set(name, { name, content, path: filePath });
208
+ log.debug(`Loaded skill: ${name} from ${filePath}`);
209
+ } catch (e) {
210
+ log.warn(`skills: could not load "${filePath}": ${e.message}`);
211
+ }
212
+ }
213
+ }
214
+
125
215
  // ── Context Gathering ────────────────────────────────────────────────────
126
216
 
127
217
  /**
@@ -240,9 +330,35 @@ class CodeAgent extends BaseClaude {
240
330
 
241
331
  let prompt = `You are a coding agent working in ${this.workingDirectory}.
242
332
 
243
- ## Instructions
244
- - Use the execute_code tool to accomplish tasks by writing JavaScript code
245
- - Always provide a short descriptive \`purpose\` parameter (2-4 word slug like "read-config") when calling execute_code
333
+ ## Available Tools
334
+
335
+ ### write_code
336
+ Output code without executing it. Use when showing, proposing, or presenting code to the user.
337
+
338
+ ### execute_code
339
+ Run a given piece of JavaScript code. Use when you already have code to run — e.g., from a previous write_code call, re-running a snippet, or executing user-provided code.
340
+
341
+ ### write_and_run_code
342
+ Write a fresh solution from scratch and execute it in one step. The autonomous, end-to-end tool for solving problems with code.
343
+
344
+ ### fix_code
345
+ Fix broken code by providing original and fixed versions. Set execute=true to verify the fix works.
346
+
347
+ ### run_bash
348
+ Run shell commands directly (e.g., ls, grep, curl, git, npm, cat). Prefer this over execute_code for simple shell operations.`;
349
+
350
+ if (this._skillRegistry.size > 0) {
351
+ prompt += `
352
+
353
+ ### use_skill
354
+ Load a skill by name to get detailed instructions and templates. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`;
355
+ }
356
+
357
+ prompt += `
358
+
359
+ ## Code Execution Rules
360
+ These rules apply when using execute_code, write_and_run_code, or fix_code (with execute=true):
361
+ - Always provide a short descriptive \`purpose\` parameter (2-4 word slug like "read-config")
246
362
  - Your code runs in a Node.js child process with access to all built-in modules
247
363
  - IMPORTANT: Your code runs as an ES module (.mjs). Use import syntax, NOT require():
248
364
  - import fs from 'fs';
@@ -250,9 +366,7 @@ class CodeAgent extends BaseClaude {
250
366
  - import { execSync } from 'child_process';
251
367
  - Use console.log() to produce output — that's how results are returned to you
252
368
  - Write efficient scripts that do multiple things per execution when possible
253
- - For parallel async operations, use Promise.all():
254
- const [a, b] = await Promise.all([fetchA(), fetchB()]);
255
- - Read files with fs.readFileSync() when you need to understand their contents
369
+ - For parallel async operations, use Promise.all()
256
370
  - Handle errors in your scripts with try/catch so you get useful error messages
257
371
  - Top-level await is supported
258
372
  - The working directory is: ${this.workingDirectory}`;
@@ -282,6 +396,10 @@ class CodeAgent extends BaseClaude {
282
396
  prompt += `\n\n## Additional Instructions\n${this._userSystemPrompt}`;
283
397
  }
284
398
 
399
+ if (this.envOverview) {
400
+ prompt += `\n\n## Environment Overview\n${this.envOverview}`;
401
+ }
402
+
285
403
  return prompt;
286
404
  }
287
405
 
@@ -298,14 +416,14 @@ class CodeAgent extends BaseClaude {
298
416
  /**
299
417
  * @private
300
418
  */
301
- async _executeCode(code, purpose) {
419
+ async _executeCode(code, purpose, toolName) {
302
420
  if (this._stopped) {
303
421
  return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
304
422
  }
305
423
 
306
424
  if (this.onBeforeExecution) {
307
425
  try {
308
- const allowed = await this.onBeforeExecution(code);
426
+ const allowed = await this.onBeforeExecution(code, toolName || 'execute_code');
309
427
  if (allowed === false) {
310
428
  return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
311
429
  }
@@ -356,7 +474,8 @@ class CodeAgent extends BaseClaude {
356
474
 
357
475
  this._allExecutions.push({
358
476
  code, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
359
- exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null
477
+ exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null,
478
+ tool: toolName || 'execute_code'
360
479
  });
361
480
 
362
481
  if (this.onCodeExecution) {
@@ -373,6 +492,73 @@ class CodeAgent extends BaseClaude {
373
492
  }
374
493
  }
375
494
 
495
+ // ── Bash Execution ───────────────────────────────────────────────────────
496
+
497
+ /**
498
+ * Execute a bash command in the working directory.
499
+ * @private
500
+ */
501
+ async _executeBash(command, purpose) {
502
+ if (this._stopped) {
503
+ return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
504
+ }
505
+
506
+ if (this.onBeforeExecution) {
507
+ try {
508
+ const allowed = await this.onBeforeExecution(command, 'run_bash');
509
+ if (allowed === false) {
510
+ return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
511
+ }
512
+ } catch (e) {
513
+ log.warn(`onBeforeExecution callback error: ${e.message}`);
514
+ }
515
+ }
516
+
517
+ const result = await new Promise((resolve) => {
518
+ const child = execFile('bash', ['-c', command], {
519
+ cwd: this.workingDirectory,
520
+ timeout: this.timeout,
521
+ env: process.env,
522
+ maxBuffer: 10 * 1024 * 1024
523
+ }, (err, stdout, stderr) => {
524
+ this._activeProcess = null;
525
+ if (err) {
526
+ resolve({
527
+ stdout: err.stdout || stdout || '',
528
+ stderr: (err.stderr || stderr || '') + (err.killed ? '\n[EXECUTION TIMED OUT]' : ''),
529
+ exitCode: err.code || 1
530
+ });
531
+ } else {
532
+ resolve({ stdout: stdout || '', stderr: stderr || '', exitCode: 0 });
533
+ }
534
+ });
535
+ this._activeProcess = child;
536
+ });
537
+
538
+ const totalLen = result.stdout.length + result.stderr.length;
539
+ if (totalLen > MAX_OUTPUT_CHARS) {
540
+ const half = Math.floor(MAX_OUTPUT_CHARS / 2);
541
+ if (result.stdout.length > half) {
542
+ result.stdout = result.stdout.slice(0, half) + '\n...[OUTPUT TRUNCATED]';
543
+ }
544
+ if (result.stderr.length > half) {
545
+ result.stderr = result.stderr.slice(0, half) + '\n...[STDERR TRUNCATED]';
546
+ }
547
+ }
548
+
549
+ this._allExecutions.push({
550
+ code: command, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
551
+ exitCode: result.exitCode, filePath: null, tool: 'run_bash'
552
+ });
553
+
554
+ if (this.onCodeExecution) {
555
+ try { this.onCodeExecution(command, result); }
556
+ catch (e) { log.warn(`onCodeExecution callback error: ${e.message}`); }
557
+ }
558
+
559
+ return result;
560
+ }
561
+
376
562
  /**
377
563
  * @private
378
564
  */
@@ -384,11 +570,102 @@ class CodeAgent extends BaseClaude {
384
570
  return output || '(no output)';
385
571
  }
386
572
 
573
+ // ── Tool Call Dispatch ───────────────────────────────────────────────────
574
+
575
+ /**
576
+ * Handle a tool call by name, dispatching to the appropriate handler.
577
+ * @private
578
+ * @param {string} name - Tool name
579
+ * @param {Object} input - Tool arguments
580
+ * @returns {Promise<{output: string, type: string, data: Object}>}
581
+ */
582
+ async _handleToolCall(name, input) {
583
+ switch (name) {
584
+ case 'execute_code':
585
+ case 'write_and_run_code': {
586
+ const result = await this._executeCode(input.code || '', input.purpose, name);
587
+ return {
588
+ output: this._formatOutput(result),
589
+ type: 'code_execution',
590
+ data: {
591
+ tool: name, code: input.code || '', purpose: input.purpose,
592
+ stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
593
+ denied: result.denied
594
+ }
595
+ };
596
+ }
597
+ case 'write_code': {
598
+ return {
599
+ output: 'Code written successfully.',
600
+ type: 'write',
601
+ data: {
602
+ tool: 'write_code', code: input.code || '',
603
+ purpose: input.purpose, language: input.language || 'javascript'
604
+ }
605
+ };
606
+ }
607
+ case 'fix_code': {
608
+ let execResult = null;
609
+ if (input.execute) {
610
+ execResult = await this._executeCode(input.fixed_code || '', 'fix', 'fix_code');
611
+ }
612
+ return {
613
+ output: input.execute ? this._formatOutput(execResult) : 'Fix recorded.',
614
+ type: 'fix',
615
+ data: {
616
+ tool: 'fix_code',
617
+ originalCode: input.original_code || '',
618
+ fixedCode: input.fixed_code || '',
619
+ explanation: input.explanation,
620
+ executed: !!input.execute,
621
+ stdout: execResult?.stdout, stderr: execResult?.stderr,
622
+ exitCode: execResult?.exitCode, denied: execResult?.denied
623
+ }
624
+ };
625
+ }
626
+ case 'run_bash': {
627
+ const result = await this._executeBash(input.command || '', input.purpose);
628
+ return {
629
+ output: this._formatOutput(result),
630
+ type: 'bash',
631
+ data: {
632
+ tool: 'run_bash', command: input.command || '', purpose: input.purpose,
633
+ stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
634
+ denied: result.denied
635
+ }
636
+ };
637
+ }
638
+ case 'use_skill': {
639
+ const skillName = input.skill_name || '';
640
+ const skill = this._skillRegistry.get(skillName);
641
+ if (!skill) {
642
+ const available = [...this._skillRegistry.keys()].join(', ');
643
+ return {
644
+ output: `Skill "${skillName}" not found. Available skills: ${available || '(none)'}`,
645
+ type: 'skill',
646
+ data: { tool: 'use_skill', skillName, found: false }
647
+ };
648
+ }
649
+ return {
650
+ output: skill.content,
651
+ type: 'skill',
652
+ data: { tool: 'use_skill', skillName: skill.name, content: skill.content, found: true }
653
+ };
654
+ }
655
+ default:
656
+ return {
657
+ output: `Unknown tool: ${name}`,
658
+ type: 'unknown',
659
+ data: { tool: name }
660
+ };
661
+ }
662
+ }
663
+
387
664
  // ── Non-Streaming Chat ───────────────────────────────────────────────────
388
665
 
389
666
  /**
390
667
  * Send a message and get a complete response (non-streaming).
391
- * Automatically handles the code execution loop.
668
+ * Automatically handles the multi-tool execution loop.
392
669
  *
393
670
  * @param {string} message - The user's message
394
671
  * @param {Object} [opts={}] - Per-message options
@@ -398,7 +675,7 @@ class CodeAgent extends BaseClaude {
398
675
  if (!this._initialized) await this.init();
399
676
  this._stopped = false;
400
677
 
401
- const codeExecutions = [];
678
+ const toolCalls = [];
402
679
  let consecutiveFailures = 0;
403
680
 
404
681
  let response = await this._sendMessage(message, { tools: this._tools });
@@ -414,34 +691,29 @@ class CodeAgent extends BaseClaude {
414
691
  for (const block of toolUseBlocks) {
415
692
  if (this._stopped) break;
416
693
 
417
- const code = block.input?.code || '';
418
- const purpose = block.input?.purpose;
419
- const result = await this._executeCode(code, purpose);
694
+ const { output, type, data } = await this._handleToolCall(block.name, block.input || {});
420
695
 
421
- codeExecutions.push({
422
- code,
423
- purpose: this._slugify(purpose),
424
- output: result.stdout,
425
- stderr: result.stderr,
426
- exitCode: result.exitCode
427
- });
696
+ toolCalls.push(data);
428
697
 
429
- if (result.exitCode !== 0 && !result.denied) {
430
- consecutiveFailures++;
431
- } else {
432
- consecutiveFailures = 0;
698
+ // Track consecutive failures for executing tools
699
+ const isExecutingTool = EXECUTING_TOOLS.has(block.name) || (block.name === 'fix_code' && block.input?.execute);
700
+ if (isExecutingTool) {
701
+ if (data.exitCode !== 0 && !data.denied) {
702
+ consecutiveFailures++;
703
+ } else {
704
+ consecutiveFailures = 0;
705
+ }
433
706
  }
434
707
 
435
- let output = this._formatOutput(result);
436
-
708
+ let toolOutput = output;
437
709
  if (consecutiveFailures >= this.codeMaxRetries) {
438
- output += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
710
+ toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
439
711
  }
440
712
 
441
713
  toolResults.push({
442
714
  type: 'tool_result',
443
715
  tool_use_id: block.id,
444
- content: output
716
+ content: toolOutput
445
717
  });
446
718
  }
447
719
 
@@ -459,9 +731,21 @@ class CodeAgent extends BaseClaude {
459
731
  attempts: 1
460
732
  };
461
733
 
734
+ // Build backward-compat codeExecutions (only execute_code + write_and_run_code)
735
+ const codeExecutions = toolCalls
736
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
737
+ .map(tc => ({
738
+ code: tc.code || tc.fixedCode,
739
+ purpose: this._slugify(tc.purpose),
740
+ output: tc.stdout || '',
741
+ stderr: tc.stderr || '',
742
+ exitCode: tc.exitCode ?? 0
743
+ }));
744
+
462
745
  return {
463
746
  text: this._extractText(response),
464
747
  codeExecutions,
748
+ toolCalls,
465
749
  usage: this.getLastUsage()
466
750
  };
467
751
  }
@@ -473,8 +757,12 @@ class CodeAgent extends BaseClaude {
473
757
  *
474
758
  * Event types:
475
759
  * - `text` — A chunk of the agent's text response
476
- * - `code` — The agent is about to execute code
477
- * - `output` — Code finished executing
760
+ * - `code` — The agent is about to execute code (execute_code or write_and_run_code)
761
+ * - `output` — Code/bash finished executing
762
+ * - `write` — The agent wrote code without executing (write_code)
763
+ * - `fix` — The agent fixed code (fix_code)
764
+ * - `bash` — The agent is about to run a bash command
765
+ * - `skill` — The agent loaded a skill
478
766
  * - `done` — The agent finished
479
767
  *
480
768
  * @param {string} message - The user's message
@@ -485,7 +773,7 @@ class CodeAgent extends BaseClaude {
485
773
  if (!this._initialized) await this.init();
486
774
  this._stopped = false;
487
775
 
488
- const codeExecutions = [];
776
+ const toolCalls = [];
489
777
  let fullText = '';
490
778
  let consecutiveFailures = 0;
491
779
 
@@ -512,53 +800,77 @@ class CodeAgent extends BaseClaude {
512
800
  this._captureMetadata(finalMessage);
513
801
 
514
802
  if (finalMessage.stop_reason !== 'tool_use' || toolUseBlocks.length === 0) {
515
- yield { type: 'done', fullText, codeExecutions, usage: this.getLastUsage() };
803
+ const codeExecutions = toolCalls
804
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
805
+ .map(tc => ({
806
+ code: tc.code || tc.fixedCode,
807
+ purpose: this._slugify(tc.purpose),
808
+ output: tc.stdout || '',
809
+ stderr: tc.stderr || '',
810
+ exitCode: tc.exitCode ?? 0
811
+ }));
812
+ yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage() };
516
813
  return;
517
814
  }
518
815
 
519
- // Execute code
816
+ // Handle tool calls
520
817
  const toolResults = [];
521
818
  for (const block of toolUseBlocks) {
522
819
  if (this._stopped) break;
523
820
 
524
- const code = block.input?.code || '';
525
- const purpose = block.input?.purpose;
526
- yield { type: 'code', code };
821
+ const toolName = block.name;
822
+ const toolInput = block.input || {};
823
+
824
+ // Emit pre-execution events
825
+ if (toolName === 'write_code') {
826
+ yield { type: 'write', code: toolInput.code, purpose: toolInput.purpose, language: toolInput.language || 'javascript' };
827
+ } else if (toolName === 'fix_code') {
828
+ yield { type: 'fix', originalCode: toolInput.original_code, fixedCode: toolInput.fixed_code, explanation: toolInput.explanation };
829
+ } else if (toolName === 'run_bash') {
830
+ yield { type: 'bash', command: toolInput.command };
831
+ } else if (toolName === 'execute_code' || toolName === 'write_and_run_code') {
832
+ yield { type: 'code', code: toolInput.code };
833
+ }
527
834
 
528
- const result = await this._executeCode(code, purpose);
835
+ const { output, type, data } = await this._handleToolCall(toolName, toolInput);
529
836
 
530
- codeExecutions.push({
531
- code,
532
- purpose: this._slugify(purpose),
533
- output: result.stdout,
534
- stderr: result.stderr,
535
- exitCode: result.exitCode
536
- });
837
+ toolCalls.push(data);
537
838
 
538
- yield {
539
- type: 'output',
540
- code,
541
- stdout: result.stdout,
542
- stderr: result.stderr,
543
- exitCode: result.exitCode
544
- };
839
+ // Emit post-execution output events
840
+ if (data.stdout !== undefined || data.stderr !== undefined) {
841
+ yield {
842
+ type: 'output',
843
+ code: data.code || data.command || data.fixedCode,
844
+ stdout: data.stdout || '',
845
+ stderr: data.stderr || '',
846
+ exitCode: data.exitCode ?? 0
847
+ };
848
+ }
545
849
 
546
- if (result.exitCode !== 0 && !result.denied) {
547
- consecutiveFailures++;
548
- } else {
549
- consecutiveFailures = 0;
850
+ // Emit skill event
851
+ if (toolName === 'use_skill') {
852
+ yield { type: 'skill', skillName: data.skillName, content: data.content, found: data.found };
550
853
  }
551
854
 
552
- let output = this._formatOutput(result);
855
+ // Track consecutive failures
856
+ const isExecutingTool = EXECUTING_TOOLS.has(toolName) || (toolName === 'fix_code' && toolInput.execute);
857
+ if (isExecutingTool) {
858
+ if (data.exitCode !== 0 && !data.denied) {
859
+ consecutiveFailures++;
860
+ } else {
861
+ consecutiveFailures = 0;
862
+ }
863
+ }
553
864
 
865
+ let toolOutput = output;
554
866
  if (consecutiveFailures >= this.codeMaxRetries) {
555
- output += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
867
+ toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.codeMaxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
556
868
  }
557
869
 
558
870
  toolResults.push({
559
871
  type: 'tool_result',
560
872
  tool_use_id: block.id,
561
- content: output
873
+ content: toolOutput
562
874
  });
563
875
  }
564
876
 
@@ -573,21 +885,32 @@ class CodeAgent extends BaseClaude {
573
885
  if (this._stopped) warning = 'Agent was stopped';
574
886
  else if (consecutiveFailures >= this.codeMaxRetries) warning = 'Retry limit reached';
575
887
 
576
- yield { type: 'done', fullText, codeExecutions, usage: this.getLastUsage(), warning };
888
+ const codeExecutions = toolCalls
889
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
890
+ .map(tc => ({
891
+ code: tc.code || tc.fixedCode,
892
+ purpose: this._slugify(tc.purpose),
893
+ output: tc.stdout || '',
894
+ stderr: tc.stderr || '',
895
+ exitCode: tc.exitCode ?? 0
896
+ }));
897
+
898
+ yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage(), warning };
577
899
  }
578
900
 
579
901
  // ── Dump ─────────────────────────────────────────────────────────────────
580
902
 
581
903
  /**
582
- * Returns all code scripts the agent has written.
583
- * @returns {Array<{fileName: string, script: string}>}
904
+ * Returns all code scripts and bash commands the agent has executed.
905
+ * @returns {Array<{fileName: string, purpose: string|null, script: string, filePath: string|null, tool: string}>}
584
906
  */
585
907
  dump() {
586
908
  return this._allExecutions.map((exec, i) => ({
587
909
  fileName: exec.purpose ? `agent-${exec.purpose}.mjs` : `script-${i + 1}.mjs`,
588
910
  purpose: exec.purpose || null,
589
911
  script: exec.code,
590
- filePath: exec.filePath || null
912
+ filePath: exec.filePath || null,
913
+ tool: exec.tool || 'execute_code'
591
914
  }));
592
915
  }
593
916