ak-gemini 2.0.7 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/code-agent.js CHANGED
@@ -1,19 +1,14 @@
1
1
  /**
2
- * @fileoverview CodeAgent class — AI agent that writes and executes code.
3
- * Instead of traditional tool-calling with many round-trips, the model gets
4
- * a single `execute_code` tool and writes JavaScript that can do everything
5
- * (read files, write files, run commands) in a single script. Output feeds
6
- * back, and the model decides what to do next.
7
- *
8
- * Inspired by the "code mode" philosophy: LLMs are better at writing code
9
- * to call APIs than at calling APIs directly via tool-calling.
2
+ * @fileoverview CodeAgent class — AI agent with multiple code-oriented tools.
3
+ * Provides write_code, execute_code, write_and_run_code, fix_code, run_bash,
4
+ * and (optionally) use_skill tools for autonomous coding tasks.
10
5
  */
11
6
 
12
7
  import BaseGemini from './base.js';
13
8
  import log from './logger.js';
14
9
  import { execFile } from 'node:child_process';
15
10
  import { writeFile, unlink, readdir, readFile, mkdir } from 'node:fs/promises';
16
- import { join, sep } from 'node:path';
11
+ import { join, sep, basename } from 'node:path';
17
12
  import { randomUUID } from 'node:crypto';
18
13
 
19
14
  /**
@@ -26,30 +21,9 @@ const MAX_OUTPUT_CHARS = 50_000;
26
21
  const MAX_FILE_TREE_LINES = 500;
27
22
  const IGNORE_DIRS = new Set(['node_modules', '.git', 'dist', 'coverage', '.next', 'build', '__pycache__']);
28
23
 
29
- /**
30
- * AI agent that writes and executes JavaScript code autonomously. ... what could possibly go wrong, right?
31
- *
32
- * During init, gathers codebase context (file tree + key files) and injects it
33
- * into the system prompt. The model uses the `execute_code` tool to run scripts
34
- * in a Node.js child process that inherits the parent's environment variables.
35
- *
36
- * @example
37
- * ```javascript
38
- * import { CodeAgent } from 'ak-gemini';
39
- *
40
- * const agent = new CodeAgent({
41
- * workingDirectory: '/path/to/my/project',
42
- * onCodeExecution: (code, output) => {
43
- * console.log('Executed:', code.slice(0, 100));
44
- * console.log('Output:', output.stdout);
45
- * }
46
- * });
47
- *
48
- * const result = await agent.chat('List all TODO comments in the codebase');
49
- * console.log(result.text);
50
- * console.log(`Ran ${result.codeExecutions.length} scripts`);
51
- * ```
52
- */
24
+ /** Tools that execute code/commands and can fail */
25
+ const EXECUTING_TOOLS = new Set(['execute_code', 'write_and_run_code', 'run_bash']);
26
+
53
27
  class CodeAgent extends BaseGemini {
54
28
  /**
55
29
  * @param {CodeAgentOptions} [options={}]
@@ -72,6 +46,8 @@ class CodeAgent extends BaseGemini {
72
46
  this.keepArtifacts = options.keepArtifacts ?? false;
73
47
  this.comments = options.comments ?? false;
74
48
  this.maxRetries = options.maxRetries ?? 3;
49
+ this.skills = options.skills || [];
50
+ this.envOverview = options.envOverview || '';
75
51
 
76
52
  // ── Internal state ──
77
53
  this._codebaseContext = null;
@@ -80,65 +56,169 @@ class CodeAgent extends BaseGemini {
80
56
  this._activeProcess = null;
81
57
  this._userSystemPrompt = options.systemPrompt || '';
82
58
  this._allExecutions = [];
59
+ this._skillRegistry = new Map();
60
+
61
+ // ── Tools (built after skill loading; placeholder until init) ──
62
+ this.chatConfig.tools = [this._buildToolDefinitions()];
63
+ this.chatConfig.toolConfig = { functionCallingConfig: { mode: 'AUTO' } };
83
64
 
84
- // ── Single tool: execute_code ──
85
- this.chatConfig.tools = [{
86
- functionDeclarations: [{
65
+ log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
66
+ }
67
+
68
+ // ── Tool Definitions ─────────────────────────────────────────────────────
69
+
70
+ /**
71
+ * Build tool definitions in Gemini format.
72
+ * use_skill is only included when skills are registered.
73
+ * @private
74
+ * @returns {{ functionDeclarations: Array<Object> }}
75
+ */
76
+ _buildToolDefinitions() {
77
+ /** @type {Array<Object>} */
78
+ const declarations = [
79
+ {
80
+ name: 'write_code',
81
+ description: 'Output code without executing it. Use this when you want to show, propose, or present code to the user without running it.',
82
+ parametersJsonSchema: {
83
+ type: 'object',
84
+ properties: {
85
+ code: { type: 'string', description: 'The code to output.' },
86
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing the code (e.g., "api-client", "data-parser").' },
87
+ language: { type: 'string', description: 'Programming language of the code (default: "javascript").' }
88
+ },
89
+ required: ['code']
90
+ }
91
+ },
92
+ {
87
93
  name: 'execute_code',
88
- description: 'Execute JavaScript code in a Node.js child process. The code has access to all Node.js built-in modules (fs, path, child_process, http, etc.). Use console.log() to produce output that will be returned to you. The code runs in the working directory with the same environment variables as the parent process.',
94
+ description: 'Execute a given piece of JavaScript code in a Node.js child process. Use this when you already have code to run e.g., running code from a previous write_code call, re-running a snippet, or executing code the user provided. Use console.log() for output.',
89
95
  parametersJsonSchema: {
90
96
  type: 'object',
91
97
  properties: {
92
- code: {
93
- type: 'string',
94
- description: 'JavaScript code to execute. Use console.log() for output. You can import any built-in Node.js module.'
95
- },
96
- purpose: {
97
- type: 'string',
98
- description: 'A short 2-4 word slug describing what this script does (e.g., "read-config", "parse-logs", "fetch-api-data"). Used for naming the script file.'
99
- }
98
+ code: { type: 'string', description: 'JavaScript code to execute. Use console.log() for output. Use import syntax (ES modules).' },
99
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "read-config", "parse-logs").' }
100
100
  },
101
101
  required: ['code']
102
102
  }
103
- }]
104
- }];
105
- this.chatConfig.toolConfig = { functionCallingConfig: { mode: 'AUTO' } };
103
+ },
104
+ {
105
+ name: 'write_and_run_code',
106
+ description: 'Write a fresh solution from scratch and execute it in one step. Use this when you need to figure out the code AND run it — the autonomous, end-to-end tool for solving problems with code.',
107
+ parametersJsonSchema: {
108
+ type: 'object',
109
+ properties: {
110
+ code: { type: 'string', description: 'JavaScript code to write and execute. Use console.log() for output. Use import syntax (ES modules).' },
111
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing what this script does (e.g., "fetch-api-data", "generate-report").' }
112
+ },
113
+ required: ['code']
114
+ }
115
+ },
116
+ {
117
+ name: 'fix_code',
118
+ description: 'Fix broken code. Provide the original and fixed versions with an explanation. Optionally execute the fix to verify it works.',
119
+ parametersJsonSchema: {
120
+ type: 'object',
121
+ properties: {
122
+ original_code: { type: 'string', description: 'The original broken code.' },
123
+ fixed_code: { type: 'string', description: 'The corrected code.' },
124
+ explanation: { type: 'string', description: 'Brief explanation of what was wrong and how it was fixed.' },
125
+ execute: { type: 'boolean', description: 'If true, execute the fixed code to verify it works (default: false).' }
126
+ },
127
+ required: ['original_code', 'fixed_code']
128
+ }
129
+ },
130
+ {
131
+ name: 'run_bash',
132
+ description: 'Execute a shell command in the working directory. Use this for file operations, git commands, installing packages, or any shell task. Prefer this over execute_code for simple shell operations.',
133
+ parametersJsonSchema: {
134
+ type: 'object',
135
+ properties: {
136
+ command: { type: 'string', description: 'The shell command to execute.' },
137
+ purpose: { type: 'string', description: 'A short 2-4 word slug describing the command (e.g., "list-files", "install-deps").' }
138
+ },
139
+ required: ['command']
140
+ }
141
+ }
142
+ ];
106
143
 
107
- log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
144
+ // Conditionally add use_skill
145
+ if (this._skillRegistry && this._skillRegistry.size > 0) {
146
+ declarations.push({
147
+ name: 'use_skill',
148
+ description: `Load a skill by name to get instructions, templates, or patterns. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`,
149
+ parametersJsonSchema: {
150
+ type: 'object',
151
+ properties: {
152
+ skill_name: { type: 'string', description: 'The name of the skill to load.' }
153
+ },
154
+ required: ['skill_name']
155
+ }
156
+ });
157
+ }
158
+
159
+ return { functionDeclarations: declarations };
108
160
  }
109
161
 
110
162
  // ── Init ─────────────────────────────────────────────────────────────────
111
163
 
112
164
  /**
113
- * Initialize the agent: gather codebase context, build system prompt,
114
- * and create the chat session.
165
+ * Initialize the agent: load skills, gather codebase context, and build system prompt.
115
166
  * @param {boolean} [force=false]
116
167
  */
117
168
  async init(force = false) {
118
169
  if (this.chatSession && !force) return;
119
170
 
171
+ // Load skills
172
+ if (this.skills.length > 0 && (this._skillRegistry.size === 0 || force)) {
173
+ await this._loadSkills();
174
+ }
175
+
176
+ // Rebuild tools (use_skill may now be included)
177
+ this.chatConfig.tools = [this._buildToolDefinitions()];
178
+
120
179
  // Gather codebase context
121
180
  if (!this._contextGathered || force) {
122
181
  await this._gatherCodebaseContext();
123
182
  }
124
183
 
125
184
  // Build augmented system prompt
126
- const systemPrompt = this._buildSystemPrompt();
127
- this.chatConfig.systemInstruction = systemPrompt;
185
+ this.chatConfig.systemInstruction = this._buildSystemPrompt();
128
186
 
129
187
  await super.init(force);
130
188
  }
131
189
 
190
+ // ── Skill Loading ────────────────────────────────────────────────────────
191
+
192
+ /**
193
+ * Load skill files into the skill registry.
194
+ * @private
195
+ */
196
+ async _loadSkills() {
197
+ this._skillRegistry.clear();
198
+
199
+ for (const filePath of this.skills) {
200
+ try {
201
+ const content = await readFile(filePath, 'utf-8');
202
+ // Extract name from YAML frontmatter if present
203
+ let name = basename(filePath).replace(/\.md$/i, '');
204
+ const fmMatch = content.match(/^---\s*\n[\s\S]*?^name:\s*(.+)$/m);
205
+ if (fmMatch) name = fmMatch[1].trim();
206
+ this._skillRegistry.set(name, { name, content, path: filePath });
207
+ log.debug(`Loaded skill: ${name} from ${filePath}`);
208
+ } catch (e) {
209
+ log.warn(`skills: could not load "${filePath}": ${e.message}`);
210
+ }
211
+ }
212
+ }
213
+
132
214
  // ── Context Gathering ────────────────────────────────────────────────────
133
215
 
134
216
  /**
135
- * Gather file tree and key file contents from the working directory.
136
217
  * @private
137
218
  */
138
219
  async _gatherCodebaseContext() {
139
220
  let fileTree = '';
140
221
 
141
- // Get file tree
142
222
  try {
143
223
  fileTree = await this._getFileTreeGit();
144
224
  } catch {
@@ -146,14 +226,12 @@ class CodeAgent extends BaseGemini {
146
226
  fileTree = await this._getFileTreeReaddir(this.workingDirectory, 0, 3);
147
227
  }
148
228
 
149
- // Truncate file tree
150
229
  const lines = fileTree.split('\n');
151
230
  if (lines.length > MAX_FILE_TREE_LINES) {
152
231
  const truncated = lines.slice(0, MAX_FILE_TREE_LINES).join('\n');
153
232
  fileTree = `${truncated}\n... (${lines.length - MAX_FILE_TREE_LINES} more files)`;
154
233
  }
155
234
 
156
- // Extract npm package names (lightweight — just the keys)
157
235
  let npmPackages = [];
158
236
  try {
159
237
  const pkgPath = join(this.workingDirectory, 'package.json');
@@ -164,7 +242,6 @@ class CodeAgent extends BaseGemini {
164
242
  ];
165
243
  } catch { /* no package.json */ }
166
244
 
167
- // Resolve and read important files
168
245
  const importantFileContents = [];
169
246
  if (this.importantFiles.length > 0) {
170
247
  const fileTreeLines = fileTree.split('\n').map(l => l.trim()).filter(Boolean);
@@ -189,19 +266,12 @@ class CodeAgent extends BaseGemini {
189
266
  }
190
267
 
191
268
  /**
192
- * Resolve an importantFiles entry against the file tree.
193
- * Supports exact matches and partial (basename/suffix) matches.
194
269
  * @private
195
- * @param {string} filename
196
- * @param {string[]} fileTreeLines
197
- * @returns {string|null}
198
270
  */
199
271
  _resolveImportantFile(filename, fileTreeLines) {
200
- // Exact match
201
272
  const exact = fileTreeLines.find(line => line === filename);
202
273
  if (exact) return exact;
203
274
 
204
- // Partial match — filename matches end of path
205
275
  const partial = fileTreeLines.find(line =>
206
276
  line.endsWith('/' + filename) || line.endsWith(sep + filename)
207
277
  );
@@ -209,9 +279,7 @@ class CodeAgent extends BaseGemini {
209
279
  }
210
280
 
211
281
  /**
212
- * Get file tree using git ls-files.
213
282
  * @private
214
- * @returns {Promise<string>}
215
283
  */
216
284
  async _getFileTreeGit() {
217
285
  return new Promise((resolve, reject) => {
@@ -227,12 +295,7 @@ class CodeAgent extends BaseGemini {
227
295
  }
228
296
 
229
297
  /**
230
- * Fallback file tree via recursive readdir.
231
298
  * @private
232
- * @param {string} dir
233
- * @param {number} depth
234
- * @param {number} maxDepth
235
- * @returns {Promise<string>}
236
299
  */
237
300
  async _getFileTreeReaddir(dir, depth, maxDepth) {
238
301
  if (depth >= maxDepth) return '';
@@ -253,24 +316,48 @@ class CodeAgent extends BaseGemini {
253
316
  }
254
317
  }
255
318
  } catch {
256
- // Permission errors, etc. — skip
319
+ // Permission errors, etc.
257
320
  }
258
321
  return entries.join('\n');
259
322
  }
260
323
 
261
324
  /**
262
- * Build the full system prompt with codebase context.
263
325
  * @private
264
- * @returns {string}
265
326
  */
266
327
  _buildSystemPrompt() {
267
328
  const { fileTree, npmPackages, importantFileContents } = this._codebaseContext || { fileTree: '', npmPackages: [], importantFileContents: [] };
268
329
 
269
330
  let prompt = `You are a coding agent working in ${this.workingDirectory}.
270
331
 
271
- ## Instructions
272
- - Use the execute_code tool to accomplish tasks by writing JavaScript code
273
- - Always provide a short descriptive \`purpose\` parameter (2-4 word slug like "read-config") when calling execute_code
332
+ ## Available Tools
333
+
334
+ ### write_code
335
+ Output code without executing it. Use when showing, proposing, or presenting code to the user.
336
+
337
+ ### execute_code
338
+ Run a given piece of JavaScript code. Use when you already have code to run — e.g., from a previous write_code call, re-running a snippet, or executing user-provided code.
339
+
340
+ ### write_and_run_code
341
+ Write a fresh solution from scratch and execute it in one step. The autonomous, end-to-end tool for solving problems with code.
342
+
343
+ ### fix_code
344
+ Fix broken code by providing original and fixed versions. Set execute=true to verify the fix works.
345
+
346
+ ### run_bash
347
+ Run shell commands directly (e.g., ls, grep, curl, git, npm, cat). Prefer this over execute_code for simple shell operations.`;
348
+
349
+ if (this._skillRegistry.size > 0) {
350
+ prompt += `
351
+
352
+ ### use_skill
353
+ Load a skill by name to get detailed instructions and templates. Available skills: ${[...this._skillRegistry.keys()].join(', ')}`;
354
+ }
355
+
356
+ prompt += `
357
+
358
+ ## Code Execution Rules
359
+ These rules apply when using execute_code, write_and_run_code, or fix_code (with execute=true):
360
+ - Always provide a short descriptive \`purpose\` parameter (2-4 word slug like "read-config")
274
361
  - Your code runs in a Node.js child process with access to all built-in modules
275
362
  - IMPORTANT: Your code runs as an ES module (.mjs). Use import syntax, NOT require():
276
363
  - import fs from 'fs';
@@ -278,9 +365,7 @@ class CodeAgent extends BaseGemini {
278
365
  - import { execSync } from 'child_process';
279
366
  - Use console.log() to produce output — that's how results are returned to you
280
367
  - Write efficient scripts that do multiple things per execution when possible
281
- - For parallel async operations, use Promise.all():
282
- const [a, b] = await Promise.all([fetchA(), fetchB()]);
283
- - Read files with fs.readFileSync() when you need to understand their contents
368
+ - For parallel async operations, use Promise.all()
284
369
  - Handle errors in your scripts with try/catch so you get useful error messages
285
370
  - Top-level await is supported
286
371
  - The working directory is: ${this.workingDirectory}`;
@@ -310,16 +395,17 @@ class CodeAgent extends BaseGemini {
310
395
  prompt += `\n\n## Additional Instructions\n${this._userSystemPrompt}`;
311
396
  }
312
397
 
398
+ if (this.envOverview) {
399
+ prompt += `\n\n## Environment Overview\n${this.envOverview}`;
400
+ }
401
+
313
402
  return prompt;
314
403
  }
315
404
 
316
405
  // ── Code Execution ───────────────────────────────────────────────────────
317
406
 
318
407
  /**
319
- * Generate a sanitized slug from a purpose string.
320
408
  * @private
321
- * @param {string} [purpose]
322
- * @returns {string}
323
409
  */
324
410
  _slugify(purpose) {
325
411
  if (!purpose) return randomUUID().slice(0, 8);
@@ -327,22 +413,16 @@ class CodeAgent extends BaseGemini {
327
413
  }
328
414
 
329
415
  /**
330
- * Execute a JavaScript code string in a child process.
331
416
  * @private
332
- * @param {string} code - JavaScript code to execute
333
- * @param {string} [purpose] - Short description for file naming
334
- * @returns {Promise<{stdout: string, stderr: string, exitCode: number, denied?: boolean}>}
335
417
  */
336
- async _executeCode(code, purpose) {
337
- // Check if stopped
418
+ async _executeCode(code, purpose, toolName) {
338
419
  if (this._stopped) {
339
420
  return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
340
421
  }
341
422
 
342
- // Check onBeforeExecution gate
343
423
  if (this.onBeforeExecution) {
344
424
  try {
345
- const allowed = await this.onBeforeExecution(code);
425
+ const allowed = await this.onBeforeExecution(code, toolName || 'execute_code');
346
426
  if (allowed === false) {
347
427
  return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
348
428
  }
@@ -351,17 +431,14 @@ class CodeAgent extends BaseGemini {
351
431
  }
352
432
  }
353
433
 
354
- // Ensure writeDir exists
355
434
  await mkdir(this.writeDir, { recursive: true });
356
435
 
357
436
  const slug = this._slugify(purpose);
358
437
  const tempFile = join(this.writeDir, `agent-${slug}-${Date.now()}.mjs`);
359
438
 
360
439
  try {
361
- // Write code to temp file
362
440
  await writeFile(tempFile, code, 'utf-8');
363
441
 
364
- // Execute in child process
365
442
  const result = await new Promise((resolve) => {
366
443
  const child = execFile('node', [tempFile], {
367
444
  cwd: this.workingDirectory,
@@ -383,7 +460,6 @@ class CodeAgent extends BaseGemini {
383
460
  this._activeProcess = child;
384
461
  });
385
462
 
386
- // Truncate output
387
463
  const totalLen = result.stdout.length + result.stderr.length;
388
464
  if (totalLen > MAX_OUTPUT_CHARS) {
389
465
  const half = Math.floor(MAX_OUTPUT_CHARS / 2);
@@ -395,13 +471,12 @@ class CodeAgent extends BaseGemini {
395
471
  }
396
472
  }
397
473
 
398
- // Track execution
399
474
  this._allExecutions.push({
400
475
  code, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
401
- exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null
476
+ exitCode: result.exitCode, filePath: this.keepArtifacts ? tempFile : null,
477
+ tool: toolName || 'execute_code'
402
478
  });
403
479
 
404
- // Fire notification callback
405
480
  if (this.onCodeExecution) {
406
481
  try { this.onCodeExecution(code, result); }
407
482
  catch (e) { log.warn(`onCodeExecution callback error: ${e.message}`); }
@@ -409,7 +484,6 @@ class CodeAgent extends BaseGemini {
409
484
 
410
485
  return result;
411
486
  } finally {
412
- // Cleanup temp file (unless keeping artifacts)
413
487
  if (!this.keepArtifacts) {
414
488
  try { await unlink(tempFile); }
415
489
  catch { /* file may already be gone */ }
@@ -417,11 +491,75 @@ class CodeAgent extends BaseGemini {
417
491
  }
418
492
  }
419
493
 
494
+ // ── Bash Execution ───────────────────────────────────────────────────────
495
+
496
+ /**
497
+ * Execute a bash command in the working directory.
498
+ * @private
499
+ */
500
+ async _executeBash(command, purpose) {
501
+ if (this._stopped) {
502
+ return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
503
+ }
504
+
505
+ if (this.onBeforeExecution) {
506
+ try {
507
+ const allowed = await this.onBeforeExecution(command, 'run_bash');
508
+ if (allowed === false) {
509
+ return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
510
+ }
511
+ } catch (e) {
512
+ log.warn(`onBeforeExecution callback error: ${e.message}`);
513
+ }
514
+ }
515
+
516
+ const result = await new Promise((resolve) => {
517
+ const child = execFile('bash', ['-c', command], {
518
+ cwd: this.workingDirectory,
519
+ timeout: this.timeout,
520
+ env: process.env,
521
+ maxBuffer: 10 * 1024 * 1024
522
+ }, (err, stdout, stderr) => {
523
+ this._activeProcess = null;
524
+ if (err) {
525
+ resolve({
526
+ stdout: err.stdout || stdout || '',
527
+ stderr: (err.stderr || stderr || '') + (err.killed ? '\n[EXECUTION TIMED OUT]' : ''),
528
+ exitCode: err.code || 1
529
+ });
530
+ } else {
531
+ resolve({ stdout: stdout || '', stderr: stderr || '', exitCode: 0 });
532
+ }
533
+ });
534
+ this._activeProcess = child;
535
+ });
536
+
537
+ const totalLen = result.stdout.length + result.stderr.length;
538
+ if (totalLen > MAX_OUTPUT_CHARS) {
539
+ const half = Math.floor(MAX_OUTPUT_CHARS / 2);
540
+ if (result.stdout.length > half) {
541
+ result.stdout = result.stdout.slice(0, half) + '\n...[OUTPUT TRUNCATED]';
542
+ }
543
+ if (result.stderr.length > half) {
544
+ result.stderr = result.stderr.slice(0, half) + '\n...[STDERR TRUNCATED]';
545
+ }
546
+ }
547
+
548
+ this._allExecutions.push({
549
+ code: command, purpose: purpose || null, output: result.stdout, stderr: result.stderr,
550
+ exitCode: result.exitCode, filePath: null, tool: 'run_bash'
551
+ });
552
+
553
+ if (this.onCodeExecution) {
554
+ try { this.onCodeExecution(command, result); }
555
+ catch (e) { log.warn(`onCodeExecution callback error: ${e.message}`); }
556
+ }
557
+
558
+ return result;
559
+ }
560
+
420
561
  /**
421
- * Format execution result as a string for the model.
422
562
  * @private
423
- * @param {{stdout: string, stderr: string, exitCode: number}} result
424
- * @returns {string}
425
563
  */
426
564
  _formatOutput(result) {
427
565
  let output = '';
@@ -431,22 +569,113 @@ class CodeAgent extends BaseGemini {
431
569
  return output || '(no output)';
432
570
  }
433
571
 
572
+ // ── Tool Call Dispatch ───────────────────────────────────────────────────
573
+
574
+ /**
575
+ * Handle a tool call by name, dispatching to the appropriate handler.
576
+ * @private
577
+ * @param {string} name - Tool name
578
+ * @param {Object} input - Tool arguments
579
+ * @returns {Promise<{output: string, type: string, data: Object}>}
580
+ */
581
+ async _handleToolCall(name, input) {
582
+ switch (name) {
583
+ case 'execute_code':
584
+ case 'write_and_run_code': {
585
+ const result = await this._executeCode(input.code || '', input.purpose, name);
586
+ return {
587
+ output: this._formatOutput(result),
588
+ type: 'code_execution',
589
+ data: {
590
+ tool: name, code: input.code || '', purpose: input.purpose,
591
+ stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
592
+ denied: result.denied
593
+ }
594
+ };
595
+ }
596
+ case 'write_code': {
597
+ return {
598
+ output: 'Code written successfully.',
599
+ type: 'write',
600
+ data: {
601
+ tool: 'write_code', code: input.code || '',
602
+ purpose: input.purpose, language: input.language || 'javascript'
603
+ }
604
+ };
605
+ }
606
+ case 'fix_code': {
607
+ let execResult = null;
608
+ if (input.execute) {
609
+ execResult = await this._executeCode(input.fixed_code || '', 'fix', 'fix_code');
610
+ }
611
+ return {
612
+ output: input.execute ? this._formatOutput(execResult) : 'Fix recorded.',
613
+ type: 'fix',
614
+ data: {
615
+ tool: 'fix_code',
616
+ originalCode: input.original_code || '',
617
+ fixedCode: input.fixed_code || '',
618
+ explanation: input.explanation,
619
+ executed: !!input.execute,
620
+ stdout: execResult?.stdout, stderr: execResult?.stderr,
621
+ exitCode: execResult?.exitCode, denied: execResult?.denied
622
+ }
623
+ };
624
+ }
625
+ case 'run_bash': {
626
+ const result = await this._executeBash(input.command || '', input.purpose);
627
+ return {
628
+ output: this._formatOutput(result),
629
+ type: 'bash',
630
+ data: {
631
+ tool: 'run_bash', command: input.command || '', purpose: input.purpose,
632
+ stdout: result.stdout, stderr: result.stderr, exitCode: result.exitCode,
633
+ denied: result.denied
634
+ }
635
+ };
636
+ }
637
+ case 'use_skill': {
638
+ const skillName = input.skill_name || '';
639
+ const skill = this._skillRegistry.get(skillName);
640
+ if (!skill) {
641
+ const available = [...this._skillRegistry.keys()].join(', ');
642
+ return {
643
+ output: `Skill "${skillName}" not found. Available skills: ${available || '(none)'}`,
644
+ type: 'skill',
645
+ data: { tool: 'use_skill', skillName, found: false }
646
+ };
647
+ }
648
+ return {
649
+ output: skill.content,
650
+ type: 'skill',
651
+ data: { tool: 'use_skill', skillName: skill.name, content: skill.content, found: true }
652
+ };
653
+ }
654
+ default:
655
+ return {
656
+ output: `Unknown tool: ${name}`,
657
+ type: 'unknown',
658
+ data: { tool: name }
659
+ };
660
+ }
661
+ }
662
+
434
663
  // ── Non-Streaming Chat ───────────────────────────────────────────────────
435
664
 
436
665
  /**
437
666
  * Send a message and get a complete response (non-streaming).
438
- * Automatically handles the code execution loop.
667
+ * Automatically handles the multi-tool execution loop.
439
668
  *
440
669
  * @param {string} message - The user's message
441
670
  * @param {Object} [opts={}] - Per-message options
442
671
  * @param {Record<string, string>} [opts.labels] - Per-message billing labels
443
- * @returns {Promise<CodeAgentResponse>} Response with text, codeExecutions, and usage
672
+ * @returns {Promise<CodeAgentResponse>}
444
673
  */
445
674
  async chat(message, opts = {}) {
446
675
  if (!this.chatSession) await this.init();
447
676
  this._stopped = false;
448
677
 
449
- const codeExecutions = [];
678
+ const toolCalls = [];
450
679
  let consecutiveFailures = 0;
451
680
 
452
681
  let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
@@ -461,34 +690,29 @@ class CodeAgent extends BaseGemini {
461
690
  for (const call of functionCalls) {
462
691
  if (this._stopped) break;
463
692
 
464
- const code = call.args?.code || '';
465
- const purpose = call.args?.purpose;
466
- const result = await this._executeCode(code, purpose);
693
+ const { output, type, data } = await this._handleToolCall(call.name, call.args || {});
467
694
 
468
- codeExecutions.push({
469
- code,
470
- purpose: this._slugify(purpose),
471
- output: result.stdout,
472
- stderr: result.stderr,
473
- exitCode: result.exitCode
474
- });
695
+ toolCalls.push(data);
475
696
 
476
- if (result.exitCode !== 0 && !result.denied) {
477
- consecutiveFailures++;
478
- } else {
479
- consecutiveFailures = 0;
697
+ // Track consecutive failures for executing tools
698
+ const isExecutingTool = EXECUTING_TOOLS.has(call.name) || (call.name === 'fix_code' && call.args?.execute);
699
+ if (isExecutingTool) {
700
+ if (data.exitCode !== 0 && !data.denied) {
701
+ consecutiveFailures++;
702
+ } else {
703
+ consecutiveFailures = 0;
704
+ }
480
705
  }
481
706
 
482
- let output = this._formatOutput(result);
483
-
707
+ let toolOutput = output;
484
708
  if (consecutiveFailures >= this.maxRetries) {
485
- output += `\n\n[RETRY LIMIT REACHED] You have failed ${this.maxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
709
+ toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.maxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
486
710
  }
487
711
 
488
712
  results.push({
489
713
  id: call.id,
490
714
  name: call.name,
491
- result: output
715
+ result: toolOutput
492
716
  });
493
717
  }
494
718
 
@@ -517,9 +741,21 @@ class CodeAgent extends BaseGemini {
517
741
  attempts: 1
518
742
  };
519
743
 
744
+ // Build backward-compat codeExecutions (only execute_code + write_and_run_code + fix_code with execute)
745
+ const codeExecutions = toolCalls
746
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
747
+ .map(tc => ({
748
+ code: tc.code || tc.fixedCode,
749
+ purpose: this._slugify(tc.purpose),
750
+ output: tc.stdout || '',
751
+ stderr: tc.stderr || '',
752
+ exitCode: tc.exitCode ?? 0
753
+ }));
754
+
520
755
  return {
521
756
  text: response.text || '',
522
757
  codeExecutions,
758
+ toolCalls,
523
759
  usage: this.getLastUsage()
524
760
  };
525
761
  }
@@ -528,23 +764,26 @@ class CodeAgent extends BaseGemini {
528
764
 
529
765
  /**
530
766
  * Send a message and stream the response as events.
531
- * Automatically handles the code execution loop between streamed rounds.
532
767
  *
533
768
  * Event types:
534
769
  * - `text` — A chunk of the agent's text response
535
- * - `code` — The agent is about to execute code
536
- * - `output` — Code finished executing
770
+ * - `code` — The agent is about to execute code (execute_code or write_and_run_code)
771
+ * - `output` — Code/bash finished executing
772
+ * - `write` — The agent wrote code without executing (write_code)
773
+ * - `fix` — The agent fixed code (fix_code)
774
+ * - `bash` — The agent is about to run a bash command
775
+ * - `skill` — The agent loaded a skill
537
776
  * - `done` — The agent finished
538
777
  *
539
778
  * @param {string} message - The user's message
540
- * @param {Object} [opts={}] - Per-message options
779
+ * @param {Object} [opts={}]
541
780
  * @yields {CodeAgentStreamEvent}
542
781
  */
543
782
  async *stream(message, opts = {}) {
544
783
  if (!this.chatSession) await this.init();
545
784
  this._stopped = false;
546
785
 
547
- const codeExecutions = [];
786
+ const toolCalls = [];
548
787
  let fullText = '';
549
788
  let consecutiveFailures = 0;
550
789
 
@@ -568,58 +807,77 @@ class CodeAgent extends BaseGemini {
568
807
 
569
808
  // No function calls — we're done
570
809
  if (functionCalls.length === 0) {
571
- yield {
572
- type: 'done',
573
- fullText,
574
- codeExecutions,
575
- usage: this.getLastUsage()
576
- };
810
+ const codeExecutions = toolCalls
811
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
812
+ .map(tc => ({
813
+ code: tc.code || tc.fixedCode,
814
+ purpose: this._slugify(tc.purpose),
815
+ output: tc.stdout || '',
816
+ stderr: tc.stderr || '',
817
+ exitCode: tc.exitCode ?? 0
818
+ }));
819
+ yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage() };
577
820
  return;
578
821
  }
579
822
 
580
- // Execute code sequentially so we can yield events
823
+ // Handle tool calls
581
824
  const results = [];
582
825
  for (const call of functionCalls) {
583
826
  if (this._stopped) break;
584
827
 
585
- const code = call.args?.code || '';
586
- const purpose = call.args?.purpose;
587
- yield { type: 'code', code };
828
+ const toolName = call.name;
829
+ const toolInput = call.args || {};
830
+
831
+ // Emit pre-execution events
832
+ if (toolName === 'write_code') {
833
+ yield { type: 'write', code: toolInput.code, purpose: toolInput.purpose, language: toolInput.language || 'javascript' };
834
+ } else if (toolName === 'fix_code') {
835
+ yield { type: 'fix', originalCode: toolInput.original_code, fixedCode: toolInput.fixed_code, explanation: toolInput.explanation };
836
+ } else if (toolName === 'run_bash') {
837
+ yield { type: 'bash', command: toolInput.command };
838
+ } else if (toolName === 'execute_code' || toolName === 'write_and_run_code') {
839
+ yield { type: 'code', code: toolInput.code };
840
+ }
588
841
 
589
- const result = await this._executeCode(code, purpose);
842
+ const { output, type, data } = await this._handleToolCall(toolName, toolInput);
590
843
 
591
- codeExecutions.push({
592
- code,
593
- purpose: this._slugify(purpose),
594
- output: result.stdout,
595
- stderr: result.stderr,
596
- exitCode: result.exitCode
597
- });
844
+ toolCalls.push(data);
598
845
 
599
- yield {
600
- type: 'output',
601
- code,
602
- stdout: result.stdout,
603
- stderr: result.stderr,
604
- exitCode: result.exitCode
605
- };
846
+ // Emit post-execution output events
847
+ if (data.stdout !== undefined || data.stderr !== undefined) {
848
+ yield {
849
+ type: 'output',
850
+ code: data.code || data.command || data.fixedCode,
851
+ stdout: data.stdout || '',
852
+ stderr: data.stderr || '',
853
+ exitCode: data.exitCode ?? 0
854
+ };
855
+ }
606
856
 
607
- if (result.exitCode !== 0 && !result.denied) {
608
- consecutiveFailures++;
609
- } else {
610
- consecutiveFailures = 0;
857
+ // Emit skill event
858
+ if (toolName === 'use_skill') {
859
+ yield { type: 'skill', skillName: data.skillName, content: data.content, found: data.found };
611
860
  }
612
861
 
613
- let output = this._formatOutput(result);
862
+ // Track consecutive failures
863
+ const isExecutingTool = EXECUTING_TOOLS.has(toolName) || (toolName === 'fix_code' && toolInput.execute);
864
+ if (isExecutingTool) {
865
+ if (data.exitCode !== 0 && !data.denied) {
866
+ consecutiveFailures++;
867
+ } else {
868
+ consecutiveFailures = 0;
869
+ }
870
+ }
614
871
 
872
+ let toolOutput = output;
615
873
  if (consecutiveFailures >= this.maxRetries) {
616
- output += `\n\n[RETRY LIMIT REACHED] You have failed ${this.maxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
874
+ toolOutput += `\n\n[RETRY LIMIT REACHED] You have failed ${this.maxRetries} consecutive attempts. STOP trying to execute code. Instead, respond with: 1) What you were trying to do, 2) The errors you encountered, 3) Questions for the user about how to resolve it.`;
617
875
  }
618
876
 
619
877
  results.push({
620
878
  id: call.id,
621
879
  name: call.name,
622
- result: output
880
+ result: toolOutput
623
881
  });
624
882
  }
625
883
 
@@ -644,35 +902,39 @@ class CodeAgent extends BaseGemini {
644
902
  if (this._stopped) warning = 'Agent was stopped';
645
903
  else if (consecutiveFailures >= this.maxRetries) warning = 'Retry limit reached';
646
904
 
647
- yield {
648
- type: 'done',
649
- fullText,
650
- codeExecutions,
651
- usage: this.getLastUsage(),
652
- warning
653
- };
905
+ const codeExecutions = toolCalls
906
+ .filter(tc => tc.tool === 'execute_code' || tc.tool === 'write_and_run_code' || (tc.tool === 'fix_code' && tc.executed))
907
+ .map(tc => ({
908
+ code: tc.code || tc.fixedCode,
909
+ purpose: this._slugify(tc.purpose),
910
+ output: tc.stdout || '',
911
+ stderr: tc.stderr || '',
912
+ exitCode: tc.exitCode ?? 0
913
+ }));
914
+
915
+ yield { type: 'done', fullText, codeExecutions, toolCalls, usage: this.getLastUsage(), warning };
654
916
  }
655
917
 
656
918
  // ── Dump ─────────────────────────────────────────────────────────────────
657
919
 
658
920
  /**
659
- * Returns all code scripts the agent has written across all chat/stream calls.
660
- * @returns {Array<{fileName: string, script: string}>}
921
+ * Returns all code scripts and bash commands the agent has executed.
922
+ * @returns {Array<{fileName: string, purpose: string|null, script: string, filePath: string|null, tool: string}>}
661
923
  */
662
924
  dump() {
663
925
  return this._allExecutions.map((exec, i) => ({
664
926
  fileName: exec.purpose ? `agent-${exec.purpose}.mjs` : `script-${i + 1}.mjs`,
665
927
  purpose: exec.purpose || null,
666
928
  script: exec.code,
667
- filePath: exec.filePath || null
929
+ filePath: exec.filePath || null,
930
+ tool: exec.tool || 'execute_code'
668
931
  }));
669
932
  }
670
933
 
671
934
  // ── Stop ─────────────────────────────────────────────────────────────────
672
935
 
673
936
  /**
674
- * Stop the agent before the next code execution.
675
- * If a child process is currently running, it will be killed.
937
+ * Stop the agent. Kills any running child process.
676
938
  */
677
939
  stop() {
678
940
  this._stopped = true;