ak-gemini 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/code-agent.js ADDED
@@ -0,0 +1,563 @@
1
+ /**
2
+ * @fileoverview CodeAgent class — AI agent that writes and executes code.
3
+ * Instead of traditional tool-calling with many round-trips, the model gets
4
+ * a single `execute_code` tool and writes JavaScript that can do everything
5
+ * (read files, write files, run commands) in a single script. Output feeds
6
+ * back, and the model decides what to do next.
7
+ *
8
+ * Inspired by the "code mode" philosophy: LLMs are better at writing code
9
+ * to call APIs than at calling APIs directly via tool-calling.
10
+ */
11
+
12
+ import BaseGemini from './base.js';
13
+ import log from './logger.js';
14
+ import { execFile } from 'node:child_process';
15
+ import { writeFile, unlink, readdir, readFile } from 'node:fs/promises';
16
+ import { join } from 'node:path';
17
+ import { randomUUID } from 'node:crypto';
18
+
19
+ /**
20
+ * @typedef {import('./types').CodeAgentOptions} CodeAgentOptions
21
+ * @typedef {import('./types').CodeAgentResponse} CodeAgentResponse
22
+ * @typedef {import('./types').CodeAgentStreamEvent} CodeAgentStreamEvent
23
+ */
24
+
25
+ const MAX_OUTPUT_CHARS = 50_000;
26
+ const MAX_FILE_TREE_LINES = 500;
27
+ const IGNORE_DIRS = new Set(['node_modules', '.git', 'dist', 'coverage', '.next', 'build', '__pycache__']);
28
+
29
+ /**
30
+ * AI agent that writes and executes JavaScript code autonomously.
31
+ *
32
+ * During init, gathers codebase context (file tree + key files) and injects it
33
+ * into the system prompt. The model uses the `execute_code` tool to run scripts
34
+ * in a Node.js child process that inherits the parent's environment variables.
35
+ *
36
+ * @example
37
+ * ```javascript
38
+ * import { CodeAgent } from 'ak-gemini';
39
+ *
40
+ * const agent = new CodeAgent({
41
+ * workingDirectory: '/path/to/my/project',
42
+ * onCodeExecution: (code, output) => {
43
+ * console.log('Executed:', code.slice(0, 100));
44
+ * console.log('Output:', output.stdout);
45
+ * }
46
+ * });
47
+ *
48
+ * const result = await agent.chat('List all TODO comments in the codebase');
49
+ * console.log(result.text);
50
+ * console.log(`Ran ${result.codeExecutions.length} scripts`);
51
+ * ```
52
+ */
53
+ class CodeAgent extends BaseGemini {
54
+ /**
55
+ * @param {CodeAgentOptions} [options={}]
56
+ */
57
+ constructor(options = {}) {
58
+ if (options.systemPrompt === undefined) {
59
+ options = { ...options, systemPrompt: '' };
60
+ }
61
+
62
+ super(options);
63
+
64
+ // ── Agent config ──
65
+ this.workingDirectory = options.workingDirectory || process.cwd();
66
+ this.maxRounds = options.maxRounds || 10;
67
+ this.timeout = options.timeout || 30_000;
68
+ this.onBeforeExecution = options.onBeforeExecution || null;
69
+ this.onCodeExecution = options.onCodeExecution || null;
70
+
71
+ // ── Internal state ──
72
+ this._codebaseContext = null;
73
+ this._contextGathered = false;
74
+ this._stopped = false;
75
+ this._activeProcess = null;
76
+ this._userSystemPrompt = options.systemPrompt || '';
77
+ this._allExecutions = [];
78
+
79
+ // ── Single tool: execute_code ──
80
+ this.chatConfig.tools = [{
81
+ functionDeclarations: [{
82
+ name: 'execute_code',
83
+ description: 'Execute JavaScript code in a Node.js child process. The code has access to all Node.js built-in modules (fs, path, child_process, http, etc.). Use console.log() to produce output that will be returned to you. The code runs in the working directory with the same environment variables as the parent process.',
84
+ parametersJsonSchema: {
85
+ type: 'object',
86
+ properties: {
87
+ code: {
88
+ type: 'string',
89
+ description: 'JavaScript code to execute. Use console.log() for output. You can import any built-in Node.js module.'
90
+ }
91
+ },
92
+ required: ['code']
93
+ }
94
+ }]
95
+ }];
96
+ this.chatConfig.toolConfig = { functionCallingConfig: { mode: 'AUTO' } };
97
+
98
+ log.debug(`CodeAgent created for directory: ${this.workingDirectory}`);
99
+ }
100
+
101
+ // ── Init ─────────────────────────────────────────────────────────────────
102
+
103
+ /**
104
+ * Initialize the agent: gather codebase context, build system prompt,
105
+ * and create the chat session.
106
+ * @param {boolean} [force=false]
107
+ */
108
+ async init(force = false) {
109
+ if (this.chatSession && !force) return;
110
+
111
+ // Gather codebase context
112
+ if (!this._contextGathered || force) {
113
+ await this._gatherCodebaseContext();
114
+ }
115
+
116
+ // Build augmented system prompt
117
+ const systemPrompt = this._buildSystemPrompt();
118
+ this.chatConfig.systemInstruction = systemPrompt;
119
+
120
+ await super.init(force);
121
+ }
122
+
123
+ // ── Context Gathering ────────────────────────────────────────────────────
124
+
125
+ /**
126
+ * Gather file tree and key file contents from the working directory.
127
+ * @private
128
+ */
129
+ async _gatherCodebaseContext() {
130
+ let fileTree = '';
131
+
132
+ // Get file tree
133
+ try {
134
+ fileTree = await this._getFileTreeGit();
135
+ } catch {
136
+ log.debug('git ls-files failed, falling back to readdir');
137
+ fileTree = await this._getFileTreeReaddir(this.workingDirectory, 0, 3);
138
+ }
139
+
140
+ // Truncate file tree
141
+ const lines = fileTree.split('\n');
142
+ if (lines.length > MAX_FILE_TREE_LINES) {
143
+ const truncated = lines.slice(0, MAX_FILE_TREE_LINES).join('\n');
144
+ fileTree = `${truncated}\n... (${lines.length - MAX_FILE_TREE_LINES} more files)`;
145
+ }
146
+
147
+ // Extract npm package names (lightweight — just the keys)
148
+ let npmPackages = [];
149
+ try {
150
+ const pkgPath = join(this.workingDirectory, 'package.json');
151
+ const pkg = JSON.parse(await readFile(pkgPath, 'utf-8'));
152
+ npmPackages = [
153
+ ...Object.keys(pkg.dependencies || {}),
154
+ ...Object.keys(pkg.devDependencies || {})
155
+ ];
156
+ } catch { /* no package.json */ }
157
+
158
+ this._codebaseContext = { fileTree, npmPackages };
159
+ this._contextGathered = true;
160
+ }
161
+
162
+ /**
163
+ * Get file tree using git ls-files.
164
+ * @private
165
+ * @returns {Promise<string>}
166
+ */
167
+ async _getFileTreeGit() {
168
+ return new Promise((resolve, reject) => {
169
+ execFile('git', ['ls-files'], {
170
+ cwd: this.workingDirectory,
171
+ timeout: 5000,
172
+ maxBuffer: 5 * 1024 * 1024
173
+ }, (err, stdout) => {
174
+ if (err) return reject(err);
175
+ resolve(stdout.trim());
176
+ });
177
+ });
178
+ }
179
+
180
+ /**
181
+ * Fallback file tree via recursive readdir.
182
+ * @private
183
+ * @param {string} dir
184
+ * @param {number} depth
185
+ * @param {number} maxDepth
186
+ * @returns {Promise<string>}
187
+ */
188
+ async _getFileTreeReaddir(dir, depth, maxDepth) {
189
+ if (depth >= maxDepth) return '';
190
+ const entries = [];
191
+ try {
192
+ const items = await readdir(dir, { withFileTypes: true });
193
+ for (const item of items) {
194
+ if (IGNORE_DIRS.has(item.name)) continue;
195
+ if (item.name.startsWith('.') && depth === 0 && item.isDirectory()) continue;
196
+
197
+ const relativePath = join(dir, item.name).replace(this.workingDirectory + '/', '');
198
+ if (item.isFile()) {
199
+ entries.push(relativePath);
200
+ } else if (item.isDirectory()) {
201
+ entries.push(relativePath + '/');
202
+ const subEntries = await this._getFileTreeReaddir(join(dir, item.name), depth + 1, maxDepth);
203
+ if (subEntries) entries.push(subEntries);
204
+ }
205
+ }
206
+ } catch {
207
+ // Permission errors, etc. — skip
208
+ }
209
+ return entries.join('\n');
210
+ }
211
+
212
+ /**
213
+ * Build the full system prompt with codebase context.
214
+ * @private
215
+ * @returns {string}
216
+ */
217
+ _buildSystemPrompt() {
218
+ const { fileTree, npmPackages } = this._codebaseContext || { fileTree: '', npmPackages: [] };
219
+
220
+ let prompt = `You are a coding agent working in ${this.workingDirectory}.
221
+
222
+ ## Instructions
223
+ - Use the execute_code tool to accomplish tasks by writing JavaScript code
224
+ - Your code runs in a Node.js child process with access to all built-in modules
225
+ - IMPORTANT: Your code runs as an ES module (.mjs). Use import syntax, NOT require():
226
+ - import fs from 'fs';
227
+ - import path from 'path';
228
+ - import { execSync } from 'child_process';
229
+ - Use console.log() to produce output — that's how results are returned to you
230
+ - Write efficient scripts that do multiple things per execution when possible
231
+ - For parallel async operations, use Promise.all():
232
+ const [a, b] = await Promise.all([fetchA(), fetchB()]);
233
+ - Read files with fs.readFileSync() when you need to understand their contents
234
+ - Handle errors in your scripts with try/catch so you get useful error messages
235
+ - Top-level await is supported
236
+ - The working directory is: ${this.workingDirectory}`;
237
+
238
+ if (fileTree) {
239
+ prompt += `\n\n## File Tree\n\`\`\`\n${fileTree}\n\`\`\``;
240
+ }
241
+
242
+ if (npmPackages.length > 0) {
243
+ prompt += `\n\n## Available Packages\nThese npm packages are installed and can be imported: ${npmPackages.join(', ')}`;
244
+ }
245
+
246
+ if (this._userSystemPrompt) {
247
+ prompt += `\n\n## Additional Instructions\n${this._userSystemPrompt}`;
248
+ }
249
+
250
+ return prompt;
251
+ }
252
+
253
+ // ── Code Execution ───────────────────────────────────────────────────────
254
+
255
+ /**
256
+ * Execute a JavaScript code string in a child process.
257
+ * @private
258
+ * @param {string} code - JavaScript code to execute
259
+ * @returns {Promise<{stdout: string, stderr: string, exitCode: number, denied?: boolean}>}
260
+ */
261
+ async _executeCode(code) {
262
+ // Check if stopped
263
+ if (this._stopped) {
264
+ return { stdout: '', stderr: 'Agent was stopped', exitCode: -1 };
265
+ }
266
+
267
+ // Check onBeforeExecution gate
268
+ if (this.onBeforeExecution) {
269
+ try {
270
+ const allowed = await this.onBeforeExecution(code);
271
+ if (allowed === false) {
272
+ return { stdout: '', stderr: 'Execution denied by onBeforeExecution callback', exitCode: -1, denied: true };
273
+ }
274
+ } catch (e) {
275
+ log.warn(`onBeforeExecution callback error: ${e.message}`);
276
+ }
277
+ }
278
+
279
+ const tempFile = join(this.workingDirectory, `.code-agent-tmp-${randomUUID()}.mjs`);
280
+
281
+ try {
282
+ // Write code to temp file
283
+ await writeFile(tempFile, code, 'utf-8');
284
+
285
+ // Execute in child process
286
+ const result = await new Promise((resolve) => {
287
+ const child = execFile('node', [tempFile], {
288
+ cwd: this.workingDirectory,
289
+ timeout: this.timeout,
290
+ env: process.env,
291
+ maxBuffer: 10 * 1024 * 1024
292
+ }, (err, stdout, stderr) => {
293
+ this._activeProcess = null;
294
+ if (err) {
295
+ resolve({
296
+ stdout: err.stdout || stdout || '',
297
+ stderr: (err.stderr || stderr || '') + (err.killed ? '\n[EXECUTION TIMED OUT]' : ''),
298
+ exitCode: err.code || 1
299
+ });
300
+ } else {
301
+ resolve({ stdout: stdout || '', stderr: stderr || '', exitCode: 0 });
302
+ }
303
+ });
304
+ this._activeProcess = child;
305
+ });
306
+
307
+ // Truncate output
308
+ const totalLen = result.stdout.length + result.stderr.length;
309
+ if (totalLen > MAX_OUTPUT_CHARS) {
310
+ const half = Math.floor(MAX_OUTPUT_CHARS / 2);
311
+ if (result.stdout.length > half) {
312
+ result.stdout = result.stdout.slice(0, half) + '\n...[OUTPUT TRUNCATED]';
313
+ }
314
+ if (result.stderr.length > half) {
315
+ result.stderr = result.stderr.slice(0, half) + '\n...[STDERR TRUNCATED]';
316
+ }
317
+ }
318
+
319
+ // Track execution
320
+ this._allExecutions.push({ code, output: result.stdout, stderr: result.stderr, exitCode: result.exitCode });
321
+
322
+ // Fire notification callback
323
+ if (this.onCodeExecution) {
324
+ try { this.onCodeExecution(code, result); }
325
+ catch (e) { log.warn(`onCodeExecution callback error: ${e.message}`); }
326
+ }
327
+
328
+ return result;
329
+ } finally {
330
+ // Cleanup temp file
331
+ try { await unlink(tempFile); }
332
+ catch { /* file may already be gone */ }
333
+ }
334
+ }
335
+
336
+ /**
337
+ * Format execution result as a string for the model.
338
+ * @private
339
+ * @param {{stdout: string, stderr: string, exitCode: number}} result
340
+ * @returns {string}
341
+ */
342
+ _formatOutput(result) {
343
+ let output = '';
344
+ if (result.stdout) output += result.stdout;
345
+ if (result.stderr) output += (output ? '\n' : '') + `[STDERR]: ${result.stderr}`;
346
+ if (result.exitCode !== 0) output += (output ? '\n' : '') + `[EXIT CODE]: ${result.exitCode}`;
347
+ return output || '(no output)';
348
+ }
349
+
350
+ // ── Non-Streaming Chat ───────────────────────────────────────────────────
351
+
352
+ /**
353
+ * Send a message and get a complete response (non-streaming).
354
+ * Automatically handles the code execution loop.
355
+ *
356
+ * @param {string} message - The user's message
357
+ * @param {Object} [opts={}] - Per-message options
358
+ * @param {Record<string, string>} [opts.labels] - Per-message billing labels
359
+ * @returns {Promise<CodeAgentResponse>} Response with text, codeExecutions, and usage
360
+ */
361
+ async chat(message, opts = {}) {
362
+ if (!this.chatSession) await this.init();
363
+ this._stopped = false;
364
+
365
+ const codeExecutions = [];
366
+
367
+ let response = await this.chatSession.sendMessage({ message });
368
+
369
+ for (let round = 0; round < this.maxRounds; round++) {
370
+ if (this._stopped) break;
371
+
372
+ const functionCalls = response.functionCalls;
373
+ if (!functionCalls || functionCalls.length === 0) break;
374
+
375
+ const results = [];
376
+ for (const call of functionCalls) {
377
+ if (this._stopped) break;
378
+
379
+ const code = call.args?.code || '';
380
+ const result = await this._executeCode(code);
381
+
382
+ codeExecutions.push({
383
+ code,
384
+ output: result.stdout,
385
+ stderr: result.stderr,
386
+ exitCode: result.exitCode
387
+ });
388
+
389
+ results.push({
390
+ id: call.id,
391
+ name: call.name,
392
+ result: this._formatOutput(result)
393
+ });
394
+ }
395
+
396
+ if (this._stopped) break;
397
+
398
+ // Send function responses back to the model
399
+ response = await this.chatSession.sendMessage({
400
+ message: results.map(r => ({
401
+ functionResponse: {
402
+ id: r.id,
403
+ name: r.name,
404
+ response: { output: r.result }
405
+ }
406
+ }))
407
+ });
408
+ }
409
+
410
+ this._captureMetadata(response);
411
+
412
+ this._cumulativeUsage = {
413
+ promptTokens: this.lastResponseMetadata.promptTokens,
414
+ responseTokens: this.lastResponseMetadata.responseTokens,
415
+ totalTokens: this.lastResponseMetadata.totalTokens,
416
+ attempts: 1
417
+ };
418
+
419
+ return {
420
+ text: response.text || '',
421
+ codeExecutions,
422
+ usage: this.getLastUsage()
423
+ };
424
+ }
425
+
426
+ // ── Streaming ────────────────────────────────────────────────────────────
427
+
428
+ /**
429
+ * Send a message and stream the response as events.
430
+ * Automatically handles the code execution loop between streamed rounds.
431
+ *
432
+ * Event types:
433
+ * - `text` — A chunk of the agent's text response
434
+ * - `code` — The agent is about to execute code
435
+ * - `output` — Code finished executing
436
+ * - `done` — The agent finished
437
+ *
438
+ * @param {string} message - The user's message
439
+ * @param {Object} [opts={}] - Per-message options
440
+ * @yields {CodeAgentStreamEvent}
441
+ */
442
+ async *stream(message, opts = {}) {
443
+ if (!this.chatSession) await this.init();
444
+ this._stopped = false;
445
+
446
+ const codeExecutions = [];
447
+ let fullText = '';
448
+
449
+ let streamResponse = await this.chatSession.sendMessageStream({ message });
450
+
451
+ for (let round = 0; round < this.maxRounds; round++) {
452
+ if (this._stopped) break;
453
+
454
+ const functionCalls = [];
455
+
456
+ // Consume the stream
457
+ for await (const chunk of streamResponse) {
458
+ if (chunk.functionCalls) {
459
+ functionCalls.push(...chunk.functionCalls);
460
+ } else if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
461
+ const text = chunk.candidates[0].content.parts[0].text;
462
+ fullText += text;
463
+ yield { type: 'text', text };
464
+ }
465
+ }
466
+
467
+ // No function calls — we're done
468
+ if (functionCalls.length === 0) {
469
+ yield {
470
+ type: 'done',
471
+ fullText,
472
+ codeExecutions,
473
+ usage: this.getLastUsage()
474
+ };
475
+ return;
476
+ }
477
+
478
+ // Execute code sequentially so we can yield events
479
+ const results = [];
480
+ for (const call of functionCalls) {
481
+ if (this._stopped) break;
482
+
483
+ const code = call.args?.code || '';
484
+ yield { type: 'code', code };
485
+
486
+ const result = await this._executeCode(code);
487
+
488
+ codeExecutions.push({
489
+ code,
490
+ output: result.stdout,
491
+ stderr: result.stderr,
492
+ exitCode: result.exitCode
493
+ });
494
+
495
+ yield {
496
+ type: 'output',
497
+ code,
498
+ stdout: result.stdout,
499
+ stderr: result.stderr,
500
+ exitCode: result.exitCode
501
+ };
502
+
503
+ results.push({
504
+ id: call.id,
505
+ name: call.name,
506
+ result: this._formatOutput(result)
507
+ });
508
+ }
509
+
510
+ if (this._stopped) break;
511
+
512
+ // Send function responses back and get next stream
513
+ streamResponse = await this.chatSession.sendMessageStream({
514
+ message: results.map(r => ({
515
+ functionResponse: {
516
+ id: r.id,
517
+ name: r.name,
518
+ response: { output: r.result }
519
+ }
520
+ }))
521
+ });
522
+ }
523
+
524
+ // Max rounds reached or stopped
525
+ yield {
526
+ type: 'done',
527
+ fullText,
528
+ codeExecutions,
529
+ usage: this.getLastUsage(),
530
+ warning: this._stopped ? 'Agent was stopped' : 'Max tool rounds reached'
531
+ };
532
+ }
533
+
534
+ // ── Dump ─────────────────────────────────────────────────────────────────
535
+
536
+ /**
537
+ * Returns all code scripts the agent has written across all chat/stream calls.
538
+ * @returns {Array<{fileName: string, script: string}>}
539
+ */
540
+ dump() {
541
+ return this._allExecutions.map((exec, i) => ({
542
+ fileName: `script-${i + 1}.mjs`,
543
+ script: exec.code
544
+ }));
545
+ }
546
+
547
+ // ── Stop ─────────────────────────────────────────────────────────────────
548
+
549
+ /**
550
+ * Stop the agent before the next code execution.
551
+ * If a child process is currently running, it will be killed.
552
+ */
553
+ stop() {
554
+ this._stopped = true;
555
+ if (this._activeProcess) {
556
+ try { this._activeProcess.kill('SIGTERM'); }
557
+ catch { /* process may already be gone */ }
558
+ }
559
+ log.info('CodeAgent stopped');
560
+ }
561
+ }
562
+
563
+ export default CodeAgent;