ak-gemini 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/rag-agent.js ADDED
@@ -0,0 +1,340 @@
1
+ /**
2
+ * @fileoverview RagAgent class — AI agent for document & data Q&A.
3
+ *
4
+ * NOTE: This is not true RAG (no vector embeddings, chunking, or similarity
5
+ * search). It uses long-context injection — all content is placed directly
6
+ * into the model's context window. Named "RagAgent" because it serves the
7
+ * same purpose in spirit: grounding AI responses in user-provided data.
8
+ *
9
+ * Supports three input types:
10
+ * - remoteFiles: uploaded via Google Files API (PDFs, images, audio, video)
11
+ * - localFiles: read from disk as text (md, json, csv, yaml, txt)
12
+ * - localData: in-memory objects serialized as JSON
13
+ */
14
+
15
+ import { resolve, basename, extname } from 'node:path';
16
+ import { readFile } from 'node:fs/promises';
17
+ import BaseGemini from './base.js';
18
+ import log from './logger.js';
19
+
20
+ /** @type {Record<string, string>} */
21
+ const MIME_TYPES = {
22
+ // Text
23
+ '.txt': 'text/plain', '.md': 'text/plain', '.csv': 'text/csv',
24
+ '.html': 'text/html', '.htm': 'text/html', '.xml': 'text/xml',
25
+ '.json': 'application/json', '.js': 'text/javascript', '.mjs': 'text/javascript',
26
+ '.ts': 'text/plain', '.css': 'text/css', '.yaml': 'text/plain', '.yml': 'text/plain',
27
+ '.py': 'text/x-python', '.rb': 'text/plain', '.sh': 'text/plain',
28
+ // Documents
29
+ '.pdf': 'application/pdf',
30
+ '.doc': 'application/msword',
31
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
32
+ // Images
33
+ '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
34
+ '.gif': 'image/gif', '.webp': 'image/webp', '.svg': 'image/svg+xml',
35
+ // Audio
36
+ '.mp3': 'audio/mpeg', '.wav': 'audio/wav', '.ogg': 'audio/ogg',
37
+ '.flac': 'audio/flac', '.aac': 'audio/aac',
38
+ // Video
39
+ '.mp4': 'video/mp4', '.webm': 'video/webm', '.avi': 'video/x-msvideo',
40
+ '.mov': 'video/quicktime', '.mkv': 'video/x-matroska',
41
+ };
42
+
43
+ /**
44
+ * @typedef {import('./types').RagAgentOptions} RagAgentOptions
45
+ * @typedef {import('./types').RagResponse} RagResponse
46
+ * @typedef {import('./types').RagStreamEvent} RagStreamEvent
47
+ * @typedef {import('./types').LocalDataEntry} LocalDataEntry
48
+ */
49
+
50
+ const DEFAULT_SYSTEM_PROMPT =
51
+ 'You are a helpful AI assistant. Answer questions based on the provided documents and data. ' +
52
+ 'When referencing information, mention which document or data source it comes from.';
53
+
54
+ const FILE_POLL_INTERVAL_MS = 2000;
55
+ const FILE_POLL_TIMEOUT_MS = 60_000;
56
+
57
+ /**
58
+ * AI agent that answers questions grounded in user-provided documents and data.
59
+ * Supports three input types:
60
+ * - `remoteFiles` — uploaded via Google Files API (PDFs, images, audio, video)
61
+ * - `localFiles` — read from disk as text (md, json, csv, yaml, txt)
62
+ * - `localData` — in-memory objects serialized as JSON
63
+ *
64
+ * @example
65
+ * ```javascript
66
+ * import { RagAgent } from 'ak-gemini';
67
+ *
68
+ * const agent = new RagAgent({
69
+ * remoteFiles: ['./report.pdf', './diagram.png'],
70
+ * localFiles: ['./docs/api.md', './config.yaml'],
71
+ * localData: [
72
+ * { name: 'users', data: [{ id: 1, name: 'Alice' }] },
73
+ * ],
74
+ * });
75
+ *
76
+ * const result = await agent.chat('What does the API doc say about auth?');
77
+ * console.log(result.text);
78
+ *
79
+ * // Streaming
80
+ * for await (const event of agent.stream('Summarize the report')) {
81
+ * if (event.type === 'text') process.stdout.write(event.text);
82
+ * }
83
+ * ```
84
+ */
85
+ class RagAgent extends BaseGemini {
86
+ /**
87
+ * @param {RagAgentOptions} [options={}]
88
+ */
89
+ constructor(options = {}) {
90
+ if (options.systemPrompt === undefined) {
91
+ options = { ...options, systemPrompt: DEFAULT_SYSTEM_PROMPT };
92
+ }
93
+
94
+ super(options);
95
+
96
+ this.remoteFiles = options.remoteFiles || [];
97
+ this.localFiles = options.localFiles || [];
98
+ this.localData = options.localData || [];
99
+ this._uploadedRemoteFiles = [];
100
+ this._localFileContents = [];
101
+ this._initialized = false;
102
+
103
+ const total = this.remoteFiles.length + this.localFiles.length + this.localData.length;
104
+ log.debug(`RagAgent created with ${total} context sources`);
105
+ }
106
+
107
+ // ── Initialization ───────────────────────────────────────────────────────
108
+
109
+ /**
110
+ * Uploads remote files, reads local files, and seeds all context into the chat.
111
+ * @param {boolean} [force=false]
112
+ * @returns {Promise<void>}
113
+ */
114
+ async init(force = false) {
115
+ if (this._initialized && !force) return;
116
+
117
+ // 1. Upload remote files via Files API
118
+ this._uploadedRemoteFiles = [];
119
+ for (const filePath of this.remoteFiles) {
120
+ const resolvedPath = resolve(filePath);
121
+ log.debug(`Uploading remote file: ${resolvedPath}`);
122
+
123
+ const ext = extname(resolvedPath).toLowerCase();
124
+ const mimeType = MIME_TYPES[ext] || 'application/octet-stream';
125
+
126
+ const uploaded = await this.genAIClient.files.upload({
127
+ file: resolvedPath,
128
+ config: { displayName: basename(resolvedPath), mimeType }
129
+ });
130
+
131
+ await this._waitForFileActive(uploaded);
132
+
133
+ this._uploadedRemoteFiles.push({
134
+ ...uploaded,
135
+ originalPath: resolvedPath
136
+ });
137
+
138
+ log.debug(`File uploaded: ${uploaded.displayName} (${uploaded.mimeType})`);
139
+ }
140
+
141
+ // 2. Read local files from disk
142
+ this._localFileContents = [];
143
+ for (const filePath of this.localFiles) {
144
+ const resolvedPath = resolve(filePath);
145
+ log.debug(`Reading local file: ${resolvedPath}`);
146
+
147
+ const content = await readFile(resolvedPath, 'utf-8');
148
+ this._localFileContents.push({
149
+ name: basename(resolvedPath),
150
+ content,
151
+ path: resolvedPath
152
+ });
153
+
154
+ log.debug(`Local file read: ${basename(resolvedPath)} (${content.length} chars)`);
155
+ }
156
+
157
+ // 3. Set system instruction and create chat session
158
+ this.chatConfig.systemInstruction = /** @type {string} */ (this.systemPrompt);
159
+ await super.init(force);
160
+
161
+ // 4. Build unified context parts and seed into chat history
162
+ /** @type {Array<Object>} */
163
+ const parts = [];
164
+
165
+ // Remote file references
166
+ for (const f of this._uploadedRemoteFiles) {
167
+ parts.push({ fileData: { fileUri: f.uri, mimeType: f.mimeType } });
168
+ }
169
+
170
+ // Local file contents
171
+ for (const lf of this._localFileContents) {
172
+ parts.push({ text: `--- File: ${lf.name} ---\n${lf.content}` });
173
+ }
174
+
175
+ // Local data entries
176
+ for (const ld of this.localData) {
177
+ const serialized = typeof ld.data === 'string' ? ld.data : JSON.stringify(ld.data, null, 2);
178
+ parts.push({ text: `--- Data: ${ld.name} ---\n${serialized}` });
179
+ }
180
+
181
+ if (parts.length > 0) {
182
+ parts.push({ text: 'Here are the documents and data to analyze.' });
183
+
184
+ const history = [
185
+ { role: 'user', parts },
186
+ { role: 'model', parts: [{ text: 'I have reviewed all the provided documents and data. I am ready to answer your questions about them.' }] }
187
+ ];
188
+
189
+ this.chatSession = this._createChatSession(history);
190
+ }
191
+
192
+ this._initialized = true;
193
+ log.debug(`RagAgent initialized with ${this._uploadedRemoteFiles.length} remote files, ${this._localFileContents.length} local files, ${this.localData.length} data entries`);
194
+ }
195
+
196
+ // ── Non-Streaming Chat ───────────────────────────────────────────────────
197
+
198
+ /**
199
+ * Send a message and get a complete response grounded in the loaded context.
200
+ *
201
+ * @param {string} message - The user's question
202
+ * @param {Object} [opts={}] - Per-message options
203
+ * @param {Record<string, string>} [opts.labels] - Per-message billing labels
204
+ * @returns {Promise<RagResponse>}
205
+ */
206
+ async chat(message, opts = {}) {
207
+ if (!this._initialized) await this.init();
208
+
209
+ const response = await this.chatSession.sendMessage({ message });
210
+
211
+ this._captureMetadata(response);
212
+
213
+ this._cumulativeUsage = {
214
+ promptTokens: this.lastResponseMetadata.promptTokens,
215
+ responseTokens: this.lastResponseMetadata.responseTokens,
216
+ totalTokens: this.lastResponseMetadata.totalTokens,
217
+ attempts: 1
218
+ };
219
+
220
+ return {
221
+ text: response.text || '',
222
+ usage: this.getLastUsage()
223
+ };
224
+ }
225
+
226
+ // ── Streaming ────────────────────────────────────────────────────────────
227
+
228
+ /**
229
+ * Send a message and stream the response as events.
230
+ *
231
+ * @param {string} message - The user's question
232
+ * @param {Object} [opts={}] - Per-message options
233
+ * @yields {RagStreamEvent}
234
+ */
235
+ async *stream(message, opts = {}) {
236
+ if (!this._initialized) await this.init();
237
+
238
+ let fullText = '';
239
+ const streamResponse = await this.chatSession.sendMessageStream({ message });
240
+
241
+ for await (const chunk of streamResponse) {
242
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
243
+ const text = chunk.candidates[0].content.parts[0].text;
244
+ fullText += text;
245
+ yield { type: 'text', text };
246
+ }
247
+ }
248
+
249
+ yield {
250
+ type: 'done',
251
+ fullText,
252
+ usage: this.getLastUsage()
253
+ };
254
+ }
255
+
256
+ // ── Context Management ──────────────────────────────────────────────────
257
+
258
+ /**
259
+ * Add remote files (uploaded via Files API). Triggers reinitialize.
260
+ * @param {string[]} paths
261
+ * @returns {Promise<void>}
262
+ */
263
+ async addRemoteFiles(paths) {
264
+ this.remoteFiles.push(...paths);
265
+ await this.init(true);
266
+ }
267
+
268
+ /**
269
+ * Add local text files (read from disk). Triggers reinitialize.
270
+ * @param {string[]} paths
271
+ * @returns {Promise<void>}
272
+ */
273
+ async addLocalFiles(paths) {
274
+ this.localFiles.push(...paths);
275
+ await this.init(true);
276
+ }
277
+
278
+ /**
279
+ * Add in-memory data entries. Triggers reinitialize.
280
+ * @param {LocalDataEntry[]} entries
281
+ * @returns {Promise<void>}
282
+ */
283
+ async addLocalData(entries) {
284
+ this.localData.push(...entries);
285
+ await this.init(true);
286
+ }
287
+
288
+ /**
289
+ * Returns metadata about all context sources.
290
+ * @returns {{ remoteFiles: Array<Object>, localFiles: Array<Object>, localData: Array<Object> }}
291
+ */
292
+ getContext() {
293
+ return {
294
+ remoteFiles: this._uploadedRemoteFiles.map(f => ({
295
+ name: f.name,
296
+ displayName: f.displayName,
297
+ mimeType: f.mimeType,
298
+ sizeBytes: f.sizeBytes,
299
+ uri: f.uri,
300
+ originalPath: f.originalPath
301
+ })),
302
+ localFiles: this._localFileContents.map(lf => ({
303
+ name: lf.name,
304
+ path: lf.path,
305
+ size: lf.content.length
306
+ })),
307
+ localData: this.localData.map(ld => ({
308
+ name: ld.name,
309
+ type: typeof ld.data === 'object' && ld.data !== null
310
+ ? (Array.isArray(ld.data) ? 'array' : 'object')
311
+ : typeof ld.data
312
+ }))
313
+ };
314
+ }
315
+
316
+ // ── Private Helpers ──────────────────────────────────────────────────────
317
+
318
+ /**
319
+ * Polls until an uploaded file reaches ACTIVE state.
320
+ * @param {Object} file - The uploaded file object
321
+ * @returns {Promise<void>}
322
+ * @private
323
+ */
324
+ async _waitForFileActive(file) {
325
+ if (file.state === 'ACTIVE') return;
326
+
327
+ const start = Date.now();
328
+ while (Date.now() - start < FILE_POLL_TIMEOUT_MS) {
329
+ const updated = await this.genAIClient.files.get({ name: file.name });
330
+ if (updated.state === 'ACTIVE') return;
331
+ if (updated.state === 'FAILED') {
332
+ throw new Error(`File processing failed: ${file.displayName || file.name}`);
333
+ }
334
+ await new Promise(r => setTimeout(r, FILE_POLL_INTERVAL_MS));
335
+ }
336
+ throw new Error(`File processing timed out after ${FILE_POLL_TIMEOUT_MS / 1000}s: ${file.displayName || file.name}`);
337
+ }
338
+ }
339
+
340
+ export default RagAgent;
package/tool-agent.js CHANGED
@@ -79,6 +79,7 @@ class ToolAgent extends BaseGemini {
79
79
  this.maxToolRounds = options.maxToolRounds || 10;
80
80
  this.onToolCall = options.onToolCall || null;
81
81
  this.onBeforeExecution = options.onBeforeExecution || null;
82
+ this.writeDir = options.writeDir || null;
82
83
  this._stopped = false;
83
84
 
84
85
  // ── Apply tools to chat config ──
package/types.d.ts CHANGED
@@ -181,6 +181,24 @@ export interface ToolAgentOptions extends BaseGeminiOptions {
181
181
  onToolCall?: (toolName: string, args: Record<string, any>) => void;
182
182
  /** Async callback before tool execution; return false to deny */
183
183
  onBeforeExecution?: (toolName: string, args: Record<string, any>) => Promise<boolean>;
184
+ /** Directory for tool-written files (pass-through for toolExecutor use) */
185
+ writeDir?: string;
186
+ }
187
+
188
+ export interface LocalDataEntry {
189
+ /** Label shown to the model (e.g. "users", "config") */
190
+ name: string;
191
+ /** Any JSON-serializable value */
192
+ data: any;
193
+ }
194
+
195
+ export interface RagAgentOptions extends BaseGeminiOptions {
196
+ /** Paths to files uploaded via Google Files API (PDFs, images, audio, video) */
197
+ remoteFiles?: string[];
198
+ /** Paths to local text files read from disk (md, json, csv, yaml, txt) */
199
+ localFiles?: string[];
200
+ /** In-memory data objects to include as context */
201
+ localData?: LocalDataEntry[];
184
202
  }
185
203
 
186
204
  export interface CodeAgentOptions extends BaseGeminiOptions {
@@ -194,11 +212,23 @@ export interface CodeAgentOptions extends BaseGeminiOptions {
194
212
  onBeforeExecution?: (code: string) => Promise<boolean>;
195
213
  /** Notification callback after code execution */
196
214
  onCodeExecution?: (code: string, output: { stdout: string; stderr: string; exitCode: number }) => void;
215
+ /** Files whose contents are included in the system prompt for project context */
216
+ importantFiles?: string[];
217
+ /** Directory for writing script files (default: '{workingDirectory}/tmp') */
218
+ writeDir?: string;
219
+ /** Keep script files on disk after execution (default: false) */
220
+ keepArtifacts?: boolean;
221
+ /** Instruct model to write JSDoc comments in generated code (default: false) */
222
+ comments?: boolean;
223
+ /** Max consecutive failed executions before stopping (default: 3) */
224
+ maxRetries?: number;
197
225
  }
198
226
 
199
227
  export interface CodeExecution {
200
228
  /** The JavaScript code that was executed */
201
229
  code: string;
230
+ /** Short slug describing the script's purpose */
231
+ purpose?: string;
202
232
  /** stdout from the execution */
203
233
  output: string;
204
234
  /** stderr from the execution */
@@ -276,6 +306,23 @@ export interface MessageResponse {
276
306
  usage: UsageData | null;
277
307
  }
278
308
 
309
+ export interface RagResponse {
310
+ /** The model's text response */
311
+ text: string;
312
+ /** Token usage data */
313
+ usage: UsageData | null;
314
+ }
315
+
316
+ export interface RagStreamEvent {
317
+ type: 'text' | 'done';
318
+ /** For 'text' events: the text chunk */
319
+ text?: string;
320
+ /** For 'done' events: complete accumulated text */
321
+ fullText?: string;
322
+ /** For 'done' events: token usage */
323
+ usage?: UsageData | null;
324
+ }
325
+
279
326
  export interface AgentResponse {
280
327
  /** The agent's final text response */
281
328
  text: string;
@@ -385,6 +432,8 @@ export declare class ToolAgent extends BaseGemini {
385
432
  maxToolRounds: number;
386
433
  onToolCall: ((toolName: string, args: Record<string, any>) => void) | null;
387
434
  onBeforeExecution: ((toolName: string, args: Record<string, any>) => Promise<boolean>) | null;
435
+ /** Directory for tool-written files (pass-through for toolExecutor use) */
436
+ writeDir: string | null;
388
437
 
389
438
  chat(message: string, opts?: { labels?: Record<string, string> }): Promise<AgentResponse>;
390
439
  stream(message: string, opts?: { labels?: Record<string, string> }): AsyncGenerator<AgentStreamEvent, void, unknown>;
@@ -392,6 +441,33 @@ export declare class ToolAgent extends BaseGemini {
392
441
  stop(): void;
393
442
  }
394
443
 
444
+ export declare class RagAgent extends BaseGemini {
445
+ constructor(options?: RagAgentOptions);
446
+
447
+ /** Paths to files uploaded via Google Files API */
448
+ remoteFiles: string[];
449
+ /** Paths to local text files read from disk */
450
+ localFiles: string[];
451
+ /** In-memory data objects */
452
+ localData: LocalDataEntry[];
453
+
454
+ init(force?: boolean): Promise<void>;
455
+ chat(message: string, opts?: { labels?: Record<string, string> }): Promise<RagResponse>;
456
+ stream(message: string, opts?: { labels?: Record<string, string> }): AsyncGenerator<RagStreamEvent, void, unknown>;
457
+ /** Add remote files uploaded via Files API (triggers reinitialize) */
458
+ addRemoteFiles(paths: string[]): Promise<void>;
459
+ /** Add local text files read from disk (triggers reinitialize) */
460
+ addLocalFiles(paths: string[]): Promise<void>;
461
+ /** Add in-memory data entries (triggers reinitialize) */
462
+ addLocalData(entries: LocalDataEntry[]): Promise<void>;
463
+ /** Returns metadata about all context sources */
464
+ getContext(): {
465
+ remoteFiles: Array<{ name: string; displayName: string; mimeType: string; sizeBytes: string; uri: string; originalPath: string }>;
466
+ localFiles: Array<{ name: string; path: string; size: number }>;
467
+ localData: Array<{ name: string; type: string }>;
468
+ };
469
+ }
470
+
395
471
  export declare class CodeAgent extends BaseGemini {
396
472
  constructor(options?: CodeAgentOptions);
397
473
 
@@ -400,12 +476,22 @@ export declare class CodeAgent extends BaseGemini {
400
476
  timeout: number;
401
477
  onBeforeExecution: ((code: string) => Promise<boolean>) | null;
402
478
  onCodeExecution: ((code: string, output: { stdout: string; stderr: string; exitCode: number }) => void) | null;
479
+ /** Files whose contents are included in the system prompt */
480
+ importantFiles: string[];
481
+ /** Directory for writing script files */
482
+ writeDir: string;
483
+ /** Keep script files on disk after execution */
484
+ keepArtifacts: boolean;
485
+ /** Whether the model writes comments in generated code */
486
+ comments: boolean;
487
+ /** Max consecutive failed executions before stopping */
488
+ maxRetries: number;
403
489
 
404
490
  init(force?: boolean): Promise<void>;
405
491
  chat(message: string, opts?: { labels?: Record<string, string> }): Promise<CodeAgentResponse>;
406
492
  stream(message: string, opts?: { labels?: Record<string, string> }): AsyncGenerator<CodeAgentStreamEvent, void, unknown>;
407
493
  /** Returns all code scripts written across all chat/stream calls. */
408
- dump(): Array<{ fileName: string; script: string }>;
494
+ dump(): Array<{ fileName: string; purpose: string | null; script: string; filePath: string | null }>;
409
495
  /** Stop the agent before the next code execution. Kills any running child process. */
410
496
  stop(): void;
411
497
  }
@@ -421,6 +507,7 @@ declare const _default: {
421
507
  Message: typeof Message;
422
508
  ToolAgent: typeof ToolAgent;
423
509
  CodeAgent: typeof CodeAgent;
510
+ RagAgent: typeof RagAgent;
424
511
  };
425
512
 
426
513
  export default _default;