ak-gemini 1.1.12 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. package/agent.js +481 -0
  2. package/index.cjs +508 -34
  3. package/index.js +19 -12
  4. package/package.json +12 -10
  5. package/tools.js +134 -0
  6. package/types.d.ts +63 -0
package/agent.js ADDED
@@ -0,0 +1,481 @@
1
+ /**
2
+ * @fileoverview AIAgent class for chat-based agent interactions with built-in tools.
3
+ * Supports streaming and non-streaming conversations with HTTP and markdown tools.
4
+ */
5
+
6
+ import dotenv from 'dotenv';
7
+ dotenv.config();
8
+ const { NODE_ENV = "unknown", LOG_LEVEL = "" } = process.env;
9
+
10
+ import { GoogleGenAI, HarmCategory, HarmBlockThreshold, ThinkingLevel } from '@google/genai';
11
+ import log from './logger.js';
12
+ import { BUILT_IN_DECLARATIONS, executeBuiltInTool } from './tools.js';
13
+
14
+ const DEFAULT_SAFETY_SETTINGS = [
15
+ { category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: HarmBlockThreshold.BLOCK_NONE },
16
+ { category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_NONE }
17
+ ];
18
+
19
+ const DEFAULT_THINKING_CONFIG = {
20
+ thinkingBudget: 0
21
+ };
22
+
23
+ const THINKING_SUPPORTED_MODELS = [
24
+ /^gemini-3-flash(-preview)?$/,
25
+ /^gemini-3-pro(-preview|-image-preview)?$/,
26
+ /^gemini-2\.5-pro/,
27
+ /^gemini-2\.5-flash(-preview)?$/,
28
+ /^gemini-2\.5-flash-lite(-preview)?$/,
29
+ /^gemini-2\.0-flash$/
30
+ ];
31
+
32
+ /**
33
+ * @typedef {import('./types').AIAgentOptions} AIAgentOptions
34
+ * @typedef {import('./types').AgentResponse} AgentResponse
35
+ * @typedef {import('./types').AgentStreamEvent} AgentStreamEvent
36
+ * @typedef {import('./types').UsageData} UsageData
37
+ */
38
+
39
+ /**
40
+ * Chat-based AI agent with built-in tools for HTTP requests and markdown generation.
41
+ *
42
+ * Unlike AITransformer (which is optimized for few-shot JSON transformations), AIAgent
43
+ * is designed for interactive, multi-turn conversations where the agent can take actions
44
+ * on the user's behalf — like fetching data from APIs, posting to endpoints, and
45
+ * generating structured markdown reports.
46
+ *
47
+ * Built-in tools:
48
+ * - **http_get** — Fetch any URL (APIs, web pages, etc.)
49
+ * - **http_post** — POST JSON to any endpoint
50
+ * - **write_markdown** — Generate markdown documents (reports, summaries, findings)
51
+ *
52
+ * The agent automatically manages the tool-use loop: when the model decides to call
53
+ * a tool, the agent executes it, sends the result back, and continues until the model
54
+ * produces a final text response.
55
+ *
56
+ * @example
57
+ * ```javascript
58
+ * import { AIAgent } from 'ak-gemini';
59
+ *
60
+ * const agent = new AIAgent({
61
+ * systemPrompt: 'You are a research assistant...',
62
+ * onMarkdown: (filename, content) => fs.writeFileSync(filename, content)
63
+ * });
64
+ *
65
+ * // Non-streaming
66
+ * const res = await agent.chat('Fetch https://api.example.com/data and summarize it');
67
+ * console.log(res.text); // Agent's response
68
+ * console.log(res.toolCalls); // [{name, args, result}, ...]
69
+ * console.log(res.markdownFiles); // [{filename, content}, ...]
70
+ *
71
+ * // Streaming
72
+ * for await (const event of agent.stream('Write a report on...')) {
73
+ * if (event.type === 'text') process.stdout.write(event.text);
74
+ * if (event.type === 'tool_call') console.log(`Calling: ${event.toolName}`);
75
+ * if (event.type === 'done') console.log('\nDone!');
76
+ * }
77
+ * ```
78
+ */
79
+ class AIAgent {
80
+ /**
81
+ * Create a new AIAgent instance.
82
+ * @param {AIAgentOptions} [options={}] - Configuration options (see AIAgentOptions in types.d.ts)
83
+ */
84
+ constructor(options = {}) {
85
+ this.modelName = options.modelName || 'gemini-2.5-flash';
86
+ this.systemPrompt = options.systemPrompt || 'You are a helpful AI assistant.';
87
+ this.maxToolRounds = options.maxToolRounds || 10;
88
+ this.httpTimeout = options.httpTimeout || 30000;
89
+ this.maxRetries = options.maxRetries || 3;
90
+ this.onToolCall = options.onToolCall || null;
91
+ this.onMarkdown = options.onMarkdown || null;
92
+ this.labels = options.labels || {};
93
+
94
+ // Auth - same as AITransformer
95
+ this.vertexai = options.vertexai || false;
96
+ this.project = options.project || process.env.GOOGLE_CLOUD_PROJECT || null;
97
+ this.location = options.location || process.env.GOOGLE_CLOUD_LOCATION || undefined;
98
+ this.googleAuthOptions = options.googleAuthOptions || null;
99
+ this.apiKey = options.apiKey !== undefined && options.apiKey !== null ? options.apiKey : process.env.GEMINI_API_KEY;
100
+
101
+ if (!this.vertexai && !this.apiKey) {
102
+ throw new Error("Missing Gemini API key. Provide via options.apiKey or GEMINI_API_KEY env var. For Vertex AI, set vertexai: true with project and location.");
103
+ }
104
+ if (this.vertexai && !this.project) {
105
+ throw new Error("Vertex AI requires a project ID. Provide via options.project or GOOGLE_CLOUD_PROJECT env var.");
106
+ }
107
+
108
+ // Log level
109
+ this._configureLogLevel(options.logLevel);
110
+
111
+ // Build chat config
112
+ this.chatConfig = {
113
+ temperature: 0.7,
114
+ topP: 0.95,
115
+ topK: 64,
116
+ safetySettings: DEFAULT_SAFETY_SETTINGS,
117
+ systemInstruction: this.systemPrompt,
118
+ maxOutputTokens: options.chatConfig?.maxOutputTokens || 50_000,
119
+ ...options.chatConfig
120
+ };
121
+
122
+ // Ensure systemPrompt takes precedence over chatConfig.systemInstruction
123
+ this.chatConfig.systemInstruction = this.systemPrompt;
124
+
125
+ // Thinking config
126
+ this._configureThinking(options.thinkingConfig);
127
+
128
+ // Tools config
129
+ this.chatConfig.tools = [{ functionDeclarations: BUILT_IN_DECLARATIONS }];
130
+ this.chatConfig.toolConfig = { functionCallingConfig: { mode: 'AUTO' } };
131
+
132
+ // State
133
+ this.genAIClient = null;
134
+ this.chatSession = null;
135
+ this.lastResponseMetadata = null;
136
+ this._markdownFiles = [];
137
+
138
+ log.debug(`AIAgent created with model: ${this.modelName}`);
139
+ }
140
+
141
+ /**
142
+ * Initialize the agent — creates the GenAI client and chat session.
143
+ * Called automatically by chat() and stream() if not called explicitly.
144
+ * Idempotent — safe to call multiple times.
145
+ * @returns {Promise<void>}
146
+ */
147
+ async init() {
148
+ if (this.chatSession) return;
149
+
150
+ const clientOptions = this.vertexai
151
+ ? {
152
+ vertexai: true,
153
+ project: this.project,
154
+ ...(this.location && { location: this.location }),
155
+ ...(this.googleAuthOptions && { googleAuthOptions: this.googleAuthOptions })
156
+ }
157
+ : { apiKey: this.apiKey };
158
+
159
+ this.genAIClient = new GoogleGenAI(clientOptions);
160
+
161
+ this.chatSession = this.genAIClient.chats.create({
162
+ model: this.modelName,
163
+ config: {
164
+ ...this.chatConfig,
165
+ ...(this.vertexai && Object.keys(this.labels).length > 0 && { labels: this.labels })
166
+ },
167
+ history: []
168
+ });
169
+
170
+ try {
171
+ await this.genAIClient.models.list();
172
+ log.debug("AIAgent: Gemini API connection successful.");
173
+ } catch (e) {
174
+ throw new Error(`AIAgent initialization failed: ${e.message}`);
175
+ }
176
+
177
+ log.debug("AIAgent: Chat session initialized.");
178
+ }
179
+
180
+ /**
181
+ * Send a message and get a complete response (non-streaming).
182
+ * Automatically handles the tool-use loop — if the model requests tool calls,
183
+ * they are executed and results sent back until the model produces a final response.
184
+ *
185
+ * @param {string} message - The user's message
186
+ * @returns {Promise<AgentResponse>} Response with text, toolCalls, markdownFiles, and usage
187
+ * @example
188
+ * const res = await agent.chat('Fetch https://api.example.com/users');
189
+ * console.log(res.text); // Agent's summary
190
+ * console.log(res.toolCalls); // [{name: 'http_get', args: {...}, result: {...}}]
191
+ */
192
+ async chat(message) {
193
+ if (!this.chatSession) await this.init();
194
+
195
+ this._markdownFiles = [];
196
+ const allToolCalls = [];
197
+
198
+ let response = await this.chatSession.sendMessage({ message });
199
+
200
+ for (let round = 0; round < this.maxToolRounds; round++) {
201
+ const functionCalls = response.functionCalls;
202
+ if (!functionCalls || functionCalls.length === 0) break;
203
+
204
+ // Execute all tool calls in parallel
205
+ const toolResults = await Promise.all(
206
+ functionCalls.map(async (call) => {
207
+ let result;
208
+ try {
209
+ result = await executeBuiltInTool(call.name, call.args, {
210
+ httpTimeout: this.httpTimeout,
211
+ onToolCall: this.onToolCall,
212
+ onMarkdown: this.onMarkdown
213
+ });
214
+ } catch (err) {
215
+ log.warn(`Tool ${call.name} failed: ${err.message}`);
216
+ result = { error: err.message };
217
+ }
218
+
219
+ allToolCalls.push({ name: call.name, args: call.args, result });
220
+
221
+ // Collect markdown files
222
+ if (call.name === 'write_markdown' && call.args) {
223
+ this._markdownFiles.push({
224
+ filename: /** @type {string} */ (call.args.filename),
225
+ content: /** @type {string} */ (call.args.content)
226
+ });
227
+ }
228
+
229
+ return { id: call.id, name: call.name, result };
230
+ })
231
+ );
232
+
233
+ // Send all function responses back to the model
234
+ response = await this.chatSession.sendMessage({
235
+ message: toolResults.map(r => ({
236
+ functionResponse: {
237
+ id: r.id,
238
+ name: r.name,
239
+ response: { output: r.result }
240
+ }
241
+ }))
242
+ });
243
+ }
244
+
245
+ // Capture metadata
246
+ this._captureMetadata(response);
247
+
248
+ return {
249
+ text: response.text || '',
250
+ toolCalls: allToolCalls,
251
+ markdownFiles: [...this._markdownFiles],
252
+ usage: this.getLastUsage()
253
+ };
254
+ }
255
+
256
+ /**
257
+ * Send a message and stream the response as events.
258
+ * Automatically handles the tool-use loop between streamed rounds.
259
+ *
260
+ * Event types:
261
+ * - `text` — A chunk of the agent's text response (yield as it arrives)
262
+ * - `tool_call` — The agent is about to call a tool (includes toolName and args)
263
+ * - `tool_result` — A tool finished executing (includes toolName and result)
264
+ * - `markdown` — A markdown document was generated (includes filename and content)
265
+ * - `done` — The agent finished (includes fullText, markdownFiles, usage)
266
+ *
267
+ * @param {string} message - The user's message
268
+ * @yields {AgentStreamEvent}
269
+ * @example
270
+ * for await (const event of agent.stream('Analyze this API...')) {
271
+ * if (event.type === 'text') process.stdout.write(event.text);
272
+ * if (event.type === 'tool_call') console.log(`Calling: ${event.toolName}`);
273
+ * if (event.type === 'done') console.log(`\nTokens: ${event.usage?.totalTokens}`);
274
+ * }
275
+ */
276
+ async *stream(message) {
277
+ if (!this.chatSession) await this.init();
278
+
279
+ this._markdownFiles = [];
280
+ const allToolCalls = [];
281
+ let fullText = '';
282
+
283
+ let streamResponse = await this.chatSession.sendMessageStream({ message });
284
+
285
+ for (let round = 0; round < this.maxToolRounds; round++) {
286
+ let roundText = '';
287
+ const functionCalls = [];
288
+
289
+ // Consume the stream
290
+ for await (const chunk of streamResponse) {
291
+ // Check for function calls first (accessing .text when functionCall parts
292
+ // exist triggers a warning from the SDK)
293
+ if (chunk.functionCalls) {
294
+ functionCalls.push(...chunk.functionCalls);
295
+ } else if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
296
+ const text = chunk.candidates[0].content.parts[0].text;
297
+ roundText += text;
298
+ fullText += text;
299
+ yield { type: 'text', text };
300
+ }
301
+ }
302
+
303
+ // No tool calls — we're done
304
+ if (functionCalls.length === 0) {
305
+ // Capture metadata from the last chunk's aggregated response
306
+ // (streaming doesn't give us a final response object easily,
307
+ // so metadata may be limited here)
308
+ yield {
309
+ type: 'done',
310
+ fullText,
311
+ markdownFiles: [...this._markdownFiles],
312
+ usage: this.getLastUsage()
313
+ };
314
+ return;
315
+ }
316
+
317
+ // Execute tools sequentially so we can yield events
318
+ const toolResults = [];
319
+ for (const call of functionCalls) {
320
+ yield { type: 'tool_call', toolName: call.name, args: call.args };
321
+
322
+ let result;
323
+ try {
324
+ result = await executeBuiltInTool(call.name, call.args, {
325
+ httpTimeout: this.httpTimeout,
326
+ onToolCall: this.onToolCall,
327
+ onMarkdown: this.onMarkdown
328
+ });
329
+ } catch (err) {
330
+ log.warn(`Tool ${call.name} failed: ${err.message}`);
331
+ result = { error: err.message };
332
+ }
333
+
334
+ allToolCalls.push({ name: call.name, args: call.args, result });
335
+ yield { type: 'tool_result', toolName: call.name, result };
336
+
337
+ if (call.name === 'write_markdown' && call.args) {
338
+ const mdFilename = /** @type {string} */ (call.args.filename);
339
+ const mdContent = /** @type {string} */ (call.args.content);
340
+ this._markdownFiles.push({ filename: mdFilename, content: mdContent });
341
+ yield { type: 'markdown', filename: mdFilename, content: mdContent };
342
+ }
343
+
344
+ toolResults.push({ id: call.id, name: call.name, result });
345
+ }
346
+
347
+ // Send function responses back and get next stream
348
+ streamResponse = await this.chatSession.sendMessageStream({
349
+ message: toolResults.map(r => ({
350
+ functionResponse: {
351
+ id: r.id,
352
+ name: r.name,
353
+ response: { output: r.result }
354
+ }
355
+ }))
356
+ });
357
+ }
358
+
359
+ // Max rounds reached
360
+ yield {
361
+ type: 'done',
362
+ fullText,
363
+ markdownFiles: [...this._markdownFiles],
364
+ usage: this.getLastUsage(),
365
+ warning: 'Max tool rounds reached'
366
+ };
367
+ }
368
+
369
+ /**
370
+ * Clear conversation history while preserving tools and system prompt.
371
+ * Useful for starting a new user session without re-initializing the agent.
372
+ * @returns {Promise<void>}
373
+ */
374
+ async clearHistory() {
375
+ this.chatSession = this.genAIClient.chats.create({
376
+ model: this.modelName,
377
+ config: {
378
+ ...this.chatConfig,
379
+ ...(this.vertexai && Object.keys(this.labels).length > 0 && { labels: this.labels })
380
+ },
381
+ history: []
382
+ });
383
+ this._markdownFiles = [];
384
+ this.lastResponseMetadata = null;
385
+ log.debug("AIAgent: Conversation history cleared.");
386
+ }
387
+
388
+ /**
389
+ * Get conversation history.
390
+ * @param {boolean} [curated=false]
391
+ * @returns {any[]}
392
+ */
393
+ getHistory(curated = false) {
394
+ if (!this.chatSession) return [];
395
+ return this.chatSession.getHistory(curated);
396
+ }
397
+
398
+ /**
399
+ * Get structured usage data from the last API call.
400
+ * Returns null if no API call has been made yet.
401
+ * @returns {UsageData|null} Usage data with promptTokens, responseTokens, totalTokens, etc.
402
+ */
403
+ getLastUsage() {
404
+ if (!this.lastResponseMetadata) return null;
405
+ const m = this.lastResponseMetadata;
406
+ return {
407
+ promptTokens: m.promptTokens,
408
+ responseTokens: m.responseTokens,
409
+ totalTokens: m.totalTokens,
410
+ attempts: 1,
411
+ modelVersion: m.modelVersion,
412
+ requestedModel: this.modelName,
413
+ timestamp: m.timestamp
414
+ };
415
+ }
416
+
417
+ // --- Private helpers ---
418
+
419
+ /**
420
+ * Capture response metadata (model version, token counts) from an API response.
421
+ * @param {import('@google/genai').GenerateContentResponse} response
422
+ * @private
423
+ */
424
+ _captureMetadata(response) {
425
+ this.lastResponseMetadata = {
426
+ modelVersion: response.modelVersion || null,
427
+ requestedModel: this.modelName,
428
+ promptTokens: response.usageMetadata?.promptTokenCount || 0,
429
+ responseTokens: response.usageMetadata?.candidatesTokenCount || 0,
430
+ totalTokens: response.usageMetadata?.totalTokenCount || 0,
431
+ timestamp: Date.now()
432
+ };
433
+ }
434
+
435
+ /** @private */
436
+ _configureLogLevel(logLevel) {
437
+ if (logLevel) {
438
+ if (logLevel === 'none') {
439
+ log.level = 'silent';
440
+ } else {
441
+ log.level = logLevel;
442
+ }
443
+ } else if (LOG_LEVEL) {
444
+ log.level = LOG_LEVEL;
445
+ } else if (NODE_ENV === 'dev') {
446
+ log.level = 'debug';
447
+ } else if (NODE_ENV === 'test') {
448
+ log.level = 'warn';
449
+ } else if (NODE_ENV.startsWith('prod')) {
450
+ log.level = 'error';
451
+ } else {
452
+ log.level = 'info';
453
+ }
454
+ }
455
+
456
+ /** @private */
457
+ _configureThinking(thinkingConfig) {
458
+ const modelSupportsThinking = THINKING_SUPPORTED_MODELS.some(p => p.test(this.modelName));
459
+
460
+ if (thinkingConfig === undefined) return;
461
+
462
+ if (thinkingConfig === null) {
463
+ delete this.chatConfig.thinkingConfig;
464
+ return;
465
+ }
466
+
467
+ if (!modelSupportsThinking) {
468
+ log.warn(`Model ${this.modelName} does not support thinking features. Ignoring thinkingConfig.`);
469
+ return;
470
+ }
471
+
472
+ const config = { ...DEFAULT_THINKING_CONFIG, ...thinkingConfig };
473
+ if (thinkingConfig.thinkingLevel !== undefined) {
474
+ delete config.thinkingBudget;
475
+ }
476
+ this.chatConfig.thinkingConfig = config;
477
+ log.debug(`Thinking config applied: ${JSON.stringify(config)}`);
478
+ }
479
+ }
480
+
481
+ export default AIAgent;