npm - ak-gemini - Versions diffs - 2.0.3 → 2.0.5 - Mend

ak-gemini 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/GUIDE.md CHANGED Viewed

@@ -696,6 +696,129 @@ console.log('Best match:', documents[scores[0].index]);
 - Classification — compare against known category embeddings
 - Recommendation — find items similar to user preferences
+### Combining Embedding with Other Classes
+Embeddings become powerful when paired with other ak-gemini classes. Here are three common patterns:
+#### Embedding + RagAgent: Smart Document Selection
+Instead of loading all documents into a RagAgent (which costs tokens), use embeddings to find the most relevant ones first:
+```javascript
+import { Embedding, RagAgent } from 'ak-gemini';
+import fs from 'fs/promises';
+// Step 1: Build an index of your document library
+const docEmbedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
+const docPaths = ['./docs/auth.md', './docs/billing.md', './docs/api.md', './docs/deployment.md', './docs/faq.md'];
+const docTexts = await Promise.all(docPaths.map(f => fs.readFile(f, 'utf-8')));
+const docVectors = await docEmbedder.embedBatch(docTexts);
+// Step 2: At query time, find relevant documents via embedding similarity
+const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
+const query = 'How do I rotate my API keys?';
+const queryVector = await queryEmbedder.embed(query);
+const ranked = docVectors
+  .map((v, i) => ({ path: docPaths[i], score: docEmbedder.similarity(queryVector.values, v.values) }))
+  .sort((a, b) => b.score - a.score);
+// Step 3: Feed only the top matches to RagAgent for grounded Q&A
+const topDocs = ranked.slice(0, 2).map(r => r.path);
+console.log('Selected docs:', topDocs, 'Scores:', ranked.slice(0, 2).map(r => r.score.toFixed(3)));
+const rag = new RagAgent({
+  localFiles: topDocs,
+  systemPrompt: 'Answer based only on the provided documents. Cite which document your answer comes from.',
+});
+const answer = await rag.chat(query);
+console.log(answer.text);
+```
+**Why this matters**: If you have 50 documents but only 2-3 are relevant to any given query, embedding-based selection saves significant token costs vs. loading everything into the RagAgent context.
+#### Embedding + ToolAgent: Semantic Search Tool
+Give a ToolAgent an embedding-powered search tool so it can look up relevant information on demand:
+```javascript
+import { Embedding, ToolAgent } from 'ak-gemini';
+// Pre-compute embeddings for your knowledge base
+const embedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
+const knowledgeBase = [
+  { id: 1, title: 'Password Policy', content: 'Passwords must be at least 12 characters...' },
+  { id: 2, title: 'SSO Setup', content: 'To configure SAML SSO, navigate to...' },
+  { id: 3, title: 'API Rate Limits', content: 'Free tier: 100 req/min. Pro tier: 1000 req/min...' },
+];
+const kbVectors = await embedder.embedBatch(knowledgeBase.map(k => k.content));
+// Create a ToolAgent with a search tool
+const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
+const agent = new ToolAgent({
+  systemPrompt: 'You are a support agent. Use the search tool to find relevant articles before answering.',
+  tools: [{
+    name: 'search_knowledge_base',
+    description: 'Search the knowledge base for articles relevant to a query',
+    parametersJsonSchema: {
+      type: 'object',
+      properties: { query: { type: 'string', description: 'Search query' } },
+      required: ['query']
+    }
+  }],
+  toolExecutor: async (toolName, args) => {
+    const qv = await queryEmbedder.embed(args.query);
+    const results = kbVectors
+      .map((v, i) => ({ ...knowledgeBase[i], score: embedder.similarity(qv.values, v.values) }))
+      .sort((a, b) => b.score - a.score)
+      .slice(0, 3);
+    return results;
+  }
+});
+const result = await agent.chat('A customer is asking how to set up single sign-on');
+console.log(result.text); // Agent searched KB and found the SSO article
+```
+#### Embedding + Transformer: Dynamic Few-Shot Selection
+When you have many examples but only a few are relevant to each input, use embeddings to pick the best few-shot examples dynamically:
+```javascript
+import { Embedding, Transformer } from 'ak-gemini';
+// Your full library of transformation examples
+const allExamples = [
+  { INPUT: { type: 'blog', title: 'AI News' }, OUTPUT: { slug: 'ai-news', category: 'tech', priority: 'high' } },
+  { INPUT: { type: 'recipe', title: 'Pasta' }, OUTPUT: { slug: 'pasta', category: 'food', priority: 'low' } },
+  { INPUT: { type: 'tutorial', title: 'React Hooks' }, OUTPUT: { slug: 'react-hooks', category: 'tech', priority: 'medium' } },
+  { INPUT: { type: 'review', title: 'New iPhone' }, OUTPUT: { slug: 'new-iphone', category: 'tech', priority: 'medium' } },
+  // ... hundreds more
+];
+// Embed all example inputs
+const embedder = new Embedding({ taskType: 'SEMANTIC_SIMILARITY' });
+const exampleVectors = await embedder.embedBatch(allExamples.map(e => JSON.stringify(e.INPUT)));
+// For each new input, find the 3 most similar examples and seed the Transformer with just those
+async function transformWithBestExamples(input) {
+  const inputVector = await embedder.embed(JSON.stringify(input));
+  const bestExamples = exampleVectors
+    .map((v, i) => ({ example: allExamples[i], score: embedder.similarity(inputVector.values, v.values) }))
+    .sort((a, b) => b.score - a.score)
+    .slice(0, 3)
+    .map(r => r.example);
+  const t = new Transformer({ sourceKey: 'INPUT', targetKey: 'OUTPUT' });
+  await t.seed(bestExamples);
+  return await t.send(input);
+}
+const result = await transformWithBestExamples({ type: 'article', title: 'GPT-5 Released' });
+// Seeded with the most relevant tech/blog examples → better output
+```
 ---
 ## Google Search Grounding

package/README.md CHANGED Viewed

@@ -197,6 +197,31 @@ const results = await embedder.embedBatch(['Hello', 'World']);
 const score = embedder.similarity(results[0].values, results[1].values);
 ```
+### Embedding + RagAgent — Semantic Search Pipeline
+Use embeddings to find relevant documents, then feed only the best matches to a RagAgent for grounded Q&A:
+```javascript
+const embedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
+const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
+// Index your documents
+const docs = ['./docs/auth.md', './docs/billing.md', './docs/api.md', './docs/faq.md'];
+const docTexts = await Promise.all(docs.map(f => fs.readFile(f, 'utf-8')));
+const docVectors = await embedder.embedBatch(docTexts);
+// Find the most relevant docs for a query
+const query = 'How do I reset my API key?';
+const queryVector = await queryEmbedder.embed(query);
+const ranked = docVectors
+  .map((v, i) => ({ file: docs[i], score: embedder.similarity(queryVector.values, v.values) }))
+  .sort((a, b) => b.score - a.score);
+// Feed only the top 2 matches to RagAgent
+const rag = new RagAgent({ localFiles: ranked.slice(0, 2).map(r => r.file) });
+const answer = await rag.chat(query);
+```
 ---
 ## Stopping Agents
@@ -289,6 +314,29 @@ const sources = result.usage?.groundingMetadata?.groundingChunks;
 **Warning**: Google Search grounding costs ~$35/1k queries.
+### Rate Limit Handling (429)
+All classes automatically retry on 429 `RESOURCE_EXHAUSTED` errors with exponential backoff. This is separate from Transformer's validation retry logic (`maxRetries`).
+```javascript
+// Defaults: 5 retries, 1000ms initial delay (doubles each attempt + jitter)
+const chat = new Chat({ systemPrompt: 'Hello' });
+// Customize
+const transformer = new Transformer({
+  resourceExhaustedRetries: 10,  // more retries for high-throughput pipelines
+  resourceExhaustedDelay: 2000   // start with 2s backoff
+});
+// Disable entirely
+const msg = new Message({ resourceExhaustedRetries: 0 });
+```
+When a 429 is encountered, retries are logged at `WARN` level:
+```
+WARN: Rate limited (429). Retrying in 1234ms (attempt 1/5)...
+```
 ### Context Caching
 Reduce costs by caching repeated system prompts, documents, or tool definitions.
@@ -342,6 +390,8 @@ All classes accept `BaseGeminiOptions`:
 | `enableGrounding` | boolean | `false` | Enable Google Search grounding |
 | `groundingConfig` | object | — | Grounding config (excludeDomains, timeRangeFilter) |
 | `cachedContent` | string | — | Cached content resource name |
+| `resourceExhaustedRetries` | number | `5` | Max retry attempts for 429 rate-limit errors |
+| `resourceExhaustedDelay` | number | `1000` | Initial backoff delay (ms) for 429 retries |
 ### Transformer-Specific

package/base.js CHANGED Viewed

@@ -94,6 +94,10 @@ class BaseGemini {
 			throw new Error("Vertex AI requires a project ID. Provide via options.project or GOOGLE_CLOUD_PROJECT env var.");
 		}
+		// ── Rate Limit Retry ──
+		this.resourceExhaustedRetries = options.resourceExhaustedRetries ?? 5;
+		this.resourceExhaustedDelay = options.resourceExhaustedDelay ?? 1000;
 		// ── Logging ──
 		this._configureLogLevel(options.logLevel);
@@ -419,10 +423,10 @@ class BaseGemini {
 		contents.push({ parts: [{ text: nextMessage }] });
-		const resp = await this.genAIClient.models.countTokens({
+		const resp = await this._withRetry(() => this.genAIClient.models.countTokens({
 			model: this.modelName,
 			contents,
-		});
+		}));
 		return { inputTokens: resp.totalTokens };
 	}
@@ -472,10 +476,10 @@ class BaseGemini {
 		const sysInstruction = config.systemInstruction !== undefined ? config.systemInstruction : this.systemPrompt;
 		if (sysInstruction) cacheConfig.systemInstruction = sysInstruction;
-		const cached = await this.genAIClient.caches.create({
+		const cached = await this._withRetry(() => this.genAIClient.caches.create({
 			model: config.model || this.modelName,
 			config: cacheConfig
-		});
+		}));
 		log.debug(`Cache created: ${cached.name}`);
 		return cached;
@@ -487,7 +491,7 @@ class BaseGemini {
 	 * @returns {Promise<Object>} The cached content resource
 	 */
 	async getCache(cacheName) {
-		return await this.genAIClient.caches.get({ name: cacheName });
+		return await this._withRetry(() => this.genAIClient.caches.get({ name: cacheName }));
 	}
 	/**
@@ -495,7 +499,7 @@ class BaseGemini {
 	 * @returns {Promise<Object>} Pager of cached content resources
 	 */
 	async listCaches() {
-		const pager = await this.genAIClient.caches.list();
+		const pager = await this._withRetry(() => this.genAIClient.caches.list());
 		const results = [];
 		for await (const cache of pager) {
 			results.push(cache);
@@ -512,13 +516,13 @@ class BaseGemini {
 	 * @returns {Promise<Object>} The updated cache resource
 	 */
 	async updateCache(cacheName, config = {}) {
-		return await this.genAIClient.caches.update({
+		return await this._withRetry(() => this.genAIClient.caches.update({
 			name: cacheName,
 			config: {
 				...(config.ttl && { ttl: config.ttl }),
 				...(config.expireTime && { expireTime: config.expireTime })
 			}
-		});
+		}));
 	}
 	/**
@@ -528,7 +532,7 @@ class BaseGemini {
 	 * @returns {Promise<void>}
 	 */
 	async deleteCache(cacheName) {
-		await this.genAIClient.caches.delete({ name: cacheName });
+		await this._withRetry(() => this.genAIClient.caches.delete({ name: cacheName }));
 		log.debug(`Cache deleted: ${cacheName}`);
 		if (this.cachedContent === cacheName) {
 			this.cachedContent = null;
@@ -551,6 +555,44 @@ class BaseGemini {
 		log.debug(`Using cache: ${cacheName}`);
 	}
+	// ── Rate Limit Retry ────────────────────────────────────────────────────
+	/**
+	 * Detects whether an error is a 429 / RESOURCE_EXHAUSTED rate-limit error.
+	 * @param {Error} error
+	 * @returns {boolean}
+	 * @private
+	 */
+	_is429Error(error) {
+		const /** @type {any} */ e = error;
+		if (e.status === 429 || e.code === 429 || e.httpStatusCode === 429) return true;
+		const msg = e.message || '';
+		return msg.includes('429') || msg.includes('RESOURCE_EXHAUSTED');
+	}
+	/**
+	 * Wraps an async function with automatic retry on 429 (RESOURCE_EXHAUSTED) errors.
+	 * Uses exponential backoff with jitter. Non-429 errors are rethrown immediately.
+	 * @param {() => Promise<T>} fn - The async function to execute
+	 * @returns {Promise<T>}
+	 * @template T
+	 * @protected
+	 */
+	async _withRetry(fn) {
+		const maxAttempts = this.resourceExhaustedRetries;
+		for (let attempt = 0; attempt <= maxAttempts; attempt++) {
+			try {
+				return await fn();
+			} catch (error) {
+				if (!this._is429Error(error) || attempt >= maxAttempts) throw error;
+				const jitter = Math.random() * 500;
+				const delay = this.resourceExhaustedDelay * Math.pow(2, attempt) + jitter;
+				log.warn(`Rate limited (429). Retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxAttempts})...`);
+				await new Promise(r => setTimeout(r, delay));
+			}
+		}
+	}
 	// ── Private Helpers ──────────────────────────────────────────────────────
 	/**

package/chat.js CHANGED Viewed

@@ -65,7 +65,7 @@ class Chat extends BaseGemini {
 			sendParams.config = { labels: mergedLabels };
 		}
-		const result = await this.chatSession.sendMessage(sendParams);
+		const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
 		this._captureMetadata(result);

package/code-agent.js CHANGED Viewed

@@ -449,7 +449,7 @@ class CodeAgent extends BaseGemini {
 		const codeExecutions = [];
 		let consecutiveFailures = 0;
-		let response = await this.chatSession.sendMessage({ message });
+		let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
 		for (let round = 0; round < this.maxRounds; round++) {
 			if (this._stopped) break;
@@ -495,7 +495,7 @@ class CodeAgent extends BaseGemini {
 			if (this._stopped) break;
 			// Send function responses back to the model
-			response = await this.chatSession.sendMessage({
+			response = await this._withRetry(() => this.chatSession.sendMessage({
 				message: results.map(r => ({
 					functionResponse: {
 						id: r.id,
@@ -503,7 +503,7 @@ class CodeAgent extends BaseGemini {
 						response: { output: r.result }
 					}
 				}))
-			});
+			}));
 			if (consecutiveFailures >= this.maxRetries) break;
 		}
@@ -548,7 +548,7 @@ class CodeAgent extends BaseGemini {
 		let fullText = '';
 		let consecutiveFailures = 0;
-		let streamResponse = await this.chatSession.sendMessageStream({ message });
+		let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
 		for (let round = 0; round < this.maxRounds; round++) {
 			if (this._stopped) break;
@@ -626,7 +626,7 @@ class CodeAgent extends BaseGemini {
 			if (this._stopped) break;
 			// Send function responses back and get next stream
-			streamResponse = await this.chatSession.sendMessageStream({
+			streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
 				message: results.map(r => ({
 					functionResponse: {
 						id: r.id,
@@ -634,7 +634,7 @@ class CodeAgent extends BaseGemini {
 						response: { output: r.result }
 					}
 				}))
-			});
+			}));
 			if (consecutiveFailures >= this.maxRetries) break;
 		}

package/embedding.js ADDED Viewed

@@ -0,0 +1,181 @@
+/**
+ * @fileoverview Embedding class — Generate vector embeddings via Google's embedding models.
+ *
+ * Extends BaseGemini for auth/client reuse but overrides init() to skip chat session
+ * creation (embeddings don't use chat). Follows the Message class pattern.
+ *
+ * @example
+ * ```javascript
+ * import { Embedding } from 'ak-gemini';
+ *
+ * const embedder = new Embedding({ apiKey: 'your-key' });
+ * const result = await embedder.embed('Hello world');
+ * console.log(result.values); // [0.012, -0.034, ...]
+ * ```
+ */
+import BaseGemini from './base.js';
+import log from './logger.js';
+export default class Embedding extends BaseGemini {
+	/**
+	 * @param {import('./types.d.ts').EmbeddingOptions} [options={}]
+	 */
+	constructor(options = {}) {
+		// Embeddings use a different model family — default to gemini-embedding-001
+		if (options.modelName === undefined) {
+			options = { ...options, modelName: 'gemini-embedding-001' };
+		}
+		// No system prompt for embeddings
+		if (options.systemPrompt === undefined) {
+			options = { ...options, systemPrompt: null };
+		}
+		super(options);
+		this.taskType = options.taskType || null;
+		this.title = options.title || null;
+		this.outputDimensionality = options.outputDimensionality || null;
+		this.autoTruncate = options.autoTruncate ?? true;
+		log.debug(`Embedding created with model: ${this.modelName}`);
+	}
+	/**
+	 * Initialize the Embedding client.
+	 * Override: validates API connection only, NO chat session (stateless).
+	 * @param {boolean} [force=false]
+	 * @returns {Promise<void>}
+	 */
+	async init(force = false) {
+		if (this._initialized && !force) return;
+		log.debug(`Initializing ${this.constructor.name} with model: ${this.modelName}...`);
+		try {
+			await this.genAIClient.models.list();
+			log.debug(`${this.constructor.name}: API connection successful.`);
+		} catch (e) {
+			throw new Error(`${this.constructor.name} initialization failed: ${e.message}`);
+		}
+		this._initialized = true;
+		log.debug(`${this.constructor.name}: Initialized (stateless mode).`);
+	}
+	/**
+	 * Builds the config object for embedContent calls.
+	 * @param {Object} [overrides={}] - Per-call config overrides
+	 * @returns {Object} The config object
+	 * @private
+	 */
+	_buildConfig(overrides = {}) {
+		const config = {};
+		const taskType = overrides.taskType || this.taskType;
+		const title = overrides.title || this.title;
+		const dims = overrides.outputDimensionality || this.outputDimensionality;
+		if (taskType) config.taskType = taskType;
+		if (title) config.title = title;
+		if (dims) config.outputDimensionality = dims;
+		return config;
+	}
+	/**
+	 * Embed a single text string.
+	 * @param {string} text - The text to embed
+	 * @param {Object} [config={}] - Per-call config overrides
+	 * @param {string} [config.taskType] - Override task type
+	 * @param {string} [config.title] - Override title
+	 * @param {number} [config.outputDimensionality] - Override dimensions
+	 * @returns {Promise<import('./types.d.ts').EmbeddingResult>} The embedding result
+	 */
+	async embed(text, config = {}) {
+		if (!this._initialized) await this.init();
+		const result = await this._withRetry(() => this.genAIClient.models.embedContent({
+			model: this.modelName,
+			contents: text,
+			config: this._buildConfig(config)
+		}));
+		return result.embeddings[0];
+	}
+	/**
+	 * Embed multiple text strings in a single API call.
+	 * @param {string[]} texts - Array of texts to embed
+	 * @param {Object} [config={}] - Per-call config overrides
+	 * @param {string} [config.taskType] - Override task type
+	 * @param {string} [config.title] - Override title
+	 * @param {number} [config.outputDimensionality] - Override dimensions
+	 * @returns {Promise<import('./types.d.ts').EmbeddingResult[]>} Array of embedding results
+	 */
+	async embedBatch(texts, config = {}) {
+		if (!this._initialized) await this.init();
+		const result = await this._withRetry(() => this.genAIClient.models.embedContent({
+			model: this.modelName,
+			contents: texts,
+			config: this._buildConfig(config)
+		}));
+		return result.embeddings;
+	}
+	/**
+	 * Compute cosine similarity between two embedding vectors.
+	 * Pure math — no API call.
+	 * @param {number[]} a - First embedding vector
+	 * @param {number[]} b - Second embedding vector
+	 * @returns {number} Cosine similarity between -1 and 1
+	 */
+	similarity(a, b) {
+		if (!a || !b || a.length !== b.length) {
+			throw new Error('Vectors must be non-null and have the same length');
+		}
+		let dot = 0;
+		let magA = 0;
+		let magB = 0;
+		for (let i = 0; i < a.length; i++) {
+			dot += a[i] * b[i];
+			magA += a[i] * a[i];
+			magB += b[i] * b[i];
+		}
+		const magnitude = Math.sqrt(magA) * Math.sqrt(magB);
+		if (magnitude === 0) return 0;
+		return dot / magnitude;
+	}
+	// ── No-ops (embeddings don't use chat sessions) ──
+	/** @returns {any[]} Always returns empty array */
+	getHistory() { return []; }
+	/** No-op for Embedding */
+	async clearHistory() {}
+	/** No-op for Embedding */
+	async seed() {
+		log.warn('Embedding.seed() is a no-op — embeddings do not support few-shot examples.');
+		return [];
+	}
+	/**
+	 * @param {any} _nextPayload
+	 * @throws {Error} Embedding does not support token estimation
+	 * @returns {Promise<{inputTokens: number}>}
+	 */
+	async estimate(_nextPayload) {
+		throw new Error('Embedding does not support token estimation. Use embed() directly.');
+	}
+}

package/index.cjs CHANGED Viewed

@@ -361,6 +361,8 @@ var BaseGemini = class {
     if (this.vertexai && !this.project) {
       throw new Error("Vertex AI requires a project ID. Provide via options.project or GOOGLE_CLOUD_PROJECT env var.");
     }
+    this.resourceExhaustedRetries = options.resourceExhaustedRetries ?? 5;
+    this.resourceExhaustedDelay = options.resourceExhaustedDelay ?? 1e3;
     this._configureLogLevel(options.logLevel);
     this.labels = options.labels || {};
     this.enableGrounding = options.enableGrounding || false;
@@ -619,10 +621,10 @@ ${contextText}
     }
     const nextMessage = typeof nextPayload === "string" ? nextPayload : JSON.stringify(nextPayload, null, 2);
     contents.push({ parts: [{ text: nextMessage }] });
-    const resp = await this.genAIClient.models.countTokens({
+    const resp = await this._withRetry(() => this.genAIClient.models.countTokens({
       model: this.modelName,
       contents
-    });
+    }));
     return { inputTokens: resp.totalTokens };
   }
   /**
@@ -664,10 +666,10 @@ ${contextText}
     if (config.toolConfig) cacheConfig.toolConfig = config.toolConfig;
     const sysInstruction = config.systemInstruction !== void 0 ? config.systemInstruction : this.systemPrompt;
     if (sysInstruction) cacheConfig.systemInstruction = sysInstruction;
-    const cached = await this.genAIClient.caches.create({
+    const cached = await this._withRetry(() => this.genAIClient.caches.create({
       model: config.model || this.modelName,
       config: cacheConfig
-    });
+    }));
     logger_default.debug(`Cache created: ${cached.name}`);
     return cached;
   }
@@ -677,14 +679,14 @@ ${contextText}
    * @returns {Promise<Object>} The cached content resource
    */
   async getCache(cacheName) {
-    return await this.genAIClient.caches.get({ name: cacheName });
+    return await this._withRetry(() => this.genAIClient.caches.get({ name: cacheName }));
   }
   /**
    * Lists all cached content resources.
    * @returns {Promise<Object>} Pager of cached content resources
    */
   async listCaches() {
-    const pager = await this.genAIClient.caches.list();
+    const pager = await this._withRetry(() => this.genAIClient.caches.list());
     const results = [];
     for await (const cache of pager) {
       results.push(cache);
@@ -700,13 +702,13 @@ ${contextText}
    * @returns {Promise<Object>} The updated cache resource
    */
   async updateCache(cacheName, config = {}) {
-    return await this.genAIClient.caches.update({
+    return await this._withRetry(() => this.genAIClient.caches.update({
       name: cacheName,
       config: {
         ...config.ttl && { ttl: config.ttl },
         ...config.expireTime && { expireTime: config.expireTime }
       }
-    });
+    }));
   }
   /**
    * Deletes a cached content resource.
@@ -715,7 +717,7 @@ ${contextText}
    * @returns {Promise<void>}
    */
   async deleteCache(cacheName) {
-    await this.genAIClient.caches.delete({ name: cacheName });
+    await this._withRetry(() => this.genAIClient.caches.delete({ name: cacheName }));
     logger_default.debug(`Cache deleted: ${cacheName}`);
     if (this.cachedContent === cacheName) {
       this.cachedContent = null;
@@ -734,6 +736,41 @@ ${contextText}
     }
     logger_default.debug(`Using cache: ${cacheName}`);
   }
+  // ── Rate Limit Retry ────────────────────────────────────────────────────
+  /**
+   * Detects whether an error is a 429 / RESOURCE_EXHAUSTED rate-limit error.
+   * @param {Error} error
+   * @returns {boolean}
+   * @private
+   */
+  _is429Error(error) {
+    const e = error;
+    if (e.status === 429 || e.code === 429 || e.httpStatusCode === 429) return true;
+    const msg = e.message || "";
+    return msg.includes("429") || msg.includes("RESOURCE_EXHAUSTED");
+  }
+  /**
+   * Wraps an async function with automatic retry on 429 (RESOURCE_EXHAUSTED) errors.
+   * Uses exponential backoff with jitter. Non-429 errors are rethrown immediately.
+   * @param {() => Promise<T>} fn - The async function to execute
+   * @returns {Promise<T>}
+   * @template T
+   * @protected
+   */
+  async _withRetry(fn) {
+    const maxAttempts = this.resourceExhaustedRetries;
+    for (let attempt = 0; attempt <= maxAttempts; attempt++) {
+      try {
+        return await fn();
+      } catch (error) {
+        if (!this._is429Error(error) || attempt >= maxAttempts) throw error;
+        const jitter = Math.random() * 500;
+        const delay = this.resourceExhaustedDelay * Math.pow(2, attempt) + jitter;
+        logger_default.warn(`Rate limited (429). Retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxAttempts})...`);
+        await new Promise((r) => setTimeout(r, delay));
+      }
+    }
+  }
   // ── Private Helpers ──────────────────────────────────────────────────────
   /**
    * Configures the log level based on options, env vars, or NODE_ENV.
@@ -965,7 +1002,7 @@ var Transformer = class extends base_default {
       if (hasLabels) {
         sendParams.config = { labels: mergedLabels };
       }
-      const result = await this.chatSession.sendMessage(sendParams);
+      const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
       this._captureMetadata(result);
       if (result.usageMetadata && logger_default.level !== "silent") {
         logger_default.debug(`API response: model=${result.modelVersion || "unknown"}, tokens=${result.usageMetadata.totalTokenCount}`);
@@ -1009,7 +1046,7 @@ Respond with JSON only \u2013 no comments or explanations.
 `;
     let result;
     try {
-      result = await this.chatSession.sendMessage({ message: prompt });
+      result = await this._withRetry(() => this.chatSession.sendMessage({ message: prompt }));
       this._captureMetadata(result);
     } catch (err) {
       throw new Error(`Gemini call failed while repairing payload: ${err.message}`);
@@ -1044,14 +1081,14 @@ Respond with JSON only \u2013 no comments or explanations.
     }
     contents.push({ role: "user", parts: [{ text: payloadStr }] });
     const mergedLabels = { ...this.labels, ...opts.labels || {} };
-    const result = await this.genAIClient.models.generateContent({
+    const result = await this._withRetry(() => this.genAIClient.models.generateContent({
       model: this.modelName,
       contents,
       config: {
         ...this.chatConfig,
         ...this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels }
       }
-    });
+    }));
     this._captureMetadata(result);
     this._cumulativeUsage = {
       promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1163,7 +1200,7 @@ var Chat = class extends base_default {
     if (hasLabels) {
       sendParams.config = { labels: mergedLabels };
     }
-    const result = await this.chatSession.sendMessage(sendParams);
+    const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
     this._captureMetadata(result);
     this._cumulativeUsage = {
       promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1227,14 +1264,14 @@ var Message = class extends base_default {
     const payloadStr = typeof payload === "string" ? payload : JSON.stringify(payload, null, 2);
     const contents = [{ role: "user", parts: [{ text: payloadStr }] }];
     const mergedLabels = { ...this.labels, ...opts.labels || {} };
-    const result = await this.genAIClient.models.generateContent({
+    const result = await this._withRetry(() => this.genAIClient.models.generateContent({
       model: this.modelName,
       contents,
       config: {
         ...this.chatConfig,
         ...this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels }
       }
-    });
+    }));
     this._captureMetadata(result);
     this._cumulativeUsage = {
       promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1327,7 +1364,7 @@ var ToolAgent = class extends base_default {
     if (!this.chatSession) await this.init();
     this._stopped = false;
     const allToolCalls = [];
-    let response = await this.chatSession.sendMessage({ message });
+    let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
     for (let round = 0; round < this.maxToolRounds; round++) {
       if (this._stopped) break;
       const functionCalls = response.functionCalls;
@@ -1364,7 +1401,7 @@ var ToolAgent = class extends base_default {
           return { id: call.id, name: call.name, result };
         })
       );
-      response = await this.chatSession.sendMessage({
+      response = await this._withRetry(() => this.chatSession.sendMessage({
         message: toolResults.map((r) => ({
           functionResponse: {
             id: r.id,
@@ -1372,7 +1409,7 @@ var ToolAgent = class extends base_default {
             response: { output: r.result }
           }
         }))
-      });
+      }));
     }
     this._captureMetadata(response);
     this._cumulativeUsage = {
@@ -1407,7 +1444,7 @@ var ToolAgent = class extends base_default {
     this._stopped = false;
     const allToolCalls = [];
     let fullText = "";
-    let streamResponse = await this.chatSession.sendMessageStream({ message });
+    let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
     for (let round = 0; round < this.maxToolRounds; round++) {
       if (this._stopped) break;
       let roundText = "";
@@ -1465,7 +1502,7 @@ var ToolAgent = class extends base_default {
         yield { type: "tool_result", toolName: call.name, result };
         toolResults.push({ id: call.id, name: call.name, result });
       }
-      streamResponse = await this.chatSession.sendMessageStream({
+      streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
         message: toolResults.map((r) => ({
           functionResponse: {
             id: r.id,
@@ -1473,7 +1510,7 @@ var ToolAgent = class extends base_default {
             response: { output: r.result }
           }
         }))
-      });
+      }));
     }
     yield {
       type: "done",
@@ -1866,7 +1903,7 @@ ${this._userSystemPrompt}`;
     this._stopped = false;
     const codeExecutions = [];
     let consecutiveFailures = 0;
-    let response = await this.chatSession.sendMessage({ message });
+    let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
     for (let round = 0; round < this.maxRounds; round++) {
       if (this._stopped) break;
       const functionCalls = response.functionCalls;
@@ -1902,7 +1939,7 @@ ${this._userSystemPrompt}`;
         });
       }
       if (this._stopped) break;
-      response = await this.chatSession.sendMessage({
+      response = await this._withRetry(() => this.chatSession.sendMessage({
         message: results.map((r) => ({
           functionResponse: {
             id: r.id,
@@ -1910,7 +1947,7 @@ ${this._userSystemPrompt}`;
             response: { output: r.result }
           }
         }))
-      });
+      }));
       if (consecutiveFailures >= this.maxRetries) break;
     }
     this._captureMetadata(response);
@@ -1947,7 +1984,7 @@ ${this._userSystemPrompt}`;
     const codeExecutions = [];
     let fullText = "";
     let consecutiveFailures = 0;
-    let streamResponse = await this.chatSession.sendMessageStream({ message });
+    let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
     for (let round = 0; round < this.maxRounds; round++) {
       if (this._stopped) break;
       const functionCalls = [];
@@ -2008,7 +2045,7 @@ ${this._userSystemPrompt}`;
         });
       }
       if (this._stopped) break;
-      streamResponse = await this.chatSession.sendMessageStream({
+      streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
         message: results.map((r) => ({
           functionResponse: {
             id: r.id,
@@ -2016,7 +2053,7 @@ ${this._userSystemPrompt}`;
             response: { output: r.result }
           }
         }))
-      });
+      }));
       if (consecutiveFailures >= this.maxRetries) break;
     }
     let warning = "Max tool rounds reached";
@@ -2141,10 +2178,10 @@ var RagAgent = class extends base_default {
       logger_default.debug(`Uploading remote file: ${resolvedPath}`);
       const ext = (0, import_node_path2.extname)(resolvedPath).toLowerCase();
       const mimeType = MIME_TYPES[ext] || "application/octet-stream";
-      const uploaded = await this.genAIClient.files.upload({
+      const uploaded = await this._withRetry(() => this.genAIClient.files.upload({
         file: resolvedPath,
         config: { displayName: (0, import_node_path2.basename)(resolvedPath), mimeType }
-      });
+      }));
       await this._waitForFileActive(uploaded);
       this._uploadedRemoteFiles.push({
         ...uploaded,
@@ -2202,7 +2239,7 @@ ${serialized}` });
    */
   async chat(message, opts = {}) {
     if (!this._initialized) await this.init();
-    const response = await this.chatSession.sendMessage({ message });
+    const response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
     this._captureMetadata(response);
     this._cumulativeUsage = {
       promptTokens: this.lastResponseMetadata.promptTokens,
@@ -2226,7 +2263,7 @@ ${serialized}` });
   async *stream(message, opts = {}) {
     if (!this._initialized) await this.init();
     let fullText = "";
-    const streamResponse = await this.chatSession.sendMessageStream({ message });
+    const streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
     for await (const chunk of streamResponse) {
       if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
         const text = chunk.candidates[0].content.parts[0].text;
@@ -2381,11 +2418,11 @@ var Embedding = class extends base_default {
   	 */
   async embed(text, config = {}) {
     if (!this._initialized) await this.init();
-    const result = await this.genAIClient.models.embedContent({
+    const result = await this._withRetry(() => this.genAIClient.models.embedContent({
       model: this.modelName,
       contents: text,
       config: this._buildConfig(config)
-    });
+    }));
     return result.embeddings[0];
   }
   /**
@@ -2400,11 +2437,11 @@ var Embedding = class extends base_default {
   	 */
   async embedBatch(texts, config = {}) {
     if (!this._initialized) await this.init();
-    const result = await this.genAIClient.models.embedContent({
+    const result = await this._withRetry(() => this.genAIClient.models.embedContent({
       model: this.modelName,
       contents: texts,
       config: this._buildConfig(config)
-    });
+    }));
     return result.embeddings;
   }
   /**

package/message.js CHANGED Viewed

@@ -102,14 +102,14 @@ class Message extends BaseGemini {
 		const mergedLabels = { ...this.labels, ...(opts.labels || {}) };
-		const result = await this.genAIClient.models.generateContent({
+		const result = await this._withRetry(() => this.genAIClient.models.generateContent({
 			model: this.modelName,
 			contents: contents,
 			config: {
 				...this.chatConfig,
 				...(this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels })
 			}
-		});
+		}));
 		this._captureMetadata(result);

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
 	"name": "ak-gemini",
 	"author": "ak@mixpanel.com",
 	"description": "AK's Generative AI Helper for doing... everything",
-	"version": "2.0.3",
+	"version": "2.0.5",
 	"main": "index.js",
 	"files": [
 		"index.js",
@@ -14,6 +14,7 @@
 		"tool-agent.js",
 		"code-agent.js",
 		"rag-agent.js",
+		"embedding.js",
 		"json-helpers.js",
 		"types.d.ts",
 		"logger.js",

package/rag-agent.js CHANGED Viewed

@@ -123,10 +123,10 @@ class RagAgent extends BaseGemini {
 			const ext = extname(resolvedPath).toLowerCase();
 			const mimeType = MIME_TYPES[ext] || 'application/octet-stream';
-			const uploaded = await this.genAIClient.files.upload({
+			const uploaded = await this._withRetry(() => this.genAIClient.files.upload({
 				file: resolvedPath,
 				config: { displayName: basename(resolvedPath), mimeType }
-			});
+			}));
 			await this._waitForFileActive(uploaded);
@@ -206,7 +206,7 @@ class RagAgent extends BaseGemini {
 	async chat(message, opts = {}) {
 		if (!this._initialized) await this.init();
-		const response = await this.chatSession.sendMessage({ message });
+		const response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
 		this._captureMetadata(response);
@@ -236,7 +236,7 @@ class RagAgent extends BaseGemini {
 		if (!this._initialized) await this.init();
 		let fullText = '';
-		const streamResponse = await this.chatSession.sendMessageStream({ message });
+		const streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
 		for await (const chunk of streamResponse) {
 			if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {

package/tool-agent.js CHANGED Viewed

@@ -108,7 +108,7 @@ class ToolAgent extends BaseGemini {
 		const allToolCalls = [];
-		let response = await this.chatSession.sendMessage({ message });
+		let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
 		for (let round = 0; round < this.maxToolRounds; round++) {
 			if (this._stopped) break;
@@ -153,7 +153,7 @@ class ToolAgent extends BaseGemini {
 			);
 			// Send function responses back to the model
-			response = await this.chatSession.sendMessage({
+			response = await this._withRetry(() => this.chatSession.sendMessage({
 				message: toolResults.map(r => ({
 					functionResponse: {
 						id: r.id,
@@ -161,7 +161,7 @@ class ToolAgent extends BaseGemini {
 						response: { output: r.result }
 					}
 				}))
-			});
+			}));
 		}
 		this._captureMetadata(response);
@@ -204,7 +204,7 @@ class ToolAgent extends BaseGemini {
 		const allToolCalls = [];
 		let fullText = '';
-		let streamResponse = await this.chatSession.sendMessageStream({ message });
+		let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
 		for (let round = 0; round < this.maxToolRounds; round++) {
 			if (this._stopped) break;
@@ -277,7 +277,7 @@ class ToolAgent extends BaseGemini {
 			}
 			// Send function responses back and get next stream
-			streamResponse = await this.chatSession.sendMessageStream({
+			streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
 				message: toolResults.map(r => ({
 					functionResponse: {
 						id: r.id,
@@ -285,7 +285,7 @@ class ToolAgent extends BaseGemini {
 						response: { output: r.result }
 					}
 				}))
-			});
+			}));
 		}
 		// Max rounds reached or stopped

package/transformer.js CHANGED Viewed

@@ -274,7 +274,7 @@ class Transformer extends BaseGemini {
 				sendParams.config = { labels: mergedLabels };
 			}
-			const result = await this.chatSession.sendMessage(sendParams);
+			const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
 			this._captureMetadata(result);
@@ -327,7 +327,7 @@ Respond with JSON only – no comments or explanations.
 		let result;
 		try {
-			result = await this.chatSession.sendMessage({ message: prompt });
+			result = await this._withRetry(() => this.chatSession.sendMessage({ message: prompt }));
 			this._captureMetadata(result);
 		} catch (err) {
 			throw new Error(`Gemini call failed while repairing payload: ${err.message}`);
@@ -374,14 +374,14 @@ Respond with JSON only – no comments or explanations.
 		const mergedLabels = { ...this.labels, ...(opts.labels || {}) };
-		const result = await this.genAIClient.models.generateContent({
+		const result = await this._withRetry(() => this.genAIClient.models.generateContent({
 			model: this.modelName,
 			contents: contents,
 			config: {
 				...this.chatConfig,
 				...(this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels })
 			}
-		});
+		}));
 		this._captureMetadata(result);

package/types.d.ts CHANGED Viewed

@@ -169,6 +169,11 @@ export interface BaseGeminiOptions {
   /** Cached content resource name to use for this session */
   cachedContent?: string;
+  /** Max retry attempts for 429 RESOURCE_EXHAUSTED errors (default: 5) */
+  resourceExhaustedRetries?: number;
+  /** Initial backoff delay in ms for 429 retries, doubles each attempt (default: 1000) */
+  resourceExhaustedDelay?: number;
 }
 export interface TransformerOptions extends BaseGeminiOptions {