ak-gemini 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/GUIDE.md CHANGED
@@ -696,6 +696,129 @@ console.log('Best match:', documents[scores[0].index]);
696
696
  - Classification — compare against known category embeddings
697
697
  - Recommendation — find items similar to user preferences
698
698
 
699
+ ### Combining Embedding with Other Classes
700
+
701
+ Embeddings become powerful when paired with other ak-gemini classes. Here are three common patterns:
702
+
703
+ #### Embedding + RagAgent: Smart Document Selection
704
+
705
+ Instead of loading all documents into a RagAgent (which costs tokens), use embeddings to find the most relevant ones first:
706
+
707
+ ```javascript
708
+ import { Embedding, RagAgent } from 'ak-gemini';
709
+ import fs from 'fs/promises';
710
+
711
+ // Step 1: Build an index of your document library
712
+ const docEmbedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
713
+ const docPaths = ['./docs/auth.md', './docs/billing.md', './docs/api.md', './docs/deployment.md', './docs/faq.md'];
714
+ const docTexts = await Promise.all(docPaths.map(f => fs.readFile(f, 'utf-8')));
715
+ const docVectors = await docEmbedder.embedBatch(docTexts);
716
+
717
+ // Step 2: At query time, find relevant documents via embedding similarity
718
+ const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
719
+ const query = 'How do I rotate my API keys?';
720
+ const queryVector = await queryEmbedder.embed(query);
721
+
722
+ const ranked = docVectors
723
+ .map((v, i) => ({ path: docPaths[i], score: docEmbedder.similarity(queryVector.values, v.values) }))
724
+ .sort((a, b) => b.score - a.score);
725
+
726
+ // Step 3: Feed only the top matches to RagAgent for grounded Q&A
727
+ const topDocs = ranked.slice(0, 2).map(r => r.path);
728
+ console.log('Selected docs:', topDocs, 'Scores:', ranked.slice(0, 2).map(r => r.score.toFixed(3)));
729
+
730
+ const rag = new RagAgent({
731
+ localFiles: topDocs,
732
+ systemPrompt: 'Answer based only on the provided documents. Cite which document your answer comes from.',
733
+ });
734
+ const answer = await rag.chat(query);
735
+ console.log(answer.text);
736
+ ```
737
+
738
+ **Why this matters**: If you have 50 documents but only 2-3 are relevant to any given query, embedding-based selection saves significant token costs vs. loading everything into the RagAgent context.
739
+
740
+ #### Embedding + ToolAgent: Semantic Search Tool
741
+
742
+ Give a ToolAgent an embedding-powered search tool so it can look up relevant information on demand:
743
+
744
+ ```javascript
745
+ import { Embedding, ToolAgent } from 'ak-gemini';
746
+
747
+ // Pre-compute embeddings for your knowledge base
748
+ const embedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
749
+ const knowledgeBase = [
750
+ { id: 1, title: 'Password Policy', content: 'Passwords must be at least 12 characters...' },
751
+ { id: 2, title: 'SSO Setup', content: 'To configure SAML SSO, navigate to...' },
752
+ { id: 3, title: 'API Rate Limits', content: 'Free tier: 100 req/min. Pro tier: 1000 req/min...' },
753
+ ];
754
+ const kbVectors = await embedder.embedBatch(knowledgeBase.map(k => k.content));
755
+
756
+ // Create a ToolAgent with a search tool
757
+ const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
758
+
759
+ const agent = new ToolAgent({
760
+ systemPrompt: 'You are a support agent. Use the search tool to find relevant articles before answering.',
761
+ tools: [{
762
+ name: 'search_knowledge_base',
763
+ description: 'Search the knowledge base for articles relevant to a query',
764
+ parametersJsonSchema: {
765
+ type: 'object',
766
+ properties: { query: { type: 'string', description: 'Search query' } },
767
+ required: ['query']
768
+ }
769
+ }],
770
+ toolExecutor: async (toolName, args) => {
771
+ const qv = await queryEmbedder.embed(args.query);
772
+ const results = kbVectors
773
+ .map((v, i) => ({ ...knowledgeBase[i], score: embedder.similarity(qv.values, v.values) }))
774
+ .sort((a, b) => b.score - a.score)
775
+ .slice(0, 3);
776
+ return results;
777
+ }
778
+ });
779
+
780
+ const result = await agent.chat('A customer is asking how to set up single sign-on');
781
+ console.log(result.text); // Agent searched KB and found the SSO article
782
+ ```
783
+
784
+ #### Embedding + Transformer: Dynamic Few-Shot Selection
785
+
786
+ When you have many examples but only a few are relevant to each input, use embeddings to pick the best few-shot examples dynamically:
787
+
788
+ ```javascript
789
+ import { Embedding, Transformer } from 'ak-gemini';
790
+
791
+ // Your full library of transformation examples
792
+ const allExamples = [
793
+ { INPUT: { type: 'blog', title: 'AI News' }, OUTPUT: { slug: 'ai-news', category: 'tech', priority: 'high' } },
794
+ { INPUT: { type: 'recipe', title: 'Pasta' }, OUTPUT: { slug: 'pasta', category: 'food', priority: 'low' } },
795
+ { INPUT: { type: 'tutorial', title: 'React Hooks' }, OUTPUT: { slug: 'react-hooks', category: 'tech', priority: 'medium' } },
796
+ { INPUT: { type: 'review', title: 'New iPhone' }, OUTPUT: { slug: 'new-iphone', category: 'tech', priority: 'medium' } },
797
+ // ... hundreds more
798
+ ];
799
+
800
+ // Embed all example inputs
801
+ const embedder = new Embedding({ taskType: 'SEMANTIC_SIMILARITY' });
802
+ const exampleVectors = await embedder.embedBatch(allExamples.map(e => JSON.stringify(e.INPUT)));
803
+
804
+ // For each new input, find the 3 most similar examples and seed the Transformer with just those
805
+ async function transformWithBestExamples(input) {
806
+ const inputVector = await embedder.embed(JSON.stringify(input));
807
+ const bestExamples = exampleVectors
808
+ .map((v, i) => ({ example: allExamples[i], score: embedder.similarity(inputVector.values, v.values) }))
809
+ .sort((a, b) => b.score - a.score)
810
+ .slice(0, 3)
811
+ .map(r => r.example);
812
+
813
+ const t = new Transformer({ sourceKey: 'INPUT', targetKey: 'OUTPUT' });
814
+ await t.seed(bestExamples);
815
+ return await t.send(input);
816
+ }
817
+
818
+ const result = await transformWithBestExamples({ type: 'article', title: 'GPT-5 Released' });
819
+ // Seeded with the most relevant tech/blog examples → better output
820
+ ```
821
+
699
822
  ---
700
823
 
701
824
  ## Google Search Grounding
package/README.md CHANGED
@@ -197,6 +197,31 @@ const results = await embedder.embedBatch(['Hello', 'World']);
197
197
  const score = embedder.similarity(results[0].values, results[1].values);
198
198
  ```
199
199
 
200
+ ### Embedding + RagAgent — Semantic Search Pipeline
201
+
202
+ Use embeddings to find relevant documents, then feed only the best matches to a RagAgent for grounded Q&A:
203
+
204
+ ```javascript
205
+ const embedder = new Embedding({ taskType: 'RETRIEVAL_DOCUMENT' });
206
+ const queryEmbedder = new Embedding({ taskType: 'RETRIEVAL_QUERY' });
207
+
208
+ // Index your documents
209
+ const docs = ['./docs/auth.md', './docs/billing.md', './docs/api.md', './docs/faq.md'];
210
+ const docTexts = await Promise.all(docs.map(f => fs.readFile(f, 'utf-8')));
211
+ const docVectors = await embedder.embedBatch(docTexts);
212
+
213
+ // Find the most relevant docs for a query
214
+ const query = 'How do I reset my API key?';
215
+ const queryVector = await queryEmbedder.embed(query);
216
+ const ranked = docVectors
217
+ .map((v, i) => ({ file: docs[i], score: embedder.similarity(queryVector.values, v.values) }))
218
+ .sort((a, b) => b.score - a.score);
219
+
220
+ // Feed only the top 2 matches to RagAgent
221
+ const rag = new RagAgent({ localFiles: ranked.slice(0, 2).map(r => r.file) });
222
+ const answer = await rag.chat(query);
223
+ ```
224
+
200
225
  ---
201
226
 
202
227
  ## Stopping Agents
@@ -289,6 +314,29 @@ const sources = result.usage?.groundingMetadata?.groundingChunks;
289
314
 
290
315
  **Warning**: Google Search grounding costs ~$35/1k queries.
291
316
 
317
+ ### Rate Limit Handling (429)
318
+
319
+ All classes automatically retry on 429 `RESOURCE_EXHAUSTED` errors with exponential backoff. This is separate from Transformer's validation retry logic (`maxRetries`).
320
+
321
+ ```javascript
322
+ // Defaults: 5 retries, 1000ms initial delay (doubles each attempt + jitter)
323
+ const chat = new Chat({ systemPrompt: 'Hello' });
324
+
325
+ // Customize
326
+ const transformer = new Transformer({
327
+ resourceExhaustedRetries: 10, // more retries for high-throughput pipelines
328
+ resourceExhaustedDelay: 2000 // start with 2s backoff
329
+ });
330
+
331
+ // Disable entirely
332
+ const msg = new Message({ resourceExhaustedRetries: 0 });
333
+ ```
334
+
335
+ When a 429 is encountered, retries are logged at `WARN` level:
336
+ ```
337
+ WARN: Rate limited (429). Retrying in 1234ms (attempt 1/5)...
338
+ ```
339
+
292
340
  ### Context Caching
293
341
 
294
342
  Reduce costs by caching repeated system prompts, documents, or tool definitions.
@@ -342,6 +390,8 @@ All classes accept `BaseGeminiOptions`:
342
390
  | `enableGrounding` | boolean | `false` | Enable Google Search grounding |
343
391
  | `groundingConfig` | object | — | Grounding config (excludeDomains, timeRangeFilter) |
344
392
  | `cachedContent` | string | — | Cached content resource name |
393
+ | `resourceExhaustedRetries` | number | `5` | Max retry attempts for 429 rate-limit errors |
394
+ | `resourceExhaustedDelay` | number | `1000` | Initial backoff delay (ms) for 429 retries |
345
395
 
346
396
  ### Transformer-Specific
347
397
 
package/base.js CHANGED
@@ -94,6 +94,10 @@ class BaseGemini {
94
94
  throw new Error("Vertex AI requires a project ID. Provide via options.project or GOOGLE_CLOUD_PROJECT env var.");
95
95
  }
96
96
 
97
+ // ── Rate Limit Retry ──
98
+ this.resourceExhaustedRetries = options.resourceExhaustedRetries ?? 5;
99
+ this.resourceExhaustedDelay = options.resourceExhaustedDelay ?? 1000;
100
+
97
101
  // ── Logging ──
98
102
  this._configureLogLevel(options.logLevel);
99
103
 
@@ -419,10 +423,10 @@ class BaseGemini {
419
423
 
420
424
  contents.push({ parts: [{ text: nextMessage }] });
421
425
 
422
- const resp = await this.genAIClient.models.countTokens({
426
+ const resp = await this._withRetry(() => this.genAIClient.models.countTokens({
423
427
  model: this.modelName,
424
428
  contents,
425
- });
429
+ }));
426
430
 
427
431
  return { inputTokens: resp.totalTokens };
428
432
  }
@@ -472,10 +476,10 @@ class BaseGemini {
472
476
  const sysInstruction = config.systemInstruction !== undefined ? config.systemInstruction : this.systemPrompt;
473
477
  if (sysInstruction) cacheConfig.systemInstruction = sysInstruction;
474
478
 
475
- const cached = await this.genAIClient.caches.create({
479
+ const cached = await this._withRetry(() => this.genAIClient.caches.create({
476
480
  model: config.model || this.modelName,
477
481
  config: cacheConfig
478
- });
482
+ }));
479
483
 
480
484
  log.debug(`Cache created: ${cached.name}`);
481
485
  return cached;
@@ -487,7 +491,7 @@ class BaseGemini {
487
491
  * @returns {Promise<Object>} The cached content resource
488
492
  */
489
493
  async getCache(cacheName) {
490
- return await this.genAIClient.caches.get({ name: cacheName });
494
+ return await this._withRetry(() => this.genAIClient.caches.get({ name: cacheName }));
491
495
  }
492
496
 
493
497
  /**
@@ -495,7 +499,7 @@ class BaseGemini {
495
499
  * @returns {Promise<Object>} Pager of cached content resources
496
500
  */
497
501
  async listCaches() {
498
- const pager = await this.genAIClient.caches.list();
502
+ const pager = await this._withRetry(() => this.genAIClient.caches.list());
499
503
  const results = [];
500
504
  for await (const cache of pager) {
501
505
  results.push(cache);
@@ -512,13 +516,13 @@ class BaseGemini {
512
516
  * @returns {Promise<Object>} The updated cache resource
513
517
  */
514
518
  async updateCache(cacheName, config = {}) {
515
- return await this.genAIClient.caches.update({
519
+ return await this._withRetry(() => this.genAIClient.caches.update({
516
520
  name: cacheName,
517
521
  config: {
518
522
  ...(config.ttl && { ttl: config.ttl }),
519
523
  ...(config.expireTime && { expireTime: config.expireTime })
520
524
  }
521
- });
525
+ }));
522
526
  }
523
527
 
524
528
  /**
@@ -528,7 +532,7 @@ class BaseGemini {
528
532
  * @returns {Promise<void>}
529
533
  */
530
534
  async deleteCache(cacheName) {
531
- await this.genAIClient.caches.delete({ name: cacheName });
535
+ await this._withRetry(() => this.genAIClient.caches.delete({ name: cacheName }));
532
536
  log.debug(`Cache deleted: ${cacheName}`);
533
537
  if (this.cachedContent === cacheName) {
534
538
  this.cachedContent = null;
@@ -551,6 +555,44 @@ class BaseGemini {
551
555
  log.debug(`Using cache: ${cacheName}`);
552
556
  }
553
557
 
558
+ // ── Rate Limit Retry ────────────────────────────────────────────────────
559
+
560
+ /**
561
+ * Detects whether an error is a 429 / RESOURCE_EXHAUSTED rate-limit error.
562
+ * @param {Error} error
563
+ * @returns {boolean}
564
+ * @private
565
+ */
566
+ _is429Error(error) {
567
+ const /** @type {any} */ e = error;
568
+ if (e.status === 429 || e.code === 429 || e.httpStatusCode === 429) return true;
569
+ const msg = e.message || '';
570
+ return msg.includes('429') || msg.includes('RESOURCE_EXHAUSTED');
571
+ }
572
+
573
+ /**
574
+ * Wraps an async function with automatic retry on 429 (RESOURCE_EXHAUSTED) errors.
575
+ * Uses exponential backoff with jitter. Non-429 errors are rethrown immediately.
576
+ * @param {() => Promise<T>} fn - The async function to execute
577
+ * @returns {Promise<T>}
578
+ * @template T
579
+ * @protected
580
+ */
581
+ async _withRetry(fn) {
582
+ const maxAttempts = this.resourceExhaustedRetries;
583
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
584
+ try {
585
+ return await fn();
586
+ } catch (error) {
587
+ if (!this._is429Error(error) || attempt >= maxAttempts) throw error;
588
+ const jitter = Math.random() * 500;
589
+ const delay = this.resourceExhaustedDelay * Math.pow(2, attempt) + jitter;
590
+ log.warn(`Rate limited (429). Retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxAttempts})...`);
591
+ await new Promise(r => setTimeout(r, delay));
592
+ }
593
+ }
594
+ }
595
+
554
596
  // ── Private Helpers ──────────────────────────────────────────────────────
555
597
 
556
598
  /**
package/chat.js CHANGED
@@ -65,7 +65,7 @@ class Chat extends BaseGemini {
65
65
  sendParams.config = { labels: mergedLabels };
66
66
  }
67
67
 
68
- const result = await this.chatSession.sendMessage(sendParams);
68
+ const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
69
69
 
70
70
  this._captureMetadata(result);
71
71
 
package/code-agent.js CHANGED
@@ -449,7 +449,7 @@ class CodeAgent extends BaseGemini {
449
449
  const codeExecutions = [];
450
450
  let consecutiveFailures = 0;
451
451
 
452
- let response = await this.chatSession.sendMessage({ message });
452
+ let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
453
453
 
454
454
  for (let round = 0; round < this.maxRounds; round++) {
455
455
  if (this._stopped) break;
@@ -495,7 +495,7 @@ class CodeAgent extends BaseGemini {
495
495
  if (this._stopped) break;
496
496
 
497
497
  // Send function responses back to the model
498
- response = await this.chatSession.sendMessage({
498
+ response = await this._withRetry(() => this.chatSession.sendMessage({
499
499
  message: results.map(r => ({
500
500
  functionResponse: {
501
501
  id: r.id,
@@ -503,7 +503,7 @@ class CodeAgent extends BaseGemini {
503
503
  response: { output: r.result }
504
504
  }
505
505
  }))
506
- });
506
+ }));
507
507
 
508
508
  if (consecutiveFailures >= this.maxRetries) break;
509
509
  }
@@ -548,7 +548,7 @@ class CodeAgent extends BaseGemini {
548
548
  let fullText = '';
549
549
  let consecutiveFailures = 0;
550
550
 
551
- let streamResponse = await this.chatSession.sendMessageStream({ message });
551
+ let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
552
552
 
553
553
  for (let round = 0; round < this.maxRounds; round++) {
554
554
  if (this._stopped) break;
@@ -626,7 +626,7 @@ class CodeAgent extends BaseGemini {
626
626
  if (this._stopped) break;
627
627
 
628
628
  // Send function responses back and get next stream
629
- streamResponse = await this.chatSession.sendMessageStream({
629
+ streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
630
630
  message: results.map(r => ({
631
631
  functionResponse: {
632
632
  id: r.id,
@@ -634,7 +634,7 @@ class CodeAgent extends BaseGemini {
634
634
  response: { output: r.result }
635
635
  }
636
636
  }))
637
- });
637
+ }));
638
638
 
639
639
  if (consecutiveFailures >= this.maxRetries) break;
640
640
  }
package/embedding.js ADDED
@@ -0,0 +1,181 @@
1
+ /**
2
+ * @fileoverview Embedding class — Generate vector embeddings via Google's embedding models.
3
+ *
4
+ * Extends BaseGemini for auth/client reuse but overrides init() to skip chat session
5
+ * creation (embeddings don't use chat). Follows the Message class pattern.
6
+ *
7
+ * @example
8
+ * ```javascript
9
+ * import { Embedding } from 'ak-gemini';
10
+ *
11
+ * const embedder = new Embedding({ apiKey: 'your-key' });
12
+ * const result = await embedder.embed('Hello world');
13
+ * console.log(result.values); // [0.012, -0.034, ...]
14
+ * ```
15
+ */
16
+
17
+ import BaseGemini from './base.js';
18
+ import log from './logger.js';
19
+
20
+ export default class Embedding extends BaseGemini {
21
+
22
+ /**
23
+ * @param {import('./types.d.ts').EmbeddingOptions} [options={}]
24
+ */
25
+ constructor(options = {}) {
26
+ // Embeddings use a different model family — default to gemini-embedding-001
27
+ if (options.modelName === undefined) {
28
+ options = { ...options, modelName: 'gemini-embedding-001' };
29
+ }
30
+
31
+ // No system prompt for embeddings
32
+ if (options.systemPrompt === undefined) {
33
+ options = { ...options, systemPrompt: null };
34
+ }
35
+
36
+ super(options);
37
+
38
+ this.taskType = options.taskType || null;
39
+ this.title = options.title || null;
40
+ this.outputDimensionality = options.outputDimensionality || null;
41
+ this.autoTruncate = options.autoTruncate ?? true;
42
+
43
+ log.debug(`Embedding created with model: ${this.modelName}`);
44
+ }
45
+
46
+ /**
47
+ * Initialize the Embedding client.
48
+ * Override: validates API connection only, NO chat session (stateless).
49
+ * @param {boolean} [force=false]
50
+ * @returns {Promise<void>}
51
+ */
52
+ async init(force = false) {
53
+ if (this._initialized && !force) return;
54
+
55
+ log.debug(`Initializing ${this.constructor.name} with model: ${this.modelName}...`);
56
+
57
+ try {
58
+ await this.genAIClient.models.list();
59
+ log.debug(`${this.constructor.name}: API connection successful.`);
60
+ } catch (e) {
61
+ throw new Error(`${this.constructor.name} initialization failed: ${e.message}`);
62
+ }
63
+
64
+ this._initialized = true;
65
+ log.debug(`${this.constructor.name}: Initialized (stateless mode).`);
66
+ }
67
+
68
+ /**
69
+ * Builds the config object for embedContent calls.
70
+ * @param {Object} [overrides={}] - Per-call config overrides
71
+ * @returns {Object} The config object
72
+ * @private
73
+ */
74
+ _buildConfig(overrides = {}) {
75
+ const config = {};
76
+ const taskType = overrides.taskType || this.taskType;
77
+ const title = overrides.title || this.title;
78
+ const dims = overrides.outputDimensionality || this.outputDimensionality;
79
+
80
+ if (taskType) config.taskType = taskType;
81
+ if (title) config.title = title;
82
+ if (dims) config.outputDimensionality = dims;
83
+
84
+ return config;
85
+ }
86
+
87
+ /**
88
+ * Embed a single text string.
89
+ * @param {string} text - The text to embed
90
+ * @param {Object} [config={}] - Per-call config overrides
91
+ * @param {string} [config.taskType] - Override task type
92
+ * @param {string} [config.title] - Override title
93
+ * @param {number} [config.outputDimensionality] - Override dimensions
94
+
95
+ * @returns {Promise<import('./types.d.ts').EmbeddingResult>} The embedding result
96
+ */
97
+ async embed(text, config = {}) {
98
+ if (!this._initialized) await this.init();
99
+
100
+ const result = await this._withRetry(() => this.genAIClient.models.embedContent({
101
+ model: this.modelName,
102
+ contents: text,
103
+ config: this._buildConfig(config)
104
+ }));
105
+
106
+ return result.embeddings[0];
107
+ }
108
+
109
+ /**
110
+ * Embed multiple text strings in a single API call.
111
+ * @param {string[]} texts - Array of texts to embed
112
+ * @param {Object} [config={}] - Per-call config overrides
113
+ * @param {string} [config.taskType] - Override task type
114
+ * @param {string} [config.title] - Override title
115
+ * @param {number} [config.outputDimensionality] - Override dimensions
116
+
117
+ * @returns {Promise<import('./types.d.ts').EmbeddingResult[]>} Array of embedding results
118
+ */
119
+ async embedBatch(texts, config = {}) {
120
+ if (!this._initialized) await this.init();
121
+
122
+ const result = await this._withRetry(() => this.genAIClient.models.embedContent({
123
+ model: this.modelName,
124
+ contents: texts,
125
+ config: this._buildConfig(config)
126
+ }));
127
+
128
+ return result.embeddings;
129
+ }
130
+
131
+ /**
132
+ * Compute cosine similarity between two embedding vectors.
133
+ * Pure math — no API call.
134
+ * @param {number[]} a - First embedding vector
135
+ * @param {number[]} b - Second embedding vector
136
+ * @returns {number} Cosine similarity between -1 and 1
137
+ */
138
+ similarity(a, b) {
139
+ if (!a || !b || a.length !== b.length) {
140
+ throw new Error('Vectors must be non-null and have the same length');
141
+ }
142
+
143
+ let dot = 0;
144
+ let magA = 0;
145
+ let magB = 0;
146
+
147
+ for (let i = 0; i < a.length; i++) {
148
+ dot += a[i] * b[i];
149
+ magA += a[i] * a[i];
150
+ magB += b[i] * b[i];
151
+ }
152
+
153
+ const magnitude = Math.sqrt(magA) * Math.sqrt(magB);
154
+ if (magnitude === 0) return 0;
155
+
156
+ return dot / magnitude;
157
+ }
158
+
159
+ // ── No-ops (embeddings don't use chat sessions) ──
160
+
161
+ /** @returns {any[]} Always returns empty array */
162
+ getHistory() { return []; }
163
+
164
+ /** No-op for Embedding */
165
+ async clearHistory() {}
166
+
167
+ /** No-op for Embedding */
168
+ async seed() {
169
+ log.warn('Embedding.seed() is a no-op — embeddings do not support few-shot examples.');
170
+ return [];
171
+ }
172
+
173
+ /**
174
+ * @param {any} _nextPayload
175
+ * @throws {Error} Embedding does not support token estimation
176
+ * @returns {Promise<{inputTokens: number}>}
177
+ */
178
+ async estimate(_nextPayload) {
179
+ throw new Error('Embedding does not support token estimation. Use embed() directly.');
180
+ }
181
+ }
package/index.cjs CHANGED
@@ -361,6 +361,8 @@ var BaseGemini = class {
361
361
  if (this.vertexai && !this.project) {
362
362
  throw new Error("Vertex AI requires a project ID. Provide via options.project or GOOGLE_CLOUD_PROJECT env var.");
363
363
  }
364
+ this.resourceExhaustedRetries = options.resourceExhaustedRetries ?? 5;
365
+ this.resourceExhaustedDelay = options.resourceExhaustedDelay ?? 1e3;
364
366
  this._configureLogLevel(options.logLevel);
365
367
  this.labels = options.labels || {};
366
368
  this.enableGrounding = options.enableGrounding || false;
@@ -619,10 +621,10 @@ ${contextText}
619
621
  }
620
622
  const nextMessage = typeof nextPayload === "string" ? nextPayload : JSON.stringify(nextPayload, null, 2);
621
623
  contents.push({ parts: [{ text: nextMessage }] });
622
- const resp = await this.genAIClient.models.countTokens({
624
+ const resp = await this._withRetry(() => this.genAIClient.models.countTokens({
623
625
  model: this.modelName,
624
626
  contents
625
- });
627
+ }));
626
628
  return { inputTokens: resp.totalTokens };
627
629
  }
628
630
  /**
@@ -664,10 +666,10 @@ ${contextText}
664
666
  if (config.toolConfig) cacheConfig.toolConfig = config.toolConfig;
665
667
  const sysInstruction = config.systemInstruction !== void 0 ? config.systemInstruction : this.systemPrompt;
666
668
  if (sysInstruction) cacheConfig.systemInstruction = sysInstruction;
667
- const cached = await this.genAIClient.caches.create({
669
+ const cached = await this._withRetry(() => this.genAIClient.caches.create({
668
670
  model: config.model || this.modelName,
669
671
  config: cacheConfig
670
- });
672
+ }));
671
673
  logger_default.debug(`Cache created: ${cached.name}`);
672
674
  return cached;
673
675
  }
@@ -677,14 +679,14 @@ ${contextText}
677
679
  * @returns {Promise<Object>} The cached content resource
678
680
  */
679
681
  async getCache(cacheName) {
680
- return await this.genAIClient.caches.get({ name: cacheName });
682
+ return await this._withRetry(() => this.genAIClient.caches.get({ name: cacheName }));
681
683
  }
682
684
  /**
683
685
  * Lists all cached content resources.
684
686
  * @returns {Promise<Object>} Pager of cached content resources
685
687
  */
686
688
  async listCaches() {
687
- const pager = await this.genAIClient.caches.list();
689
+ const pager = await this._withRetry(() => this.genAIClient.caches.list());
688
690
  const results = [];
689
691
  for await (const cache of pager) {
690
692
  results.push(cache);
@@ -700,13 +702,13 @@ ${contextText}
700
702
  * @returns {Promise<Object>} The updated cache resource
701
703
  */
702
704
  async updateCache(cacheName, config = {}) {
703
- return await this.genAIClient.caches.update({
705
+ return await this._withRetry(() => this.genAIClient.caches.update({
704
706
  name: cacheName,
705
707
  config: {
706
708
  ...config.ttl && { ttl: config.ttl },
707
709
  ...config.expireTime && { expireTime: config.expireTime }
708
710
  }
709
- });
711
+ }));
710
712
  }
711
713
  /**
712
714
  * Deletes a cached content resource.
@@ -715,7 +717,7 @@ ${contextText}
715
717
  * @returns {Promise<void>}
716
718
  */
717
719
  async deleteCache(cacheName) {
718
- await this.genAIClient.caches.delete({ name: cacheName });
720
+ await this._withRetry(() => this.genAIClient.caches.delete({ name: cacheName }));
719
721
  logger_default.debug(`Cache deleted: ${cacheName}`);
720
722
  if (this.cachedContent === cacheName) {
721
723
  this.cachedContent = null;
@@ -734,6 +736,41 @@ ${contextText}
734
736
  }
735
737
  logger_default.debug(`Using cache: ${cacheName}`);
736
738
  }
739
+ // ── Rate Limit Retry ────────────────────────────────────────────────────
740
+ /**
741
+ * Detects whether an error is a 429 / RESOURCE_EXHAUSTED rate-limit error.
742
+ * @param {Error} error
743
+ * @returns {boolean}
744
+ * @private
745
+ */
746
+ _is429Error(error) {
747
+ const e = error;
748
+ if (e.status === 429 || e.code === 429 || e.httpStatusCode === 429) return true;
749
+ const msg = e.message || "";
750
+ return msg.includes("429") || msg.includes("RESOURCE_EXHAUSTED");
751
+ }
752
+ /**
753
+ * Wraps an async function with automatic retry on 429 (RESOURCE_EXHAUSTED) errors.
754
+ * Uses exponential backoff with jitter. Non-429 errors are rethrown immediately.
755
+ * @param {() => Promise<T>} fn - The async function to execute
756
+ * @returns {Promise<T>}
757
+ * @template T
758
+ * @protected
759
+ */
760
+ async _withRetry(fn) {
761
+ const maxAttempts = this.resourceExhaustedRetries;
762
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
763
+ try {
764
+ return await fn();
765
+ } catch (error) {
766
+ if (!this._is429Error(error) || attempt >= maxAttempts) throw error;
767
+ const jitter = Math.random() * 500;
768
+ const delay = this.resourceExhaustedDelay * Math.pow(2, attempt) + jitter;
769
+ logger_default.warn(`Rate limited (429). Retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${maxAttempts})...`);
770
+ await new Promise((r) => setTimeout(r, delay));
771
+ }
772
+ }
773
+ }
737
774
  // ── Private Helpers ──────────────────────────────────────────────────────
738
775
  /**
739
776
  * Configures the log level based on options, env vars, or NODE_ENV.
@@ -965,7 +1002,7 @@ var Transformer = class extends base_default {
965
1002
  if (hasLabels) {
966
1003
  sendParams.config = { labels: mergedLabels };
967
1004
  }
968
- const result = await this.chatSession.sendMessage(sendParams);
1005
+ const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
969
1006
  this._captureMetadata(result);
970
1007
  if (result.usageMetadata && logger_default.level !== "silent") {
971
1008
  logger_default.debug(`API response: model=${result.modelVersion || "unknown"}, tokens=${result.usageMetadata.totalTokenCount}`);
@@ -1009,7 +1046,7 @@ Respond with JSON only \u2013 no comments or explanations.
1009
1046
  `;
1010
1047
  let result;
1011
1048
  try {
1012
- result = await this.chatSession.sendMessage({ message: prompt });
1049
+ result = await this._withRetry(() => this.chatSession.sendMessage({ message: prompt }));
1013
1050
  this._captureMetadata(result);
1014
1051
  } catch (err) {
1015
1052
  throw new Error(`Gemini call failed while repairing payload: ${err.message}`);
@@ -1044,14 +1081,14 @@ Respond with JSON only \u2013 no comments or explanations.
1044
1081
  }
1045
1082
  contents.push({ role: "user", parts: [{ text: payloadStr }] });
1046
1083
  const mergedLabels = { ...this.labels, ...opts.labels || {} };
1047
- const result = await this.genAIClient.models.generateContent({
1084
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
1048
1085
  model: this.modelName,
1049
1086
  contents,
1050
1087
  config: {
1051
1088
  ...this.chatConfig,
1052
1089
  ...this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels }
1053
1090
  }
1054
- });
1091
+ }));
1055
1092
  this._captureMetadata(result);
1056
1093
  this._cumulativeUsage = {
1057
1094
  promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1163,7 +1200,7 @@ var Chat = class extends base_default {
1163
1200
  if (hasLabels) {
1164
1201
  sendParams.config = { labels: mergedLabels };
1165
1202
  }
1166
- const result = await this.chatSession.sendMessage(sendParams);
1203
+ const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
1167
1204
  this._captureMetadata(result);
1168
1205
  this._cumulativeUsage = {
1169
1206
  promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1227,14 +1264,14 @@ var Message = class extends base_default {
1227
1264
  const payloadStr = typeof payload === "string" ? payload : JSON.stringify(payload, null, 2);
1228
1265
  const contents = [{ role: "user", parts: [{ text: payloadStr }] }];
1229
1266
  const mergedLabels = { ...this.labels, ...opts.labels || {} };
1230
- const result = await this.genAIClient.models.generateContent({
1267
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
1231
1268
  model: this.modelName,
1232
1269
  contents,
1233
1270
  config: {
1234
1271
  ...this.chatConfig,
1235
1272
  ...this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels }
1236
1273
  }
1237
- });
1274
+ }));
1238
1275
  this._captureMetadata(result);
1239
1276
  this._cumulativeUsage = {
1240
1277
  promptTokens: this.lastResponseMetadata.promptTokens,
@@ -1327,7 +1364,7 @@ var ToolAgent = class extends base_default {
1327
1364
  if (!this.chatSession) await this.init();
1328
1365
  this._stopped = false;
1329
1366
  const allToolCalls = [];
1330
- let response = await this.chatSession.sendMessage({ message });
1367
+ let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
1331
1368
  for (let round = 0; round < this.maxToolRounds; round++) {
1332
1369
  if (this._stopped) break;
1333
1370
  const functionCalls = response.functionCalls;
@@ -1364,7 +1401,7 @@ var ToolAgent = class extends base_default {
1364
1401
  return { id: call.id, name: call.name, result };
1365
1402
  })
1366
1403
  );
1367
- response = await this.chatSession.sendMessage({
1404
+ response = await this._withRetry(() => this.chatSession.sendMessage({
1368
1405
  message: toolResults.map((r) => ({
1369
1406
  functionResponse: {
1370
1407
  id: r.id,
@@ -1372,7 +1409,7 @@ var ToolAgent = class extends base_default {
1372
1409
  response: { output: r.result }
1373
1410
  }
1374
1411
  }))
1375
- });
1412
+ }));
1376
1413
  }
1377
1414
  this._captureMetadata(response);
1378
1415
  this._cumulativeUsage = {
@@ -1407,7 +1444,7 @@ var ToolAgent = class extends base_default {
1407
1444
  this._stopped = false;
1408
1445
  const allToolCalls = [];
1409
1446
  let fullText = "";
1410
- let streamResponse = await this.chatSession.sendMessageStream({ message });
1447
+ let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
1411
1448
  for (let round = 0; round < this.maxToolRounds; round++) {
1412
1449
  if (this._stopped) break;
1413
1450
  let roundText = "";
@@ -1465,7 +1502,7 @@ var ToolAgent = class extends base_default {
1465
1502
  yield { type: "tool_result", toolName: call.name, result };
1466
1503
  toolResults.push({ id: call.id, name: call.name, result });
1467
1504
  }
1468
- streamResponse = await this.chatSession.sendMessageStream({
1505
+ streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
1469
1506
  message: toolResults.map((r) => ({
1470
1507
  functionResponse: {
1471
1508
  id: r.id,
@@ -1473,7 +1510,7 @@ var ToolAgent = class extends base_default {
1473
1510
  response: { output: r.result }
1474
1511
  }
1475
1512
  }))
1476
- });
1513
+ }));
1477
1514
  }
1478
1515
  yield {
1479
1516
  type: "done",
@@ -1866,7 +1903,7 @@ ${this._userSystemPrompt}`;
1866
1903
  this._stopped = false;
1867
1904
  const codeExecutions = [];
1868
1905
  let consecutiveFailures = 0;
1869
- let response = await this.chatSession.sendMessage({ message });
1906
+ let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
1870
1907
  for (let round = 0; round < this.maxRounds; round++) {
1871
1908
  if (this._stopped) break;
1872
1909
  const functionCalls = response.functionCalls;
@@ -1902,7 +1939,7 @@ ${this._userSystemPrompt}`;
1902
1939
  });
1903
1940
  }
1904
1941
  if (this._stopped) break;
1905
- response = await this.chatSession.sendMessage({
1942
+ response = await this._withRetry(() => this.chatSession.sendMessage({
1906
1943
  message: results.map((r) => ({
1907
1944
  functionResponse: {
1908
1945
  id: r.id,
@@ -1910,7 +1947,7 @@ ${this._userSystemPrompt}`;
1910
1947
  response: { output: r.result }
1911
1948
  }
1912
1949
  }))
1913
- });
1950
+ }));
1914
1951
  if (consecutiveFailures >= this.maxRetries) break;
1915
1952
  }
1916
1953
  this._captureMetadata(response);
@@ -1947,7 +1984,7 @@ ${this._userSystemPrompt}`;
1947
1984
  const codeExecutions = [];
1948
1985
  let fullText = "";
1949
1986
  let consecutiveFailures = 0;
1950
- let streamResponse = await this.chatSession.sendMessageStream({ message });
1987
+ let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
1951
1988
  for (let round = 0; round < this.maxRounds; round++) {
1952
1989
  if (this._stopped) break;
1953
1990
  const functionCalls = [];
@@ -2008,7 +2045,7 @@ ${this._userSystemPrompt}`;
2008
2045
  });
2009
2046
  }
2010
2047
  if (this._stopped) break;
2011
- streamResponse = await this.chatSession.sendMessageStream({
2048
+ streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
2012
2049
  message: results.map((r) => ({
2013
2050
  functionResponse: {
2014
2051
  id: r.id,
@@ -2016,7 +2053,7 @@ ${this._userSystemPrompt}`;
2016
2053
  response: { output: r.result }
2017
2054
  }
2018
2055
  }))
2019
- });
2056
+ }));
2020
2057
  if (consecutiveFailures >= this.maxRetries) break;
2021
2058
  }
2022
2059
  let warning = "Max tool rounds reached";
@@ -2141,10 +2178,10 @@ var RagAgent = class extends base_default {
2141
2178
  logger_default.debug(`Uploading remote file: ${resolvedPath}`);
2142
2179
  const ext = (0, import_node_path2.extname)(resolvedPath).toLowerCase();
2143
2180
  const mimeType = MIME_TYPES[ext] || "application/octet-stream";
2144
- const uploaded = await this.genAIClient.files.upload({
2181
+ const uploaded = await this._withRetry(() => this.genAIClient.files.upload({
2145
2182
  file: resolvedPath,
2146
2183
  config: { displayName: (0, import_node_path2.basename)(resolvedPath), mimeType }
2147
- });
2184
+ }));
2148
2185
  await this._waitForFileActive(uploaded);
2149
2186
  this._uploadedRemoteFiles.push({
2150
2187
  ...uploaded,
@@ -2202,7 +2239,7 @@ ${serialized}` });
2202
2239
  */
2203
2240
  async chat(message, opts = {}) {
2204
2241
  if (!this._initialized) await this.init();
2205
- const response = await this.chatSession.sendMessage({ message });
2242
+ const response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
2206
2243
  this._captureMetadata(response);
2207
2244
  this._cumulativeUsage = {
2208
2245
  promptTokens: this.lastResponseMetadata.promptTokens,
@@ -2226,7 +2263,7 @@ ${serialized}` });
2226
2263
  async *stream(message, opts = {}) {
2227
2264
  if (!this._initialized) await this.init();
2228
2265
  let fullText = "";
2229
- const streamResponse = await this.chatSession.sendMessageStream({ message });
2266
+ const streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
2230
2267
  for await (const chunk of streamResponse) {
2231
2268
  if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
2232
2269
  const text = chunk.candidates[0].content.parts[0].text;
@@ -2381,11 +2418,11 @@ var Embedding = class extends base_default {
2381
2418
  */
2382
2419
  async embed(text, config = {}) {
2383
2420
  if (!this._initialized) await this.init();
2384
- const result = await this.genAIClient.models.embedContent({
2421
+ const result = await this._withRetry(() => this.genAIClient.models.embedContent({
2385
2422
  model: this.modelName,
2386
2423
  contents: text,
2387
2424
  config: this._buildConfig(config)
2388
- });
2425
+ }));
2389
2426
  return result.embeddings[0];
2390
2427
  }
2391
2428
  /**
@@ -2400,11 +2437,11 @@ var Embedding = class extends base_default {
2400
2437
  */
2401
2438
  async embedBatch(texts, config = {}) {
2402
2439
  if (!this._initialized) await this.init();
2403
- const result = await this.genAIClient.models.embedContent({
2440
+ const result = await this._withRetry(() => this.genAIClient.models.embedContent({
2404
2441
  model: this.modelName,
2405
2442
  contents: texts,
2406
2443
  config: this._buildConfig(config)
2407
- });
2444
+ }));
2408
2445
  return result.embeddings;
2409
2446
  }
2410
2447
  /**
package/message.js CHANGED
@@ -102,14 +102,14 @@ class Message extends BaseGemini {
102
102
 
103
103
  const mergedLabels = { ...this.labels, ...(opts.labels || {}) };
104
104
 
105
- const result = await this.genAIClient.models.generateContent({
105
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
106
106
  model: this.modelName,
107
107
  contents: contents,
108
108
  config: {
109
109
  ...this.chatConfig,
110
110
  ...(this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels })
111
111
  }
112
- });
112
+ }));
113
113
 
114
114
  this._captureMetadata(result);
115
115
 
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "ak-gemini",
3
3
  "author": "ak@mixpanel.com",
4
4
  "description": "AK's Generative AI Helper for doing... everything",
5
- "version": "2.0.3",
5
+ "version": "2.0.5",
6
6
  "main": "index.js",
7
7
  "files": [
8
8
  "index.js",
@@ -14,6 +14,7 @@
14
14
  "tool-agent.js",
15
15
  "code-agent.js",
16
16
  "rag-agent.js",
17
+ "embedding.js",
17
18
  "json-helpers.js",
18
19
  "types.d.ts",
19
20
  "logger.js",
package/rag-agent.js CHANGED
@@ -123,10 +123,10 @@ class RagAgent extends BaseGemini {
123
123
  const ext = extname(resolvedPath).toLowerCase();
124
124
  const mimeType = MIME_TYPES[ext] || 'application/octet-stream';
125
125
 
126
- const uploaded = await this.genAIClient.files.upload({
126
+ const uploaded = await this._withRetry(() => this.genAIClient.files.upload({
127
127
  file: resolvedPath,
128
128
  config: { displayName: basename(resolvedPath), mimeType }
129
- });
129
+ }));
130
130
 
131
131
  await this._waitForFileActive(uploaded);
132
132
 
@@ -206,7 +206,7 @@ class RagAgent extends BaseGemini {
206
206
  async chat(message, opts = {}) {
207
207
  if (!this._initialized) await this.init();
208
208
 
209
- const response = await this.chatSession.sendMessage({ message });
209
+ const response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
210
210
 
211
211
  this._captureMetadata(response);
212
212
 
@@ -236,7 +236,7 @@ class RagAgent extends BaseGemini {
236
236
  if (!this._initialized) await this.init();
237
237
 
238
238
  let fullText = '';
239
- const streamResponse = await this.chatSession.sendMessageStream({ message });
239
+ const streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
240
240
 
241
241
  for await (const chunk of streamResponse) {
242
242
  if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
package/tool-agent.js CHANGED
@@ -108,7 +108,7 @@ class ToolAgent extends BaseGemini {
108
108
 
109
109
  const allToolCalls = [];
110
110
 
111
- let response = await this.chatSession.sendMessage({ message });
111
+ let response = await this._withRetry(() => this.chatSession.sendMessage({ message }));
112
112
 
113
113
  for (let round = 0; round < this.maxToolRounds; round++) {
114
114
  if (this._stopped) break;
@@ -153,7 +153,7 @@ class ToolAgent extends BaseGemini {
153
153
  );
154
154
 
155
155
  // Send function responses back to the model
156
- response = await this.chatSession.sendMessage({
156
+ response = await this._withRetry(() => this.chatSession.sendMessage({
157
157
  message: toolResults.map(r => ({
158
158
  functionResponse: {
159
159
  id: r.id,
@@ -161,7 +161,7 @@ class ToolAgent extends BaseGemini {
161
161
  response: { output: r.result }
162
162
  }
163
163
  }))
164
- });
164
+ }));
165
165
  }
166
166
 
167
167
  this._captureMetadata(response);
@@ -204,7 +204,7 @@ class ToolAgent extends BaseGemini {
204
204
  const allToolCalls = [];
205
205
  let fullText = '';
206
206
 
207
- let streamResponse = await this.chatSession.sendMessageStream({ message });
207
+ let streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({ message }));
208
208
 
209
209
  for (let round = 0; round < this.maxToolRounds; round++) {
210
210
  if (this._stopped) break;
@@ -277,7 +277,7 @@ class ToolAgent extends BaseGemini {
277
277
  }
278
278
 
279
279
  // Send function responses back and get next stream
280
- streamResponse = await this.chatSession.sendMessageStream({
280
+ streamResponse = await this._withRetry(() => this.chatSession.sendMessageStream({
281
281
  message: toolResults.map(r => ({
282
282
  functionResponse: {
283
283
  id: r.id,
@@ -285,7 +285,7 @@ class ToolAgent extends BaseGemini {
285
285
  response: { output: r.result }
286
286
  }
287
287
  }))
288
- });
288
+ }));
289
289
  }
290
290
 
291
291
  // Max rounds reached or stopped
package/transformer.js CHANGED
@@ -274,7 +274,7 @@ class Transformer extends BaseGemini {
274
274
  sendParams.config = { labels: mergedLabels };
275
275
  }
276
276
 
277
- const result = await this.chatSession.sendMessage(sendParams);
277
+ const result = await this._withRetry(() => this.chatSession.sendMessage(sendParams));
278
278
 
279
279
  this._captureMetadata(result);
280
280
 
@@ -327,7 +327,7 @@ Respond with JSON only – no comments or explanations.
327
327
 
328
328
  let result;
329
329
  try {
330
- result = await this.chatSession.sendMessage({ message: prompt });
330
+ result = await this._withRetry(() => this.chatSession.sendMessage({ message: prompt }));
331
331
  this._captureMetadata(result);
332
332
  } catch (err) {
333
333
  throw new Error(`Gemini call failed while repairing payload: ${err.message}`);
@@ -374,14 +374,14 @@ Respond with JSON only – no comments or explanations.
374
374
 
375
375
  const mergedLabels = { ...this.labels, ...(opts.labels || {}) };
376
376
 
377
- const result = await this.genAIClient.models.generateContent({
377
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
378
378
  model: this.modelName,
379
379
  contents: contents,
380
380
  config: {
381
381
  ...this.chatConfig,
382
382
  ...(this.vertexai && Object.keys(mergedLabels).length > 0 && { labels: mergedLabels })
383
383
  }
384
- });
384
+ }));
385
385
 
386
386
  this._captureMetadata(result);
387
387
 
package/types.d.ts CHANGED
@@ -169,6 +169,11 @@ export interface BaseGeminiOptions {
169
169
 
170
170
  /** Cached content resource name to use for this session */
171
171
  cachedContent?: string;
172
+
173
+ /** Max retry attempts for 429 RESOURCE_EXHAUSTED errors (default: 5) */
174
+ resourceExhaustedRetries?: number;
175
+ /** Initial backoff delay in ms for 429 retries, doubles each attempt (default: 1000) */
176
+ resourceExhaustedDelay?: number;
172
177
  }
173
178
 
174
179
  export interface TransformerOptions extends BaseGeminiOptions {