ak-gemini 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/base.js CHANGED
@@ -25,26 +25,47 @@ const DEFAULT_THINKING_CONFIG = {
25
25
 
26
26
  const DEFAULT_MAX_OUTPUT_TOKENS = 50_000;
27
27
 
28
- /** Models that support thinking features */
28
+ /** Models that support thinking features. Image / live / tts variants intentionally excluded. */
29
29
  const THINKING_SUPPORTED_MODELS = [
30
- /^gemini-3-flash(-preview)?$/,
31
- /^gemini-3-pro(-preview|-image-preview)?$/,
30
+ /^gemini-3(\.\d+)?-pro(-preview)?$/,
31
+ /^gemini-3(\.\d+)?-flash(-preview)?$/,
32
+ /^gemini-3(\.\d+)?-flash-lite(-preview)?$/,
32
33
  /^gemini-2\.5-pro/,
33
34
  /^gemini-2\.5-flash(-preview)?$/,
34
35
  /^gemini-2\.5-flash-lite(-preview)?$/,
35
36
  /^gemini-2\.0-flash$/
36
37
  ];
37
38
 
38
- /** Model pricing per million tokens (as of Dec 2025) */
39
+ /**
40
+ * Model pricing per million tokens (Paid Tier Standard, base rate, as of May 2026).
41
+ * Source: https://ai.google.dev/gemini-api/docs/pricing
42
+ *
43
+ * NOTES:
44
+ * - Pro models use tiered pricing (≤200k vs >200k context). Listed rate is ≤200k base tier.
45
+ * - Image-output tokens on Nano Banana models bill at $60/M (1.5 Flash Image) or $120/M (3 Pro Image).
46
+ * Only text-input/text-output rates are modelled here; image-output cost is NOT included in estimateCost().
47
+ * - Audio input is more expensive on most models — listed rate covers text/image/video input.
48
+ */
39
49
  const MODEL_PRICING = {
40
- 'gemini-2.5-flash': { input: 0.15, output: 0.60 },
41
- 'gemini-2.5-flash-lite': { input: 0.02, output: 0.10 },
42
- 'gemini-2.5-pro': { input: 2.50, output: 10.00 },
43
- 'gemini-3-pro': { input: 2.00, output: 12.00 },
44
- 'gemini-3-pro-preview': { input: 2.00, output: 12.00 },
50
+ // Gemini 3.x stable
51
+ 'gemini-3.5-flash': { input: 1.50, output: 9.00 },
52
+ 'gemini-3.1-flash-lite': { input: 0.25, output: 1.50 },
53
+ // Gemini 3.x preview
54
+ 'gemini-3.1-pro-preview': { input: 2.00, output: 12.00 }, // ≤200k tier
55
+ 'gemini-3-flash-preview': { input: 0.50, output: 3.00 },
56
+ 'gemini-3.1-flash-lite-preview': { input: 0.25, output: 1.50 },
57
+ 'gemini-3.1-flash-image-preview': { input: 0.50, output: 3.00 }, // text-only; image-output is $60/M
58
+ 'gemini-3-pro-image-preview': { input: 2.00, output: 12.00 }, // text-only; image-output is $120/M
59
+ // Gemini 2.5 stable
60
+ 'gemini-2.5-flash': { input: 0.30, output: 2.50 },
61
+ 'gemini-2.5-flash-lite': { input: 0.10, output: 0.40 },
62
+ 'gemini-2.5-pro': { input: 1.25, output: 10.00 }, // ≤200k tier
63
+ 'gemini-2.5-flash-image': { input: 0.30, output: 0 }, // image-output is ~$0.039/image (1290 tokens)
64
+ // Deprecated but kept for back-compat (shut down June 2026)
45
65
  'gemini-2.0-flash': { input: 0.10, output: 0.40 },
46
66
  'gemini-2.0-flash-lite': { input: 0.02, output: 0.10 },
47
- 'gemini-embedding-001': { input: 0.006, output: 0 }
67
+ // Embeddings
68
+ 'gemini-embedding-001': { input: 0.15, output: 0 }
48
69
  };
49
70
 
50
71
  export { DEFAULT_SAFETY_SETTINGS, DEFAULT_THINKING_CONFIG, THINKING_SUPPORTED_MODELS, MODEL_PRICING, DEFAULT_MAX_OUTPUT_TOKENS };
@@ -70,7 +91,7 @@ class BaseGemini {
70
91
  */
71
92
  constructor(options = {}) {
72
93
  // ── Model ──
73
- this.modelName = options.modelName || 'gemini-2.5-flash';
94
+ this.modelName = options.modelName || 'gemini-3-flash-preview';
74
95
 
75
96
  // ── System Prompt ──
76
97
  // Subclasses set their own default if options.systemPrompt is undefined
@@ -114,6 +135,14 @@ class BaseGemini {
114
135
  // ── Caching ──
115
136
  this.cachedContent = options.cachedContent || null;
116
137
 
138
+ // ── Service Tier (Gemini API / Vertex AI 2026+) ──
139
+ // Allowed values: 'STANDARD' | 'FLEX' | 'PRIORITY' — cost vs latency trade.
140
+ this.serviceTier = options.serviceTier || null;
141
+
142
+ // ── Server-Side Tool Invocation Visibility (1.46.0+) ──
143
+ // When grounding is on, surface the server's tool calls (e.g. Google Search) in the response.
144
+ this.includeServerSideToolInvocations = options.includeServerSideToolInvocations ?? false;
145
+
117
146
  // ── Chat Config ──
118
147
  this.chatConfig = {
119
148
  temperature: 0.7,
@@ -123,6 +152,9 @@ class BaseGemini {
123
152
  ...options.chatConfig
124
153
  };
125
154
 
155
+ if (this.serviceTier) this.chatConfig['serviceTier'] = this.serviceTier;
156
+ if (this.includeServerSideToolInvocations) this.chatConfig['includeServerSideToolInvocations'] = true;
157
+
126
158
  // Apply systemPrompt to chatConfig
127
159
  if (this.systemPrompt) {
128
160
  this.chatConfig.systemInstruction = this.systemPrompt;
@@ -365,6 +397,7 @@ class BaseGemini {
365
397
  * @protected
366
398
  */
367
399
  _captureMetadata(response) {
400
+ const modelStatus = response?.modelStatus || null;
368
401
  this.lastResponseMetadata = {
369
402
  modelVersion: response.modelVersion || null,
370
403
  requestedModel: this.modelName,
@@ -372,8 +405,13 @@ class BaseGemini {
372
405
  responseTokens: response.usageMetadata?.candidatesTokenCount || 0,
373
406
  totalTokens: response.usageMetadata?.totalTokenCount || 0,
374
407
  timestamp: Date.now(),
375
- groundingMetadata: response.candidates?.[0]?.groundingMetadata || null
408
+ groundingMetadata: response.candidates?.[0]?.groundingMetadata || null,
409
+ modelStatus
376
410
  };
411
+ if (modelStatus === 'DEPRECATED' && !this._deprecationWarned) {
412
+ log.warn(`Model "${this.modelName}" is marked DEPRECATED by Google. Plan migration.`);
413
+ this._deprecationWarned = true;
414
+ }
377
415
  }
378
416
 
379
417
  /**
@@ -396,7 +434,8 @@ class BaseGemini {
396
434
  modelVersion: meta.modelVersion,
397
435
  requestedModel: meta.requestedModel,
398
436
  timestamp: meta.timestamp,
399
- groundingMetadata: meta.groundingMetadata || null
437
+ groundingMetadata: meta.groundingMetadata || null,
438
+ modelStatus: meta.modelStatus || null
400
439
  };
401
440
  }
402
441
 
@@ -0,0 +1,186 @@
1
+ /**
2
+ * @fileoverview ImageGenerator — Generate images via Gemini's Nano Banana models.
3
+ *
4
+ * Extends BaseGemini for auth/client reuse but overrides init() to skip chat session
5
+ * creation (image gen is stateless). Mirrors the Embedding class pattern.
6
+ *
7
+ * @example
8
+ * ```javascript
9
+ * import { ImageGenerator } from 'ak-gemini';
10
+ * import { writeFileSync } from 'node:fs';
11
+ *
12
+ * const gen = new ImageGenerator({ apiKey: 'your-key' });
13
+ * const result = await gen.generate('A cat astronaut on the moon');
14
+ * writeFileSync('cat.png', Buffer.from(result.images[0].data, 'base64'));
15
+ * ```
16
+ */
17
+
18
+ import BaseGemini from './base.js';
19
+ import log from './logger.js';
20
+ import { writeFileSync } from 'node:fs';
21
+
22
+ const DEFAULT_IMAGE_MODEL = 'gemini-3.1-flash-image-preview';
23
+
24
+ export default class ImageGenerator extends BaseGemini {
25
+
26
+ /**
27
+ * @param {import('./types.d.ts').ImageGeneratorOptions} [options={}]
28
+ */
29
+ constructor(options = {}) {
30
+ if (options.modelName === undefined) {
31
+ options = { ...options, modelName: DEFAULT_IMAGE_MODEL };
32
+ }
33
+ if (options.systemPrompt === undefined) {
34
+ options = { ...options, systemPrompt: null };
35
+ }
36
+ super(options);
37
+
38
+ this.aspectRatio = options.aspectRatio || null;
39
+ this.imageSize = options.imageSize || null;
40
+ this.personGeneration = options.personGeneration || null;
41
+ this.includeText = options.includeText ?? false;
42
+
43
+ log.debug(`ImageGenerator created with model: ${this.modelName}`);
44
+ }
45
+
46
+ /**
47
+ * Validate API connection only; no chat session (stateless).
48
+ * @param {boolean} [force=false]
49
+ */
50
+ async init(force = false) {
51
+ if (this._initialized && !force) return;
52
+
53
+ log.debug(`Initializing ${this.constructor.name} with model: ${this.modelName}...`);
54
+
55
+ try {
56
+ await this.genAIClient.models.list();
57
+ log.debug(`${this.constructor.name}: API connection successful.`);
58
+ } catch (e) {
59
+ throw new Error(`${this.constructor.name} initialization failed: ${e.message}`);
60
+ }
61
+
62
+ this._initialized = true;
63
+ }
64
+
65
+ /**
66
+ * Build a FRESH config — Gemini image models reject safetySettings/temp/topK/topP/thinkingConfig.
67
+ * Do NOT spread this.chatConfig.
68
+ * @private
69
+ */
70
+ _buildConfig(overrides = {}) {
71
+ const includeText = overrides.includeText ?? this.includeText;
72
+ const config = { responseModalities: includeText ? ['IMAGE', 'TEXT'] : ['IMAGE'] };
73
+
74
+ const imageConfig = {};
75
+ const aspectRatio = overrides.aspectRatio || this.aspectRatio;
76
+ const imageSize = overrides.imageSize || this.imageSize;
77
+ const personGeneration = overrides.personGeneration || this.personGeneration;
78
+ if (aspectRatio) imageConfig.aspectRatio = aspectRatio;
79
+ if (imageSize) imageConfig.imageSize = imageSize;
80
+ if (personGeneration) imageConfig.personGeneration = personGeneration;
81
+ if (Object.keys(imageConfig).length > 0) config.imageConfig = imageConfig;
82
+
83
+ return config;
84
+ }
85
+
86
+ /**
87
+ * Generate one or more images from a text prompt.
88
+ * Optionally accepts `inputImages` for image editing / multi-image composition.
89
+ *
90
+ * @param {string} prompt
91
+ * @param {import('./types.d.ts').ImageGenerateOptions} [opts={}]
92
+ * @returns {Promise<import('./types.d.ts').ImageGenerationResult>}
93
+ */
94
+ async generate(prompt, opts = {}) {
95
+ if (!this._initialized) await this.init();
96
+
97
+ /** @type {any[]} */
98
+ const parts = [{ text: prompt }];
99
+ if (Array.isArray(opts.inputImages)) {
100
+ for (const img of opts.inputImages) {
101
+ parts.push({ inlineData: { data: img.data, mimeType: img.mimeType } });
102
+ }
103
+ }
104
+
105
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
106
+ model: this.modelName,
107
+ contents: [{ role: 'user', parts }],
108
+ config: this._buildConfig(opts)
109
+ }));
110
+
111
+ this._captureMetadata(result);
112
+ this._cumulativeUsage = {
113
+ promptTokens: this.lastResponseMetadata.promptTokens,
114
+ responseTokens: this.lastResponseMetadata.responseTokens,
115
+ totalTokens: this.lastResponseMetadata.totalTokens,
116
+ attempts: 1
117
+ };
118
+
119
+ const images = [];
120
+ let text = '';
121
+ const responseParts = result.candidates?.[0]?.content?.parts || [];
122
+ for (const part of responseParts) {
123
+ if (part.inlineData?.data) {
124
+ images.push({
125
+ data: part.inlineData.data,
126
+ mimeType: part.inlineData.mimeType || 'image/png'
127
+ });
128
+ } else if (part.text) {
129
+ text += part.text;
130
+ }
131
+ }
132
+
133
+ if (images.length === 0) {
134
+ log.warn('ImageGenerator: no images returned. Check prompt or safety filters.');
135
+ }
136
+
137
+ return { images, text: text || null, usage: this.getLastUsage() };
138
+ }
139
+
140
+ /**
141
+ * Convenience: write one or all images to disk.
142
+ * If multiple images, suffixes with `_N` before extension.
143
+ * @param {import('./types.d.ts').ImageGenerationResult} result
144
+ * @param {string} filePath
145
+ * @returns {string[]} Written file paths
146
+ */
147
+ save(result, filePath) {
148
+ if (!result?.images?.length) {
149
+ log.warn('ImageGenerator.save(): no images to save.');
150
+ return [];
151
+ }
152
+ const paths = [];
153
+ const dot = filePath.lastIndexOf('.');
154
+ const base = dot >= 0 ? filePath.slice(0, dot) : filePath;
155
+ const ext = dot >= 0 ? filePath.slice(dot) : '.png';
156
+ result.images.forEach((img, i) => {
157
+ const out = result.images.length === 1 ? filePath : `${base}_${i}${ext}`;
158
+ writeFileSync(out, Buffer.from(img.data, 'base64'));
159
+ paths.push(out);
160
+ });
161
+ return paths;
162
+ }
163
+
164
+ // ── No-ops (image gen is stateless) ──
165
+
166
+ /** @returns {any[]} Always returns empty array */
167
+ getHistory() { return []; }
168
+
169
+ /** No-op for ImageGenerator */
170
+ async clearHistory() {}
171
+
172
+ /** No-op for ImageGenerator */
173
+ async seed() {
174
+ log.warn('ImageGenerator.seed() is a no-op — image generation does not support few-shot.');
175
+ return [];
176
+ }
177
+
178
+ /**
179
+ * @param {any} _nextPayload
180
+ * @throws {Error} ImageGenerator does not support token estimation
181
+ * @returns {Promise<{ inputTokens: number }>}
182
+ */
183
+ async estimate(_nextPayload) {
184
+ throw new Error('ImageGenerator does not support token estimation. Use generate() directly.');
185
+ }
186
+ }
package/index.cjs CHANGED
@@ -35,6 +35,7 @@ __export(index_exports, {
35
35
  Embedding: () => Embedding,
36
36
  HarmBlockThreshold: () => import_genai2.HarmBlockThreshold,
37
37
  HarmCategory: () => import_genai2.HarmCategory,
38
+ ImageGenerator: () => ImageGenerator,
38
39
  Message: () => message_default,
39
40
  RagAgent: () => rag_agent_default,
40
41
  ThinkingLevel: () => import_genai2.ThinkingLevel,
@@ -322,29 +323,46 @@ var DEFAULT_THINKING_CONFIG = {
322
323
  };
323
324
  var DEFAULT_MAX_OUTPUT_TOKENS = 5e4;
324
325
  var THINKING_SUPPORTED_MODELS = [
325
- /^gemini-3-flash(-preview)?$/,
326
- /^gemini-3-pro(-preview|-image-preview)?$/,
326
+ /^gemini-3(\.\d+)?-pro(-preview)?$/,
327
+ /^gemini-3(\.\d+)?-flash(-preview)?$/,
328
+ /^gemini-3(\.\d+)?-flash-lite(-preview)?$/,
327
329
  /^gemini-2\.5-pro/,
328
330
  /^gemini-2\.5-flash(-preview)?$/,
329
331
  /^gemini-2\.5-flash-lite(-preview)?$/,
330
332
  /^gemini-2\.0-flash$/
331
333
  ];
332
334
  var MODEL_PRICING = {
333
- "gemini-2.5-flash": { input: 0.15, output: 0.6 },
334
- "gemini-2.5-flash-lite": { input: 0.02, output: 0.1 },
335
- "gemini-2.5-pro": { input: 2.5, output: 10 },
336
- "gemini-3-pro": { input: 2, output: 12 },
337
- "gemini-3-pro-preview": { input: 2, output: 12 },
335
+ // Gemini 3.x stable
336
+ "gemini-3.5-flash": { input: 1.5, output: 9 },
337
+ "gemini-3.1-flash-lite": { input: 0.25, output: 1.5 },
338
+ // Gemini 3.x preview
339
+ "gemini-3.1-pro-preview": { input: 2, output: 12 },
340
+ // ≤200k tier
341
+ "gemini-3-flash-preview": { input: 0.5, output: 3 },
342
+ "gemini-3.1-flash-lite-preview": { input: 0.25, output: 1.5 },
343
+ "gemini-3.1-flash-image-preview": { input: 0.5, output: 3 },
344
+ // text-only; image-output is $60/M
345
+ "gemini-3-pro-image-preview": { input: 2, output: 12 },
346
+ // text-only; image-output is $120/M
347
+ // Gemini 2.5 stable
348
+ "gemini-2.5-flash": { input: 0.3, output: 2.5 },
349
+ "gemini-2.5-flash-lite": { input: 0.1, output: 0.4 },
350
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
351
+ // ≤200k tier
352
+ "gemini-2.5-flash-image": { input: 0.3, output: 0 },
353
+ // image-output is ~$0.039/image (1290 tokens)
354
+ // Deprecated but kept for back-compat (shut down June 2026)
338
355
  "gemini-2.0-flash": { input: 0.1, output: 0.4 },
339
356
  "gemini-2.0-flash-lite": { input: 0.02, output: 0.1 },
340
- "gemini-embedding-001": { input: 6e-3, output: 0 }
357
+ // Embeddings
358
+ "gemini-embedding-001": { input: 0.15, output: 0 }
341
359
  };
342
360
  var BaseGemini = class {
343
361
  /**
344
362
  * @param {BaseGeminiOptions} [options={}]
345
363
  */
346
364
  constructor(options = {}) {
347
- this.modelName = options.modelName || "gemini-2.5-flash";
365
+ this.modelName = options.modelName || "gemini-3-flash-preview";
348
366
  if (options.systemPrompt !== void 0) {
349
367
  this.systemPrompt = options.systemPrompt;
350
368
  } else {
@@ -369,6 +387,8 @@ var BaseGemini = class {
369
387
  this.enableGrounding = options.enableGrounding || false;
370
388
  this.groundingConfig = options.groundingConfig || {};
371
389
  this.cachedContent = options.cachedContent || null;
390
+ this.serviceTier = options.serviceTier || null;
391
+ this.includeServerSideToolInvocations = options.includeServerSideToolInvocations ?? false;
372
392
  this.chatConfig = {
373
393
  temperature: 0.7,
374
394
  topP: 0.95,
@@ -376,6 +396,8 @@ var BaseGemini = class {
376
396
  safetySettings: DEFAULT_SAFETY_SETTINGS,
377
397
  ...options.chatConfig
378
398
  };
399
+ if (this.serviceTier) this.chatConfig["serviceTier"] = this.serviceTier;
400
+ if (this.includeServerSideToolInvocations) this.chatConfig["includeServerSideToolInvocations"] = true;
379
401
  if (this.systemPrompt) {
380
402
  this.chatConfig.systemInstruction = this.systemPrompt;
381
403
  } else if (this.systemPrompt === null && options.systemPrompt === void 0) {
@@ -573,6 +595,7 @@ ${contextText}
573
595
  * @protected
574
596
  */
575
597
  _captureMetadata(response) {
598
+ const modelStatus = response?.modelStatus || null;
576
599
  this.lastResponseMetadata = {
577
600
  modelVersion: response.modelVersion || null,
578
601
  requestedModel: this.modelName,
@@ -580,8 +603,13 @@ ${contextText}
580
603
  responseTokens: response.usageMetadata?.candidatesTokenCount || 0,
581
604
  totalTokens: response.usageMetadata?.totalTokenCount || 0,
582
605
  timestamp: Date.now(),
583
- groundingMetadata: response.candidates?.[0]?.groundingMetadata || null
606
+ groundingMetadata: response.candidates?.[0]?.groundingMetadata || null,
607
+ modelStatus
584
608
  };
609
+ if (modelStatus === "DEPRECATED" && !this._deprecationWarned) {
610
+ logger_default.warn(`Model "${this.modelName}" is marked DEPRECATED by Google. Plan migration.`);
611
+ this._deprecationWarned = true;
612
+ }
585
613
  }
586
614
  /**
587
615
  * Returns structured usage data from the last API call for billing verification.
@@ -601,7 +629,8 @@ ${contextText}
601
629
  modelVersion: meta.modelVersion,
602
630
  requestedModel: meta.requestedModel,
603
631
  timestamp: meta.timestamp,
604
- groundingMetadata: meta.groundingMetadata || null
632
+ groundingMetadata: meta.groundingMetadata || null,
633
+ modelStatus: meta.modelStatus || null
605
634
  };
606
635
  }
607
636
  // ── Token Estimation ─────────────────────────────────────────────────────
@@ -3035,9 +3064,155 @@ var Embedding = class extends base_default {
3035
3064
  }
3036
3065
  };
3037
3066
 
3067
+ // image-generator.js
3068
+ var import_node_fs = require("node:fs");
3069
+ var DEFAULT_IMAGE_MODEL = "gemini-3.1-flash-image-preview";
3070
+ var ImageGenerator = class extends base_default {
3071
+ /**
3072
+ * @param {import('./types.d.ts').ImageGeneratorOptions} [options={}]
3073
+ */
3074
+ constructor(options = {}) {
3075
+ if (options.modelName === void 0) {
3076
+ options = { ...options, modelName: DEFAULT_IMAGE_MODEL };
3077
+ }
3078
+ if (options.systemPrompt === void 0) {
3079
+ options = { ...options, systemPrompt: null };
3080
+ }
3081
+ super(options);
3082
+ this.aspectRatio = options.aspectRatio || null;
3083
+ this.imageSize = options.imageSize || null;
3084
+ this.personGeneration = options.personGeneration || null;
3085
+ this.includeText = options.includeText ?? false;
3086
+ logger_default.debug(`ImageGenerator created with model: ${this.modelName}`);
3087
+ }
3088
+ /**
3089
+ * Validate API connection only; no chat session (stateless).
3090
+ * @param {boolean} [force=false]
3091
+ */
3092
+ async init(force = false) {
3093
+ if (this._initialized && !force) return;
3094
+ logger_default.debug(`Initializing ${this.constructor.name} with model: ${this.modelName}...`);
3095
+ try {
3096
+ await this.genAIClient.models.list();
3097
+ logger_default.debug(`${this.constructor.name}: API connection successful.`);
3098
+ } catch (e) {
3099
+ throw new Error(`${this.constructor.name} initialization failed: ${e.message}`);
3100
+ }
3101
+ this._initialized = true;
3102
+ }
3103
+ /**
3104
+ * Build a FRESH config — Gemini image models reject safetySettings/temp/topK/topP/thinkingConfig.
3105
+ * Do NOT spread this.chatConfig.
3106
+ * @private
3107
+ */
3108
+ _buildConfig(overrides = {}) {
3109
+ const includeText = overrides.includeText ?? this.includeText;
3110
+ const config = { responseModalities: includeText ? ["IMAGE", "TEXT"] : ["IMAGE"] };
3111
+ const imageConfig = {};
3112
+ const aspectRatio = overrides.aspectRatio || this.aspectRatio;
3113
+ const imageSize = overrides.imageSize || this.imageSize;
3114
+ const personGeneration = overrides.personGeneration || this.personGeneration;
3115
+ if (aspectRatio) imageConfig.aspectRatio = aspectRatio;
3116
+ if (imageSize) imageConfig.imageSize = imageSize;
3117
+ if (personGeneration) imageConfig.personGeneration = personGeneration;
3118
+ if (Object.keys(imageConfig).length > 0) config.imageConfig = imageConfig;
3119
+ return config;
3120
+ }
3121
+ /**
3122
+ * Generate one or more images from a text prompt.
3123
+ * Optionally accepts `inputImages` for image editing / multi-image composition.
3124
+ *
3125
+ * @param {string} prompt
3126
+ * @param {import('./types.d.ts').ImageGenerateOptions} [opts={}]
3127
+ * @returns {Promise<import('./types.d.ts').ImageGenerationResult>}
3128
+ */
3129
+ async generate(prompt, opts = {}) {
3130
+ if (!this._initialized) await this.init();
3131
+ const parts = [{ text: prompt }];
3132
+ if (Array.isArray(opts.inputImages)) {
3133
+ for (const img of opts.inputImages) {
3134
+ parts.push({ inlineData: { data: img.data, mimeType: img.mimeType } });
3135
+ }
3136
+ }
3137
+ const result = await this._withRetry(() => this.genAIClient.models.generateContent({
3138
+ model: this.modelName,
3139
+ contents: [{ role: "user", parts }],
3140
+ config: this._buildConfig(opts)
3141
+ }));
3142
+ this._captureMetadata(result);
3143
+ this._cumulativeUsage = {
3144
+ promptTokens: this.lastResponseMetadata.promptTokens,
3145
+ responseTokens: this.lastResponseMetadata.responseTokens,
3146
+ totalTokens: this.lastResponseMetadata.totalTokens,
3147
+ attempts: 1
3148
+ };
3149
+ const images = [];
3150
+ let text = "";
3151
+ const responseParts = result.candidates?.[0]?.content?.parts || [];
3152
+ for (const part of responseParts) {
3153
+ if (part.inlineData?.data) {
3154
+ images.push({
3155
+ data: part.inlineData.data,
3156
+ mimeType: part.inlineData.mimeType || "image/png"
3157
+ });
3158
+ } else if (part.text) {
3159
+ text += part.text;
3160
+ }
3161
+ }
3162
+ if (images.length === 0) {
3163
+ logger_default.warn("ImageGenerator: no images returned. Check prompt or safety filters.");
3164
+ }
3165
+ return { images, text: text || null, usage: this.getLastUsage() };
3166
+ }
3167
+ /**
3168
+ * Convenience: write one or all images to disk.
3169
+ * If multiple images, suffixes with `_N` before extension.
3170
+ * @param {import('./types.d.ts').ImageGenerationResult} result
3171
+ * @param {string} filePath
3172
+ * @returns {string[]} Written file paths
3173
+ */
3174
+ save(result, filePath) {
3175
+ if (!result?.images?.length) {
3176
+ logger_default.warn("ImageGenerator.save(): no images to save.");
3177
+ return [];
3178
+ }
3179
+ const paths = [];
3180
+ const dot = filePath.lastIndexOf(".");
3181
+ const base = dot >= 0 ? filePath.slice(0, dot) : filePath;
3182
+ const ext = dot >= 0 ? filePath.slice(dot) : ".png";
3183
+ result.images.forEach((img, i) => {
3184
+ const out = result.images.length === 1 ? filePath : `${base}_${i}${ext}`;
3185
+ (0, import_node_fs.writeFileSync)(out, Buffer.from(img.data, "base64"));
3186
+ paths.push(out);
3187
+ });
3188
+ return paths;
3189
+ }
3190
+ // ── No-ops (image gen is stateless) ──
3191
+ /** @returns {any[]} Always returns empty array */
3192
+ getHistory() {
3193
+ return [];
3194
+ }
3195
+ /** No-op for ImageGenerator */
3196
+ async clearHistory() {
3197
+ }
3198
+ /** No-op for ImageGenerator */
3199
+ async seed() {
3200
+ logger_default.warn("ImageGenerator.seed() is a no-op \u2014 image generation does not support few-shot.");
3201
+ return [];
3202
+ }
3203
+ /**
3204
+ * @param {any} _nextPayload
3205
+ * @throws {Error} ImageGenerator does not support token estimation
3206
+ * @returns {Promise<{ inputTokens: number }>}
3207
+ */
3208
+ async estimate(_nextPayload) {
3209
+ throw new Error("ImageGenerator does not support token estimation. Use generate() directly.");
3210
+ }
3211
+ };
3212
+
3038
3213
  // index.js
3039
3214
  var import_genai2 = require("@google/genai");
3040
- var index_default = { Transformer: transformer_default, Chat: chat_default, Message: message_default, ToolAgent: tool_agent_default, CodeAgent: code_agent_default, RagAgent: rag_agent_default, Embedding };
3215
+ var index_default = { Transformer: transformer_default, Chat: chat_default, Message: message_default, ToolAgent: tool_agent_default, CodeAgent: code_agent_default, RagAgent: rag_agent_default, Embedding, ImageGenerator };
3041
3216
  // Annotate the CommonJS export names for ESM import in node:
3042
3217
  0 && (module.exports = {
3043
3218
  BaseGemini,
@@ -3046,6 +3221,7 @@ var index_default = { Transformer: transformer_default, Chat: chat_default, Mess
3046
3221
  Embedding,
3047
3222
  HarmBlockThreshold,
3048
3223
  HarmCategory,
3224
+ ImageGenerator,
3049
3225
  Message,
3050
3226
  RagAgent,
3051
3227
  ThinkingLevel,
package/index.js CHANGED
@@ -27,6 +27,7 @@ export { default as ToolAgent } from './tool-agent.js';
27
27
  export { default as CodeAgent } from './code-agent.js';
28
28
  export { default as RagAgent } from './rag-agent.js';
29
29
  export { default as Embedding } from './embedding.js';
30
+ export { default as ImageGenerator } from './image-generator.js';
30
31
  export { default as BaseGemini } from './base.js';
31
32
  export { default as log } from './logger.js';
32
33
  export { ThinkingLevel, HarmCategory, HarmBlockThreshold } from '@google/genai';
@@ -41,5 +42,6 @@ import ToolAgent from './tool-agent.js';
41
42
  import CodeAgent from './code-agent.js';
42
43
  import RagAgent from './rag-agent.js';
43
44
  import Embedding from './embedding.js';
45
+ import ImageGenerator from './image-generator.js';
44
46
 
45
- export default { Transformer, Chat, Message, ToolAgent, CodeAgent, RagAgent, Embedding };
47
+ export default { Transformer, Chat, Message, ToolAgent, CodeAgent, RagAgent, Embedding, ImageGenerator };
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "ak-gemini",
3
3
  "author": "ak@mixpanel.com",
4
4
  "description": "AK's Generative AI Helper for doing... everything",
5
- "version": "2.2.1",
5
+ "version": "2.3.0",
6
6
  "main": "index.js",
7
7
  "files": [
8
8
  "index.js",
@@ -15,6 +15,7 @@
15
15
  "code-agent.js",
16
16
  "rag-agent.js",
17
17
  "embedding.js",
18
+ "image-generator.js",
18
19
  "json-helpers.js",
19
20
  "types.d.ts",
20
21
  "logger.js",
@@ -64,7 +65,7 @@
64
65
  ],
65
66
  "license": "ISC",
66
67
  "dependencies": {
67
- "@google/genai": "^1.44.0",
68
+ "@google/genai": "^2.6.0",
68
69
  "dotenv": "^17.3.1",
69
70
  "pino": "^10.3.1",
70
71
  "pino-pretty": "^13.1.3"
package/types.d.ts CHANGED
@@ -68,12 +68,14 @@ export interface UsageData {
68
68
  totalTokens: number;
69
69
  /** Number of attempts (1 = first try success, 2+ = retries needed) */
70
70
  attempts: number;
71
- /** Actual model that responded (e.g., 'gemini-2.5-flash-001') */
71
+ /** Actual model that responded (e.g., 'gemini-3-flash-preview-001') */
72
72
  modelVersion: string | null;
73
- /** Model you requested (e.g., 'gemini-2.5-flash') */
73
+ /** Model you requested (e.g., 'gemini-3-flash-preview') */
74
74
  requestedModel: string;
75
75
  timestamp: number;
76
76
  groundingMetadata?: GroundingMetadata | null;
77
+ /** Model lifecycle status from Google (e.g., 'DEPRECATED'). Surfaced from @google/genai 1.47+. */
78
+ modelStatus?: string | null;
77
79
  }
78
80
 
79
81
  export interface TransformationExample {
@@ -130,11 +132,12 @@ export interface CachedContentInfo {
130
132
 
131
133
  export type AsyncValidatorFunction = (payload: Record<string, unknown>) => Promise<unknown>;
132
134
  export type LogLevel = 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal' | 'none';
135
+ export type ServiceTier = 'STANDARD' | 'FLEX' | 'PRIORITY';
133
136
 
134
137
  // ── Constructor Options ──────────────────────────────────────────────────────
135
138
 
136
139
  export interface BaseGeminiOptions {
137
- /** Gemini model to use (default: 'gemini-2.5-flash') */
140
+ /** Gemini model to use (default: 'gemini-3-flash-preview') */
138
141
  modelName?: string;
139
142
  /** System prompt for the model (null or false to disable) */
140
143
  systemPrompt?: string | null | false;
@@ -177,6 +180,12 @@ export interface BaseGeminiOptions {
177
180
 
178
181
  /** Run models.list() health check during init() (default: false) */
179
182
  healthCheck?: boolean;
183
+
184
+ /** Service tier for generateContent (STANDARD | FLEX | PRIORITY). @google/genai 1.47+ */
185
+ serviceTier?: ServiceTier;
186
+
187
+ /** Surface server-side tool invocations (e.g. Google Search) in the response. @google/genai 1.46+ */
188
+ includeServerSideToolInvocations?: boolean;
180
189
  }
181
190
 
182
191
  export interface TransformerOptions extends BaseGeminiOptions {
@@ -254,6 +263,48 @@ export interface EmbeddingResult {
254
263
  statistics?: { tokenCount?: number; truncated?: boolean };
255
264
  }
256
265
 
266
+ // ── ImageGenerator ───────────────────────────────────────────────────────────
267
+
268
+ export type ImageAspectRatio = '1:1' | '2:3' | '3:2' | '3:4' | '4:3' | '9:16' | '16:9' | '21:9';
269
+ export type ImageSize = '1K' | '2K' | '4K';
270
+ export type PersonGeneration = 'ALLOW_ALL' | 'ALLOW_ADULT' | 'ALLOW_NONE';
271
+
272
+ export interface ImageGeneratorOptions extends BaseGeminiOptions {
273
+ /** Default aspect ratio for generated images */
274
+ aspectRatio?: ImageAspectRatio;
275
+ /** Default output resolution (1K/2K/4K) */
276
+ imageSize?: ImageSize;
277
+ /** Default people-generation policy */
278
+ personGeneration?: PersonGeneration;
279
+ /** Include text output alongside images (default: false) */
280
+ includeText?: boolean;
281
+ }
282
+
283
+ export interface ImageGenerateOptions {
284
+ aspectRatio?: ImageAspectRatio;
285
+ imageSize?: ImageSize;
286
+ personGeneration?: PersonGeneration;
287
+ includeText?: boolean;
288
+ /** Reference images for editing / multi-image composition (base64) */
289
+ inputImages?: Array<{ data: string; mimeType: string }>;
290
+ }
291
+
292
+ export interface GeneratedImage {
293
+ /** Base64-encoded image data */
294
+ data: string;
295
+ /** MIME type (e.g. "image/png") */
296
+ mimeType: string;
297
+ }
298
+
299
+ export interface ImageGenerationResult {
300
+ /** One or more generated images */
301
+ images: GeneratedImage[];
302
+ /** Optional text response (only when includeText: true) */
303
+ text: string | null;
304
+ /** Token usage */
305
+ usage: UsageData | null;
306
+ }
307
+
257
308
  /** Tool declaration in @google/genai FunctionDeclaration format */
258
309
  export interface ToolDeclaration {
259
310
  name: string;
@@ -531,6 +582,8 @@ export declare class BaseGemini {
531
582
  enableGrounding: boolean;
532
583
  groundingConfig: Record<string, any>;
533
584
  cachedContent: string | null;
585
+ serviceTier: ServiceTier | null;
586
+ includeServerSideToolInvocations: boolean;
534
587
 
535
588
  init(force?: boolean): Promise<void>;
536
589
  seed(examples?: TransformationExample[], opts?: SeedOptions): Promise<any[]>;
@@ -677,6 +730,21 @@ export declare class Embedding extends BaseGemini {
677
730
  similarity(a: number[], b: number[]): number;
678
731
  }
679
732
 
733
+ export declare class ImageGenerator extends BaseGemini {
734
+ constructor(options?: ImageGeneratorOptions);
735
+
736
+ aspectRatio: ImageAspectRatio | null;
737
+ imageSize: ImageSize | null;
738
+ personGeneration: PersonGeneration | null;
739
+ includeText: boolean;
740
+
741
+ init(force?: boolean): Promise<void>;
742
+ /** Generate one or more images from a text prompt. Supports optional reference images for editing. */
743
+ generate(prompt: string, opts?: ImageGenerateOptions): Promise<ImageGenerationResult>;
744
+ /** Write generated images to disk (suffixes `_N` before extension if >1 image) */
745
+ save(result: ImageGenerationResult, filePath: string): string[];
746
+ }
747
+
680
748
  // ── Module Exports ───────────────────────────────────────────────────────────
681
749
 
682
750
  export declare function extractJSON(text: string): any;
@@ -690,6 +758,7 @@ declare const _default: {
690
758
  CodeAgent: typeof CodeAgent;
691
759
  RagAgent: typeof RagAgent;
692
760
  Embedding: typeof Embedding;
761
+ ImageGenerator: typeof ImageGenerator;
693
762
  };
694
763
 
695
764
  export default _default;