@smythos/sre 1.5.66 → 1.5.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ import { IAgent as Agent } from '@sre/types/Agent.types';
2
+ import { Trigger } from './Trigger.class';
3
+ export declare class GmailTrigger extends Trigger {
4
+ init(): void;
5
+ process(input: any, config: any, agent: Agent): Promise<{
6
+ Payload: {};
7
+ Result: any;
8
+ _temp_result: any;
9
+ _error: any;
10
+ _in_progress: boolean;
11
+ _debug: string;
12
+ }>;
13
+ }
@@ -0,0 +1,3 @@
1
+ import { Component } from '../Component.class';
2
+ export declare class Trigger extends Component {
3
+ }
@@ -1,9 +1,10 @@
1
1
  import EventEmitter from 'events';
2
2
  import { UsageMetadata } from '@google/generative-ai';
3
- import { TLLMMessageBlock, ToolData, TLLMToolResultMessageBlock, APIKeySource, ILLMRequestFuncParams, TLLMChatResponse, TGoogleAIRequestBody, TLLMPreparedParams } from '@sre/types/LLM.types';
3
+ import { TLLMMessageBlock, ToolData, TLLMToolResultMessageBlock, APIKeySource, ILLMRequestFuncParams, TLLMChatResponse, TGoogleAIRequestBody, ILLMRequestContext, TLLMPreparedParams } from '@sre/types/LLM.types';
4
4
  import { LLMConnector } from '../LLMConnector';
5
5
  type UsageMetadataWithThoughtsToken = UsageMetadata & {
6
- thoughtsTokenCount: number;
6
+ thoughtsTokenCount?: number;
7
+ cost?: number;
7
8
  };
8
9
  export declare class GoogleAIConnector extends LLMConnector {
9
10
  name: string;
@@ -32,6 +33,18 @@ export declare class GoogleAIConnector extends LLMConnector {
32
33
  teamId: string;
33
34
  tier: string;
34
35
  };
36
+ /**
37
+ * Extract text and image tokens from Google AI usage metadata
38
+ */
39
+ private extractTokenCounts;
40
+ protected reportImageUsage({ usage, context, numberOfImages, }: {
41
+ usage: {
42
+ cost?: number;
43
+ usageMetadata?: UsageMetadataWithThoughtsToken;
44
+ };
45
+ context: ILLMRequestContext;
46
+ numberOfImages?: number;
47
+ }): void;
35
48
  formatToolsConfig({ toolDefinitions, toolChoice }: {
36
49
  toolDefinitions: any;
37
50
  toolChoice?: string;
@@ -51,6 +64,7 @@ export declare class GoogleAIConnector extends LLMConnector {
51
64
  private prepareMessagesWithTools;
52
65
  private prepareMessagesWithTextQuery;
53
66
  private prepareBodyForImageGenRequest;
67
+ private prepareImageEditBody;
54
68
  private sanitizeFunctionName;
55
69
  private uploadFile;
56
70
  private getValidFiles;
@@ -193,11 +193,29 @@ export type TLLMModel = {
193
193
  params?: TLLMParams;
194
194
  /**
195
195
  * Specifies the API interface type to use for this model
196
- * Examples: 'chat.completions', 'responses'
197
- * This determines which OpenAI API endpoint and interface implementation to use
196
+ * This determines which API endpoint and interface implementation to use
198
197
  */
199
- interface?: 'chat.completions' | 'responses';
198
+ interface?: LLMInterface;
199
+ /**
200
+ * Indicates whether this model supports image editing functionality
201
+ * Only applicable for image generation models
202
+ */
203
+ supportsEditing?: boolean;
200
204
  };
205
+ /**
206
+ * Enum for different LLM API interfaces
207
+ * Each interface represents a different API endpoint or interaction pattern
208
+ */
209
+ export declare enum LLMInterface {
210
+ /** OpenAI-style chat completions API */
211
+ ChatCompletions = "chat.completions",
212
+ /** OpenAI-style responses API */
213
+ Responses = "responses",
214
+ /** Google AI generateContent API (for text and multimodal) */
215
+ GenerateContent = "generateContent",
216
+ /** Google AI generateImages API (for traditional Imagen models) */
217
+ GenerateImages = "generateImages"
218
+ }
201
219
  export declare const BuiltinLLMProviders: {
202
220
  readonly Echo: "Echo";
203
221
  readonly OpenAI: "OpenAI";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smythos/sre",
3
- "version": "1.5.66",
3
+ "version": "1.5.67",
4
4
  "description": "Smyth Runtime Environment",
5
5
  "author": "Alaa-eddine KADDOURI",
6
6
  "license": "MIT",
@@ -44,12 +44,6 @@ const IMAGE_GEN_COST_MAP = {
44
44
  },
45
45
  };
46
46
 
47
- // Imagen 4 cost map - fixed cost per image
48
- const IMAGEN_4_COST_MAP = {
49
- 'imagen-4': 0.04, // Standard Imagen 4
50
- 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
51
- };
52
-
53
47
  export class ImageGenerator extends Component {
54
48
  protected configSchema = Joi.object({
55
49
  model: Joi.string().max(100).required(),
@@ -344,11 +338,6 @@ const imageGenerator = {
344
338
 
345
339
  const files: any[] = parseFiles(input, config);
346
340
 
347
- // Imagen models only support image generation, not image editing
348
- if (files.length > 0) {
349
- throw new Error('Google AI Image Generation Error: Image editing is not supported. Imagen models only support image generation.');
350
- }
351
-
352
341
  let args: GenerateImageConfig & {
353
342
  aspectRatio?: string;
354
343
  numberOfImages?: number;
@@ -360,29 +349,21 @@ const imageGenerator = {
360
349
  personGeneration: config?.data?.personGeneration || 'allow_adult',
361
350
  };
362
351
 
363
- const response = await llmInference.imageGenRequest({ query: prompt, params: { ...args, agentId: agent.id } });
364
-
365
- // Calculate fixed cost for Imagen 4
366
- const modelName = model.replace(BUILT_IN_MODEL_PREFIX, '');
367
- const cost = IMAGEN_4_COST_MAP[modelName];
368
-
369
- if (cost && cost > 0) {
370
- // Multiply by number of images generated
371
- const numberOfImages = args.numberOfImages || 1;
372
- const totalCost = cost * numberOfImages;
373
-
374
- // Report fixed cost usage
375
- imageGenerator.reportUsage(
376
- { cost: totalCost },
377
- {
378
- modelEntryName: model,
379
- keySource: model.startsWith(BUILT_IN_MODEL_PREFIX) ? APIKeySource.Smyth : APIKeySource.User,
380
- agentId: agent.id,
381
- teamId: agent.teamId,
382
- }
383
- );
352
+ let response;
353
+
354
+ // Check if files are provided for image editing
355
+ if (files.length > 0) {
356
+ const validFiles = files.filter((file) => imageGenerator.isValidImageFile('GoogleAI', file.mimetype));
357
+ if (validFiles.length === 0) {
358
+ throw new Error('Supported image file types are: ' + SUPPORTED_MIME_TYPES_MAP.GoogleAI?.image?.join(', '));
359
+ }
360
+ response = await llmInference.imageEditRequest({ query: prompt, files: validFiles, params: { ...args, agentId: agent.id } });
361
+ } else {
362
+ response = await llmInference.imageGenRequest({ query: prompt, params: { ...args, agentId: agent.id } });
384
363
  }
385
364
 
365
+ // Usage reporting is now handled in the GoogleAI connector
366
+
386
367
  let output = response?.data?.[0]?.b64_json;
387
368
 
388
369
  if (output) {
@@ -51,7 +51,7 @@ export class MCPClient extends Component {
51
51
  }
52
52
 
53
53
  // TODO [Forhad]: Need to check and validate input prompt token
54
- const { client } = await this.connectMCP(mcpUrl);
54
+ const { client } = await this.connectMCP(mcpUrl, logger);
55
55
 
56
56
  const toolsData = await client.listTools();
57
57
  const conv = new Conversation(
@@ -105,17 +105,17 @@ export class MCPClient extends Component {
105
105
  return { _error: `Error on running MCP Client!\n${error?.message || JSON.stringify(error)}`, _debug: logger.output };
106
106
  }
107
107
  }
108
- private async connectMCP(mcpUrl: string) {
108
+ private async connectMCP(mcpUrl: string, logger: any) {
109
109
  const client = new Client({ name: 'auto-client', version: '1.0.0' });
110
110
 
111
111
  // 1) Try Streamable HTTP first
112
112
  try {
113
113
  const st = new StreamableHTTPClientTransport(new URL(mcpUrl));
114
114
  await client.connect(st);
115
- console.debug('Connected to MCP using Streamable HTTP');
115
+ logger.debug('Connected to MCP using Streamable HTTP');
116
116
  return { client, transport: 'streamable' as const };
117
117
  } catch (e: any) {
118
- console.debug('Failed to connect to MCP using Streamable HTTP, falling back to SSE');
118
+ logger.debug('Failed to connect to MCP using Streamable HTTP, falling back to SSE');
119
119
  // 2) If clearly unsupported, fall back to SSE
120
120
  const msg = String(e?.message || e);
121
121
  const isUnsupported = /404|405|ENOTFOUND|ECONNREFUSED|CORS/i.test(msg);
@@ -281,6 +281,8 @@ export class Conversation extends EventEmitter {
281
281
  let _content = '';
282
282
  const reqMethods = this._reqMethods;
283
283
  const toolsConfig = this._toolsConfig;
284
+ //deduplicate tools
285
+ toolsConfig.tools = toolsConfig.tools.filter((tool, index, self) => self.findIndex((t) => t.name === tool.name) === index);
284
286
  const endpoints = this._endpoints;
285
287
  const baseUrl = this._baseUrl;
286
288
  const message_id = 'msg_' + randomUUID();
@@ -27,6 +27,7 @@ import {
27
27
  TGoogleAIRequestBody,
28
28
  ILLMRequestContext,
29
29
  TLLMPreparedParams,
30
+ LLMInterface,
30
31
  } from '@sre/types/LLM.types';
31
32
  import { LLMHelper } from '@sre/LLMManager/LLM.helper';
32
33
 
@@ -59,7 +60,15 @@ const VALID_MIME_TYPES = [
59
60
  ];
60
61
 
61
62
  // will be removed after updating the SDK
62
- type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount: number };
63
+ type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
+
65
+ const IMAGE_GEN_FIXED_PRICING = {
66
+ 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
+ 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
+ 'imagen-4': 0.04, // Standard Imagen 4
69
+ 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
+ 'gemini-2.5-flash-image': 0.039,
71
+ };
63
72
 
64
73
  export class GoogleAIConnector extends LLMConnector {
65
74
  public name = 'LLM:GoogleAI';
@@ -200,31 +209,90 @@ export class GoogleAIConnector extends LLMConnector {
200
209
  }
201
210
  }
202
211
  // #region Image Generation, will be moved to a different subsystem/service
212
+
203
213
  protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
204
- try {
205
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
206
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
214
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
207
216
 
208
- const model = body.model || 'imagen-3.0-generate-001';
217
+ const model = body.model || 'imagen-3.0-generate-001';
218
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
209
219
 
210
- // Use Imagen models via GoogleGenAI
211
- const ai = new GoogleGenAI({ apiKey });
220
+ // Use traditional Imagen models
221
+ const config = {
222
+ numberOfImages: body.n || 1,
223
+ aspectRatio: body.aspect_ratio || body.size || '1:1',
224
+ personGeneration: body.person_generation || 'allow_adult',
225
+ };
212
226
 
213
- // Prepare the configuration for image generation
214
- const config = {
215
- numberOfImages: body.n || 1,
216
- aspectRatio: body.aspect_ratio || body.size || '1:1',
217
- personGeneration: body.person_generation || 'allow_adult',
218
- };
227
+ const ai = new GoogleGenAI({ apiKey });
219
228
 
220
- // Generate images using the SDK
221
- const response = await ai.models.generateImages({
229
+ // Default to GenerateImages interface if not specified
230
+ const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
+
232
+ let response: any;
233
+
234
+ if (modelInterface === LLMInterface.GenerateContent) {
235
+ // Use Gemini image generation API
236
+ response = await ai.models.generateContent({
237
+ model,
238
+ contents: body.prompt,
239
+ });
240
+
241
+ // Extract image data from Gemini response format
242
+ const imageData: any[] = [];
243
+ if (response.candidates?.[0]?.content?.parts) {
244
+ for (const part of response.candidates[0].content.parts) {
245
+ if (part.inlineData?.data) {
246
+ imageData.push({
247
+ url: `data:image/png;base64,${part.inlineData.data}`,
248
+ b64_json: part.inlineData.data,
249
+ revised_prompt: body.prompt,
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ // Report input tokens and image cost pricing based on the official pricing page:
256
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
+
259
+ this.reportImageUsage({
260
+ usage: {
261
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
+ usageMetadata,
263
+ },
264
+ context,
265
+ });
266
+
267
+ if (imageData.length === 0) {
268
+ throw new Error(
269
+ 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
+ );
271
+ }
272
+
273
+ return {
274
+ created: Math.floor(Date.now() / 1000),
275
+ data: imageData,
276
+ };
277
+ } else if (modelInterface === LLMInterface.GenerateImages) {
278
+ response = await ai.models.generateImages({
222
279
  model,
223
280
  prompt: body.prompt,
224
281
  config,
225
282
  });
226
283
 
227
- // Transform the response to match OpenAI format for compatibility
284
+ // Report input tokens and image cost pricing based on the official pricing page:
285
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
+ this.reportImageUsage({
288
+ usage: {
289
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
+ usageMetadata,
291
+ },
292
+ numberOfImages: config.numberOfImages,
293
+ context,
294
+ });
295
+
228
296
  return {
229
297
  created: Math.floor(Date.now() / 1000),
230
298
  data:
@@ -234,13 +302,59 @@ export class GoogleAIConnector extends LLMConnector {
234
302
  revised_prompt: body.prompt,
235
303
  })) || [],
236
304
  };
237
- } catch (error: any) {
238
- throw error;
305
+ } else {
306
+ throw new Error(`Unsupported interface: ${modelInterface}`);
239
307
  }
240
308
  }
241
309
 
242
310
  protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
243
- throw new Error('Image editing is not supported for Google AI. Imagen models only support image generation.');
311
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
+
314
+ // A model supports image editing if it implements the `generateContent` interface.
315
+ const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
+ if (!supportsEditing) {
317
+ throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
+ }
319
+
320
+ const ai = new GoogleGenAI({ apiKey });
321
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
+
323
+ // Use the prepared body which already contains processed files and contents
324
+ const response = await ai.models.generateContent({
325
+ model: body.model,
326
+ contents: body.contents,
327
+ });
328
+
329
+ // Extract image data from Gemini response format
330
+ const imageData: any[] = [];
331
+ if (response.candidates?.[0]?.content?.parts) {
332
+ for (const part of response.candidates[0].content.parts) {
333
+ if (part.inlineData?.data) {
334
+ imageData.push({
335
+ url: `data:image/png;base64,${part.inlineData.data}`,
336
+ b64_json: part.inlineData.data,
337
+ revised_prompt: body._metadata?.prompt || body.prompt,
338
+ });
339
+ }
340
+ }
341
+ }
342
+
343
+ // Report pricing for input tokens and image costs
344
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
+
346
+ this.reportImageUsage({
347
+ usage: {
348
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
+ usageMetadata,
350
+ },
351
+ context,
352
+ });
353
+
354
+ return {
355
+ created: Math.floor(Date.now() / 1000),
356
+ data: imageData,
357
+ };
244
358
  }
245
359
 
246
360
  protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
@@ -248,7 +362,13 @@ export class GoogleAIConnector extends LLMConnector {
248
362
 
249
363
  // Check if this is an image generation request based on capabilities
250
364
  if (params?.capabilities?.imageGeneration) {
251
- return this.prepareBodyForImageGenRequest(params) as any;
365
+ // Determine if this is image editing (has files) or generation
366
+ const hasFiles = params?.files?.length > 0;
367
+ if (hasFiles) {
368
+ return this.prepareImageEditBody(params) as any;
369
+ } else {
370
+ return this.prepareBodyForImageGenRequest(params) as any;
371
+ }
252
372
  }
253
373
 
254
374
  const messages = await this.prepareMessages(params);
@@ -291,9 +411,9 @@ export class GoogleAIConnector extends LLMConnector {
291
411
  usage: UsageMetadataWithThoughtsToken,
292
412
  metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
293
413
  ) {
294
- const modelEntryName = metadata.modelEntryName;
414
+ // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
+ const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
295
416
  let tier = '';
296
-
297
417
  const tierThresholds = {
298
418
  'gemini-1.5-pro': 128_000,
299
419
  'gemini-2.5-pro': 200_000,
@@ -304,24 +424,21 @@ export class GoogleAIConnector extends LLMConnector {
304
424
  const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
305
425
 
306
426
  // Find matching model and set tier based on threshold
307
- const modelWithTier = Object.keys(tierThresholds).find((model) => modelEntryName.includes(model));
427
+ const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
308
428
  if (modelWithTier) {
309
429
  tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
310
430
  }
311
431
 
312
432
  // #endregion
313
433
 
314
- // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
315
- const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
316
-
317
434
  const usageData = {
318
435
  sourceId: `llm:${modelName}`,
319
436
  input_tokens: textInputTokens,
320
- output_tokens: usage.candidatesTokenCount,
437
+ output_tokens: usage?.candidatesTokenCount || 0,
321
438
  input_tokens_audio: audioInputTokens,
322
- input_tokens_cache_read: usage.cachedContentTokenCount || 0,
439
+ input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
323
440
  input_tokens_cache_write: 0,
324
- reasoning_tokens: usage.thoughtsTokenCount,
441
+ reasoning_tokens: usage?.thoughtsTokenCount,
325
442
  keySource: metadata.keySource,
326
443
  agentId: metadata.agentId,
327
444
  teamId: metadata.teamId,
@@ -332,6 +449,49 @@ export class GoogleAIConnector extends LLMConnector {
332
449
  return usageData;
333
450
  }
334
451
 
452
+ /**
453
+ * Extract text and image tokens from Google AI usage metadata
454
+ */
455
+ private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
+ const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
+ const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
+
459
+ return { textTokens, imageTokens };
460
+ }
461
+
462
+ protected reportImageUsage({
463
+ usage,
464
+ context,
465
+ numberOfImages = 1,
466
+ }: {
467
+ usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
+ context: ILLMRequestContext;
469
+ numberOfImages?: number;
470
+ }) {
471
+ // Extract text and image tokens from rawUsage if available
472
+ let input_tokens_txt = 0;
473
+ let input_tokens_img = 0;
474
+
475
+ if (usage.usageMetadata) {
476
+ const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
+ input_tokens_txt = textTokens;
478
+ input_tokens_img = imageTokens;
479
+ }
480
+
481
+ const imageUsageData = {
482
+ sourceId: `api:imagegen.smyth`,
483
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
+
485
+ cost: usage.cost * numberOfImages,
486
+ input_tokens_txt,
487
+ input_tokens_img,
488
+
489
+ agentId: context.agentId,
490
+ teamId: context.teamId,
491
+ };
492
+ SystemEvents.emit('USAGE:API', imageUsageData);
493
+ }
494
+
335
495
  public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
336
496
  const tools = toolDefinitions.map((tool) => {
337
497
  const { name, description, properties, requiredFields } = tool;
@@ -640,6 +800,65 @@ export class GoogleAIConnector extends LLMConnector {
640
800
  };
641
801
  }
642
802
 
803
+ private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
+ const model = params.model || 'gemini-2.5-flash-image-preview';
805
+
806
+ // Construct edit prompt with image and instructions
807
+ let editPrompt = params.prompt || 'Edit this image';
808
+ if ((params as any).instruction) {
809
+ editPrompt += `. ${(params as any).instruction}`;
810
+ }
811
+
812
+ // For image editing, we need to include the original image in the contents
813
+ const contents: any[] = [];
814
+ const files: BinaryInput[] = params?.files || [];
815
+
816
+ if (files.length > 0) {
817
+ // Get only valid image files for editing
818
+ const validImageFiles = this.getValidFiles(files, 'image');
819
+
820
+ if (validImageFiles.length === 0) {
821
+ throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
+ }
823
+
824
+ // Process each image file
825
+ for (const file of validImageFiles) {
826
+ try {
827
+ // Read the file data as base64
828
+ const bufferData = await file.getBuffer();
829
+ const base64Image = Buffer.from(bufferData).toString('base64');
830
+
831
+ contents.push({
832
+ inlineData: {
833
+ mimeType: file.mimetype,
834
+ data: base64Image,
835
+ },
836
+ });
837
+ } catch (error) {
838
+ throw new Error(`Failed to process image file: ${error.message}`);
839
+ }
840
+ }
841
+ } else {
842
+ throw new Error('No image provided for editing. Please include an image file.');
843
+ }
844
+
845
+ // Add the edit instruction
846
+ contents.push({ text: editPrompt });
847
+
848
+ // Return the complete request body that can be used directly in imageEditRequest
849
+ return {
850
+ model,
851
+ contents,
852
+ // Additional metadata for usage reporting
853
+ _metadata: {
854
+ prompt: editPrompt,
855
+ numberOfImages: (params as any).n || 1,
856
+ aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
+ personGeneration: (params as any).person_generation || 'allow_adult',
858
+ },
859
+ };
860
+ }
861
+
643
862
  // Add this helper method to sanitize function names
644
863
  private sanitizeFunctionName(name: string): string {
645
864
  // Check if name is undefined or null
@@ -228,12 +228,33 @@ export type TLLMModel = {
228
228
  params?: TLLMParams;
229
229
  /**
230
230
  * Specifies the API interface type to use for this model
231
- * Examples: 'chat.completions', 'responses'
232
- * This determines which OpenAI API endpoint and interface implementation to use
231
+ * This determines which API endpoint and interface implementation to use
233
232
  */
234
- interface?: 'chat.completions' | 'responses';
233
+ interface?: LLMInterface;
234
+
235
+ /**
236
+ * Indicates whether this model supports image editing functionality
237
+ * Only applicable for image generation models
238
+ */
239
+ supportsEditing?: boolean;
235
240
  };
236
241
 
242
+ // #region [ LLM Interface Types ] ================================================
243
+ /**
244
+ * Enum for different LLM API interfaces
245
+ * Each interface represents a different API endpoint or interaction pattern
246
+ */
247
+ export enum LLMInterface {
248
+ /** OpenAI-style chat completions API */
249
+ ChatCompletions = 'chat.completions',
250
+ /** OpenAI-style responses API */
251
+ Responses = 'responses',
252
+ /** Google AI generateContent API (for text and multimodal) */
253
+ GenerateContent = 'generateContent',
254
+ /** Google AI generateImages API (for traditional Imagen models) */
255
+ GenerateImages = 'generateImages',
256
+ }
257
+
237
258
  // #region [ Handle extendable LLM Providers ] ================================================
238
259
  export const BuiltinLLMProviders = {
239
260
  Echo: 'Echo',