@smythos/sre 1.5.66 → 1.5.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +9 -9
- package/dist/index.js.map +1 -1
- package/dist/types/Components/Triggers/GmailTrigger.class.d.ts +13 -0
- package/dist/types/Components/Triggers/Trigger.class.d.ts +3 -0
- package/dist/types/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.d.ts +16 -2
- package/dist/types/types/LLM.types.d.ts +21 -3
- package/package.json +1 -1
- package/src/Components/ImageGenerator.class.ts +13 -32
- package/src/Components/MCPClient.class.ts +4 -4
- package/src/helpers/Conversation.helper.ts +2 -0
- package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +248 -29
- package/src/types/LLM.types.ts +24 -3
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { IAgent as Agent } from '@sre/types/Agent.types';
|
|
2
|
+
import { Trigger } from './Trigger.class';
|
|
3
|
+
export declare class GmailTrigger extends Trigger {
|
|
4
|
+
init(): void;
|
|
5
|
+
process(input: any, config: any, agent: Agent): Promise<{
|
|
6
|
+
Payload: {};
|
|
7
|
+
Result: any;
|
|
8
|
+
_temp_result: any;
|
|
9
|
+
_error: any;
|
|
10
|
+
_in_progress: boolean;
|
|
11
|
+
_debug: string;
|
|
12
|
+
}>;
|
|
13
|
+
}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import EventEmitter from 'events';
|
|
2
2
|
import { UsageMetadata } from '@google/generative-ai';
|
|
3
|
-
import { TLLMMessageBlock, ToolData, TLLMToolResultMessageBlock, APIKeySource, ILLMRequestFuncParams, TLLMChatResponse, TGoogleAIRequestBody, TLLMPreparedParams } from '@sre/types/LLM.types';
|
|
3
|
+
import { TLLMMessageBlock, ToolData, TLLMToolResultMessageBlock, APIKeySource, ILLMRequestFuncParams, TLLMChatResponse, TGoogleAIRequestBody, ILLMRequestContext, TLLMPreparedParams } from '@sre/types/LLM.types';
|
|
4
4
|
import { LLMConnector } from '../LLMConnector';
|
|
5
5
|
type UsageMetadataWithThoughtsToken = UsageMetadata & {
|
|
6
|
-
thoughtsTokenCount
|
|
6
|
+
thoughtsTokenCount?: number;
|
|
7
|
+
cost?: number;
|
|
7
8
|
};
|
|
8
9
|
export declare class GoogleAIConnector extends LLMConnector {
|
|
9
10
|
name: string;
|
|
@@ -32,6 +33,18 @@ export declare class GoogleAIConnector extends LLMConnector {
|
|
|
32
33
|
teamId: string;
|
|
33
34
|
tier: string;
|
|
34
35
|
};
|
|
36
|
+
/**
|
|
37
|
+
* Extract text and image tokens from Google AI usage metadata
|
|
38
|
+
*/
|
|
39
|
+
private extractTokenCounts;
|
|
40
|
+
protected reportImageUsage({ usage, context, numberOfImages, }: {
|
|
41
|
+
usage: {
|
|
42
|
+
cost?: number;
|
|
43
|
+
usageMetadata?: UsageMetadataWithThoughtsToken;
|
|
44
|
+
};
|
|
45
|
+
context: ILLMRequestContext;
|
|
46
|
+
numberOfImages?: number;
|
|
47
|
+
}): void;
|
|
35
48
|
formatToolsConfig({ toolDefinitions, toolChoice }: {
|
|
36
49
|
toolDefinitions: any;
|
|
37
50
|
toolChoice?: string;
|
|
@@ -51,6 +64,7 @@ export declare class GoogleAIConnector extends LLMConnector {
|
|
|
51
64
|
private prepareMessagesWithTools;
|
|
52
65
|
private prepareMessagesWithTextQuery;
|
|
53
66
|
private prepareBodyForImageGenRequest;
|
|
67
|
+
private prepareImageEditBody;
|
|
54
68
|
private sanitizeFunctionName;
|
|
55
69
|
private uploadFile;
|
|
56
70
|
private getValidFiles;
|
|
@@ -193,11 +193,29 @@ export type TLLMModel = {
|
|
|
193
193
|
params?: TLLMParams;
|
|
194
194
|
/**
|
|
195
195
|
* Specifies the API interface type to use for this model
|
|
196
|
-
*
|
|
197
|
-
* This determines which OpenAI API endpoint and interface implementation to use
|
|
196
|
+
* This determines which API endpoint and interface implementation to use
|
|
198
197
|
*/
|
|
199
|
-
interface?:
|
|
198
|
+
interface?: LLMInterface;
|
|
199
|
+
/**
|
|
200
|
+
* Indicates whether this model supports image editing functionality
|
|
201
|
+
* Only applicable for image generation models
|
|
202
|
+
*/
|
|
203
|
+
supportsEditing?: boolean;
|
|
200
204
|
};
|
|
205
|
+
/**
|
|
206
|
+
* Enum for different LLM API interfaces
|
|
207
|
+
* Each interface represents a different API endpoint or interaction pattern
|
|
208
|
+
*/
|
|
209
|
+
export declare enum LLMInterface {
|
|
210
|
+
/** OpenAI-style chat completions API */
|
|
211
|
+
ChatCompletions = "chat.completions",
|
|
212
|
+
/** OpenAI-style responses API */
|
|
213
|
+
Responses = "responses",
|
|
214
|
+
/** Google AI generateContent API (for text and multimodal) */
|
|
215
|
+
GenerateContent = "generateContent",
|
|
216
|
+
/** Google AI generateImages API (for traditional Imagen models) */
|
|
217
|
+
GenerateImages = "generateImages"
|
|
218
|
+
}
|
|
201
219
|
export declare const BuiltinLLMProviders: {
|
|
202
220
|
readonly Echo: "Echo";
|
|
203
221
|
readonly OpenAI: "OpenAI";
|
package/package.json
CHANGED
|
@@ -44,12 +44,6 @@ const IMAGE_GEN_COST_MAP = {
|
|
|
44
44
|
},
|
|
45
45
|
};
|
|
46
46
|
|
|
47
|
-
// Imagen 4 cost map - fixed cost per image
|
|
48
|
-
const IMAGEN_4_COST_MAP = {
|
|
49
|
-
'imagen-4': 0.04, // Standard Imagen 4
|
|
50
|
-
'imagen-4-ultra': 0.06, // Imagen 4 Ultra
|
|
51
|
-
};
|
|
52
|
-
|
|
53
47
|
export class ImageGenerator extends Component {
|
|
54
48
|
protected configSchema = Joi.object({
|
|
55
49
|
model: Joi.string().max(100).required(),
|
|
@@ -344,11 +338,6 @@ const imageGenerator = {
|
|
|
344
338
|
|
|
345
339
|
const files: any[] = parseFiles(input, config);
|
|
346
340
|
|
|
347
|
-
// Imagen models only support image generation, not image editing
|
|
348
|
-
if (files.length > 0) {
|
|
349
|
-
throw new Error('Google AI Image Generation Error: Image editing is not supported. Imagen models only support image generation.');
|
|
350
|
-
}
|
|
351
|
-
|
|
352
341
|
let args: GenerateImageConfig & {
|
|
353
342
|
aspectRatio?: string;
|
|
354
343
|
numberOfImages?: number;
|
|
@@ -360,29 +349,21 @@ const imageGenerator = {
|
|
|
360
349
|
personGeneration: config?.data?.personGeneration || 'allow_adult',
|
|
361
350
|
};
|
|
362
351
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
//
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
// Report fixed cost usage
|
|
375
|
-
imageGenerator.reportUsage(
|
|
376
|
-
{ cost: totalCost },
|
|
377
|
-
{
|
|
378
|
-
modelEntryName: model,
|
|
379
|
-
keySource: model.startsWith(BUILT_IN_MODEL_PREFIX) ? APIKeySource.Smyth : APIKeySource.User,
|
|
380
|
-
agentId: agent.id,
|
|
381
|
-
teamId: agent.teamId,
|
|
382
|
-
}
|
|
383
|
-
);
|
|
352
|
+
let response;
|
|
353
|
+
|
|
354
|
+
// Check if files are provided for image editing
|
|
355
|
+
if (files.length > 0) {
|
|
356
|
+
const validFiles = files.filter((file) => imageGenerator.isValidImageFile('GoogleAI', file.mimetype));
|
|
357
|
+
if (validFiles.length === 0) {
|
|
358
|
+
throw new Error('Supported image file types are: ' + SUPPORTED_MIME_TYPES_MAP.GoogleAI?.image?.join(', '));
|
|
359
|
+
}
|
|
360
|
+
response = await llmInference.imageEditRequest({ query: prompt, files: validFiles, params: { ...args, agentId: agent.id } });
|
|
361
|
+
} else {
|
|
362
|
+
response = await llmInference.imageGenRequest({ query: prompt, params: { ...args, agentId: agent.id } });
|
|
384
363
|
}
|
|
385
364
|
|
|
365
|
+
// Usage reporting is now handled in the GoogleAI connector
|
|
366
|
+
|
|
386
367
|
let output = response?.data?.[0]?.b64_json;
|
|
387
368
|
|
|
388
369
|
if (output) {
|
|
@@ -51,7 +51,7 @@ export class MCPClient extends Component {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
// TODO [Forhad]: Need to check and validate input prompt token
|
|
54
|
-
const { client } = await this.connectMCP(mcpUrl);
|
|
54
|
+
const { client } = await this.connectMCP(mcpUrl, logger);
|
|
55
55
|
|
|
56
56
|
const toolsData = await client.listTools();
|
|
57
57
|
const conv = new Conversation(
|
|
@@ -105,17 +105,17 @@ export class MCPClient extends Component {
|
|
|
105
105
|
return { _error: `Error on running MCP Client!\n${error?.message || JSON.stringify(error)}`, _debug: logger.output };
|
|
106
106
|
}
|
|
107
107
|
}
|
|
108
|
-
private async connectMCP(mcpUrl: string) {
|
|
108
|
+
private async connectMCP(mcpUrl: string, logger: any) {
|
|
109
109
|
const client = new Client({ name: 'auto-client', version: '1.0.0' });
|
|
110
110
|
|
|
111
111
|
// 1) Try Streamable HTTP first
|
|
112
112
|
try {
|
|
113
113
|
const st = new StreamableHTTPClientTransport(new URL(mcpUrl));
|
|
114
114
|
await client.connect(st);
|
|
115
|
-
|
|
115
|
+
logger.debug('Connected to MCP using Streamable HTTP');
|
|
116
116
|
return { client, transport: 'streamable' as const };
|
|
117
117
|
} catch (e: any) {
|
|
118
|
-
|
|
118
|
+
logger.debug('Failed to connect to MCP using Streamable HTTP, falling back to SSE');
|
|
119
119
|
// 2) If clearly unsupported, fall back to SSE
|
|
120
120
|
const msg = String(e?.message || e);
|
|
121
121
|
const isUnsupported = /404|405|ENOTFOUND|ECONNREFUSED|CORS/i.test(msg);
|
|
@@ -281,6 +281,8 @@ export class Conversation extends EventEmitter {
|
|
|
281
281
|
let _content = '';
|
|
282
282
|
const reqMethods = this._reqMethods;
|
|
283
283
|
const toolsConfig = this._toolsConfig;
|
|
284
|
+
//deduplicate tools
|
|
285
|
+
toolsConfig.tools = toolsConfig.tools.filter((tool, index, self) => self.findIndex((t) => t.name === tool.name) === index);
|
|
284
286
|
const endpoints = this._endpoints;
|
|
285
287
|
const baseUrl = this._baseUrl;
|
|
286
288
|
const message_id = 'msg_' + randomUUID();
|
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
TGoogleAIRequestBody,
|
|
28
28
|
ILLMRequestContext,
|
|
29
29
|
TLLMPreparedParams,
|
|
30
|
+
LLMInterface,
|
|
30
31
|
} from '@sre/types/LLM.types';
|
|
31
32
|
import { LLMHelper } from '@sre/LLMManager/LLM.helper';
|
|
32
33
|
|
|
@@ -59,7 +60,15 @@ const VALID_MIME_TYPES = [
|
|
|
59
60
|
];
|
|
60
61
|
|
|
61
62
|
// will be removed after updating the SDK
|
|
62
|
-
type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount
|
|
63
|
+
type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
|
|
64
|
+
|
|
65
|
+
const IMAGE_GEN_FIXED_PRICING = {
|
|
66
|
+
'imagen-3.0-generate-001': 0.04, // Fixed cost per image
|
|
67
|
+
'imagen-4.0-generate-001': 0.04, // Fixed cost per image
|
|
68
|
+
'imagen-4': 0.04, // Standard Imagen 4
|
|
69
|
+
'imagen-4-ultra': 0.06, // Imagen 4 Ultra
|
|
70
|
+
'gemini-2.5-flash-image': 0.039,
|
|
71
|
+
};
|
|
63
72
|
|
|
64
73
|
export class GoogleAIConnector extends LLMConnector {
|
|
65
74
|
public name = 'LLM:GoogleAI';
|
|
@@ -200,31 +209,90 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
200
209
|
}
|
|
201
210
|
}
|
|
202
211
|
// #region Image Generation, will be moved to a different subsystem/service
|
|
212
|
+
|
|
203
213
|
protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
if (!apiKey) throw new Error('Please provide an API key for Google AI');
|
|
214
|
+
const apiKey = (context.credentials as BasicCredentials)?.apiKey;
|
|
215
|
+
if (!apiKey) throw new Error('Please provide an API key for Google AI');
|
|
207
216
|
|
|
208
|
-
|
|
217
|
+
const model = body.model || 'imagen-3.0-generate-001';
|
|
218
|
+
const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
209
219
|
|
|
210
|
-
|
|
211
|
-
|
|
220
|
+
// Use traditional Imagen models
|
|
221
|
+
const config = {
|
|
222
|
+
numberOfImages: body.n || 1,
|
|
223
|
+
aspectRatio: body.aspect_ratio || body.size || '1:1',
|
|
224
|
+
personGeneration: body.person_generation || 'allow_adult',
|
|
225
|
+
};
|
|
212
226
|
|
|
213
|
-
|
|
214
|
-
const config = {
|
|
215
|
-
numberOfImages: body.n || 1,
|
|
216
|
-
aspectRatio: body.aspect_ratio || body.size || '1:1',
|
|
217
|
-
personGeneration: body.person_generation || 'allow_adult',
|
|
218
|
-
};
|
|
227
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
219
228
|
|
|
220
|
-
|
|
221
|
-
|
|
229
|
+
// Default to GenerateImages interface if not specified
|
|
230
|
+
const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
|
|
231
|
+
|
|
232
|
+
let response: any;
|
|
233
|
+
|
|
234
|
+
if (modelInterface === LLMInterface.GenerateContent) {
|
|
235
|
+
// Use Gemini image generation API
|
|
236
|
+
response = await ai.models.generateContent({
|
|
237
|
+
model,
|
|
238
|
+
contents: body.prompt,
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
// Extract image data from Gemini response format
|
|
242
|
+
const imageData: any[] = [];
|
|
243
|
+
if (response.candidates?.[0]?.content?.parts) {
|
|
244
|
+
for (const part of response.candidates[0].content.parts) {
|
|
245
|
+
if (part.inlineData?.data) {
|
|
246
|
+
imageData.push({
|
|
247
|
+
url: `data:image/png;base64,${part.inlineData.data}`,
|
|
248
|
+
b64_json: part.inlineData.data,
|
|
249
|
+
revised_prompt: body.prompt,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Report input tokens and image cost pricing based on the official pricing page:
|
|
256
|
+
// https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
|
|
257
|
+
const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
|
|
258
|
+
|
|
259
|
+
this.reportImageUsage({
|
|
260
|
+
usage: {
|
|
261
|
+
cost: IMAGE_GEN_FIXED_PRICING[modelName],
|
|
262
|
+
usageMetadata,
|
|
263
|
+
},
|
|
264
|
+
context,
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
if (imageData.length === 0) {
|
|
268
|
+
throw new Error(
|
|
269
|
+
'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
created: Math.floor(Date.now() / 1000),
|
|
275
|
+
data: imageData,
|
|
276
|
+
};
|
|
277
|
+
} else if (modelInterface === LLMInterface.GenerateImages) {
|
|
278
|
+
response = await ai.models.generateImages({
|
|
222
279
|
model,
|
|
223
280
|
prompt: body.prompt,
|
|
224
281
|
config,
|
|
225
282
|
});
|
|
226
283
|
|
|
227
|
-
//
|
|
284
|
+
// Report input tokens and image cost pricing based on the official pricing page:
|
|
285
|
+
// https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
|
|
286
|
+
const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
|
|
287
|
+
this.reportImageUsage({
|
|
288
|
+
usage: {
|
|
289
|
+
cost: IMAGE_GEN_FIXED_PRICING[modelName],
|
|
290
|
+
usageMetadata,
|
|
291
|
+
},
|
|
292
|
+
numberOfImages: config.numberOfImages,
|
|
293
|
+
context,
|
|
294
|
+
});
|
|
295
|
+
|
|
228
296
|
return {
|
|
229
297
|
created: Math.floor(Date.now() / 1000),
|
|
230
298
|
data:
|
|
@@ -234,13 +302,59 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
234
302
|
revised_prompt: body.prompt,
|
|
235
303
|
})) || [],
|
|
236
304
|
};
|
|
237
|
-
}
|
|
238
|
-
throw
|
|
305
|
+
} else {
|
|
306
|
+
throw new Error(`Unsupported interface: ${modelInterface}`);
|
|
239
307
|
}
|
|
240
308
|
}
|
|
241
309
|
|
|
242
310
|
protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
|
|
243
|
-
|
|
311
|
+
const apiKey = (context.credentials as BasicCredentials)?.apiKey;
|
|
312
|
+
if (!apiKey) throw new Error('Please provide an API key for Google AI');
|
|
313
|
+
|
|
314
|
+
// A model supports image editing if it implements the `generateContent` interface.
|
|
315
|
+
const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
|
|
316
|
+
if (!supportsEditing) {
|
|
317
|
+
throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
321
|
+
const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
322
|
+
|
|
323
|
+
// Use the prepared body which already contains processed files and contents
|
|
324
|
+
const response = await ai.models.generateContent({
|
|
325
|
+
model: body.model,
|
|
326
|
+
contents: body.contents,
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
// Extract image data from Gemini response format
|
|
330
|
+
const imageData: any[] = [];
|
|
331
|
+
if (response.candidates?.[0]?.content?.parts) {
|
|
332
|
+
for (const part of response.candidates[0].content.parts) {
|
|
333
|
+
if (part.inlineData?.data) {
|
|
334
|
+
imageData.push({
|
|
335
|
+
url: `data:image/png;base64,${part.inlineData.data}`,
|
|
336
|
+
b64_json: part.inlineData.data,
|
|
337
|
+
revised_prompt: body._metadata?.prompt || body.prompt,
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Report pricing for input tokens and image costs
|
|
344
|
+
const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
|
|
345
|
+
|
|
346
|
+
this.reportImageUsage({
|
|
347
|
+
usage: {
|
|
348
|
+
cost: IMAGE_GEN_FIXED_PRICING[modelName],
|
|
349
|
+
usageMetadata,
|
|
350
|
+
},
|
|
351
|
+
context,
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
return {
|
|
355
|
+
created: Math.floor(Date.now() / 1000),
|
|
356
|
+
data: imageData,
|
|
357
|
+
};
|
|
244
358
|
}
|
|
245
359
|
|
|
246
360
|
protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
|
|
@@ -248,7 +362,13 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
248
362
|
|
|
249
363
|
// Check if this is an image generation request based on capabilities
|
|
250
364
|
if (params?.capabilities?.imageGeneration) {
|
|
251
|
-
|
|
365
|
+
// Determine if this is image editing (has files) or generation
|
|
366
|
+
const hasFiles = params?.files?.length > 0;
|
|
367
|
+
if (hasFiles) {
|
|
368
|
+
return this.prepareImageEditBody(params) as any;
|
|
369
|
+
} else {
|
|
370
|
+
return this.prepareBodyForImageGenRequest(params) as any;
|
|
371
|
+
}
|
|
252
372
|
}
|
|
253
373
|
|
|
254
374
|
const messages = await this.prepareMessages(params);
|
|
@@ -291,9 +411,9 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
291
411
|
usage: UsageMetadataWithThoughtsToken,
|
|
292
412
|
metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
|
|
293
413
|
) {
|
|
294
|
-
|
|
414
|
+
// SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
|
|
415
|
+
const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
295
416
|
let tier = '';
|
|
296
|
-
|
|
297
417
|
const tierThresholds = {
|
|
298
418
|
'gemini-1.5-pro': 128_000,
|
|
299
419
|
'gemini-2.5-pro': 200_000,
|
|
@@ -304,24 +424,21 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
304
424
|
const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
|
|
305
425
|
|
|
306
426
|
// Find matching model and set tier based on threshold
|
|
307
|
-
const modelWithTier = Object.keys(tierThresholds).find((model) =>
|
|
427
|
+
const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
|
|
308
428
|
if (modelWithTier) {
|
|
309
429
|
tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
|
|
310
430
|
}
|
|
311
431
|
|
|
312
432
|
// #endregion
|
|
313
433
|
|
|
314
|
-
// SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
|
|
315
|
-
const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
|
|
316
|
-
|
|
317
434
|
const usageData = {
|
|
318
435
|
sourceId: `llm:${modelName}`,
|
|
319
436
|
input_tokens: textInputTokens,
|
|
320
|
-
output_tokens: usage
|
|
437
|
+
output_tokens: usage?.candidatesTokenCount || 0,
|
|
321
438
|
input_tokens_audio: audioInputTokens,
|
|
322
|
-
input_tokens_cache_read: usage
|
|
439
|
+
input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
|
|
323
440
|
input_tokens_cache_write: 0,
|
|
324
|
-
reasoning_tokens: usage
|
|
441
|
+
reasoning_tokens: usage?.thoughtsTokenCount,
|
|
325
442
|
keySource: metadata.keySource,
|
|
326
443
|
agentId: metadata.agentId,
|
|
327
444
|
teamId: metadata.teamId,
|
|
@@ -332,6 +449,49 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
332
449
|
return usageData;
|
|
333
450
|
}
|
|
334
451
|
|
|
452
|
+
/**
|
|
453
|
+
* Extract text and image tokens from Google AI usage metadata
|
|
454
|
+
*/
|
|
455
|
+
private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
|
|
456
|
+
const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
|
|
457
|
+
const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
|
|
458
|
+
|
|
459
|
+
return { textTokens, imageTokens };
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
protected reportImageUsage({
|
|
463
|
+
usage,
|
|
464
|
+
context,
|
|
465
|
+
numberOfImages = 1,
|
|
466
|
+
}: {
|
|
467
|
+
usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
|
|
468
|
+
context: ILLMRequestContext;
|
|
469
|
+
numberOfImages?: number;
|
|
470
|
+
}) {
|
|
471
|
+
// Extract text and image tokens from rawUsage if available
|
|
472
|
+
let input_tokens_txt = 0;
|
|
473
|
+
let input_tokens_img = 0;
|
|
474
|
+
|
|
475
|
+
if (usage.usageMetadata) {
|
|
476
|
+
const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
|
|
477
|
+
input_tokens_txt = textTokens;
|
|
478
|
+
input_tokens_img = imageTokens;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
const imageUsageData = {
|
|
482
|
+
sourceId: `api:imagegen.smyth`,
|
|
483
|
+
keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
|
|
484
|
+
|
|
485
|
+
cost: usage.cost * numberOfImages,
|
|
486
|
+
input_tokens_txt,
|
|
487
|
+
input_tokens_img,
|
|
488
|
+
|
|
489
|
+
agentId: context.agentId,
|
|
490
|
+
teamId: context.teamId,
|
|
491
|
+
};
|
|
492
|
+
SystemEvents.emit('USAGE:API', imageUsageData);
|
|
493
|
+
}
|
|
494
|
+
|
|
335
495
|
public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
|
|
336
496
|
const tools = toolDefinitions.map((tool) => {
|
|
337
497
|
const { name, description, properties, requiredFields } = tool;
|
|
@@ -640,6 +800,65 @@ export class GoogleAIConnector extends LLMConnector {
|
|
|
640
800
|
};
|
|
641
801
|
}
|
|
642
802
|
|
|
803
|
+
private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
|
|
804
|
+
const model = params.model || 'gemini-2.5-flash-image-preview';
|
|
805
|
+
|
|
806
|
+
// Construct edit prompt with image and instructions
|
|
807
|
+
let editPrompt = params.prompt || 'Edit this image';
|
|
808
|
+
if ((params as any).instruction) {
|
|
809
|
+
editPrompt += `. ${(params as any).instruction}`;
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// For image editing, we need to include the original image in the contents
|
|
813
|
+
const contents: any[] = [];
|
|
814
|
+
const files: BinaryInput[] = params?.files || [];
|
|
815
|
+
|
|
816
|
+
if (files.length > 0) {
|
|
817
|
+
// Get only valid image files for editing
|
|
818
|
+
const validImageFiles = this.getValidFiles(files, 'image');
|
|
819
|
+
|
|
820
|
+
if (validImageFiles.length === 0) {
|
|
821
|
+
throw new Error('No valid image files found for editing. Please provide at least one image file.');
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
// Process each image file
|
|
825
|
+
for (const file of validImageFiles) {
|
|
826
|
+
try {
|
|
827
|
+
// Read the file data as base64
|
|
828
|
+
const bufferData = await file.getBuffer();
|
|
829
|
+
const base64Image = Buffer.from(bufferData).toString('base64');
|
|
830
|
+
|
|
831
|
+
contents.push({
|
|
832
|
+
inlineData: {
|
|
833
|
+
mimeType: file.mimetype,
|
|
834
|
+
data: base64Image,
|
|
835
|
+
},
|
|
836
|
+
});
|
|
837
|
+
} catch (error) {
|
|
838
|
+
throw new Error(`Failed to process image file: ${error.message}`);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
} else {
|
|
842
|
+
throw new Error('No image provided for editing. Please include an image file.');
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// Add the edit instruction
|
|
846
|
+
contents.push({ text: editPrompt });
|
|
847
|
+
|
|
848
|
+
// Return the complete request body that can be used directly in imageEditRequest
|
|
849
|
+
return {
|
|
850
|
+
model,
|
|
851
|
+
contents,
|
|
852
|
+
// Additional metadata for usage reporting
|
|
853
|
+
_metadata: {
|
|
854
|
+
prompt: editPrompt,
|
|
855
|
+
numberOfImages: (params as any).n || 1,
|
|
856
|
+
aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
|
|
857
|
+
personGeneration: (params as any).person_generation || 'allow_adult',
|
|
858
|
+
},
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
|
|
643
862
|
// Add this helper method to sanitize function names
|
|
644
863
|
private sanitizeFunctionName(name: string): string {
|
|
645
864
|
// Check if name is undefined or null
|
package/src/types/LLM.types.ts
CHANGED
|
@@ -228,12 +228,33 @@ export type TLLMModel = {
|
|
|
228
228
|
params?: TLLMParams;
|
|
229
229
|
/**
|
|
230
230
|
* Specifies the API interface type to use for this model
|
|
231
|
-
*
|
|
232
|
-
* This determines which OpenAI API endpoint and interface implementation to use
|
|
231
|
+
* This determines which API endpoint and interface implementation to use
|
|
233
232
|
*/
|
|
234
|
-
interface?:
|
|
233
|
+
interface?: LLMInterface;
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Indicates whether this model supports image editing functionality
|
|
237
|
+
* Only applicable for image generation models
|
|
238
|
+
*/
|
|
239
|
+
supportsEditing?: boolean;
|
|
235
240
|
};
|
|
236
241
|
|
|
242
|
+
// #region [ LLM Interface Types ] ================================================
|
|
243
|
+
/**
|
|
244
|
+
* Enum for different LLM API interfaces
|
|
245
|
+
* Each interface represents a different API endpoint or interaction pattern
|
|
246
|
+
*/
|
|
247
|
+
export enum LLMInterface {
|
|
248
|
+
/** OpenAI-style chat completions API */
|
|
249
|
+
ChatCompletions = 'chat.completions',
|
|
250
|
+
/** OpenAI-style responses API */
|
|
251
|
+
Responses = 'responses',
|
|
252
|
+
/** Google AI generateContent API (for text and multimodal) */
|
|
253
|
+
GenerateContent = 'generateContent',
|
|
254
|
+
/** Google AI generateImages API (for traditional Imagen models) */
|
|
255
|
+
GenerateImages = 'generateImages',
|
|
256
|
+
}
|
|
257
|
+
|
|
237
258
|
// #region [ Handle extendable LLM Providers ] ================================================
|
|
238
259
|
export const BuiltinLLMProviders = {
|
|
239
260
|
Echo: 'Echo',
|