@smythos/sre 1.6.1 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. package/CHANGELOG +111 -111
  2. package/LICENSE +18 -18
  3. package/README.md +135 -135
  4. package/dist/index.js +2 -2
  5. package/dist/index.js.map +1 -1
  6. package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/SmythModelsProvider.class.d.ts +39 -0
  7. package/package.json +1 -1
  8. package/src/Components/APICall/APICall.class.ts +161 -161
  9. package/src/Components/APICall/AccessTokenManager.ts +166 -166
  10. package/src/Components/APICall/ArrayBufferResponse.helper.ts +58 -58
  11. package/src/Components/APICall/OAuth.helper.ts +447 -447
  12. package/src/Components/APICall/mimeTypeCategories.ts +46 -46
  13. package/src/Components/APICall/parseData.ts +167 -167
  14. package/src/Components/APICall/parseHeaders.ts +41 -41
  15. package/src/Components/APICall/parseProxy.ts +68 -68
  16. package/src/Components/APICall/parseUrl.ts +91 -91
  17. package/src/Components/APIEndpoint.class.ts +234 -234
  18. package/src/Components/APIOutput.class.ts +58 -58
  19. package/src/Components/AgentPlugin.class.ts +102 -102
  20. package/src/Components/Async.class.ts +155 -155
  21. package/src/Components/Await.class.ts +90 -90
  22. package/src/Components/Classifier.class.ts +158 -158
  23. package/src/Components/Component.class.ts +147 -147
  24. package/src/Components/ComponentHost.class.ts +38 -38
  25. package/src/Components/DataSourceCleaner.class.ts +92 -92
  26. package/src/Components/DataSourceIndexer.class.ts +181 -181
  27. package/src/Components/DataSourceLookup.class.ts +161 -161
  28. package/src/Components/ECMASandbox.class.ts +72 -72
  29. package/src/Components/FEncDec.class.ts +29 -29
  30. package/src/Components/FHash.class.ts +33 -33
  31. package/src/Components/FSign.class.ts +80 -80
  32. package/src/Components/FSleep.class.ts +25 -25
  33. package/src/Components/FTimestamp.class.ts +66 -66
  34. package/src/Components/FileStore.class.ts +78 -78
  35. package/src/Components/ForEach.class.ts +97 -97
  36. package/src/Components/GPTPlugin.class.ts +70 -70
  37. package/src/Components/GenAILLM.class.ts +586 -586
  38. package/src/Components/HuggingFace.class.ts +313 -313
  39. package/src/Components/Image/imageSettings.config.ts +70 -70
  40. package/src/Components/ImageGenerator.class.ts +483 -483
  41. package/src/Components/JSONFilter.class.ts +54 -54
  42. package/src/Components/LLMAssistant.class.ts +213 -213
  43. package/src/Components/LogicAND.class.ts +28 -28
  44. package/src/Components/LogicAtLeast.class.ts +85 -85
  45. package/src/Components/LogicAtMost.class.ts +86 -86
  46. package/src/Components/LogicOR.class.ts +29 -29
  47. package/src/Components/LogicXOR.class.ts +34 -34
  48. package/src/Components/MCPClient.class.ts +137 -137
  49. package/src/Components/MemoryDeleteKeyVal.class.ts +70 -70
  50. package/src/Components/MemoryReadKeyVal.class.ts +67 -67
  51. package/src/Components/MemoryWriteKeyVal.class.ts +62 -62
  52. package/src/Components/MemoryWriteObject.class.ts +97 -97
  53. package/src/Components/MultimodalLLM.class.ts +128 -128
  54. package/src/Components/OpenAPI.class.ts +72 -72
  55. package/src/Components/PromptGenerator.class.ts +122 -122
  56. package/src/Components/ScrapflyWebScrape.class.ts +183 -183
  57. package/src/Components/ServerlessCode.class.ts +123 -123
  58. package/src/Components/TavilyWebSearch.class.ts +103 -103
  59. package/src/Components/VisionLLM.class.ts +104 -104
  60. package/src/Components/ZapierAction.class.ts +127 -127
  61. package/src/Components/index.ts +97 -97
  62. package/src/Core/AgentProcess.helper.ts +240 -240
  63. package/src/Core/Connector.class.ts +123 -123
  64. package/src/Core/ConnectorsService.ts +197 -197
  65. package/src/Core/DummyConnector.ts +49 -49
  66. package/src/Core/HookService.ts +105 -105
  67. package/src/Core/SmythRuntime.class.ts +241 -241
  68. package/src/Core/SystemEvents.ts +16 -16
  69. package/src/Core/boot.ts +56 -56
  70. package/src/config.ts +15 -15
  71. package/src/constants.ts +126 -126
  72. package/src/data/hugging-face.params.json +579 -579
  73. package/src/helpers/AWSLambdaCode.helper.ts +624 -599
  74. package/src/helpers/BinaryInput.helper.ts +331 -331
  75. package/src/helpers/Conversation.helper.ts +1157 -1157
  76. package/src/helpers/ECMASandbox.helper.ts +64 -64
  77. package/src/helpers/JsonContent.helper.ts +97 -97
  78. package/src/helpers/LocalCache.helper.ts +97 -97
  79. package/src/helpers/Log.helper.ts +274 -274
  80. package/src/helpers/OpenApiParser.helper.ts +150 -150
  81. package/src/helpers/S3Cache.helper.ts +147 -147
  82. package/src/helpers/SmythURI.helper.ts +5 -5
  83. package/src/helpers/Sysconfig.helper.ts +95 -95
  84. package/src/helpers/TemplateString.helper.ts +243 -243
  85. package/src/helpers/TypeChecker.helper.ts +329 -329
  86. package/src/index.ts +198 -198
  87. package/src/index.ts.bak +198 -198
  88. package/src/subsystems/AgentManager/Agent.class.ts +1114 -1114
  89. package/src/subsystems/AgentManager/Agent.helper.ts +3 -3
  90. package/src/subsystems/AgentManager/AgentData.service/AgentDataConnector.ts +230 -230
  91. package/src/subsystems/AgentManager/AgentData.service/connectors/CLIAgentDataConnector.class.ts +66 -66
  92. package/src/subsystems/AgentManager/AgentData.service/connectors/LocalAgentDataConnector.class.ts +145 -145
  93. package/src/subsystems/AgentManager/AgentData.service/connectors/NullAgentData.class.ts +39 -39
  94. package/src/subsystems/AgentManager/AgentData.service/index.ts +18 -18
  95. package/src/subsystems/AgentManager/AgentLogger.class.ts +301 -301
  96. package/src/subsystems/AgentManager/AgentRequest.class.ts +51 -51
  97. package/src/subsystems/AgentManager/AgentRuntime.class.ts +557 -557
  98. package/src/subsystems/AgentManager/AgentSSE.class.ts +101 -101
  99. package/src/subsystems/AgentManager/AgentSettings.class.ts +52 -52
  100. package/src/subsystems/AgentManager/Component.service/ComponentConnector.ts +32 -32
  101. package/src/subsystems/AgentManager/Component.service/connectors/LocalComponentConnector.class.ts +60 -60
  102. package/src/subsystems/AgentManager/Component.service/index.ts +11 -11
  103. package/src/subsystems/AgentManager/EmbodimentSettings.class.ts +47 -47
  104. package/src/subsystems/AgentManager/ForkedAgent.class.ts +154 -154
  105. package/src/subsystems/AgentManager/OSResourceMonitor.ts +77 -77
  106. package/src/subsystems/ComputeManager/Code.service/CodeConnector.ts +98 -98
  107. package/src/subsystems/ComputeManager/Code.service/connectors/AWSLambdaCode.class.ts +171 -172
  108. package/src/subsystems/ComputeManager/Code.service/connectors/ECMASandbox.class.ts +131 -131
  109. package/src/subsystems/ComputeManager/Code.service/index.ts +13 -13
  110. package/src/subsystems/IO/CLI.service/CLIConnector.ts +47 -47
  111. package/src/subsystems/IO/CLI.service/index.ts +9 -9
  112. package/src/subsystems/IO/Log.service/LogConnector.ts +32 -32
  113. package/src/subsystems/IO/Log.service/connectors/ConsoleLog.class.ts +28 -28
  114. package/src/subsystems/IO/Log.service/index.ts +13 -13
  115. package/src/subsystems/IO/NKV.service/NKVConnector.ts +43 -43
  116. package/src/subsystems/IO/NKV.service/connectors/NKVLocalStorage.class.ts +234 -234
  117. package/src/subsystems/IO/NKV.service/connectors/NKVRAM.class.ts +204 -204
  118. package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +182 -182
  119. package/src/subsystems/IO/NKV.service/index.ts +14 -14
  120. package/src/subsystems/IO/Router.service/RouterConnector.ts +21 -21
  121. package/src/subsystems/IO/Router.service/connectors/ExpressRouter.class.ts +48 -48
  122. package/src/subsystems/IO/Router.service/connectors/NullRouter.class.ts +40 -40
  123. package/src/subsystems/IO/Router.service/index.ts +11 -11
  124. package/src/subsystems/IO/Storage.service/SmythFS.class.ts +488 -488
  125. package/src/subsystems/IO/Storage.service/StorageConnector.ts +66 -66
  126. package/src/subsystems/IO/Storage.service/connectors/LocalStorage.class.ts +327 -327
  127. package/src/subsystems/IO/Storage.service/connectors/S3Storage.class.ts +482 -482
  128. package/src/subsystems/IO/Storage.service/index.ts +13 -13
  129. package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +108 -108
  130. package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +465 -465
  131. package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +387 -387
  132. package/src/subsystems/IO/VectorDB.service/connectors/RAMVecrtorDB.class.ts +408 -408
  133. package/src/subsystems/IO/VectorDB.service/embed/BaseEmbedding.ts +107 -107
  134. package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +118 -118
  135. package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +109 -109
  136. package/src/subsystems/IO/VectorDB.service/embed/index.ts +26 -26
  137. package/src/subsystems/IO/VectorDB.service/index.ts +14 -14
  138. package/src/subsystems/LLMManager/LLM.helper.ts +251 -251
  139. package/src/subsystems/LLMManager/LLM.inference.ts +345 -345
  140. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +492 -492
  141. package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +171 -171
  142. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +666 -666
  143. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +407 -407
  144. package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +92 -92
  145. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +983 -983
  146. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +319 -319
  147. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +361 -361
  148. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +257 -257
  149. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +430 -430
  150. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +503 -503
  151. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +524 -524
  152. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +100 -100
  153. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterfaceFactory.ts +81 -81
  154. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +1145 -1145
  155. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/constants.ts +13 -13
  156. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/index.ts +4 -4
  157. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +11 -11
  158. package/src/subsystems/LLMManager/LLM.service/connectors/openai/types.ts +32 -32
  159. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +478 -478
  160. package/src/subsystems/LLMManager/LLM.service/index.ts +47 -47
  161. package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +303 -303
  162. package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +271 -271
  163. package/src/subsystems/LLMManager/ModelsProvider.service/index.ts +11 -11
  164. package/src/subsystems/LLMManager/custom-models.ts +854 -854
  165. package/src/subsystems/LLMManager/models.ts +2540 -2540
  166. package/src/subsystems/LLMManager/paramMappings.ts +69 -69
  167. package/src/subsystems/MemoryManager/Cache.service/CacheConnector.ts +86 -86
  168. package/src/subsystems/MemoryManager/Cache.service/connectors/LocalStorageCache.class.ts +297 -297
  169. package/src/subsystems/MemoryManager/Cache.service/connectors/RAMCache.class.ts +214 -214
  170. package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +252 -252
  171. package/src/subsystems/MemoryManager/Cache.service/connectors/S3Cache.class.ts +373 -373
  172. package/src/subsystems/MemoryManager/Cache.service/index.ts +15 -15
  173. package/src/subsystems/MemoryManager/LLMCache.ts +72 -72
  174. package/src/subsystems/MemoryManager/LLMContext.ts +124 -124
  175. package/src/subsystems/MemoryManager/LLMMemory.service/LLMMemoryConnector.ts +26 -26
  176. package/src/subsystems/MemoryManager/RuntimeContext.ts +277 -277
  177. package/src/subsystems/Security/AccessControl/ACL.class.ts +208 -208
  178. package/src/subsystems/Security/AccessControl/AccessCandidate.class.ts +82 -82
  179. package/src/subsystems/Security/AccessControl/AccessRequest.class.ts +52 -52
  180. package/src/subsystems/Security/Account.service/AccountConnector.ts +44 -44
  181. package/src/subsystems/Security/Account.service/connectors/DummyAccount.class.ts +130 -130
  182. package/src/subsystems/Security/Account.service/connectors/JSONFileAccount.class.ts +170 -170
  183. package/src/subsystems/Security/Account.service/connectors/MySQLAccount.class.ts +76 -76
  184. package/src/subsystems/Security/Account.service/index.ts +14 -14
  185. package/src/subsystems/Security/Credentials.helper.ts +62 -62
  186. package/src/subsystems/Security/ManagedVault.service/ManagedVaultConnector.ts +38 -38
  187. package/src/subsystems/Security/ManagedVault.service/connectors/NullManagedVault.class.ts +53 -53
  188. package/src/subsystems/Security/ManagedVault.service/connectors/SecretManagerManagedVault.ts +154 -154
  189. package/src/subsystems/Security/ManagedVault.service/index.ts +12 -12
  190. package/src/subsystems/Security/SecureConnector.class.ts +110 -110
  191. package/src/subsystems/Security/Vault.service/Vault.helper.ts +30 -30
  192. package/src/subsystems/Security/Vault.service/VaultConnector.ts +29 -29
  193. package/src/subsystems/Security/Vault.service/connectors/HashicorpVault.class.ts +46 -46
  194. package/src/subsystems/Security/Vault.service/connectors/JSONFileVault.class.ts +221 -221
  195. package/src/subsystems/Security/Vault.service/connectors/NullVault.class.ts +54 -54
  196. package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +140 -140
  197. package/src/subsystems/Security/Vault.service/index.ts +12 -12
  198. package/src/types/ACL.types.ts +104 -104
  199. package/src/types/AWS.types.ts +10 -10
  200. package/src/types/Agent.types.ts +61 -61
  201. package/src/types/AgentLogger.types.ts +17 -17
  202. package/src/types/Cache.types.ts +1 -1
  203. package/src/types/Common.types.ts +2 -2
  204. package/src/types/LLM.types.ts +520 -520
  205. package/src/types/Redis.types.ts +8 -8
  206. package/src/types/SRE.types.ts +64 -64
  207. package/src/types/Security.types.ts +14 -14
  208. package/src/types/Storage.types.ts +5 -5
  209. package/src/types/VectorDB.types.ts +86 -86
  210. package/src/utils/base64.utils.ts +275 -275
  211. package/src/utils/cli.utils.ts +68 -68
  212. package/src/utils/data.utils.ts +322 -322
  213. package/src/utils/date-time.utils.ts +22 -22
  214. package/src/utils/general.utils.ts +238 -238
  215. package/src/utils/index.ts +12 -12
  216. package/src/utils/lazy-client.ts +261 -261
  217. package/src/utils/numbers.utils.ts +13 -13
  218. package/src/utils/oauth.utils.ts +35 -35
  219. package/src/utils/string.utils.ts +414 -414
  220. package/src/utils/url.utils.ts +19 -19
  221. package/src/utils/validation.utils.ts +74 -74
  222. package/dist/bundle-analysis-lazy.html +0 -4949
  223. package/dist/bundle-analysis.html +0 -4949
  224. package/dist/types/Components/Triggers/GmailTrigger.class.d.ts +0 -13
  225. package/dist/types/Components/Triggers/Trigger.class.d.ts +0 -3
  226. package/dist/types/helpers/AIPerformanceAnalyzer.helper.d.ts +0 -45
  227. package/dist/types/helpers/AIPerformanceCollector.helper.d.ts +0 -111
  228. package/dist/types/subsystems/IO/Storage.service/connectors/AzureBlobStorage.class.d.ts +0 -211
  229. package/dist/types/subsystems/IO/VectorDB.service/connectors/WeaviateVectorDB.class.d.ts +0 -187
  230. package/dist/types/subsystems/PerformanceManager/Performance.service/PerformanceConnector.d.ts +0 -102
  231. package/dist/types/subsystems/PerformanceManager/Performance.service/connectors/LocalPerformanceConnector.class.d.ts +0 -100
  232. package/dist/types/subsystems/PerformanceManager/Performance.service/index.d.ts +0 -22
  233. package/dist/types/types/Performance.types.d.ts +0 -468
  234. package/dist/types/utils/package-manager.utils.d.ts +0 -26
@@ -1,983 +1,983 @@
1
- import os from 'os';
2
- import path from 'path';
3
- import EventEmitter from 'events';
4
- import fs from 'fs';
5
-
6
- import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
- import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
- import { GoogleGenAI } from '@google/genai';
9
-
10
- import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
- import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
- import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
- import { uid } from '@sre/utils';
14
-
15
- import { processWithConcurrencyLimit } from '@sre/utils';
16
-
17
- import {
18
- TLLMMessageBlock,
19
- ToolData,
20
- TLLMMessageRole,
21
- TLLMToolResultMessageBlock,
22
- APIKeySource,
23
- TLLMEvent,
24
- BasicCredentials,
25
- ILLMRequestFuncParams,
26
- TLLMChatResponse,
27
- TGoogleAIRequestBody,
28
- ILLMRequestContext,
29
- TLLMPreparedParams,
30
- LLMInterface,
31
- } from '@sre/types/LLM.types';
32
- import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
-
34
- import { SystemEvents } from '@sre/Core/SystemEvents';
35
- import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
- import { Logger } from '@sre/helpers/Log.helper';
37
-
38
- import { LLMConnector } from '../LLMConnector';
39
-
40
- const logger = Logger('GoogleAIConnector');
41
-
42
- const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
- 'gemini-1.5-pro-exp-0801',
44
- 'gemini-1.5-pro-latest',
45
- 'gemini-1.5-pro-latest',
46
- 'gemini-1.5-pro',
47
- 'gemini-1.5-pro-001',
48
- 'gemini-1.5-flash-latest',
49
- 'gemini-1.5-flash-001',
50
- 'gemini-1.5-flash',
51
- ];
52
- const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
-
54
- // Supported file MIME types for Google AI's Gemini models
55
- const VALID_MIME_TYPES = [
56
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
- ];
61
-
62
- // will be removed after updating the SDK
63
- type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
-
65
- const IMAGE_GEN_FIXED_PRICING = {
66
- 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
- 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
- 'imagen-4': 0.04, // Standard Imagen 4
69
- 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
- 'gemini-2.5-flash-image': 0.039,
71
- };
72
-
73
- export class GoogleAIConnector extends LLMConnector {
74
- public name = 'LLM:GoogleAI';
75
-
76
- private validMimeTypes = {
77
- all: VALID_MIME_TYPES,
78
- image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
- };
80
-
81
- private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
- const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
-
84
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
-
86
- return new GoogleGenerativeAI(apiKey);
87
- }
88
-
89
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
- try {
91
- logger.debug(`request ${this.name}`, acRequest.candidate);
92
- const prompt = body.messages;
93
- delete body.messages;
94
-
95
- const genAI = await this.getClient(context);
96
- const $model = genAI.getGenerativeModel(body);
97
-
98
- const result = await $model.generateContent(prompt);
99
-
100
- const response = await result.response;
101
- const content = response.text();
102
- const finishReason = response.candidates[0].finishReason || 'stop';
103
- const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
- this.reportUsage(usage, {
105
- modelEntryName: context.modelEntryName,
106
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
- agentId: context.agentId,
108
- teamId: context.teamId,
109
- });
110
-
111
- const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
-
113
- let toolsData: ToolData[] = [];
114
- let useTool = false;
115
-
116
- if (toolCalls && toolCalls.length > 0) {
117
- toolsData = toolCalls.map((toolCall, index) => ({
118
- index,
119
- id: `tool-${index}`,
120
- type: 'function',
121
- name: toolCall.functionCall.name,
122
- arguments: JSON.stringify(toolCall.functionCall.args),
123
- role: TLLMMessageRole.Assistant,
124
- }));
125
- useTool = true;
126
- }
127
-
128
- return {
129
- content,
130
- finishReason: finishReason.toLowerCase(),
131
- useTool,
132
- toolsData,
133
- message: { content, role: 'assistant' },
134
- usage,
135
- };
136
- } catch (error: any) {
137
- logger.error(`request ${this.name}`, error, acRequest.candidate);
138
- throw error;
139
- }
140
- }
141
-
142
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
- logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
- const emitter = new EventEmitter();
145
-
146
- const prompt = body.messages;
147
- delete body.messages;
148
-
149
- const genAI = await this.getClient(context);
150
- const $model = genAI.getGenerativeModel(body);
151
-
152
- try {
153
- const result = await $model.generateContentStream(prompt);
154
-
155
- let toolsData: ToolData[] = [];
156
- let usage: UsageMetadataWithThoughtsToken;
157
-
158
- // Process stream asynchronously while as we need to return emitter immediately
159
- (async () => {
160
- for await (const chunk of result.stream) {
161
- const chunkText = chunk.text();
162
- emitter.emit('content', chunkText);
163
-
164
- if (chunk.candidates[0]?.content?.parts) {
165
- const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
- if (toolCalls.length > 0) {
167
- toolsData = toolCalls.map((toolCall, index) => ({
168
- index,
169
- id: `tool-${index}`,
170
- type: 'function',
171
- name: toolCall.functionCall.name,
172
- arguments: JSON.stringify(toolCall.functionCall.args),
173
- role: TLLMMessageRole.Assistant,
174
- }));
175
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
- }
177
- }
178
-
179
- // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
- // rather just sends the same usage for the entire request.
181
- // notice that the output tokens are only sent in the last chunk usage metadata.
182
- // so we will just update a var to hold the latest usage and report it when the stream ends.
183
- // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
- // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
- // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
- if (chunk?.usageMetadata) {
187
- usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
- }
189
- }
190
-
191
- if (usage) {
192
- this.reportUsage(usage, {
193
- modelEntryName: context.modelEntryName,
194
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
- agentId: context.agentId,
196
- teamId: context.teamId,
197
- });
198
- }
199
-
200
- setTimeout(() => {
201
- emitter.emit('end', toolsData);
202
- }, 100);
203
- })();
204
-
205
- return emitter;
206
- } catch (error: any) {
207
- logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
- throw error;
209
- }
210
- }
211
- // #region Image Generation, will be moved to a different subsystem/service
212
-
213
- protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
-
217
- const model = body.model || 'imagen-3.0-generate-001';
218
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
-
220
- // Use traditional Imagen models
221
- const config = {
222
- numberOfImages: body.n || 1,
223
- aspectRatio: body.aspect_ratio || body.size || '1:1',
224
- personGeneration: body.person_generation || 'allow_adult',
225
- };
226
-
227
- const ai = new GoogleGenAI({ apiKey });
228
-
229
- // Default to GenerateImages interface if not specified
230
- const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
-
232
- let response: any;
233
-
234
- if (modelInterface === LLMInterface.GenerateContent) {
235
- // Use Gemini image generation API
236
- response = await ai.models.generateContent({
237
- model,
238
- contents: body.prompt,
239
- });
240
-
241
- // Extract image data from Gemini response format
242
- const imageData: any[] = [];
243
- if (response.candidates?.[0]?.content?.parts) {
244
- for (const part of response.candidates[0].content.parts) {
245
- if (part.inlineData?.data) {
246
- imageData.push({
247
- url: `data:image/png;base64,${part.inlineData.data}`,
248
- b64_json: part.inlineData.data,
249
- revised_prompt: body.prompt,
250
- });
251
- }
252
- }
253
- }
254
-
255
- // Report input tokens and image cost pricing based on the official pricing page:
256
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
-
259
- this.reportImageUsage({
260
- usage: {
261
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
- usageMetadata,
263
- },
264
- context,
265
- });
266
-
267
- if (imageData.length === 0) {
268
- throw new Error(
269
- 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
- );
271
- }
272
-
273
- return {
274
- created: Math.floor(Date.now() / 1000),
275
- data: imageData,
276
- };
277
- } else if (modelInterface === LLMInterface.GenerateImages) {
278
- response = await ai.models.generateImages({
279
- model,
280
- prompt: body.prompt,
281
- config,
282
- });
283
-
284
- // Report input tokens and image cost pricing based on the official pricing page:
285
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
- this.reportImageUsage({
288
- usage: {
289
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
- usageMetadata,
291
- },
292
- numberOfImages: config.numberOfImages,
293
- context,
294
- });
295
-
296
- return {
297
- created: Math.floor(Date.now() / 1000),
298
- data:
299
- response.generatedImages?.map((generatedImage: any) => ({
300
- url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
- b64_json: generatedImage.image.imageBytes,
302
- revised_prompt: body.prompt,
303
- })) || [],
304
- };
305
- } else {
306
- throw new Error(`Unsupported interface: ${modelInterface}`);
307
- }
308
- }
309
-
310
- protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
-
314
- // A model supports image editing if it implements the `generateContent` interface.
315
- const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
- if (!supportsEditing) {
317
- throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
- }
319
-
320
- const ai = new GoogleGenAI({ apiKey });
321
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
-
323
- // Use the prepared body which already contains processed files and contents
324
- const response = await ai.models.generateContent({
325
- model: body.model,
326
- contents: body.contents,
327
- });
328
-
329
- // Extract image data from Gemini response format
330
- const imageData: any[] = [];
331
- if (response.candidates?.[0]?.content?.parts) {
332
- for (const part of response.candidates[0].content.parts) {
333
- if (part.inlineData?.data) {
334
- imageData.push({
335
- url: `data:image/png;base64,${part.inlineData.data}`,
336
- b64_json: part.inlineData.data,
337
- revised_prompt: body._metadata?.prompt || body.prompt,
338
- });
339
- }
340
- }
341
- }
342
-
343
- // Report pricing for input tokens and image costs
344
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
-
346
- this.reportImageUsage({
347
- usage: {
348
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
- usageMetadata,
350
- },
351
- context,
352
- });
353
-
354
- return {
355
- created: Math.floor(Date.now() / 1000),
356
- data: imageData,
357
- };
358
- }
359
-
360
- protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
- const model = params?.model;
362
-
363
- // Check if this is an image generation request based on capabilities
364
- if (params?.capabilities?.imageGeneration) {
365
- // Determine if this is image editing (has files) or generation
366
- const hasFiles = params?.files?.length > 0;
367
- if (hasFiles) {
368
- return this.prepareImageEditBody(params) as any;
369
- } else {
370
- return this.prepareBodyForImageGenRequest(params) as any;
371
- }
372
- }
373
-
374
- const messages = await this.prepareMessages(params);
375
-
376
- let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
- model: model as string,
378
- messages,
379
- };
380
-
381
- const responseFormat = params?.responseFormat || '';
382
- let responseMimeType = '';
383
- let systemInstruction = '';
384
-
385
- if (responseFormat === 'json') {
386
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
-
388
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
- responseMimeType = 'application/json';
390
- }
391
- }
392
-
393
- const config: GenerationConfig = {};
394
-
395
- if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
- if (params.temperature !== undefined) config.temperature = params.temperature;
397
- if (params.topP !== undefined) config.topP = params.topP;
398
- if (params.topK !== undefined) config.topK = params.topK;
399
- if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
- if (responseMimeType) config.responseMimeType = responseMimeType;
401
-
402
- if (systemInstruction) body.systemInstruction = systemInstruction;
403
- if (Object.keys(config).length > 0) {
404
- body.generationConfig = config;
405
- }
406
-
407
- return body;
408
- }
409
-
410
- protected reportUsage(
411
- usage: UsageMetadataWithThoughtsToken,
412
- metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
- ) {
414
- // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
- const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
- let tier = '';
417
- const tierThresholds = {
418
- 'gemini-1.5-pro': 128_000,
419
- 'gemini-2.5-pro': 200_000,
420
- };
421
-
422
- const textInputTokens =
423
- usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
- const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
-
426
- // Find matching model and set tier based on threshold
427
- const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
- if (modelWithTier) {
429
- tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
- }
431
-
432
- // #endregion
433
-
434
- const usageData = {
435
- sourceId: `llm:${modelName}`,
436
- input_tokens: textInputTokens,
437
- output_tokens: usage?.candidatesTokenCount || 0,
438
- input_tokens_audio: audioInputTokens,
439
- input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
- input_tokens_cache_write: 0,
441
- reasoning_tokens: usage?.thoughtsTokenCount,
442
- keySource: metadata.keySource,
443
- agentId: metadata.agentId,
444
- teamId: metadata.teamId,
445
- tier,
446
- };
447
- SystemEvents.emit('USAGE:LLM', usageData);
448
-
449
- return usageData;
450
- }
451
-
452
- /**
453
- * Extract text and image tokens from Google AI usage metadata
454
- */
455
- private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
- const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
- const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
-
459
- return { textTokens, imageTokens };
460
- }
461
-
462
- protected reportImageUsage({
463
- usage,
464
- context,
465
- numberOfImages = 1,
466
- }: {
467
- usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
- context: ILLMRequestContext;
469
- numberOfImages?: number;
470
- }) {
471
- // Extract text and image tokens from rawUsage if available
472
- let input_tokens_txt = 0;
473
- let input_tokens_img = 0;
474
-
475
- if (usage.usageMetadata) {
476
- const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
- input_tokens_txt = textTokens;
478
- input_tokens_img = imageTokens;
479
- }
480
-
481
- const imageUsageData = {
482
- sourceId: `api:imagegen.smyth`,
483
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
-
485
- cost: usage.cost * numberOfImages,
486
- input_tokens_txt,
487
- input_tokens_img,
488
-
489
- agentId: context.agentId,
490
- teamId: context.teamId,
491
- };
492
- SystemEvents.emit('USAGE:API', imageUsageData);
493
- }
494
-
495
- public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
- const tools = toolDefinitions.map((tool) => {
497
- const { name, description, properties, requiredFields } = tool;
498
-
499
- // Ensure the function name is valid
500
- const validName = this.sanitizeFunctionName(name);
501
-
502
- // Ensure properties are non-empty for OBJECT type
503
- const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
-
505
- return {
506
- functionDeclarations: [
507
- {
508
- name: validName,
509
- description: description || '',
510
- parameters: {
511
- type: 'OBJECT',
512
- properties: validProperties,
513
- required: requiredFields || [],
514
- },
515
- },
516
- ],
517
- };
518
- });
519
-
520
- return {
521
- tools,
522
- toolChoice: {
523
- type: toolChoice,
524
- },
525
- };
526
- }
527
-
528
- public transformToolMessageBlocks({
529
- messageBlock,
530
- toolsData,
531
- }: {
532
- messageBlock: TLLMMessageBlock;
533
- toolsData: ToolData[];
534
- }): TLLMToolResultMessageBlock[] {
535
- const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
-
537
- if (messageBlock) {
538
- const content = [];
539
- if (typeof messageBlock.content === 'string') {
540
- content.push({ text: messageBlock.content });
541
- } else if (Array.isArray(messageBlock.content)) {
542
- content.push(...messageBlock.content);
543
- }
544
-
545
- if (messageBlock.parts) {
546
- const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
- if (functionCalls.length > 0) {
548
- content.push(
549
- ...functionCalls.map((call) => ({
550
- functionCall: {
551
- name: call.functionCall.name,
552
- args: JSON.parse(call.functionCall.args),
553
- },
554
- }))
555
- );
556
- }
557
- }
558
-
559
- messageBlocks.push({
560
- role: messageBlock.role,
561
- parts: content,
562
- });
563
- }
564
-
565
- const transformedToolsData = toolsData.map(
566
- (toolData): TLLMToolResultMessageBlock => ({
567
- role: TLLMMessageRole.User,
568
- parts: [
569
- {
570
- functionResponse: {
571
- name: toolData.name,
572
- response: {
573
- name: toolData.name,
574
- content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
- },
576
- },
577
- },
578
- ],
579
- })
580
- );
581
-
582
- return [...messageBlocks, ...transformedToolsData];
583
- }
584
-
585
- public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
- const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
-
588
- return _messages.map((message) => {
589
- const _message = { ...message };
590
- let textContent = '';
591
-
592
- // Map roles to valid Google AI roles
593
- switch (_message.role) {
594
- case TLLMMessageRole.Assistant:
595
- case TLLMMessageRole.System:
596
- _message.role = TLLMMessageRole.Model;
597
- break;
598
- case TLLMMessageRole.User:
599
- // User role is already valid
600
- break;
601
- default:
602
- _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
- }
604
-
605
- // * empty text causes error that's why we added '...'
606
-
607
- if (_message?.parts) {
608
- textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
- } else if (Array.isArray(_message?.content)) {
610
- textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
- } else if (_message?.content) {
612
- textContent = (_message.content as string) || '...';
613
- }
614
-
615
- _message.parts = [{ text: textContent || '...' }];
616
-
617
- delete _message.content; // Remove content to avoid error
618
-
619
- return _message;
620
- });
621
- }
622
-
623
- private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
- let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
-
626
- const files: BinaryInput[] = params?.files || [];
627
-
628
- if (files.length > 0) {
629
- messages = await this.prepareMessagesWithFiles(params);
630
- } else if (params?.toolsConfig?.tools?.length > 0) {
631
- messages = await this.prepareMessagesWithTools(params);
632
- } else {
633
- messages = await this.prepareMessagesWithTextQuery(params);
634
- }
635
-
636
- return messages;
637
- }
638
-
639
- private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
- const model = params.model;
641
-
642
- let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
- let systemInstruction = '';
644
- const files: BinaryInput[] = params?.files || [];
645
-
646
- // #region Upload files
647
- const promises = [];
648
- const _files = [];
649
-
650
- for (let image of files) {
651
- const binaryInput = BinaryInput.from(image);
652
- promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
-
654
- _files.push(binaryInput);
655
- }
656
-
657
- await Promise.all(promises);
658
- // #endregion Upload files
659
-
660
- // If user provide mix of valid and invalid files, we will only process the valid files
661
- const validFiles = this.getValidFiles(_files, 'all');
662
-
663
- const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
-
665
- // GoogleAI only supports one video file at a time
666
- if (hasVideo && validFiles.length > 1) {
667
- throw new Error('Only one video file is supported at a time.');
668
- }
669
-
670
- const fileUploadingTasks = validFiles.map((file) => async () => {
671
- try {
672
- const uploadedFile = await this.uploadFile({
673
- file,
674
- apiKey: (params.credentials as BasicCredentials).apiKey,
675
- agentId: params.agentId,
676
- });
677
-
678
- return { url: uploadedFile.url, mimetype: file.mimetype };
679
- } catch {
680
- return null;
681
- }
682
- });
683
-
684
- const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
-
686
- // We throw error when there are no valid uploaded files,
687
- if (uploadedFiles && uploadedFiles?.length === 0) {
688
- throw new Error(`There is an issue during upload file in Google AI Server!`);
689
- }
690
-
691
- const fileData = this.getFileData(uploadedFiles);
692
-
693
- const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
- let prompt = userMessage?.content || '';
695
-
696
- // if the the model does not support system instruction, we will add it to the prompt
697
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
- prompt = `${prompt}\n${systemInstruction}`;
699
- }
700
- //#endregion Separate system message and add JSON response instruction if needed
701
-
702
- // Adjust input structure handling for multiple image files to accommodate variations.
703
- messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
-
705
- return messages as string;
706
- }
707
-
708
- private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
- let formattedMessages: TLLMMessageBlock[];
710
- let systemInstruction = '';
711
-
712
- let messages = params?.messages || [];
713
-
714
- const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
-
716
- if (hasSystemMessage) {
717
- const separateMessages = LLMHelper.separateSystemMessages(messages);
718
- const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
- systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
- formattedMessages = separateMessages.otherMessages;
721
- } else {
722
- formattedMessages = messages;
723
- }
724
-
725
- const toolsPrompt: GenerateContentRequest = {
726
- contents: formattedMessages as any,
727
- };
728
-
729
- if (systemInstruction) {
730
- toolsPrompt.systemInstruction = systemInstruction;
731
- }
732
-
733
- if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
- if (params?.toolsConfig?.tool_choice) {
735
- // Map tool choice to valid Google AI function calling modes
736
- let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
- const toolChoice = params?.toolsConfig?.tool_choice;
738
-
739
- if (toolChoice === 'auto') {
740
- mode = FunctionCallingMode.AUTO;
741
- } else if (toolChoice === 'required') {
742
- mode = FunctionCallingMode.ANY;
743
- } else if (toolChoice === 'none') {
744
- mode = FunctionCallingMode.NONE;
745
- } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
- // Handle OpenAI-style named tool choice - force any function call
747
- mode = FunctionCallingMode.ANY;
748
- }
749
-
750
- toolsPrompt.toolConfig = {
751
- functionCallingConfig: { mode },
752
- };
753
- }
754
-
755
- return toolsPrompt;
756
- }
757
-
758
- private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
- const model = params.model;
760
- let systemInstruction = '';
761
- let prompt = '';
762
-
763
- const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
-
765
- if ('content' in systemMessage) {
766
- systemInstruction = systemMessage.content as string;
767
- }
768
-
769
- const responseFormat = params?.responseFormat || '';
770
- let responseMimeType = '';
771
-
772
- if (responseFormat === 'json') {
773
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
-
775
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
- responseMimeType = 'application/json';
777
- }
778
- }
779
-
780
- if (otherMessages?.length > 0) {
781
- // Concatenate messages with prompt and remove messages from params as it's not supported
782
- prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
- }
784
-
785
- // if the the model does not support system instruction, we will add it to the prompt
786
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
- prompt = `${prompt}\n${systemInstruction}`;
788
- }
789
- //#endregion Separate system message and add JSON response instruction if needed
790
-
791
- return prompt;
792
- }
793
-
794
- private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
- return {
796
- prompt: params.prompt,
797
- model: params.model,
798
- aspectRatio: (params as any).aspectRatio,
799
- personGeneration: (params as any).personGeneration,
800
- };
801
- }
802
-
803
- private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
- const model = params.model || 'gemini-2.5-flash-image-preview';
805
-
806
- // Construct edit prompt with image and instructions
807
- let editPrompt = params.prompt || 'Edit this image';
808
- if ((params as any).instruction) {
809
- editPrompt += `. ${(params as any).instruction}`;
810
- }
811
-
812
- // For image editing, we need to include the original image in the contents
813
- const contents: any[] = [];
814
- const files: BinaryInput[] = params?.files || [];
815
-
816
- if (files.length > 0) {
817
- // Get only valid image files for editing
818
- const validImageFiles = this.getValidFiles(files, 'image');
819
-
820
- if (validImageFiles.length === 0) {
821
- throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
- }
823
-
824
- // Process each image file
825
- for (const file of validImageFiles) {
826
- try {
827
- // Read the file data as base64
828
- const bufferData = await file.getBuffer();
829
- const base64Image = Buffer.from(bufferData).toString('base64');
830
-
831
- contents.push({
832
- inlineData: {
833
- mimeType: file.mimetype,
834
- data: base64Image,
835
- },
836
- });
837
- } catch (error) {
838
- throw new Error(`Failed to process image file: ${error.message}`);
839
- }
840
- }
841
- } else {
842
- throw new Error('No image provided for editing. Please include an image file.');
843
- }
844
-
845
- // Add the edit instruction
846
- contents.push({ text: editPrompt });
847
-
848
- // Return the complete request body that can be used directly in imageEditRequest
849
- return {
850
- model,
851
- contents,
852
- // Additional metadata for usage reporting
853
- _metadata: {
854
- prompt: editPrompt,
855
- numberOfImages: (params as any).n || 1,
856
- aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
- personGeneration: (params as any).person_generation || 'allow_adult',
858
- },
859
- };
860
- }
861
-
862
- // Add this helper method to sanitize function names
863
- private sanitizeFunctionName(name: string): string {
864
- // Check if name is undefined or null
865
- if (name == null) {
866
- return '_unnamed_function';
867
- }
868
-
869
- // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
- let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
-
872
- // Ensure the name starts with a letter or underscore
873
- if (!/^[a-zA-Z_]/.test(sanitized)) {
874
- sanitized = '_' + sanitized;
875
- }
876
-
877
- // If sanitized is empty after removing invalid characters, use a default name
878
- if (sanitized === '') {
879
- sanitized = '_unnamed_function';
880
- }
881
-
882
- // Truncate to 64 characters if longer
883
- sanitized = sanitized.slice(0, 64);
884
-
885
- return sanitized;
886
- }
887
-
888
- private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
- try {
890
- if (!apiKey || !file?.mimetype) {
891
- throw new Error('Missing required parameters to save file for Google AI!');
892
- }
893
-
894
- // Create a temporary directory
895
- const tempDir = os.tmpdir();
896
- const fileName = uid();
897
- const tempFilePath = path.join(tempDir, fileName);
898
-
899
- const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
-
901
- // Write buffer data to temp file
902
- await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
-
904
- // Upload the file to the Google File Manager
905
- const fileManager = new GoogleAIFileManager(apiKey);
906
-
907
- const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
- mimeType: file.mimetype,
909
- displayName: fileName,
910
- });
911
-
912
- const name = uploadResponse.file.name;
913
-
914
- // Poll getFile() on a set interval (10 seconds here) to check file state.
915
- let uploadedFile = await fileManager.getFile(name);
916
- while (uploadedFile.state === FileState.PROCESSING) {
917
- process.stdout.write('.');
918
- // Sleep for 10 seconds
919
- await new Promise((resolve) => setTimeout(resolve, 10_000));
920
- // Fetch the file from the API again
921
- uploadedFile = await fileManager.getFile(name);
922
- }
923
-
924
- if (uploadedFile.state === FileState.FAILED) {
925
- throw new Error('File processing failed.');
926
- }
927
-
928
- // Clean up temp file
929
- await fs.promises.unlink(tempFilePath);
930
-
931
- return {
932
- url: uploadResponse.file.uri || '',
933
- };
934
- } catch (error) {
935
- throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
- }
937
- }
938
-
939
- private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
- const validSources = [];
941
-
942
- for (let file of files) {
943
- if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
- validSources.push(file);
945
- }
946
- }
947
-
948
- if (validSources?.length === 0) {
949
- throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
- }
951
-
952
- return validSources;
953
- }
954
-
955
- private getFileData(
956
- files: {
957
- url: string;
958
- mimetype: string;
959
- }[]
960
- ): {
961
- fileData: {
962
- mimeType: string;
963
- fileUri: string;
964
- };
965
- }[] {
966
- try {
967
- const imageData = [];
968
-
969
- for (let file of files) {
970
- imageData.push({
971
- fileData: {
972
- mimeType: file.mimetype,
973
- fileUri: file.url,
974
- },
975
- });
976
- }
977
-
978
- return imageData;
979
- } catch (error) {
980
- throw error;
981
- }
982
- }
983
- }
1
+ import os from 'os';
2
+ import path from 'path';
3
+ import EventEmitter from 'events';
4
+ import fs from 'fs';
5
+
6
+ import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
+ import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
+ import { GoogleGenAI } from '@google/genai';
9
+
10
+ import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
+ import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
+ import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
+ import { uid } from '@sre/utils';
14
+
15
+ import { processWithConcurrencyLimit } from '@sre/utils';
16
+
17
+ import {
18
+ TLLMMessageBlock,
19
+ ToolData,
20
+ TLLMMessageRole,
21
+ TLLMToolResultMessageBlock,
22
+ APIKeySource,
23
+ TLLMEvent,
24
+ BasicCredentials,
25
+ ILLMRequestFuncParams,
26
+ TLLMChatResponse,
27
+ TGoogleAIRequestBody,
28
+ ILLMRequestContext,
29
+ TLLMPreparedParams,
30
+ LLMInterface,
31
+ } from '@sre/types/LLM.types';
32
+ import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
+
34
+ import { SystemEvents } from '@sre/Core/SystemEvents';
35
+ import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
+ import { Logger } from '@sre/helpers/Log.helper';
37
+
38
+ import { LLMConnector } from '../LLMConnector';
39
+
40
+ const logger = Logger('GoogleAIConnector');
41
+
42
+ const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
+ 'gemini-1.5-pro-exp-0801',
44
+ 'gemini-1.5-pro-latest',
45
+ 'gemini-1.5-pro-latest',
46
+ 'gemini-1.5-pro',
47
+ 'gemini-1.5-pro-001',
48
+ 'gemini-1.5-flash-latest',
49
+ 'gemini-1.5-flash-001',
50
+ 'gemini-1.5-flash',
51
+ ];
52
+ const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
+
54
+ // Supported file MIME types for Google AI's Gemini models
55
+ const VALID_MIME_TYPES = [
56
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
+ ];
61
+
62
+ // will be removed after updating the SDK
63
+ type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
+
65
+ const IMAGE_GEN_FIXED_PRICING = {
66
+ 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
+ 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
+ 'imagen-4': 0.04, // Standard Imagen 4
69
+ 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
+ 'gemini-2.5-flash-image': 0.039,
71
+ };
72
+
73
+ export class GoogleAIConnector extends LLMConnector {
74
+ public name = 'LLM:GoogleAI';
75
+
76
+ private validMimeTypes = {
77
+ all: VALID_MIME_TYPES,
78
+ image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
+ };
80
+
81
+ private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
+ const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
+
84
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
+
86
+ return new GoogleGenerativeAI(apiKey);
87
+ }
88
+
89
+ protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
+ try {
91
+ logger.debug(`request ${this.name}`, acRequest.candidate);
92
+ const prompt = body.messages;
93
+ delete body.messages;
94
+
95
+ const genAI = await this.getClient(context);
96
+ const $model = genAI.getGenerativeModel(body);
97
+
98
+ const result = await $model.generateContent(prompt);
99
+
100
+ const response = await result.response;
101
+ const content = response.text();
102
+ const finishReason = response.candidates[0].finishReason || 'stop';
103
+ const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
+ this.reportUsage(usage, {
105
+ modelEntryName: context.modelEntryName,
106
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
+ agentId: context.agentId,
108
+ teamId: context.teamId,
109
+ });
110
+
111
+ const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
+
113
+ let toolsData: ToolData[] = [];
114
+ let useTool = false;
115
+
116
+ if (toolCalls && toolCalls.length > 0) {
117
+ toolsData = toolCalls.map((toolCall, index) => ({
118
+ index,
119
+ id: `tool-${index}`,
120
+ type: 'function',
121
+ name: toolCall.functionCall.name,
122
+ arguments: JSON.stringify(toolCall.functionCall.args),
123
+ role: TLLMMessageRole.Assistant,
124
+ }));
125
+ useTool = true;
126
+ }
127
+
128
+ return {
129
+ content,
130
+ finishReason: finishReason.toLowerCase(),
131
+ useTool,
132
+ toolsData,
133
+ message: { content, role: 'assistant' },
134
+ usage,
135
+ };
136
+ } catch (error: any) {
137
+ logger.error(`request ${this.name}`, error, acRequest.candidate);
138
+ throw error;
139
+ }
140
+ }
141
+
142
+ protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
+ logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
+ const emitter = new EventEmitter();
145
+
146
+ const prompt = body.messages;
147
+ delete body.messages;
148
+
149
+ const genAI = await this.getClient(context);
150
+ const $model = genAI.getGenerativeModel(body);
151
+
152
+ try {
153
+ const result = await $model.generateContentStream(prompt);
154
+
155
+ let toolsData: ToolData[] = [];
156
+ let usage: UsageMetadataWithThoughtsToken;
157
+
158
+ // Process stream asynchronously while as we need to return emitter immediately
159
+ (async () => {
160
+ for await (const chunk of result.stream) {
161
+ const chunkText = chunk.text();
162
+ emitter.emit('content', chunkText);
163
+
164
+ if (chunk.candidates[0]?.content?.parts) {
165
+ const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
+ if (toolCalls.length > 0) {
167
+ toolsData = toolCalls.map((toolCall, index) => ({
168
+ index,
169
+ id: `tool-${index}`,
170
+ type: 'function',
171
+ name: toolCall.functionCall.name,
172
+ arguments: JSON.stringify(toolCall.functionCall.args),
173
+ role: TLLMMessageRole.Assistant,
174
+ }));
175
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
+ }
177
+ }
178
+
179
+ // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
+ // rather just sends the same usage for the entire request.
181
+ // notice that the output tokens are only sent in the last chunk usage metadata.
182
+ // so we will just update a var to hold the latest usage and report it when the stream ends.
183
+ // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
+ // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
+ // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
+ if (chunk?.usageMetadata) {
187
+ usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
+ }
189
+ }
190
+
191
+ if (usage) {
192
+ this.reportUsage(usage, {
193
+ modelEntryName: context.modelEntryName,
194
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
+ agentId: context.agentId,
196
+ teamId: context.teamId,
197
+ });
198
+ }
199
+
200
+ setTimeout(() => {
201
+ emitter.emit('end', toolsData);
202
+ }, 100);
203
+ })();
204
+
205
+ return emitter;
206
+ } catch (error: any) {
207
+ logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
+ throw error;
209
+ }
210
+ }
211
+ // #region Image Generation, will be moved to a different subsystem/service
212
+
213
+ protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
+
217
+ const model = body.model || 'imagen-3.0-generate-001';
218
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
+
220
+ // Use traditional Imagen models
221
+ const config = {
222
+ numberOfImages: body.n || 1,
223
+ aspectRatio: body.aspect_ratio || body.size || '1:1',
224
+ personGeneration: body.person_generation || 'allow_adult',
225
+ };
226
+
227
+ const ai = new GoogleGenAI({ apiKey });
228
+
229
+ // Default to GenerateImages interface if not specified
230
+ const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
+
232
+ let response: any;
233
+
234
+ if (modelInterface === LLMInterface.GenerateContent) {
235
+ // Use Gemini image generation API
236
+ response = await ai.models.generateContent({
237
+ model,
238
+ contents: body.prompt,
239
+ });
240
+
241
+ // Extract image data from Gemini response format
242
+ const imageData: any[] = [];
243
+ if (response.candidates?.[0]?.content?.parts) {
244
+ for (const part of response.candidates[0].content.parts) {
245
+ if (part.inlineData?.data) {
246
+ imageData.push({
247
+ url: `data:image/png;base64,${part.inlineData.data}`,
248
+ b64_json: part.inlineData.data,
249
+ revised_prompt: body.prompt,
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ // Report input tokens and image cost pricing based on the official pricing page:
256
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
+
259
+ this.reportImageUsage({
260
+ usage: {
261
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
+ usageMetadata,
263
+ },
264
+ context,
265
+ });
266
+
267
+ if (imageData.length === 0) {
268
+ throw new Error(
269
+ 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
+ );
271
+ }
272
+
273
+ return {
274
+ created: Math.floor(Date.now() / 1000),
275
+ data: imageData,
276
+ };
277
+ } else if (modelInterface === LLMInterface.GenerateImages) {
278
+ response = await ai.models.generateImages({
279
+ model,
280
+ prompt: body.prompt,
281
+ config,
282
+ });
283
+
284
+ // Report input tokens and image cost pricing based on the official pricing page:
285
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
+ this.reportImageUsage({
288
+ usage: {
289
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
+ usageMetadata,
291
+ },
292
+ numberOfImages: config.numberOfImages,
293
+ context,
294
+ });
295
+
296
+ return {
297
+ created: Math.floor(Date.now() / 1000),
298
+ data:
299
+ response.generatedImages?.map((generatedImage: any) => ({
300
+ url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
+ b64_json: generatedImage.image.imageBytes,
302
+ revised_prompt: body.prompt,
303
+ })) || [],
304
+ };
305
+ } else {
306
+ throw new Error(`Unsupported interface: ${modelInterface}`);
307
+ }
308
+ }
309
+
310
+ protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
+
314
+ // A model supports image editing if it implements the `generateContent` interface.
315
+ const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
+ if (!supportsEditing) {
317
+ throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
+ }
319
+
320
+ const ai = new GoogleGenAI({ apiKey });
321
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
+
323
+ // Use the prepared body which already contains processed files and contents
324
+ const response = await ai.models.generateContent({
325
+ model: body.model,
326
+ contents: body.contents,
327
+ });
328
+
329
+ // Extract image data from Gemini response format
330
+ const imageData: any[] = [];
331
+ if (response.candidates?.[0]?.content?.parts) {
332
+ for (const part of response.candidates[0].content.parts) {
333
+ if (part.inlineData?.data) {
334
+ imageData.push({
335
+ url: `data:image/png;base64,${part.inlineData.data}`,
336
+ b64_json: part.inlineData.data,
337
+ revised_prompt: body._metadata?.prompt || body.prompt,
338
+ });
339
+ }
340
+ }
341
+ }
342
+
343
+ // Report pricing for input tokens and image costs
344
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
+
346
+ this.reportImageUsage({
347
+ usage: {
348
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
+ usageMetadata,
350
+ },
351
+ context,
352
+ });
353
+
354
+ return {
355
+ created: Math.floor(Date.now() / 1000),
356
+ data: imageData,
357
+ };
358
+ }
359
+
360
+ protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
+ const model = params?.model;
362
+
363
+ // Check if this is an image generation request based on capabilities
364
+ if (params?.capabilities?.imageGeneration) {
365
+ // Determine if this is image editing (has files) or generation
366
+ const hasFiles = params?.files?.length > 0;
367
+ if (hasFiles) {
368
+ return this.prepareImageEditBody(params) as any;
369
+ } else {
370
+ return this.prepareBodyForImageGenRequest(params) as any;
371
+ }
372
+ }
373
+
374
+ const messages = await this.prepareMessages(params);
375
+
376
+ let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
+ model: model as string,
378
+ messages,
379
+ };
380
+
381
+ const responseFormat = params?.responseFormat || '';
382
+ let responseMimeType = '';
383
+ let systemInstruction = '';
384
+
385
+ if (responseFormat === 'json') {
386
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
+
388
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
+ responseMimeType = 'application/json';
390
+ }
391
+ }
392
+
393
+ const config: GenerationConfig = {};
394
+
395
+ if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
+ if (params.temperature !== undefined) config.temperature = params.temperature;
397
+ if (params.topP !== undefined) config.topP = params.topP;
398
+ if (params.topK !== undefined) config.topK = params.topK;
399
+ if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
+ if (responseMimeType) config.responseMimeType = responseMimeType;
401
+
402
+ if (systemInstruction) body.systemInstruction = systemInstruction;
403
+ if (Object.keys(config).length > 0) {
404
+ body.generationConfig = config;
405
+ }
406
+
407
+ return body;
408
+ }
409
+
410
+ protected reportUsage(
411
+ usage: UsageMetadataWithThoughtsToken,
412
+ metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
+ ) {
414
+ // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
+ const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
+ let tier = '';
417
+ const tierThresholds = {
418
+ 'gemini-1.5-pro': 128_000,
419
+ 'gemini-2.5-pro': 200_000,
420
+ };
421
+
422
+ const textInputTokens =
423
+ usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
+ const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
+
426
+ // Find matching model and set tier based on threshold
427
+ const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
+ if (modelWithTier) {
429
+ tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
+ }
431
+
432
+ // #endregion
433
+
434
+ const usageData = {
435
+ sourceId: `llm:${modelName}`,
436
+ input_tokens: textInputTokens,
437
+ output_tokens: usage?.candidatesTokenCount || 0,
438
+ input_tokens_audio: audioInputTokens,
439
+ input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
+ input_tokens_cache_write: 0,
441
+ reasoning_tokens: usage?.thoughtsTokenCount,
442
+ keySource: metadata.keySource,
443
+ agentId: metadata.agentId,
444
+ teamId: metadata.teamId,
445
+ tier,
446
+ };
447
+ SystemEvents.emit('USAGE:LLM', usageData);
448
+
449
+ return usageData;
450
+ }
451
+
452
+ /**
453
+ * Extract text and image tokens from Google AI usage metadata
454
+ */
455
+ private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
+ const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
+ const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
+
459
+ return { textTokens, imageTokens };
460
+ }
461
+
462
+ protected reportImageUsage({
463
+ usage,
464
+ context,
465
+ numberOfImages = 1,
466
+ }: {
467
+ usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
+ context: ILLMRequestContext;
469
+ numberOfImages?: number;
470
+ }) {
471
+ // Extract text and image tokens from rawUsage if available
472
+ let input_tokens_txt = 0;
473
+ let input_tokens_img = 0;
474
+
475
+ if (usage.usageMetadata) {
476
+ const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
+ input_tokens_txt = textTokens;
478
+ input_tokens_img = imageTokens;
479
+ }
480
+
481
+ const imageUsageData = {
482
+ sourceId: `api:imagegen.smyth`,
483
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
+
485
+ cost: usage.cost * numberOfImages,
486
+ input_tokens_txt,
487
+ input_tokens_img,
488
+
489
+ agentId: context.agentId,
490
+ teamId: context.teamId,
491
+ };
492
+ SystemEvents.emit('USAGE:API', imageUsageData);
493
+ }
494
+
495
+ public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
+ const tools = toolDefinitions.map((tool) => {
497
+ const { name, description, properties, requiredFields } = tool;
498
+
499
+ // Ensure the function name is valid
500
+ const validName = this.sanitizeFunctionName(name);
501
+
502
+ // Ensure properties are non-empty for OBJECT type
503
+ const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
+
505
+ return {
506
+ functionDeclarations: [
507
+ {
508
+ name: validName,
509
+ description: description || '',
510
+ parameters: {
511
+ type: 'OBJECT',
512
+ properties: validProperties,
513
+ required: requiredFields || [],
514
+ },
515
+ },
516
+ ],
517
+ };
518
+ });
519
+
520
+ return {
521
+ tools,
522
+ toolChoice: {
523
+ type: toolChoice,
524
+ },
525
+ };
526
+ }
527
+
528
+ public transformToolMessageBlocks({
529
+ messageBlock,
530
+ toolsData,
531
+ }: {
532
+ messageBlock: TLLMMessageBlock;
533
+ toolsData: ToolData[];
534
+ }): TLLMToolResultMessageBlock[] {
535
+ const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
+
537
+ if (messageBlock) {
538
+ const content = [];
539
+ if (typeof messageBlock.content === 'string') {
540
+ content.push({ text: messageBlock.content });
541
+ } else if (Array.isArray(messageBlock.content)) {
542
+ content.push(...messageBlock.content);
543
+ }
544
+
545
+ if (messageBlock.parts) {
546
+ const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
+ if (functionCalls.length > 0) {
548
+ content.push(
549
+ ...functionCalls.map((call) => ({
550
+ functionCall: {
551
+ name: call.functionCall.name,
552
+ args: JSON.parse(call.functionCall.args),
553
+ },
554
+ }))
555
+ );
556
+ }
557
+ }
558
+
559
+ messageBlocks.push({
560
+ role: messageBlock.role,
561
+ parts: content,
562
+ });
563
+ }
564
+
565
+ const transformedToolsData = toolsData.map(
566
+ (toolData): TLLMToolResultMessageBlock => ({
567
+ role: TLLMMessageRole.User,
568
+ parts: [
569
+ {
570
+ functionResponse: {
571
+ name: toolData.name,
572
+ response: {
573
+ name: toolData.name,
574
+ content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
+ },
576
+ },
577
+ },
578
+ ],
579
+ })
580
+ );
581
+
582
+ return [...messageBlocks, ...transformedToolsData];
583
+ }
584
+
585
+ public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
+ const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
+
588
+ return _messages.map((message) => {
589
+ const _message = { ...message };
590
+ let textContent = '';
591
+
592
+ // Map roles to valid Google AI roles
593
+ switch (_message.role) {
594
+ case TLLMMessageRole.Assistant:
595
+ case TLLMMessageRole.System:
596
+ _message.role = TLLMMessageRole.Model;
597
+ break;
598
+ case TLLMMessageRole.User:
599
+ // User role is already valid
600
+ break;
601
+ default:
602
+ _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
+ }
604
+
605
+ // * empty text causes error that's why we added '...'
606
+
607
+ if (_message?.parts) {
608
+ textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
+ } else if (Array.isArray(_message?.content)) {
610
+ textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
+ } else if (_message?.content) {
612
+ textContent = (_message.content as string) || '...';
613
+ }
614
+
615
+ _message.parts = [{ text: textContent || '...' }];
616
+
617
+ delete _message.content; // Remove content to avoid error
618
+
619
+ return _message;
620
+ });
621
+ }
622
+
623
+ private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
+ let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
+
626
+ const files: BinaryInput[] = params?.files || [];
627
+
628
+ if (files.length > 0) {
629
+ messages = await this.prepareMessagesWithFiles(params);
630
+ } else if (params?.toolsConfig?.tools?.length > 0) {
631
+ messages = await this.prepareMessagesWithTools(params);
632
+ } else {
633
+ messages = await this.prepareMessagesWithTextQuery(params);
634
+ }
635
+
636
+ return messages;
637
+ }
638
+
639
+ private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
+ const model = params.model;
641
+
642
+ let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
+ let systemInstruction = '';
644
+ const files: BinaryInput[] = params?.files || [];
645
+
646
+ // #region Upload files
647
+ const promises = [];
648
+ const _files = [];
649
+
650
+ for (let image of files) {
651
+ const binaryInput = BinaryInput.from(image);
652
+ promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
+
654
+ _files.push(binaryInput);
655
+ }
656
+
657
+ await Promise.all(promises);
658
+ // #endregion Upload files
659
+
660
+ // If user provide mix of valid and invalid files, we will only process the valid files
661
+ const validFiles = this.getValidFiles(_files, 'all');
662
+
663
+ const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
+
665
+ // GoogleAI only supports one video file at a time
666
+ if (hasVideo && validFiles.length > 1) {
667
+ throw new Error('Only one video file is supported at a time.');
668
+ }
669
+
670
+ const fileUploadingTasks = validFiles.map((file) => async () => {
671
+ try {
672
+ const uploadedFile = await this.uploadFile({
673
+ file,
674
+ apiKey: (params.credentials as BasicCredentials).apiKey,
675
+ agentId: params.agentId,
676
+ });
677
+
678
+ return { url: uploadedFile.url, mimetype: file.mimetype };
679
+ } catch {
680
+ return null;
681
+ }
682
+ });
683
+
684
+ const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
+
686
+ // We throw error when there are no valid uploaded files,
687
+ if (uploadedFiles && uploadedFiles?.length === 0) {
688
+ throw new Error(`There is an issue during upload file in Google AI Server!`);
689
+ }
690
+
691
+ const fileData = this.getFileData(uploadedFiles);
692
+
693
+ const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
+ let prompt = userMessage?.content || '';
695
+
696
+ // if the the model does not support system instruction, we will add it to the prompt
697
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
+ prompt = `${prompt}\n${systemInstruction}`;
699
+ }
700
+ //#endregion Separate system message and add JSON response instruction if needed
701
+
702
+ // Adjust input structure handling for multiple image files to accommodate variations.
703
+ messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
+
705
+ return messages as string;
706
+ }
707
+
708
+ private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
+ let formattedMessages: TLLMMessageBlock[];
710
+ let systemInstruction = '';
711
+
712
+ let messages = params?.messages || [];
713
+
714
+ const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
+
716
+ if (hasSystemMessage) {
717
+ const separateMessages = LLMHelper.separateSystemMessages(messages);
718
+ const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
+ systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
+ formattedMessages = separateMessages.otherMessages;
721
+ } else {
722
+ formattedMessages = messages;
723
+ }
724
+
725
+ const toolsPrompt: GenerateContentRequest = {
726
+ contents: formattedMessages as any,
727
+ };
728
+
729
+ if (systemInstruction) {
730
+ toolsPrompt.systemInstruction = systemInstruction;
731
+ }
732
+
733
+ if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
+ if (params?.toolsConfig?.tool_choice) {
735
+ // Map tool choice to valid Google AI function calling modes
736
+ let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
+ const toolChoice = params?.toolsConfig?.tool_choice;
738
+
739
+ if (toolChoice === 'auto') {
740
+ mode = FunctionCallingMode.AUTO;
741
+ } else if (toolChoice === 'required') {
742
+ mode = FunctionCallingMode.ANY;
743
+ } else if (toolChoice === 'none') {
744
+ mode = FunctionCallingMode.NONE;
745
+ } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
+ // Handle OpenAI-style named tool choice - force any function call
747
+ mode = FunctionCallingMode.ANY;
748
+ }
749
+
750
+ toolsPrompt.toolConfig = {
751
+ functionCallingConfig: { mode },
752
+ };
753
+ }
754
+
755
+ return toolsPrompt;
756
+ }
757
+
758
+ private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
+ const model = params.model;
760
+ let systemInstruction = '';
761
+ let prompt = '';
762
+
763
+ const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
+
765
+ if ('content' in systemMessage) {
766
+ systemInstruction = systemMessage.content as string;
767
+ }
768
+
769
+ const responseFormat = params?.responseFormat || '';
770
+ let responseMimeType = '';
771
+
772
+ if (responseFormat === 'json') {
773
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
+
775
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
+ responseMimeType = 'application/json';
777
+ }
778
+ }
779
+
780
+ if (otherMessages?.length > 0) {
781
+ // Concatenate messages with prompt and remove messages from params as it's not supported
782
+ prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
+ }
784
+
785
+ // if the the model does not support system instruction, we will add it to the prompt
786
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
+ prompt = `${prompt}\n${systemInstruction}`;
788
+ }
789
+ //#endregion Separate system message and add JSON response instruction if needed
790
+
791
+ return prompt;
792
+ }
793
+
794
+ private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
+ return {
796
+ prompt: params.prompt,
797
+ model: params.model,
798
+ aspectRatio: (params as any).aspectRatio,
799
+ personGeneration: (params as any).personGeneration,
800
+ };
801
+ }
802
+
803
+ private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
+ const model = params.model || 'gemini-2.5-flash-image-preview';
805
+
806
+ // Construct edit prompt with image and instructions
807
+ let editPrompt = params.prompt || 'Edit this image';
808
+ if ((params as any).instruction) {
809
+ editPrompt += `. ${(params as any).instruction}`;
810
+ }
811
+
812
+ // For image editing, we need to include the original image in the contents
813
+ const contents: any[] = [];
814
+ const files: BinaryInput[] = params?.files || [];
815
+
816
+ if (files.length > 0) {
817
+ // Get only valid image files for editing
818
+ const validImageFiles = this.getValidFiles(files, 'image');
819
+
820
+ if (validImageFiles.length === 0) {
821
+ throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
+ }
823
+
824
+ // Process each image file
825
+ for (const file of validImageFiles) {
826
+ try {
827
+ // Read the file data as base64
828
+ const bufferData = await file.getBuffer();
829
+ const base64Image = Buffer.from(bufferData).toString('base64');
830
+
831
+ contents.push({
832
+ inlineData: {
833
+ mimeType: file.mimetype,
834
+ data: base64Image,
835
+ },
836
+ });
837
+ } catch (error) {
838
+ throw new Error(`Failed to process image file: ${error.message}`);
839
+ }
840
+ }
841
+ } else {
842
+ throw new Error('No image provided for editing. Please include an image file.');
843
+ }
844
+
845
+ // Add the edit instruction
846
+ contents.push({ text: editPrompt });
847
+
848
+ // Return the complete request body that can be used directly in imageEditRequest
849
+ return {
850
+ model,
851
+ contents,
852
+ // Additional metadata for usage reporting
853
+ _metadata: {
854
+ prompt: editPrompt,
855
+ numberOfImages: (params as any).n || 1,
856
+ aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
+ personGeneration: (params as any).person_generation || 'allow_adult',
858
+ },
859
+ };
860
+ }
861
+
862
+ // Add this helper method to sanitize function names
863
+ private sanitizeFunctionName(name: string): string {
864
+ // Check if name is undefined or null
865
+ if (name == null) {
866
+ return '_unnamed_function';
867
+ }
868
+
869
+ // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
+ let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
+
872
+ // Ensure the name starts with a letter or underscore
873
+ if (!/^[a-zA-Z_]/.test(sanitized)) {
874
+ sanitized = '_' + sanitized;
875
+ }
876
+
877
+ // If sanitized is empty after removing invalid characters, use a default name
878
+ if (sanitized === '') {
879
+ sanitized = '_unnamed_function';
880
+ }
881
+
882
+ // Truncate to 64 characters if longer
883
+ sanitized = sanitized.slice(0, 64);
884
+
885
+ return sanitized;
886
+ }
887
+
888
+ private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
+ try {
890
+ if (!apiKey || !file?.mimetype) {
891
+ throw new Error('Missing required parameters to save file for Google AI!');
892
+ }
893
+
894
+ // Create a temporary directory
895
+ const tempDir = os.tmpdir();
896
+ const fileName = uid();
897
+ const tempFilePath = path.join(tempDir, fileName);
898
+
899
+ const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
+
901
+ // Write buffer data to temp file
902
+ await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
+
904
+ // Upload the file to the Google File Manager
905
+ const fileManager = new GoogleAIFileManager(apiKey);
906
+
907
+ const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
+ mimeType: file.mimetype,
909
+ displayName: fileName,
910
+ });
911
+
912
+ const name = uploadResponse.file.name;
913
+
914
+ // Poll getFile() on a set interval (10 seconds here) to check file state.
915
+ let uploadedFile = await fileManager.getFile(name);
916
+ while (uploadedFile.state === FileState.PROCESSING) {
917
+ process.stdout.write('.');
918
+ // Sleep for 10 seconds
919
+ await new Promise((resolve) => setTimeout(resolve, 10_000));
920
+ // Fetch the file from the API again
921
+ uploadedFile = await fileManager.getFile(name);
922
+ }
923
+
924
+ if (uploadedFile.state === FileState.FAILED) {
925
+ throw new Error('File processing failed.');
926
+ }
927
+
928
+ // Clean up temp file
929
+ await fs.promises.unlink(tempFilePath);
930
+
931
+ return {
932
+ url: uploadResponse.file.uri || '',
933
+ };
934
+ } catch (error) {
935
+ throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
+ }
937
+ }
938
+
939
+ private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
+ const validSources = [];
941
+
942
+ for (let file of files) {
943
+ if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
+ validSources.push(file);
945
+ }
946
+ }
947
+
948
+ if (validSources?.length === 0) {
949
+ throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
+ }
951
+
952
+ return validSources;
953
+ }
954
+
955
+ private getFileData(
956
+ files: {
957
+ url: string;
958
+ mimetype: string;
959
+ }[]
960
+ ): {
961
+ fileData: {
962
+ mimeType: string;
963
+ fileUri: string;
964
+ };
965
+ }[] {
966
+ try {
967
+ const imageData = [];
968
+
969
+ for (let file of files) {
970
+ imageData.push({
971
+ fileData: {
972
+ mimeType: file.mimetype,
973
+ fileUri: file.url,
974
+ },
975
+ });
976
+ }
977
+
978
+ return imageData;
979
+ } catch (error) {
980
+ throw error;
981
+ }
982
+ }
983
+ }