@smythos/sre 1.6.0 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG +111 -111
  2. package/LICENSE +18 -18
  3. package/README.md +135 -135
  4. package/dist/index.js +43 -43
  5. package/dist/index.js.map +1 -1
  6. package/dist/types/index.d.ts +1 -0
  7. package/dist/types/subsystems/LLMManager/LLM.service/connectors/Ollama.class.d.ts +45 -0
  8. package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/SmythModelsProvider.class.d.ts +39 -0
  9. package/dist/types/types/LLM.types.d.ts +2 -0
  10. package/package.json +2 -1
  11. package/src/Components/APICall/APICall.class.ts +161 -161
  12. package/src/Components/APICall/AccessTokenManager.ts +166 -166
  13. package/src/Components/APICall/ArrayBufferResponse.helper.ts +58 -58
  14. package/src/Components/APICall/OAuth.helper.ts +447 -447
  15. package/src/Components/APICall/mimeTypeCategories.ts +46 -46
  16. package/src/Components/APICall/parseData.ts +167 -167
  17. package/src/Components/APICall/parseHeaders.ts +41 -41
  18. package/src/Components/APICall/parseProxy.ts +68 -68
  19. package/src/Components/APICall/parseUrl.ts +91 -91
  20. package/src/Components/APIEndpoint.class.ts +234 -234
  21. package/src/Components/APIOutput.class.ts +58 -58
  22. package/src/Components/AgentPlugin.class.ts +102 -102
  23. package/src/Components/Async.class.ts +155 -155
  24. package/src/Components/Await.class.ts +90 -90
  25. package/src/Components/Classifier.class.ts +158 -158
  26. package/src/Components/Component.class.ts +147 -147
  27. package/src/Components/ComponentHost.class.ts +38 -38
  28. package/src/Components/DataSourceCleaner.class.ts +92 -92
  29. package/src/Components/DataSourceIndexer.class.ts +181 -181
  30. package/src/Components/DataSourceLookup.class.ts +161 -161
  31. package/src/Components/ECMASandbox.class.ts +72 -72
  32. package/src/Components/FEncDec.class.ts +29 -29
  33. package/src/Components/FHash.class.ts +33 -33
  34. package/src/Components/FSign.class.ts +80 -80
  35. package/src/Components/FSleep.class.ts +25 -25
  36. package/src/Components/FTimestamp.class.ts +66 -66
  37. package/src/Components/FileStore.class.ts +78 -78
  38. package/src/Components/ForEach.class.ts +97 -97
  39. package/src/Components/GPTPlugin.class.ts +70 -70
  40. package/src/Components/GenAILLM.class.ts +586 -586
  41. package/src/Components/HuggingFace.class.ts +314 -314
  42. package/src/Components/Image/imageSettings.config.ts +70 -70
  43. package/src/Components/ImageGenerator.class.ts +483 -483
  44. package/src/Components/JSONFilter.class.ts +54 -54
  45. package/src/Components/LLMAssistant.class.ts +213 -213
  46. package/src/Components/LogicAND.class.ts +28 -28
  47. package/src/Components/LogicAtLeast.class.ts +85 -85
  48. package/src/Components/LogicAtMost.class.ts +86 -86
  49. package/src/Components/LogicOR.class.ts +29 -29
  50. package/src/Components/LogicXOR.class.ts +34 -34
  51. package/src/Components/MCPClient.class.ts +137 -137
  52. package/src/Components/MemoryDeleteKeyVal.class.ts +70 -70
  53. package/src/Components/MemoryReadKeyVal.class.ts +67 -67
  54. package/src/Components/MemoryWriteKeyVal.class.ts +62 -62
  55. package/src/Components/MemoryWriteObject.class.ts +97 -97
  56. package/src/Components/MultimodalLLM.class.ts +128 -128
  57. package/src/Components/OpenAPI.class.ts +72 -72
  58. package/src/Components/PromptGenerator.class.ts +122 -122
  59. package/src/Components/ScrapflyWebScrape.class.ts +183 -183
  60. package/src/Components/ServerlessCode.class.ts +123 -123
  61. package/src/Components/TavilyWebSearch.class.ts +103 -103
  62. package/src/Components/VisionLLM.class.ts +104 -104
  63. package/src/Components/ZapierAction.class.ts +127 -127
  64. package/src/Components/index.ts +97 -97
  65. package/src/Core/AgentProcess.helper.ts +240 -240
  66. package/src/Core/Connector.class.ts +123 -123
  67. package/src/Core/ConnectorsService.ts +197 -197
  68. package/src/Core/DummyConnector.ts +49 -49
  69. package/src/Core/HookService.ts +105 -105
  70. package/src/Core/SmythRuntime.class.ts +241 -241
  71. package/src/Core/SystemEvents.ts +16 -16
  72. package/src/Core/boot.ts +56 -56
  73. package/src/config.ts +15 -15
  74. package/src/constants.ts +126 -126
  75. package/src/data/hugging-face.params.json +579 -579
  76. package/src/helpers/AWSLambdaCode.helper.ts +624 -599
  77. package/src/helpers/BinaryInput.helper.ts +331 -331
  78. package/src/helpers/Conversation.helper.ts +1157 -1157
  79. package/src/helpers/ECMASandbox.helper.ts +64 -64
  80. package/src/helpers/JsonContent.helper.ts +97 -97
  81. package/src/helpers/LocalCache.helper.ts +97 -97
  82. package/src/helpers/Log.helper.ts +274 -274
  83. package/src/helpers/OpenApiParser.helper.ts +150 -150
  84. package/src/helpers/S3Cache.helper.ts +147 -147
  85. package/src/helpers/SmythURI.helper.ts +5 -5
  86. package/src/helpers/Sysconfig.helper.ts +95 -95
  87. package/src/helpers/TemplateString.helper.ts +243 -243
  88. package/src/helpers/TypeChecker.helper.ts +329 -329
  89. package/src/index.ts +4 -3
  90. package/src/index.ts.bak +4 -3
  91. package/src/subsystems/AgentManager/Agent.class.ts +1114 -1114
  92. package/src/subsystems/AgentManager/Agent.helper.ts +3 -3
  93. package/src/subsystems/AgentManager/AgentData.service/AgentDataConnector.ts +230 -230
  94. package/src/subsystems/AgentManager/AgentData.service/connectors/CLIAgentDataConnector.class.ts +66 -66
  95. package/src/subsystems/AgentManager/AgentData.service/connectors/LocalAgentDataConnector.class.ts +145 -145
  96. package/src/subsystems/AgentManager/AgentData.service/connectors/NullAgentData.class.ts +39 -39
  97. package/src/subsystems/AgentManager/AgentData.service/index.ts +18 -18
  98. package/src/subsystems/AgentManager/AgentLogger.class.ts +301 -301
  99. package/src/subsystems/AgentManager/AgentRequest.class.ts +51 -51
  100. package/src/subsystems/AgentManager/AgentRuntime.class.ts +557 -557
  101. package/src/subsystems/AgentManager/AgentSSE.class.ts +101 -101
  102. package/src/subsystems/AgentManager/AgentSettings.class.ts +52 -52
  103. package/src/subsystems/AgentManager/Component.service/ComponentConnector.ts +32 -32
  104. package/src/subsystems/AgentManager/Component.service/connectors/LocalComponentConnector.class.ts +60 -60
  105. package/src/subsystems/AgentManager/Component.service/index.ts +11 -11
  106. package/src/subsystems/AgentManager/EmbodimentSettings.class.ts +47 -47
  107. package/src/subsystems/AgentManager/ForkedAgent.class.ts +154 -154
  108. package/src/subsystems/AgentManager/OSResourceMonitor.ts +77 -77
  109. package/src/subsystems/ComputeManager/Code.service/CodeConnector.ts +98 -98
  110. package/src/subsystems/ComputeManager/Code.service/connectors/AWSLambdaCode.class.ts +171 -172
  111. package/src/subsystems/ComputeManager/Code.service/connectors/ECMASandbox.class.ts +131 -131
  112. package/src/subsystems/ComputeManager/Code.service/index.ts +13 -13
  113. package/src/subsystems/IO/CLI.service/CLIConnector.ts +47 -47
  114. package/src/subsystems/IO/CLI.service/index.ts +9 -9
  115. package/src/subsystems/IO/Log.service/LogConnector.ts +32 -32
  116. package/src/subsystems/IO/Log.service/connectors/ConsoleLog.class.ts +28 -28
  117. package/src/subsystems/IO/Log.service/index.ts +13 -13
  118. package/src/subsystems/IO/NKV.service/NKVConnector.ts +43 -43
  119. package/src/subsystems/IO/NKV.service/connectors/NKVLocalStorage.class.ts +234 -234
  120. package/src/subsystems/IO/NKV.service/connectors/NKVRAM.class.ts +204 -204
  121. package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +182 -182
  122. package/src/subsystems/IO/NKV.service/index.ts +14 -14
  123. package/src/subsystems/IO/Router.service/RouterConnector.ts +21 -21
  124. package/src/subsystems/IO/Router.service/connectors/ExpressRouter.class.ts +48 -48
  125. package/src/subsystems/IO/Router.service/connectors/NullRouter.class.ts +40 -40
  126. package/src/subsystems/IO/Router.service/index.ts +11 -11
  127. package/src/subsystems/IO/Storage.service/SmythFS.class.ts +488 -488
  128. package/src/subsystems/IO/Storage.service/StorageConnector.ts +66 -66
  129. package/src/subsystems/IO/Storage.service/connectors/LocalStorage.class.ts +327 -327
  130. package/src/subsystems/IO/Storage.service/connectors/S3Storage.class.ts +482 -482
  131. package/src/subsystems/IO/Storage.service/index.ts +13 -13
  132. package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +108 -108
  133. package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +465 -465
  134. package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +387 -387
  135. package/src/subsystems/IO/VectorDB.service/connectors/RAMVecrtorDB.class.ts +408 -408
  136. package/src/subsystems/IO/VectorDB.service/embed/BaseEmbedding.ts +107 -107
  137. package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +118 -118
  138. package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +109 -109
  139. package/src/subsystems/IO/VectorDB.service/embed/index.ts +26 -26
  140. package/src/subsystems/IO/VectorDB.service/index.ts +14 -14
  141. package/src/subsystems/LLMManager/LLM.helper.ts +251 -251
  142. package/src/subsystems/LLMManager/LLM.inference.ts +345 -345
  143. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +492 -492
  144. package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +171 -171
  145. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +666 -666
  146. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +407 -407
  147. package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +92 -92
  148. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +983 -983
  149. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +319 -319
  150. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +362 -0
  151. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +257 -257
  152. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +430 -430
  153. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +503 -503
  154. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +524 -524
  155. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +100 -100
  156. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterfaceFactory.ts +81 -81
  157. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +1145 -1145
  158. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/constants.ts +13 -13
  159. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/index.ts +4 -4
  160. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +11 -11
  161. package/src/subsystems/LLMManager/LLM.service/connectors/openai/types.ts +32 -32
  162. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +478 -478
  163. package/src/subsystems/LLMManager/LLM.service/index.ts +47 -44
  164. package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +303 -303
  165. package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +271 -271
  166. package/src/subsystems/LLMManager/ModelsProvider.service/index.ts +11 -11
  167. package/src/subsystems/LLMManager/custom-models.ts +854 -854
  168. package/src/subsystems/LLMManager/models.ts +2540 -2540
  169. package/src/subsystems/LLMManager/paramMappings.ts +69 -69
  170. package/src/subsystems/MemoryManager/Cache.service/CacheConnector.ts +86 -86
  171. package/src/subsystems/MemoryManager/Cache.service/connectors/LocalStorageCache.class.ts +297 -297
  172. package/src/subsystems/MemoryManager/Cache.service/connectors/RAMCache.class.ts +214 -214
  173. package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +252 -252
  174. package/src/subsystems/MemoryManager/Cache.service/connectors/S3Cache.class.ts +373 -373
  175. package/src/subsystems/MemoryManager/Cache.service/index.ts +15 -15
  176. package/src/subsystems/MemoryManager/LLMCache.ts +72 -72
  177. package/src/subsystems/MemoryManager/LLMContext.ts +124 -124
  178. package/src/subsystems/MemoryManager/LLMMemory.service/LLMMemoryConnector.ts +26 -26
  179. package/src/subsystems/MemoryManager/RuntimeContext.ts +277 -277
  180. package/src/subsystems/Security/AccessControl/ACL.class.ts +208 -208
  181. package/src/subsystems/Security/AccessControl/AccessCandidate.class.ts +82 -82
  182. package/src/subsystems/Security/AccessControl/AccessRequest.class.ts +52 -52
  183. package/src/subsystems/Security/Account.service/AccountConnector.ts +44 -44
  184. package/src/subsystems/Security/Account.service/connectors/DummyAccount.class.ts +130 -130
  185. package/src/subsystems/Security/Account.service/connectors/JSONFileAccount.class.ts +170 -170
  186. package/src/subsystems/Security/Account.service/connectors/MySQLAccount.class.ts +76 -76
  187. package/src/subsystems/Security/Account.service/index.ts +14 -14
  188. package/src/subsystems/Security/Credentials.helper.ts +62 -62
  189. package/src/subsystems/Security/ManagedVault.service/ManagedVaultConnector.ts +38 -38
  190. package/src/subsystems/Security/ManagedVault.service/connectors/NullManagedVault.class.ts +53 -53
  191. package/src/subsystems/Security/ManagedVault.service/connectors/SecretManagerManagedVault.ts +154 -154
  192. package/src/subsystems/Security/ManagedVault.service/index.ts +12 -12
  193. package/src/subsystems/Security/SecureConnector.class.ts +110 -110
  194. package/src/subsystems/Security/Vault.service/Vault.helper.ts +30 -30
  195. package/src/subsystems/Security/Vault.service/VaultConnector.ts +29 -29
  196. package/src/subsystems/Security/Vault.service/connectors/HashicorpVault.class.ts +46 -46
  197. package/src/subsystems/Security/Vault.service/connectors/JSONFileVault.class.ts +221 -221
  198. package/src/subsystems/Security/Vault.service/connectors/NullVault.class.ts +54 -54
  199. package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +140 -140
  200. package/src/subsystems/Security/Vault.service/index.ts +12 -12
  201. package/src/types/ACL.types.ts +104 -104
  202. package/src/types/AWS.types.ts +10 -10
  203. package/src/types/Agent.types.ts +61 -61
  204. package/src/types/AgentLogger.types.ts +17 -17
  205. package/src/types/Cache.types.ts +1 -1
  206. package/src/types/Common.types.ts +2 -2
  207. package/src/types/LLM.types.ts +520 -519
  208. package/src/types/Redis.types.ts +8 -8
  209. package/src/types/SRE.types.ts +64 -64
  210. package/src/types/Security.types.ts +14 -14
  211. package/src/types/Storage.types.ts +5 -5
  212. package/src/types/VectorDB.types.ts +86 -86
  213. package/src/utils/base64.utils.ts +275 -275
  214. package/src/utils/cli.utils.ts +68 -68
  215. package/src/utils/data.utils.ts +322 -322
  216. package/src/utils/date-time.utils.ts +22 -22
  217. package/src/utils/general.utils.ts +238 -238
  218. package/src/utils/index.ts +12 -12
  219. package/src/utils/lazy-client.ts +261 -261
  220. package/src/utils/numbers.utils.ts +13 -13
  221. package/src/utils/oauth.utils.ts +35 -35
  222. package/src/utils/string.utils.ts +414 -414
  223. package/src/utils/url.utils.ts +19 -19
  224. package/src/utils/validation.utils.ts +74 -74
  225. package/dist/bundle-analysis-lazy.html +0 -4949
  226. package/dist/bundle-analysis.html +0 -4949
  227. package/dist/types/Components/Triggers/GmailTrigger.class.d.ts +0 -13
  228. package/dist/types/Components/Triggers/Trigger.class.d.ts +0 -3
  229. package/dist/types/helpers/AIPerformanceAnalyzer.helper.d.ts +0 -45
  230. package/dist/types/helpers/AIPerformanceCollector.helper.d.ts +0 -111
  231. package/dist/types/subsystems/IO/Storage.service/connectors/AzureBlobStorage.class.d.ts +0 -211
  232. package/dist/types/subsystems/IO/VectorDB.service/connectors/WeaviateVectorDB.class.d.ts +0 -187
  233. package/dist/types/subsystems/PerformanceManager/Performance.service/PerformanceConnector.d.ts +0 -102
  234. package/dist/types/subsystems/PerformanceManager/Performance.service/connectors/LocalPerformanceConnector.class.d.ts +0 -100
  235. package/dist/types/subsystems/PerformanceManager/Performance.service/index.d.ts +0 -22
  236. package/dist/types/types/Performance.types.d.ts +0 -468
  237. package/dist/types/utils/package-manager.utils.d.ts +0 -26
@@ -1,983 +1,983 @@
1
- import os from 'os';
2
- import path from 'path';
3
- import EventEmitter from 'events';
4
- import fs from 'fs';
5
-
6
- import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
- import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
- import { GoogleGenAI } from '@google/genai';
9
-
10
- import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
- import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
- import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
- import { uid } from '@sre/utils';
14
-
15
- import { processWithConcurrencyLimit } from '@sre/utils';
16
-
17
- import {
18
- TLLMMessageBlock,
19
- ToolData,
20
- TLLMMessageRole,
21
- TLLMToolResultMessageBlock,
22
- APIKeySource,
23
- TLLMEvent,
24
- BasicCredentials,
25
- ILLMRequestFuncParams,
26
- TLLMChatResponse,
27
- TGoogleAIRequestBody,
28
- ILLMRequestContext,
29
- TLLMPreparedParams,
30
- LLMInterface,
31
- } from '@sre/types/LLM.types';
32
- import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
-
34
- import { SystemEvents } from '@sre/Core/SystemEvents';
35
- import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
- import { Logger } from '@sre/helpers/Log.helper';
37
-
38
- import { LLMConnector } from '../LLMConnector';
39
-
40
- const logger = Logger('GoogleAIConnector');
41
-
42
- const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
- 'gemini-1.5-pro-exp-0801',
44
- 'gemini-1.5-pro-latest',
45
- 'gemini-1.5-pro-latest',
46
- 'gemini-1.5-pro',
47
- 'gemini-1.5-pro-001',
48
- 'gemini-1.5-flash-latest',
49
- 'gemini-1.5-flash-001',
50
- 'gemini-1.5-flash',
51
- ];
52
- const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
-
54
- // Supported file MIME types for Google AI's Gemini models
55
- const VALID_MIME_TYPES = [
56
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
- ];
61
-
62
- // will be removed after updating the SDK
63
- type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
-
65
- const IMAGE_GEN_FIXED_PRICING = {
66
- 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
- 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
- 'imagen-4': 0.04, // Standard Imagen 4
69
- 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
- 'gemini-2.5-flash-image': 0.039,
71
- };
72
-
73
- export class GoogleAIConnector extends LLMConnector {
74
- public name = 'LLM:GoogleAI';
75
-
76
- private validMimeTypes = {
77
- all: VALID_MIME_TYPES,
78
- image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
- };
80
-
81
- private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
- const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
-
84
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
-
86
- return new GoogleGenerativeAI(apiKey);
87
- }
88
-
89
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
- try {
91
- logger.debug(`request ${this.name}`, acRequest.candidate);
92
- const prompt = body.messages;
93
- delete body.messages;
94
-
95
- const genAI = await this.getClient(context);
96
- const $model = genAI.getGenerativeModel(body);
97
-
98
- const result = await $model.generateContent(prompt);
99
-
100
- const response = await result.response;
101
- const content = response.text();
102
- const finishReason = response.candidates[0].finishReason || 'stop';
103
- const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
- this.reportUsage(usage, {
105
- modelEntryName: context.modelEntryName,
106
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
- agentId: context.agentId,
108
- teamId: context.teamId,
109
- });
110
-
111
- const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
-
113
- let toolsData: ToolData[] = [];
114
- let useTool = false;
115
-
116
- if (toolCalls && toolCalls.length > 0) {
117
- toolsData = toolCalls.map((toolCall, index) => ({
118
- index,
119
- id: `tool-${index}`,
120
- type: 'function',
121
- name: toolCall.functionCall.name,
122
- arguments: JSON.stringify(toolCall.functionCall.args),
123
- role: TLLMMessageRole.Assistant,
124
- }));
125
- useTool = true;
126
- }
127
-
128
- return {
129
- content,
130
- finishReason: finishReason.toLowerCase(),
131
- useTool,
132
- toolsData,
133
- message: { content, role: 'assistant' },
134
- usage,
135
- };
136
- } catch (error: any) {
137
- logger.error(`request ${this.name}`, error, acRequest.candidate);
138
- throw error;
139
- }
140
- }
141
-
142
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
- logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
- const emitter = new EventEmitter();
145
-
146
- const prompt = body.messages;
147
- delete body.messages;
148
-
149
- const genAI = await this.getClient(context);
150
- const $model = genAI.getGenerativeModel(body);
151
-
152
- try {
153
- const result = await $model.generateContentStream(prompt);
154
-
155
- let toolsData: ToolData[] = [];
156
- let usage: UsageMetadataWithThoughtsToken;
157
-
158
- // Process stream asynchronously while as we need to return emitter immediately
159
- (async () => {
160
- for await (const chunk of result.stream) {
161
- const chunkText = chunk.text();
162
- emitter.emit('content', chunkText);
163
-
164
- if (chunk.candidates[0]?.content?.parts) {
165
- const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
- if (toolCalls.length > 0) {
167
- toolsData = toolCalls.map((toolCall, index) => ({
168
- index,
169
- id: `tool-${index}`,
170
- type: 'function',
171
- name: toolCall.functionCall.name,
172
- arguments: JSON.stringify(toolCall.functionCall.args),
173
- role: TLLMMessageRole.Assistant,
174
- }));
175
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
- }
177
- }
178
-
179
- // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
- // rather just sends the same usage for the entire request.
181
- // notice that the output tokens are only sent in the last chunk usage metadata.
182
- // so we will just update a var to hold the latest usage and report it when the stream ends.
183
- // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
- // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
- // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
- if (chunk?.usageMetadata) {
187
- usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
- }
189
- }
190
-
191
- if (usage) {
192
- this.reportUsage(usage, {
193
- modelEntryName: context.modelEntryName,
194
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
- agentId: context.agentId,
196
- teamId: context.teamId,
197
- });
198
- }
199
-
200
- setTimeout(() => {
201
- emitter.emit('end', toolsData);
202
- }, 100);
203
- })();
204
-
205
- return emitter;
206
- } catch (error: any) {
207
- logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
- throw error;
209
- }
210
- }
211
- // #region Image Generation, will be moved to a different subsystem/service
212
-
213
- protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
-
217
- const model = body.model || 'imagen-3.0-generate-001';
218
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
-
220
- // Use traditional Imagen models
221
- const config = {
222
- numberOfImages: body.n || 1,
223
- aspectRatio: body.aspect_ratio || body.size || '1:1',
224
- personGeneration: body.person_generation || 'allow_adult',
225
- };
226
-
227
- const ai = new GoogleGenAI({ apiKey });
228
-
229
- // Default to GenerateImages interface if not specified
230
- const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
-
232
- let response: any;
233
-
234
- if (modelInterface === LLMInterface.GenerateContent) {
235
- // Use Gemini image generation API
236
- response = await ai.models.generateContent({
237
- model,
238
- contents: body.prompt,
239
- });
240
-
241
- // Extract image data from Gemini response format
242
- const imageData: any[] = [];
243
- if (response.candidates?.[0]?.content?.parts) {
244
- for (const part of response.candidates[0].content.parts) {
245
- if (part.inlineData?.data) {
246
- imageData.push({
247
- url: `data:image/png;base64,${part.inlineData.data}`,
248
- b64_json: part.inlineData.data,
249
- revised_prompt: body.prompt,
250
- });
251
- }
252
- }
253
- }
254
-
255
- // Report input tokens and image cost pricing based on the official pricing page:
256
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
-
259
- this.reportImageUsage({
260
- usage: {
261
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
- usageMetadata,
263
- },
264
- context,
265
- });
266
-
267
- if (imageData.length === 0) {
268
- throw new Error(
269
- 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
- );
271
- }
272
-
273
- return {
274
- created: Math.floor(Date.now() / 1000),
275
- data: imageData,
276
- };
277
- } else if (modelInterface === LLMInterface.GenerateImages) {
278
- response = await ai.models.generateImages({
279
- model,
280
- prompt: body.prompt,
281
- config,
282
- });
283
-
284
- // Report input tokens and image cost pricing based on the official pricing page:
285
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
- this.reportImageUsage({
288
- usage: {
289
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
- usageMetadata,
291
- },
292
- numberOfImages: config.numberOfImages,
293
- context,
294
- });
295
-
296
- return {
297
- created: Math.floor(Date.now() / 1000),
298
- data:
299
- response.generatedImages?.map((generatedImage: any) => ({
300
- url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
- b64_json: generatedImage.image.imageBytes,
302
- revised_prompt: body.prompt,
303
- })) || [],
304
- };
305
- } else {
306
- throw new Error(`Unsupported interface: ${modelInterface}`);
307
- }
308
- }
309
-
310
- protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
-
314
- // A model supports image editing if it implements the `generateContent` interface.
315
- const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
- if (!supportsEditing) {
317
- throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
- }
319
-
320
- const ai = new GoogleGenAI({ apiKey });
321
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
-
323
- // Use the prepared body which already contains processed files and contents
324
- const response = await ai.models.generateContent({
325
- model: body.model,
326
- contents: body.contents,
327
- });
328
-
329
- // Extract image data from Gemini response format
330
- const imageData: any[] = [];
331
- if (response.candidates?.[0]?.content?.parts) {
332
- for (const part of response.candidates[0].content.parts) {
333
- if (part.inlineData?.data) {
334
- imageData.push({
335
- url: `data:image/png;base64,${part.inlineData.data}`,
336
- b64_json: part.inlineData.data,
337
- revised_prompt: body._metadata?.prompt || body.prompt,
338
- });
339
- }
340
- }
341
- }
342
-
343
- // Report pricing for input tokens and image costs
344
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
-
346
- this.reportImageUsage({
347
- usage: {
348
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
- usageMetadata,
350
- },
351
- context,
352
- });
353
-
354
- return {
355
- created: Math.floor(Date.now() / 1000),
356
- data: imageData,
357
- };
358
- }
359
-
360
- protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
- const model = params?.model;
362
-
363
- // Check if this is an image generation request based on capabilities
364
- if (params?.capabilities?.imageGeneration) {
365
- // Determine if this is image editing (has files) or generation
366
- const hasFiles = params?.files?.length > 0;
367
- if (hasFiles) {
368
- return this.prepareImageEditBody(params) as any;
369
- } else {
370
- return this.prepareBodyForImageGenRequest(params) as any;
371
- }
372
- }
373
-
374
- const messages = await this.prepareMessages(params);
375
-
376
- let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
- model: model as string,
378
- messages,
379
- };
380
-
381
- const responseFormat = params?.responseFormat || '';
382
- let responseMimeType = '';
383
- let systemInstruction = '';
384
-
385
- if (responseFormat === 'json') {
386
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
-
388
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
- responseMimeType = 'application/json';
390
- }
391
- }
392
-
393
- const config: GenerationConfig = {};
394
-
395
- if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
- if (params.temperature !== undefined) config.temperature = params.temperature;
397
- if (params.topP !== undefined) config.topP = params.topP;
398
- if (params.topK !== undefined) config.topK = params.topK;
399
- if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
- if (responseMimeType) config.responseMimeType = responseMimeType;
401
-
402
- if (systemInstruction) body.systemInstruction = systemInstruction;
403
- if (Object.keys(config).length > 0) {
404
- body.generationConfig = config;
405
- }
406
-
407
- return body;
408
- }
409
-
410
- protected reportUsage(
411
- usage: UsageMetadataWithThoughtsToken,
412
- metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
- ) {
414
- // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
- const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
- let tier = '';
417
- const tierThresholds = {
418
- 'gemini-1.5-pro': 128_000,
419
- 'gemini-2.5-pro': 200_000,
420
- };
421
-
422
- const textInputTokens =
423
- usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
- const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
-
426
- // Find matching model and set tier based on threshold
427
- const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
- if (modelWithTier) {
429
- tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
- }
431
-
432
- // #endregion
433
-
434
- const usageData = {
435
- sourceId: `llm:${modelName}`,
436
- input_tokens: textInputTokens,
437
- output_tokens: usage?.candidatesTokenCount || 0,
438
- input_tokens_audio: audioInputTokens,
439
- input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
- input_tokens_cache_write: 0,
441
- reasoning_tokens: usage?.thoughtsTokenCount,
442
- keySource: metadata.keySource,
443
- agentId: metadata.agentId,
444
- teamId: metadata.teamId,
445
- tier,
446
- };
447
- SystemEvents.emit('USAGE:LLM', usageData);
448
-
449
- return usageData;
450
- }
451
-
452
- /**
453
- * Extract text and image tokens from Google AI usage metadata
454
- */
455
- private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
- const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
- const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
-
459
- return { textTokens, imageTokens };
460
- }
461
-
462
- protected reportImageUsage({
463
- usage,
464
- context,
465
- numberOfImages = 1,
466
- }: {
467
- usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
- context: ILLMRequestContext;
469
- numberOfImages?: number;
470
- }) {
471
- // Extract text and image tokens from rawUsage if available
472
- let input_tokens_txt = 0;
473
- let input_tokens_img = 0;
474
-
475
- if (usage.usageMetadata) {
476
- const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
- input_tokens_txt = textTokens;
478
- input_tokens_img = imageTokens;
479
- }
480
-
481
- const imageUsageData = {
482
- sourceId: `api:imagegen.smyth`,
483
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
-
485
- cost: usage.cost * numberOfImages,
486
- input_tokens_txt,
487
- input_tokens_img,
488
-
489
- agentId: context.agentId,
490
- teamId: context.teamId,
491
- };
492
- SystemEvents.emit('USAGE:API', imageUsageData);
493
- }
494
-
495
- public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
- const tools = toolDefinitions.map((tool) => {
497
- const { name, description, properties, requiredFields } = tool;
498
-
499
- // Ensure the function name is valid
500
- const validName = this.sanitizeFunctionName(name);
501
-
502
- // Ensure properties are non-empty for OBJECT type
503
- const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
-
505
- return {
506
- functionDeclarations: [
507
- {
508
- name: validName,
509
- description: description || '',
510
- parameters: {
511
- type: 'OBJECT',
512
- properties: validProperties,
513
- required: requiredFields || [],
514
- },
515
- },
516
- ],
517
- };
518
- });
519
-
520
- return {
521
- tools,
522
- toolChoice: {
523
- type: toolChoice,
524
- },
525
- };
526
- }
527
-
528
- public transformToolMessageBlocks({
529
- messageBlock,
530
- toolsData,
531
- }: {
532
- messageBlock: TLLMMessageBlock;
533
- toolsData: ToolData[];
534
- }): TLLMToolResultMessageBlock[] {
535
- const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
-
537
- if (messageBlock) {
538
- const content = [];
539
- if (typeof messageBlock.content === 'string') {
540
- content.push({ text: messageBlock.content });
541
- } else if (Array.isArray(messageBlock.content)) {
542
- content.push(...messageBlock.content);
543
- }
544
-
545
- if (messageBlock.parts) {
546
- const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
- if (functionCalls.length > 0) {
548
- content.push(
549
- ...functionCalls.map((call) => ({
550
- functionCall: {
551
- name: call.functionCall.name,
552
- args: JSON.parse(call.functionCall.args),
553
- },
554
- }))
555
- );
556
- }
557
- }
558
-
559
- messageBlocks.push({
560
- role: messageBlock.role,
561
- parts: content,
562
- });
563
- }
564
-
565
- const transformedToolsData = toolsData.map(
566
- (toolData): TLLMToolResultMessageBlock => ({
567
- role: TLLMMessageRole.User,
568
- parts: [
569
- {
570
- functionResponse: {
571
- name: toolData.name,
572
- response: {
573
- name: toolData.name,
574
- content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
- },
576
- },
577
- },
578
- ],
579
- })
580
- );
581
-
582
- return [...messageBlocks, ...transformedToolsData];
583
- }
584
-
585
- public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
- const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
-
588
- return _messages.map((message) => {
589
- const _message = { ...message };
590
- let textContent = '';
591
-
592
- // Map roles to valid Google AI roles
593
- switch (_message.role) {
594
- case TLLMMessageRole.Assistant:
595
- case TLLMMessageRole.System:
596
- _message.role = TLLMMessageRole.Model;
597
- break;
598
- case TLLMMessageRole.User:
599
- // User role is already valid
600
- break;
601
- default:
602
- _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
- }
604
-
605
- // * empty text causes error that's why we added '...'
606
-
607
- if (_message?.parts) {
608
- textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
- } else if (Array.isArray(_message?.content)) {
610
- textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
- } else if (_message?.content) {
612
- textContent = (_message.content as string) || '...';
613
- }
614
-
615
- _message.parts = [{ text: textContent || '...' }];
616
-
617
- delete _message.content; // Remove content to avoid error
618
-
619
- return _message;
620
- });
621
- }
622
-
623
- private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
- let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
-
626
- const files: BinaryInput[] = params?.files || [];
627
-
628
- if (files.length > 0) {
629
- messages = await this.prepareMessagesWithFiles(params);
630
- } else if (params?.toolsConfig?.tools?.length > 0) {
631
- messages = await this.prepareMessagesWithTools(params);
632
- } else {
633
- messages = await this.prepareMessagesWithTextQuery(params);
634
- }
635
-
636
- return messages;
637
- }
638
-
639
- private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
- const model = params.model;
641
-
642
- let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
- let systemInstruction = '';
644
- const files: BinaryInput[] = params?.files || [];
645
-
646
- // #region Upload files
647
- const promises = [];
648
- const _files = [];
649
-
650
- for (let image of files) {
651
- const binaryInput = BinaryInput.from(image);
652
- promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
-
654
- _files.push(binaryInput);
655
- }
656
-
657
- await Promise.all(promises);
658
- // #endregion Upload files
659
-
660
- // If user provide mix of valid and invalid files, we will only process the valid files
661
- const validFiles = this.getValidFiles(_files, 'all');
662
-
663
- const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
-
665
- // GoogleAI only supports one video file at a time
666
- if (hasVideo && validFiles.length > 1) {
667
- throw new Error('Only one video file is supported at a time.');
668
- }
669
-
670
- const fileUploadingTasks = validFiles.map((file) => async () => {
671
- try {
672
- const uploadedFile = await this.uploadFile({
673
- file,
674
- apiKey: (params.credentials as BasicCredentials).apiKey,
675
- agentId: params.agentId,
676
- });
677
-
678
- return { url: uploadedFile.url, mimetype: file.mimetype };
679
- } catch {
680
- return null;
681
- }
682
- });
683
-
684
- const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
-
686
- // We throw error when there are no valid uploaded files,
687
- if (uploadedFiles && uploadedFiles?.length === 0) {
688
- throw new Error(`There is an issue during upload file in Google AI Server!`);
689
- }
690
-
691
- const fileData = this.getFileData(uploadedFiles);
692
-
693
- const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
- let prompt = userMessage?.content || '';
695
-
696
- // if the the model does not support system instruction, we will add it to the prompt
697
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
- prompt = `${prompt}\n${systemInstruction}`;
699
- }
700
- //#endregion Separate system message and add JSON response instruction if needed
701
-
702
- // Adjust input structure handling for multiple image files to accommodate variations.
703
- messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
-
705
- return messages as string;
706
- }
707
-
708
- private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
- let formattedMessages: TLLMMessageBlock[];
710
- let systemInstruction = '';
711
-
712
- let messages = params?.messages || [];
713
-
714
- const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
-
716
- if (hasSystemMessage) {
717
- const separateMessages = LLMHelper.separateSystemMessages(messages);
718
- const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
- systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
- formattedMessages = separateMessages.otherMessages;
721
- } else {
722
- formattedMessages = messages;
723
- }
724
-
725
- const toolsPrompt: GenerateContentRequest = {
726
- contents: formattedMessages as any,
727
- };
728
-
729
- if (systemInstruction) {
730
- toolsPrompt.systemInstruction = systemInstruction;
731
- }
732
-
733
- if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
- if (params?.toolsConfig?.tool_choice) {
735
- // Map tool choice to valid Google AI function calling modes
736
- let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
- const toolChoice = params?.toolsConfig?.tool_choice;
738
-
739
- if (toolChoice === 'auto') {
740
- mode = FunctionCallingMode.AUTO;
741
- } else if (toolChoice === 'required') {
742
- mode = FunctionCallingMode.ANY;
743
- } else if (toolChoice === 'none') {
744
- mode = FunctionCallingMode.NONE;
745
- } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
- // Handle OpenAI-style named tool choice - force any function call
747
- mode = FunctionCallingMode.ANY;
748
- }
749
-
750
- toolsPrompt.toolConfig = {
751
- functionCallingConfig: { mode },
752
- };
753
- }
754
-
755
- return toolsPrompt;
756
- }
757
-
758
- private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
- const model = params.model;
760
- let systemInstruction = '';
761
- let prompt = '';
762
-
763
- const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
-
765
- if ('content' in systemMessage) {
766
- systemInstruction = systemMessage.content as string;
767
- }
768
-
769
- const responseFormat = params?.responseFormat || '';
770
- let responseMimeType = '';
771
-
772
- if (responseFormat === 'json') {
773
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
-
775
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
- responseMimeType = 'application/json';
777
- }
778
- }
779
-
780
- if (otherMessages?.length > 0) {
781
- // Concatenate messages with prompt and remove messages from params as it's not supported
782
- prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
- }
784
-
785
- // if the the model does not support system instruction, we will add it to the prompt
786
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
- prompt = `${prompt}\n${systemInstruction}`;
788
- }
789
- //#endregion Separate system message and add JSON response instruction if needed
790
-
791
- return prompt;
792
- }
793
-
794
- private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
- return {
796
- prompt: params.prompt,
797
- model: params.model,
798
- aspectRatio: (params as any).aspectRatio,
799
- personGeneration: (params as any).personGeneration,
800
- };
801
- }
802
-
803
- private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
- const model = params.model || 'gemini-2.5-flash-image-preview';
805
-
806
- // Construct edit prompt with image and instructions
807
- let editPrompt = params.prompt || 'Edit this image';
808
- if ((params as any).instruction) {
809
- editPrompt += `. ${(params as any).instruction}`;
810
- }
811
-
812
- // For image editing, we need to include the original image in the contents
813
- const contents: any[] = [];
814
- const files: BinaryInput[] = params?.files || [];
815
-
816
- if (files.length > 0) {
817
- // Get only valid image files for editing
818
- const validImageFiles = this.getValidFiles(files, 'image');
819
-
820
- if (validImageFiles.length === 0) {
821
- throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
- }
823
-
824
- // Process each image file
825
- for (const file of validImageFiles) {
826
- try {
827
- // Read the file data as base64
828
- const bufferData = await file.getBuffer();
829
- const base64Image = Buffer.from(bufferData).toString('base64');
830
-
831
- contents.push({
832
- inlineData: {
833
- mimeType: file.mimetype,
834
- data: base64Image,
835
- },
836
- });
837
- } catch (error) {
838
- throw new Error(`Failed to process image file: ${error.message}`);
839
- }
840
- }
841
- } else {
842
- throw new Error('No image provided for editing. Please include an image file.');
843
- }
844
-
845
- // Add the edit instruction
846
- contents.push({ text: editPrompt });
847
-
848
- // Return the complete request body that can be used directly in imageEditRequest
849
- return {
850
- model,
851
- contents,
852
- // Additional metadata for usage reporting
853
- _metadata: {
854
- prompt: editPrompt,
855
- numberOfImages: (params as any).n || 1,
856
- aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
- personGeneration: (params as any).person_generation || 'allow_adult',
858
- },
859
- };
860
- }
861
-
862
- // Add this helper method to sanitize function names
863
- private sanitizeFunctionName(name: string): string {
864
- // Check if name is undefined or null
865
- if (name == null) {
866
- return '_unnamed_function';
867
- }
868
-
869
- // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
- let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
-
872
- // Ensure the name starts with a letter or underscore
873
- if (!/^[a-zA-Z_]/.test(sanitized)) {
874
- sanitized = '_' + sanitized;
875
- }
876
-
877
- // If sanitized is empty after removing invalid characters, use a default name
878
- if (sanitized === '') {
879
- sanitized = '_unnamed_function';
880
- }
881
-
882
- // Truncate to 64 characters if longer
883
- sanitized = sanitized.slice(0, 64);
884
-
885
- return sanitized;
886
- }
887
-
888
- private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
- try {
890
- if (!apiKey || !file?.mimetype) {
891
- throw new Error('Missing required parameters to save file for Google AI!');
892
- }
893
-
894
- // Create a temporary directory
895
- const tempDir = os.tmpdir();
896
- const fileName = uid();
897
- const tempFilePath = path.join(tempDir, fileName);
898
-
899
- const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
-
901
- // Write buffer data to temp file
902
- await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
-
904
- // Upload the file to the Google File Manager
905
- const fileManager = new GoogleAIFileManager(apiKey);
906
-
907
- const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
- mimeType: file.mimetype,
909
- displayName: fileName,
910
- });
911
-
912
- const name = uploadResponse.file.name;
913
-
914
- // Poll getFile() on a set interval (10 seconds here) to check file state.
915
- let uploadedFile = await fileManager.getFile(name);
916
- while (uploadedFile.state === FileState.PROCESSING) {
917
- process.stdout.write('.');
918
- // Sleep for 10 seconds
919
- await new Promise((resolve) => setTimeout(resolve, 10_000));
920
- // Fetch the file from the API again
921
- uploadedFile = await fileManager.getFile(name);
922
- }
923
-
924
- if (uploadedFile.state === FileState.FAILED) {
925
- throw new Error('File processing failed.');
926
- }
927
-
928
- // Clean up temp file
929
- await fs.promises.unlink(tempFilePath);
930
-
931
- return {
932
- url: uploadResponse.file.uri || '',
933
- };
934
- } catch (error) {
935
- throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
- }
937
- }
938
-
939
- private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
- const validSources = [];
941
-
942
- for (let file of files) {
943
- if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
- validSources.push(file);
945
- }
946
- }
947
-
948
- if (validSources?.length === 0) {
949
- throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
- }
951
-
952
- return validSources;
953
- }
954
-
955
- private getFileData(
956
- files: {
957
- url: string;
958
- mimetype: string;
959
- }[]
960
- ): {
961
- fileData: {
962
- mimeType: string;
963
- fileUri: string;
964
- };
965
- }[] {
966
- try {
967
- const imageData = [];
968
-
969
- for (let file of files) {
970
- imageData.push({
971
- fileData: {
972
- mimeType: file.mimetype,
973
- fileUri: file.url,
974
- },
975
- });
976
- }
977
-
978
- return imageData;
979
- } catch (error) {
980
- throw error;
981
- }
982
- }
983
- }
1
+ import os from 'os';
2
+ import path from 'path';
3
+ import EventEmitter from 'events';
4
+ import fs from 'fs';
5
+
6
+ import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
+ import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
+ import { GoogleGenAI } from '@google/genai';
9
+
10
+ import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
+ import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
+ import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
+ import { uid } from '@sre/utils';
14
+
15
+ import { processWithConcurrencyLimit } from '@sre/utils';
16
+
17
+ import {
18
+ TLLMMessageBlock,
19
+ ToolData,
20
+ TLLMMessageRole,
21
+ TLLMToolResultMessageBlock,
22
+ APIKeySource,
23
+ TLLMEvent,
24
+ BasicCredentials,
25
+ ILLMRequestFuncParams,
26
+ TLLMChatResponse,
27
+ TGoogleAIRequestBody,
28
+ ILLMRequestContext,
29
+ TLLMPreparedParams,
30
+ LLMInterface,
31
+ } from '@sre/types/LLM.types';
32
+ import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
+
34
+ import { SystemEvents } from '@sre/Core/SystemEvents';
35
+ import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
+ import { Logger } from '@sre/helpers/Log.helper';
37
+
38
+ import { LLMConnector } from '../LLMConnector';
39
+
40
+ const logger = Logger('GoogleAIConnector');
41
+
42
+ const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
+ 'gemini-1.5-pro-exp-0801',
44
+ 'gemini-1.5-pro-latest',
45
+ 'gemini-1.5-pro-latest',
46
+ 'gemini-1.5-pro',
47
+ 'gemini-1.5-pro-001',
48
+ 'gemini-1.5-flash-latest',
49
+ 'gemini-1.5-flash-001',
50
+ 'gemini-1.5-flash',
51
+ ];
52
+ const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
+
54
+ // Supported file MIME types for Google AI's Gemini models
55
+ const VALID_MIME_TYPES = [
56
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
+ ];
61
+
62
+ // will be removed after updating the SDK
63
+ type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
+
65
+ const IMAGE_GEN_FIXED_PRICING = {
66
+ 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
+ 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
+ 'imagen-4': 0.04, // Standard Imagen 4
69
+ 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
+ 'gemini-2.5-flash-image': 0.039,
71
+ };
72
+
73
+ export class GoogleAIConnector extends LLMConnector {
74
+ public name = 'LLM:GoogleAI';
75
+
76
+ private validMimeTypes = {
77
+ all: VALID_MIME_TYPES,
78
+ image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
+ };
80
+
81
+ private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
+ const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
+
84
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
+
86
+ return new GoogleGenerativeAI(apiKey);
87
+ }
88
+
89
+ protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
+ try {
91
+ logger.debug(`request ${this.name}`, acRequest.candidate);
92
+ const prompt = body.messages;
93
+ delete body.messages;
94
+
95
+ const genAI = await this.getClient(context);
96
+ const $model = genAI.getGenerativeModel(body);
97
+
98
+ const result = await $model.generateContent(prompt);
99
+
100
+ const response = await result.response;
101
+ const content = response.text();
102
+ const finishReason = response.candidates[0].finishReason || 'stop';
103
+ const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
+ this.reportUsage(usage, {
105
+ modelEntryName: context.modelEntryName,
106
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
+ agentId: context.agentId,
108
+ teamId: context.teamId,
109
+ });
110
+
111
+ const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
+
113
+ let toolsData: ToolData[] = [];
114
+ let useTool = false;
115
+
116
+ if (toolCalls && toolCalls.length > 0) {
117
+ toolsData = toolCalls.map((toolCall, index) => ({
118
+ index,
119
+ id: `tool-${index}`,
120
+ type: 'function',
121
+ name: toolCall.functionCall.name,
122
+ arguments: JSON.stringify(toolCall.functionCall.args),
123
+ role: TLLMMessageRole.Assistant,
124
+ }));
125
+ useTool = true;
126
+ }
127
+
128
+ return {
129
+ content,
130
+ finishReason: finishReason.toLowerCase(),
131
+ useTool,
132
+ toolsData,
133
+ message: { content, role: 'assistant' },
134
+ usage,
135
+ };
136
+ } catch (error: any) {
137
+ logger.error(`request ${this.name}`, error, acRequest.candidate);
138
+ throw error;
139
+ }
140
+ }
141
+
142
+ protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
+ logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
+ const emitter = new EventEmitter();
145
+
146
+ const prompt = body.messages;
147
+ delete body.messages;
148
+
149
+ const genAI = await this.getClient(context);
150
+ const $model = genAI.getGenerativeModel(body);
151
+
152
+ try {
153
+ const result = await $model.generateContentStream(prompt);
154
+
155
+ let toolsData: ToolData[] = [];
156
+ let usage: UsageMetadataWithThoughtsToken;
157
+
158
+ // Process stream asynchronously while as we need to return emitter immediately
159
+ (async () => {
160
+ for await (const chunk of result.stream) {
161
+ const chunkText = chunk.text();
162
+ emitter.emit('content', chunkText);
163
+
164
+ if (chunk.candidates[0]?.content?.parts) {
165
+ const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
+ if (toolCalls.length > 0) {
167
+ toolsData = toolCalls.map((toolCall, index) => ({
168
+ index,
169
+ id: `tool-${index}`,
170
+ type: 'function',
171
+ name: toolCall.functionCall.name,
172
+ arguments: JSON.stringify(toolCall.functionCall.args),
173
+ role: TLLMMessageRole.Assistant,
174
+ }));
175
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
+ }
177
+ }
178
+
179
+ // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
+ // rather just sends the same usage for the entire request.
181
+ // notice that the output tokens are only sent in the last chunk usage metadata.
182
+ // so we will just update a var to hold the latest usage and report it when the stream ends.
183
+ // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
+ // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
+ // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
+ if (chunk?.usageMetadata) {
187
+ usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
+ }
189
+ }
190
+
191
+ if (usage) {
192
+ this.reportUsage(usage, {
193
+ modelEntryName: context.modelEntryName,
194
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
+ agentId: context.agentId,
196
+ teamId: context.teamId,
197
+ });
198
+ }
199
+
200
+ setTimeout(() => {
201
+ emitter.emit('end', toolsData);
202
+ }, 100);
203
+ })();
204
+
205
+ return emitter;
206
+ } catch (error: any) {
207
+ logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
+ throw error;
209
+ }
210
+ }
211
+ // #region Image Generation, will be moved to a different subsystem/service
212
+
213
+ protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
+
217
+ const model = body.model || 'imagen-3.0-generate-001';
218
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
+
220
+ // Use traditional Imagen models
221
+ const config = {
222
+ numberOfImages: body.n || 1,
223
+ aspectRatio: body.aspect_ratio || body.size || '1:1',
224
+ personGeneration: body.person_generation || 'allow_adult',
225
+ };
226
+
227
+ const ai = new GoogleGenAI({ apiKey });
228
+
229
+ // Default to GenerateImages interface if not specified
230
+ const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
+
232
+ let response: any;
233
+
234
+ if (modelInterface === LLMInterface.GenerateContent) {
235
+ // Use Gemini image generation API
236
+ response = await ai.models.generateContent({
237
+ model,
238
+ contents: body.prompt,
239
+ });
240
+
241
+ // Extract image data from Gemini response format
242
+ const imageData: any[] = [];
243
+ if (response.candidates?.[0]?.content?.parts) {
244
+ for (const part of response.candidates[0].content.parts) {
245
+ if (part.inlineData?.data) {
246
+ imageData.push({
247
+ url: `data:image/png;base64,${part.inlineData.data}`,
248
+ b64_json: part.inlineData.data,
249
+ revised_prompt: body.prompt,
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ // Report input tokens and image cost pricing based on the official pricing page:
256
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
+
259
+ this.reportImageUsage({
260
+ usage: {
261
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
+ usageMetadata,
263
+ },
264
+ context,
265
+ });
266
+
267
+ if (imageData.length === 0) {
268
+ throw new Error(
269
+ 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
+ );
271
+ }
272
+
273
+ return {
274
+ created: Math.floor(Date.now() / 1000),
275
+ data: imageData,
276
+ };
277
+ } else if (modelInterface === LLMInterface.GenerateImages) {
278
+ response = await ai.models.generateImages({
279
+ model,
280
+ prompt: body.prompt,
281
+ config,
282
+ });
283
+
284
+ // Report input tokens and image cost pricing based on the official pricing page:
285
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
+ this.reportImageUsage({
288
+ usage: {
289
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
+ usageMetadata,
291
+ },
292
+ numberOfImages: config.numberOfImages,
293
+ context,
294
+ });
295
+
296
+ return {
297
+ created: Math.floor(Date.now() / 1000),
298
+ data:
299
+ response.generatedImages?.map((generatedImage: any) => ({
300
+ url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
+ b64_json: generatedImage.image.imageBytes,
302
+ revised_prompt: body.prompt,
303
+ })) || [],
304
+ };
305
+ } else {
306
+ throw new Error(`Unsupported interface: ${modelInterface}`);
307
+ }
308
+ }
309
+
310
+ protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
+
314
+ // A model supports image editing if it implements the `generateContent` interface.
315
+ const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
+ if (!supportsEditing) {
317
+ throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
+ }
319
+
320
+ const ai = new GoogleGenAI({ apiKey });
321
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
+
323
+ // Use the prepared body which already contains processed files and contents
324
+ const response = await ai.models.generateContent({
325
+ model: body.model,
326
+ contents: body.contents,
327
+ });
328
+
329
+ // Extract image data from Gemini response format
330
+ const imageData: any[] = [];
331
+ if (response.candidates?.[0]?.content?.parts) {
332
+ for (const part of response.candidates[0].content.parts) {
333
+ if (part.inlineData?.data) {
334
+ imageData.push({
335
+ url: `data:image/png;base64,${part.inlineData.data}`,
336
+ b64_json: part.inlineData.data,
337
+ revised_prompt: body._metadata?.prompt || body.prompt,
338
+ });
339
+ }
340
+ }
341
+ }
342
+
343
+ // Report pricing for input tokens and image costs
344
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
+
346
+ this.reportImageUsage({
347
+ usage: {
348
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
+ usageMetadata,
350
+ },
351
+ context,
352
+ });
353
+
354
+ return {
355
+ created: Math.floor(Date.now() / 1000),
356
+ data: imageData,
357
+ };
358
+ }
359
+
360
+ protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
+ const model = params?.model;
362
+
363
+ // Check if this is an image generation request based on capabilities
364
+ if (params?.capabilities?.imageGeneration) {
365
+ // Determine if this is image editing (has files) or generation
366
+ const hasFiles = params?.files?.length > 0;
367
+ if (hasFiles) {
368
+ return this.prepareImageEditBody(params) as any;
369
+ } else {
370
+ return this.prepareBodyForImageGenRequest(params) as any;
371
+ }
372
+ }
373
+
374
+ const messages = await this.prepareMessages(params);
375
+
376
+ let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
+ model: model as string,
378
+ messages,
379
+ };
380
+
381
+ const responseFormat = params?.responseFormat || '';
382
+ let responseMimeType = '';
383
+ let systemInstruction = '';
384
+
385
+ if (responseFormat === 'json') {
386
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
+
388
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
+ responseMimeType = 'application/json';
390
+ }
391
+ }
392
+
393
+ const config: GenerationConfig = {};
394
+
395
+ if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
+ if (params.temperature !== undefined) config.temperature = params.temperature;
397
+ if (params.topP !== undefined) config.topP = params.topP;
398
+ if (params.topK !== undefined) config.topK = params.topK;
399
+ if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
+ if (responseMimeType) config.responseMimeType = responseMimeType;
401
+
402
+ if (systemInstruction) body.systemInstruction = systemInstruction;
403
+ if (Object.keys(config).length > 0) {
404
+ body.generationConfig = config;
405
+ }
406
+
407
+ return body;
408
+ }
409
+
410
+ protected reportUsage(
411
+ usage: UsageMetadataWithThoughtsToken,
412
+ metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
+ ) {
414
+ // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
+ const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
+ let tier = '';
417
+ const tierThresholds = {
418
+ 'gemini-1.5-pro': 128_000,
419
+ 'gemini-2.5-pro': 200_000,
420
+ };
421
+
422
+ const textInputTokens =
423
+ usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
+ const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
+
426
+ // Find matching model and set tier based on threshold
427
+ const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
+ if (modelWithTier) {
429
+ tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
+ }
431
+
432
+ // #endregion
433
+
434
+ const usageData = {
435
+ sourceId: `llm:${modelName}`,
436
+ input_tokens: textInputTokens,
437
+ output_tokens: usage?.candidatesTokenCount || 0,
438
+ input_tokens_audio: audioInputTokens,
439
+ input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
+ input_tokens_cache_write: 0,
441
+ reasoning_tokens: usage?.thoughtsTokenCount,
442
+ keySource: metadata.keySource,
443
+ agentId: metadata.agentId,
444
+ teamId: metadata.teamId,
445
+ tier,
446
+ };
447
+ SystemEvents.emit('USAGE:LLM', usageData);
448
+
449
+ return usageData;
450
+ }
451
+
452
+ /**
453
+ * Extract text and image tokens from Google AI usage metadata
454
+ */
455
+ private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
+ const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
+ const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
+
459
+ return { textTokens, imageTokens };
460
+ }
461
+
462
+ protected reportImageUsage({
463
+ usage,
464
+ context,
465
+ numberOfImages = 1,
466
+ }: {
467
+ usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
+ context: ILLMRequestContext;
469
+ numberOfImages?: number;
470
+ }) {
471
+ // Extract text and image tokens from rawUsage if available
472
+ let input_tokens_txt = 0;
473
+ let input_tokens_img = 0;
474
+
475
+ if (usage.usageMetadata) {
476
+ const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
+ input_tokens_txt = textTokens;
478
+ input_tokens_img = imageTokens;
479
+ }
480
+
481
+ const imageUsageData = {
482
+ sourceId: `api:imagegen.smyth`,
483
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
+
485
+ cost: usage.cost * numberOfImages,
486
+ input_tokens_txt,
487
+ input_tokens_img,
488
+
489
+ agentId: context.agentId,
490
+ teamId: context.teamId,
491
+ };
492
+ SystemEvents.emit('USAGE:API', imageUsageData);
493
+ }
494
+
495
+ public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
+ const tools = toolDefinitions.map((tool) => {
497
+ const { name, description, properties, requiredFields } = tool;
498
+
499
+ // Ensure the function name is valid
500
+ const validName = this.sanitizeFunctionName(name);
501
+
502
+ // Ensure properties are non-empty for OBJECT type
503
+ const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
+
505
+ return {
506
+ functionDeclarations: [
507
+ {
508
+ name: validName,
509
+ description: description || '',
510
+ parameters: {
511
+ type: 'OBJECT',
512
+ properties: validProperties,
513
+ required: requiredFields || [],
514
+ },
515
+ },
516
+ ],
517
+ };
518
+ });
519
+
520
+ return {
521
+ tools,
522
+ toolChoice: {
523
+ type: toolChoice,
524
+ },
525
+ };
526
+ }
527
+
528
+ public transformToolMessageBlocks({
529
+ messageBlock,
530
+ toolsData,
531
+ }: {
532
+ messageBlock: TLLMMessageBlock;
533
+ toolsData: ToolData[];
534
+ }): TLLMToolResultMessageBlock[] {
535
+ const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
+
537
+ if (messageBlock) {
538
+ const content = [];
539
+ if (typeof messageBlock.content === 'string') {
540
+ content.push({ text: messageBlock.content });
541
+ } else if (Array.isArray(messageBlock.content)) {
542
+ content.push(...messageBlock.content);
543
+ }
544
+
545
+ if (messageBlock.parts) {
546
+ const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
+ if (functionCalls.length > 0) {
548
+ content.push(
549
+ ...functionCalls.map((call) => ({
550
+ functionCall: {
551
+ name: call.functionCall.name,
552
+ args: JSON.parse(call.functionCall.args),
553
+ },
554
+ }))
555
+ );
556
+ }
557
+ }
558
+
559
+ messageBlocks.push({
560
+ role: messageBlock.role,
561
+ parts: content,
562
+ });
563
+ }
564
+
565
+ const transformedToolsData = toolsData.map(
566
+ (toolData): TLLMToolResultMessageBlock => ({
567
+ role: TLLMMessageRole.User,
568
+ parts: [
569
+ {
570
+ functionResponse: {
571
+ name: toolData.name,
572
+ response: {
573
+ name: toolData.name,
574
+ content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
+ },
576
+ },
577
+ },
578
+ ],
579
+ })
580
+ );
581
+
582
+ return [...messageBlocks, ...transformedToolsData];
583
+ }
584
+
585
+ public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
+ const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
+
588
+ return _messages.map((message) => {
589
+ const _message = { ...message };
590
+ let textContent = '';
591
+
592
+ // Map roles to valid Google AI roles
593
+ switch (_message.role) {
594
+ case TLLMMessageRole.Assistant:
595
+ case TLLMMessageRole.System:
596
+ _message.role = TLLMMessageRole.Model;
597
+ break;
598
+ case TLLMMessageRole.User:
599
+ // User role is already valid
600
+ break;
601
+ default:
602
+ _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
+ }
604
+
605
+ // * empty text causes error that's why we added '...'
606
+
607
+ if (_message?.parts) {
608
+ textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
+ } else if (Array.isArray(_message?.content)) {
610
+ textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
+ } else if (_message?.content) {
612
+ textContent = (_message.content as string) || '...';
613
+ }
614
+
615
+ _message.parts = [{ text: textContent || '...' }];
616
+
617
+ delete _message.content; // Remove content to avoid error
618
+
619
+ return _message;
620
+ });
621
+ }
622
+
623
+ private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
+ let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
+
626
+ const files: BinaryInput[] = params?.files || [];
627
+
628
+ if (files.length > 0) {
629
+ messages = await this.prepareMessagesWithFiles(params);
630
+ } else if (params?.toolsConfig?.tools?.length > 0) {
631
+ messages = await this.prepareMessagesWithTools(params);
632
+ } else {
633
+ messages = await this.prepareMessagesWithTextQuery(params);
634
+ }
635
+
636
+ return messages;
637
+ }
638
+
639
+ private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
+ const model = params.model;
641
+
642
+ let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
+ let systemInstruction = '';
644
+ const files: BinaryInput[] = params?.files || [];
645
+
646
+ // #region Upload files
647
+ const promises = [];
648
+ const _files = [];
649
+
650
+ for (let image of files) {
651
+ const binaryInput = BinaryInput.from(image);
652
+ promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
+
654
+ _files.push(binaryInput);
655
+ }
656
+
657
+ await Promise.all(promises);
658
+ // #endregion Upload files
659
+
660
+ // If user provide mix of valid and invalid files, we will only process the valid files
661
+ const validFiles = this.getValidFiles(_files, 'all');
662
+
663
+ const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
+
665
+ // GoogleAI only supports one video file at a time
666
+ if (hasVideo && validFiles.length > 1) {
667
+ throw new Error('Only one video file is supported at a time.');
668
+ }
669
+
670
+ const fileUploadingTasks = validFiles.map((file) => async () => {
671
+ try {
672
+ const uploadedFile = await this.uploadFile({
673
+ file,
674
+ apiKey: (params.credentials as BasicCredentials).apiKey,
675
+ agentId: params.agentId,
676
+ });
677
+
678
+ return { url: uploadedFile.url, mimetype: file.mimetype };
679
+ } catch {
680
+ return null;
681
+ }
682
+ });
683
+
684
+ const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
+
686
+ // We throw error when there are no valid uploaded files,
687
+ if (uploadedFiles && uploadedFiles?.length === 0) {
688
+ throw new Error(`There is an issue during upload file in Google AI Server!`);
689
+ }
690
+
691
+ const fileData = this.getFileData(uploadedFiles);
692
+
693
+ const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
+ let prompt = userMessage?.content || '';
695
+
696
+ // if the the model does not support system instruction, we will add it to the prompt
697
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
+ prompt = `${prompt}\n${systemInstruction}`;
699
+ }
700
+ //#endregion Separate system message and add JSON response instruction if needed
701
+
702
+ // Adjust input structure handling for multiple image files to accommodate variations.
703
+ messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
+
705
+ return messages as string;
706
+ }
707
+
708
+ private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
+ let formattedMessages: TLLMMessageBlock[];
710
+ let systemInstruction = '';
711
+
712
+ let messages = params?.messages || [];
713
+
714
+ const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
+
716
+ if (hasSystemMessage) {
717
+ const separateMessages = LLMHelper.separateSystemMessages(messages);
718
+ const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
+ systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
+ formattedMessages = separateMessages.otherMessages;
721
+ } else {
722
+ formattedMessages = messages;
723
+ }
724
+
725
+ const toolsPrompt: GenerateContentRequest = {
726
+ contents: formattedMessages as any,
727
+ };
728
+
729
+ if (systemInstruction) {
730
+ toolsPrompt.systemInstruction = systemInstruction;
731
+ }
732
+
733
+ if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
+ if (params?.toolsConfig?.tool_choice) {
735
+ // Map tool choice to valid Google AI function calling modes
736
+ let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
+ const toolChoice = params?.toolsConfig?.tool_choice;
738
+
739
+ if (toolChoice === 'auto') {
740
+ mode = FunctionCallingMode.AUTO;
741
+ } else if (toolChoice === 'required') {
742
+ mode = FunctionCallingMode.ANY;
743
+ } else if (toolChoice === 'none') {
744
+ mode = FunctionCallingMode.NONE;
745
+ } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
+ // Handle OpenAI-style named tool choice - force any function call
747
+ mode = FunctionCallingMode.ANY;
748
+ }
749
+
750
+ toolsPrompt.toolConfig = {
751
+ functionCallingConfig: { mode },
752
+ };
753
+ }
754
+
755
+ return toolsPrompt;
756
+ }
757
+
758
+ private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
+ const model = params.model;
760
+ let systemInstruction = '';
761
+ let prompt = '';
762
+
763
+ const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
+
765
+ if ('content' in systemMessage) {
766
+ systemInstruction = systemMessage.content as string;
767
+ }
768
+
769
+ const responseFormat = params?.responseFormat || '';
770
+ let responseMimeType = '';
771
+
772
+ if (responseFormat === 'json') {
773
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
+
775
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
+ responseMimeType = 'application/json';
777
+ }
778
+ }
779
+
780
+ if (otherMessages?.length > 0) {
781
+ // Concatenate messages with prompt and remove messages from params as it's not supported
782
+ prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
+ }
784
+
785
+ // if the the model does not support system instruction, we will add it to the prompt
786
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
+ prompt = `${prompt}\n${systemInstruction}`;
788
+ }
789
+ //#endregion Separate system message and add JSON response instruction if needed
790
+
791
+ return prompt;
792
+ }
793
+
794
+ private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
+ return {
796
+ prompt: params.prompt,
797
+ model: params.model,
798
+ aspectRatio: (params as any).aspectRatio,
799
+ personGeneration: (params as any).personGeneration,
800
+ };
801
+ }
802
+
803
+ private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
+ const model = params.model || 'gemini-2.5-flash-image-preview';
805
+
806
+ // Construct edit prompt with image and instructions
807
+ let editPrompt = params.prompt || 'Edit this image';
808
+ if ((params as any).instruction) {
809
+ editPrompt += `. ${(params as any).instruction}`;
810
+ }
811
+
812
+ // For image editing, we need to include the original image in the contents
813
+ const contents: any[] = [];
814
+ const files: BinaryInput[] = params?.files || [];
815
+
816
+ if (files.length > 0) {
817
+ // Get only valid image files for editing
818
+ const validImageFiles = this.getValidFiles(files, 'image');
819
+
820
+ if (validImageFiles.length === 0) {
821
+ throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
+ }
823
+
824
+ // Process each image file
825
+ for (const file of validImageFiles) {
826
+ try {
827
+ // Read the file data as base64
828
+ const bufferData = await file.getBuffer();
829
+ const base64Image = Buffer.from(bufferData).toString('base64');
830
+
831
+ contents.push({
832
+ inlineData: {
833
+ mimeType: file.mimetype,
834
+ data: base64Image,
835
+ },
836
+ });
837
+ } catch (error) {
838
+ throw new Error(`Failed to process image file: ${error.message}`);
839
+ }
840
+ }
841
+ } else {
842
+ throw new Error('No image provided for editing. Please include an image file.');
843
+ }
844
+
845
+ // Add the edit instruction
846
+ contents.push({ text: editPrompt });
847
+
848
+ // Return the complete request body that can be used directly in imageEditRequest
849
+ return {
850
+ model,
851
+ contents,
852
+ // Additional metadata for usage reporting
853
+ _metadata: {
854
+ prompt: editPrompt,
855
+ numberOfImages: (params as any).n || 1,
856
+ aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
+ personGeneration: (params as any).person_generation || 'allow_adult',
858
+ },
859
+ };
860
+ }
861
+
862
+ // Add this helper method to sanitize function names
863
+ private sanitizeFunctionName(name: string): string {
864
+ // Check if name is undefined or null
865
+ if (name == null) {
866
+ return '_unnamed_function';
867
+ }
868
+
869
+ // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
+ let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
+
872
+ // Ensure the name starts with a letter or underscore
873
+ if (!/^[a-zA-Z_]/.test(sanitized)) {
874
+ sanitized = '_' + sanitized;
875
+ }
876
+
877
+ // If sanitized is empty after removing invalid characters, use a default name
878
+ if (sanitized === '') {
879
+ sanitized = '_unnamed_function';
880
+ }
881
+
882
+ // Truncate to 64 characters if longer
883
+ sanitized = sanitized.slice(0, 64);
884
+
885
+ return sanitized;
886
+ }
887
+
888
+ private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
+ try {
890
+ if (!apiKey || !file?.mimetype) {
891
+ throw new Error('Missing required parameters to save file for Google AI!');
892
+ }
893
+
894
+ // Create a temporary directory
895
+ const tempDir = os.tmpdir();
896
+ const fileName = uid();
897
+ const tempFilePath = path.join(tempDir, fileName);
898
+
899
+ const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
+
901
+ // Write buffer data to temp file
902
+ await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
+
904
+ // Upload the file to the Google File Manager
905
+ const fileManager = new GoogleAIFileManager(apiKey);
906
+
907
+ const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
+ mimeType: file.mimetype,
909
+ displayName: fileName,
910
+ });
911
+
912
+ const name = uploadResponse.file.name;
913
+
914
+ // Poll getFile() on a set interval (10 seconds here) to check file state.
915
+ let uploadedFile = await fileManager.getFile(name);
916
+ while (uploadedFile.state === FileState.PROCESSING) {
917
+ process.stdout.write('.');
918
+ // Sleep for 10 seconds
919
+ await new Promise((resolve) => setTimeout(resolve, 10_000));
920
+ // Fetch the file from the API again
921
+ uploadedFile = await fileManager.getFile(name);
922
+ }
923
+
924
+ if (uploadedFile.state === FileState.FAILED) {
925
+ throw new Error('File processing failed.');
926
+ }
927
+
928
+ // Clean up temp file
929
+ await fs.promises.unlink(tempFilePath);
930
+
931
+ return {
932
+ url: uploadResponse.file.uri || '',
933
+ };
934
+ } catch (error) {
935
+ throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
+ }
937
+ }
938
+
939
+ private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
+ const validSources = [];
941
+
942
+ for (let file of files) {
943
+ if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
+ validSources.push(file);
945
+ }
946
+ }
947
+
948
+ if (validSources?.length === 0) {
949
+ throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
+ }
951
+
952
+ return validSources;
953
+ }
954
+
955
+ private getFileData(
956
+ files: {
957
+ url: string;
958
+ mimetype: string;
959
+ }[]
960
+ ): {
961
+ fileData: {
962
+ mimeType: string;
963
+ fileUri: string;
964
+ };
965
+ }[] {
966
+ try {
967
+ const imageData = [];
968
+
969
+ for (let file of files) {
970
+ imageData.push({
971
+ fileData: {
972
+ mimeType: file.mimetype,
973
+ fileUri: file.url,
974
+ },
975
+ });
976
+ }
977
+
978
+ return imageData;
979
+ } catch (error) {
980
+ throw error;
981
+ }
982
+ }
983
+ }