@smythos/sre 1.6.8 → 1.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/CHANGELOG +111 -111
  2. package/LICENSE +18 -18
  3. package/README.md +135 -135
  4. package/dist/bundle-analysis-lazy.html +4949 -0
  5. package/dist/bundle-analysis.html +4949 -0
  6. package/dist/index.js +2 -2
  7. package/dist/index.js.map +1 -1
  8. package/dist/types/Components/Triggers/Gmail.trigger.d.ts +58 -0
  9. package/dist/types/Components/Triggers/GmailTrigger.class.d.ts +44 -0
  10. package/dist/types/Components/Triggers/Trigger.class.d.ts +21 -0
  11. package/dist/types/Components/Triggers/WhatsApp.trigger.d.ts +22 -0
  12. package/dist/types/helpers/AIPerformanceAnalyzer.helper.d.ts +45 -0
  13. package/dist/types/helpers/AIPerformanceCollector.helper.d.ts +111 -0
  14. package/dist/types/subsystems/IO/Storage.service/connectors/AzureBlobStorage.class.d.ts +211 -0
  15. package/dist/types/subsystems/IO/VectorDB.service/connectors/WeaviateVectorDB.class.d.ts +187 -0
  16. package/dist/types/subsystems/PerformanceManager/Performance.service/PerformanceConnector.d.ts +102 -0
  17. package/dist/types/subsystems/PerformanceManager/Performance.service/connectors/LocalPerformanceConnector.class.d.ts +100 -0
  18. package/dist/types/subsystems/PerformanceManager/Performance.service/index.d.ts +22 -0
  19. package/dist/types/subsystems/Security/Credentials/Credentials.class.d.ts +2 -0
  20. package/dist/types/subsystems/Security/Credentials/ManagedOAuth2Credentials.class.d.ts +18 -0
  21. package/dist/types/subsystems/Security/Credentials/OAuth2Credentials.class.d.ts +14 -0
  22. package/dist/types/types/Performance.types.d.ts +468 -0
  23. package/dist/types/utils/package-manager.utils.d.ts +26 -0
  24. package/package.json +1 -1
  25. package/src/Components/APICall/APICall.class.ts +161 -161
  26. package/src/Components/APICall/AccessTokenManager.ts +166 -166
  27. package/src/Components/APICall/ArrayBufferResponse.helper.ts +58 -58
  28. package/src/Components/APICall/OAuth.helper.ts +447 -447
  29. package/src/Components/APICall/mimeTypeCategories.ts +46 -46
  30. package/src/Components/APICall/parseData.ts +167 -167
  31. package/src/Components/APICall/parseHeaders.ts +41 -41
  32. package/src/Components/APICall/parseProxy.ts +68 -68
  33. package/src/Components/APICall/parseUrl.ts +91 -91
  34. package/src/Components/APIEndpoint.class.ts +234 -234
  35. package/src/Components/APIOutput.class.ts +58 -58
  36. package/src/Components/AgentPlugin.class.ts +102 -102
  37. package/src/Components/Async.class.ts +155 -155
  38. package/src/Components/Await.class.ts +90 -90
  39. package/src/Components/Classifier.class.ts +158 -158
  40. package/src/Components/Component.class.ts +147 -147
  41. package/src/Components/ComponentHost.class.ts +38 -38
  42. package/src/Components/DataSourceCleaner.class.ts +92 -92
  43. package/src/Components/DataSourceIndexer.class.ts +181 -181
  44. package/src/Components/DataSourceLookup.class.ts +161 -161
  45. package/src/Components/ECMASandbox.class.ts +72 -72
  46. package/src/Components/FEncDec.class.ts +29 -29
  47. package/src/Components/FHash.class.ts +33 -33
  48. package/src/Components/FSign.class.ts +80 -80
  49. package/src/Components/FSleep.class.ts +25 -25
  50. package/src/Components/FTimestamp.class.ts +66 -66
  51. package/src/Components/FileStore.class.ts +78 -78
  52. package/src/Components/ForEach.class.ts +97 -97
  53. package/src/Components/GPTPlugin.class.ts +70 -70
  54. package/src/Components/GenAILLM.class.ts +586 -586
  55. package/src/Components/HuggingFace.class.ts +313 -313
  56. package/src/Components/Image/imageSettings.config.ts +70 -70
  57. package/src/Components/ImageGenerator.class.ts +483 -483
  58. package/src/Components/JSONFilter.class.ts +54 -54
  59. package/src/Components/LLMAssistant.class.ts +213 -213
  60. package/src/Components/LogicAND.class.ts +28 -28
  61. package/src/Components/LogicAtLeast.class.ts +85 -85
  62. package/src/Components/LogicAtMost.class.ts +86 -86
  63. package/src/Components/LogicOR.class.ts +29 -29
  64. package/src/Components/LogicXOR.class.ts +34 -34
  65. package/src/Components/MCPClient.class.ts +137 -137
  66. package/src/Components/MemoryDeleteKeyVal.class.ts +70 -70
  67. package/src/Components/MemoryReadKeyVal.class.ts +67 -67
  68. package/src/Components/MemoryWriteKeyVal.class.ts +62 -62
  69. package/src/Components/MemoryWriteObject.class.ts +97 -97
  70. package/src/Components/MultimodalLLM.class.ts +128 -128
  71. package/src/Components/OpenAPI.class.ts +72 -72
  72. package/src/Components/PromptGenerator.class.ts +122 -122
  73. package/src/Components/ScrapflyWebScrape.class.ts +183 -183
  74. package/src/Components/ServerlessCode.class.ts +123 -123
  75. package/src/Components/TavilyWebSearch.class.ts +103 -103
  76. package/src/Components/VisionLLM.class.ts +104 -104
  77. package/src/Components/ZapierAction.class.ts +127 -127
  78. package/src/Components/index.ts +97 -97
  79. package/src/Core/AgentProcess.helper.ts +240 -240
  80. package/src/Core/Connector.class.ts +123 -123
  81. package/src/Core/ConnectorsService.ts +197 -197
  82. package/src/Core/DummyConnector.ts +49 -49
  83. package/src/Core/HookService.ts +105 -105
  84. package/src/Core/SmythRuntime.class.ts +241 -241
  85. package/src/Core/SystemEvents.ts +16 -16
  86. package/src/Core/boot.ts +56 -56
  87. package/src/config.ts +15 -15
  88. package/src/constants.ts +126 -126
  89. package/src/data/hugging-face.params.json +579 -579
  90. package/src/helpers/AWSLambdaCode.helper.ts +624 -624
  91. package/src/helpers/BinaryInput.helper.ts +331 -331
  92. package/src/helpers/Conversation.helper.ts +1157 -1157
  93. package/src/helpers/ECMASandbox.helper.ts +64 -64
  94. package/src/helpers/JsonContent.helper.ts +97 -97
  95. package/src/helpers/LocalCache.helper.ts +97 -97
  96. package/src/helpers/Log.helper.ts +274 -274
  97. package/src/helpers/OpenApiParser.helper.ts +150 -150
  98. package/src/helpers/S3Cache.helper.ts +147 -147
  99. package/src/helpers/SmythURI.helper.ts +5 -5
  100. package/src/helpers/Sysconfig.helper.ts +95 -95
  101. package/src/helpers/TemplateString.helper.ts +243 -243
  102. package/src/helpers/TypeChecker.helper.ts +329 -329
  103. package/src/index.ts +3 -3
  104. package/src/index.ts.bak +3 -3
  105. package/src/subsystems/AgentManager/Agent.class.ts +1114 -1114
  106. package/src/subsystems/AgentManager/Agent.helper.ts +3 -3
  107. package/src/subsystems/AgentManager/AgentData.service/AgentDataConnector.ts +230 -230
  108. package/src/subsystems/AgentManager/AgentData.service/connectors/CLIAgentDataConnector.class.ts +66 -66
  109. package/src/subsystems/AgentManager/AgentData.service/connectors/LocalAgentDataConnector.class.ts +145 -145
  110. package/src/subsystems/AgentManager/AgentData.service/connectors/NullAgentData.class.ts +39 -39
  111. package/src/subsystems/AgentManager/AgentData.service/index.ts +18 -18
  112. package/src/subsystems/AgentManager/AgentLogger.class.ts +301 -301
  113. package/src/subsystems/AgentManager/AgentRequest.class.ts +51 -51
  114. package/src/subsystems/AgentManager/AgentRuntime.class.ts +557 -557
  115. package/src/subsystems/AgentManager/AgentSSE.class.ts +101 -101
  116. package/src/subsystems/AgentManager/AgentSettings.class.ts +52 -52
  117. package/src/subsystems/AgentManager/Component.service/ComponentConnector.ts +32 -32
  118. package/src/subsystems/AgentManager/Component.service/connectors/LocalComponentConnector.class.ts +60 -60
  119. package/src/subsystems/AgentManager/Component.service/index.ts +11 -11
  120. package/src/subsystems/AgentManager/EmbodimentSettings.class.ts +47 -47
  121. package/src/subsystems/AgentManager/ForkedAgent.class.ts +154 -154
  122. package/src/subsystems/AgentManager/OSResourceMonitor.ts +77 -77
  123. package/src/subsystems/ComputeManager/Code.service/CodeConnector.ts +98 -98
  124. package/src/subsystems/ComputeManager/Code.service/connectors/AWSLambdaCode.class.ts +171 -171
  125. package/src/subsystems/ComputeManager/Code.service/connectors/ECMASandbox.class.ts +131 -131
  126. package/src/subsystems/ComputeManager/Code.service/index.ts +13 -13
  127. package/src/subsystems/IO/CLI.service/CLIConnector.ts +47 -47
  128. package/src/subsystems/IO/CLI.service/index.ts +9 -9
  129. package/src/subsystems/IO/Log.service/LogConnector.ts +32 -32
  130. package/src/subsystems/IO/Log.service/connectors/ConsoleLog.class.ts +28 -28
  131. package/src/subsystems/IO/Log.service/index.ts +13 -13
  132. package/src/subsystems/IO/NKV.service/NKVConnector.ts +43 -43
  133. package/src/subsystems/IO/NKV.service/connectors/NKVLocalStorage.class.ts +234 -234
  134. package/src/subsystems/IO/NKV.service/connectors/NKVRAM.class.ts +204 -204
  135. package/src/subsystems/IO/NKV.service/connectors/NKVRedis.class.ts +182 -182
  136. package/src/subsystems/IO/NKV.service/index.ts +14 -14
  137. package/src/subsystems/IO/Router.service/RouterConnector.ts +21 -21
  138. package/src/subsystems/IO/Router.service/connectors/ExpressRouter.class.ts +48 -48
  139. package/src/subsystems/IO/Router.service/connectors/NullRouter.class.ts +40 -40
  140. package/src/subsystems/IO/Router.service/index.ts +11 -11
  141. package/src/subsystems/IO/Storage.service/SmythFS.class.ts +488 -488
  142. package/src/subsystems/IO/Storage.service/StorageConnector.ts +66 -66
  143. package/src/subsystems/IO/Storage.service/connectors/LocalStorage.class.ts +327 -327
  144. package/src/subsystems/IO/Storage.service/connectors/S3Storage.class.ts +482 -482
  145. package/src/subsystems/IO/Storage.service/index.ts +13 -13
  146. package/src/subsystems/IO/VectorDB.service/VectorDBConnector.ts +108 -108
  147. package/src/subsystems/IO/VectorDB.service/connectors/MilvusVectorDB.class.ts +465 -465
  148. package/src/subsystems/IO/VectorDB.service/connectors/PineconeVectorDB.class.ts +387 -387
  149. package/src/subsystems/IO/VectorDB.service/connectors/RAMVecrtorDB.class.ts +408 -408
  150. package/src/subsystems/IO/VectorDB.service/embed/BaseEmbedding.ts +107 -107
  151. package/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.ts +118 -118
  152. package/src/subsystems/IO/VectorDB.service/embed/OpenAIEmbedding.ts +109 -109
  153. package/src/subsystems/IO/VectorDB.service/embed/index.ts +26 -26
  154. package/src/subsystems/IO/VectorDB.service/index.ts +14 -14
  155. package/src/subsystems/LLMManager/LLM.helper.ts +251 -251
  156. package/src/subsystems/LLMManager/LLM.inference.ts +345 -345
  157. package/src/subsystems/LLMManager/LLM.service/LLMConnector.ts +492 -492
  158. package/src/subsystems/LLMManager/LLM.service/LLMCredentials.helper.ts +171 -171
  159. package/src/subsystems/LLMManager/LLM.service/connectors/Anthropic.class.ts +666 -666
  160. package/src/subsystems/LLMManager/LLM.service/connectors/Bedrock.class.ts +407 -407
  161. package/src/subsystems/LLMManager/LLM.service/connectors/Echo.class.ts +92 -92
  162. package/src/subsystems/LLMManager/LLM.service/connectors/GoogleAI.class.ts +983 -983
  163. package/src/subsystems/LLMManager/LLM.service/connectors/Groq.class.ts +319 -319
  164. package/src/subsystems/LLMManager/LLM.service/connectors/Ollama.class.ts +361 -361
  165. package/src/subsystems/LLMManager/LLM.service/connectors/Perplexity.class.ts +257 -257
  166. package/src/subsystems/LLMManager/LLM.service/connectors/VertexAI.class.ts +430 -430
  167. package/src/subsystems/LLMManager/LLM.service/connectors/openai/OpenAIConnector.class.ts +503 -503
  168. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ChatCompletionsApiInterface.ts +524 -524
  169. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterface.ts +100 -100
  170. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/OpenAIApiInterfaceFactory.ts +81 -81
  171. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/ResponsesApiInterface.ts +1145 -1145
  172. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/constants.ts +13 -13
  173. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/index.ts +4 -4
  174. package/src/subsystems/LLMManager/LLM.service/connectors/openai/apiInterfaces/utils.ts +11 -11
  175. package/src/subsystems/LLMManager/LLM.service/connectors/openai/types.ts +32 -32
  176. package/src/subsystems/LLMManager/LLM.service/connectors/xAI.class.ts +478 -478
  177. package/src/subsystems/LLMManager/LLM.service/index.ts +47 -47
  178. package/src/subsystems/LLMManager/ModelsProvider.service/ModelsProviderConnector.ts +303 -303
  179. package/src/subsystems/LLMManager/ModelsProvider.service/connectors/JSONModelsProvider.class.ts +280 -271
  180. package/src/subsystems/LLMManager/ModelsProvider.service/index.ts +11 -11
  181. package/src/subsystems/LLMManager/custom-models.ts +854 -854
  182. package/src/subsystems/LLMManager/models.ts +2540 -2540
  183. package/src/subsystems/LLMManager/paramMappings.ts +69 -69
  184. package/src/subsystems/MemoryManager/Cache.service/CacheConnector.ts +86 -86
  185. package/src/subsystems/MemoryManager/Cache.service/connectors/LocalStorageCache.class.ts +297 -297
  186. package/src/subsystems/MemoryManager/Cache.service/connectors/RAMCache.class.ts +214 -214
  187. package/src/subsystems/MemoryManager/Cache.service/connectors/RedisCache.class.ts +252 -252
  188. package/src/subsystems/MemoryManager/Cache.service/connectors/S3Cache.class.ts +373 -373
  189. package/src/subsystems/MemoryManager/Cache.service/index.ts +15 -15
  190. package/src/subsystems/MemoryManager/LLMCache.ts +72 -72
  191. package/src/subsystems/MemoryManager/LLMContext.ts +124 -124
  192. package/src/subsystems/MemoryManager/LLMMemory.service/LLMMemoryConnector.ts +26 -26
  193. package/src/subsystems/MemoryManager/RuntimeContext.ts +277 -277
  194. package/src/subsystems/Security/AccessControl/ACL.class.ts +208 -208
  195. package/src/subsystems/Security/AccessControl/AccessCandidate.class.ts +82 -82
  196. package/src/subsystems/Security/AccessControl/AccessRequest.class.ts +52 -52
  197. package/src/subsystems/Security/Account.service/AccountConnector.ts +44 -44
  198. package/src/subsystems/Security/Account.service/connectors/DummyAccount.class.ts +130 -130
  199. package/src/subsystems/Security/Account.service/connectors/JSONFileAccount.class.ts +170 -170
  200. package/src/subsystems/Security/Account.service/connectors/MySQLAccount.class.ts +76 -76
  201. package/src/subsystems/Security/Account.service/index.ts +14 -14
  202. package/src/subsystems/Security/Credentials.helper.ts +62 -62
  203. package/src/subsystems/Security/ManagedVault.service/ManagedVaultConnector.ts +38 -38
  204. package/src/subsystems/Security/ManagedVault.service/connectors/NullManagedVault.class.ts +53 -53
  205. package/src/subsystems/Security/ManagedVault.service/connectors/SecretManagerManagedVault.ts +154 -154
  206. package/src/subsystems/Security/ManagedVault.service/index.ts +12 -12
  207. package/src/subsystems/Security/SecureConnector.class.ts +110 -110
  208. package/src/subsystems/Security/Vault.service/Vault.helper.ts +30 -30
  209. package/src/subsystems/Security/Vault.service/VaultConnector.ts +29 -29
  210. package/src/subsystems/Security/Vault.service/connectors/HashicorpVault.class.ts +46 -46
  211. package/src/subsystems/Security/Vault.service/connectors/JSONFileVault.class.ts +221 -221
  212. package/src/subsystems/Security/Vault.service/connectors/NullVault.class.ts +54 -54
  213. package/src/subsystems/Security/Vault.service/connectors/SecretsManager.class.ts +140 -140
  214. package/src/subsystems/Security/Vault.service/index.ts +12 -12
  215. package/src/types/ACL.types.ts +104 -104
  216. package/src/types/AWS.types.ts +10 -10
  217. package/src/types/Agent.types.ts +61 -61
  218. package/src/types/AgentLogger.types.ts +17 -17
  219. package/src/types/Cache.types.ts +1 -1
  220. package/src/types/Common.types.ts +2 -2
  221. package/src/types/LLM.types.ts +520 -520
  222. package/src/types/Redis.types.ts +8 -8
  223. package/src/types/SRE.types.ts +64 -64
  224. package/src/types/Security.types.ts +14 -14
  225. package/src/types/Storage.types.ts +5 -5
  226. package/src/types/VectorDB.types.ts +86 -86
  227. package/src/utils/base64.utils.ts +275 -275
  228. package/src/utils/cli.utils.ts +68 -68
  229. package/src/utils/data.utils.ts +322 -322
  230. package/src/utils/date-time.utils.ts +22 -22
  231. package/src/utils/general.utils.ts +238 -238
  232. package/src/utils/index.ts +12 -12
  233. package/src/utils/lazy-client.ts +261 -261
  234. package/src/utils/numbers.utils.ts +13 -13
  235. package/src/utils/oauth.utils.ts +35 -35
  236. package/src/utils/string.utils.ts +414 -414
  237. package/src/utils/url.utils.ts +19 -19
  238. package/src/utils/validation.utils.ts +74 -74
  239. package/dist/types/subsystems/LLMManager/ModelsProvider.service/connectors/SmythModelsProvider.class.d.ts +0 -39
@@ -1,983 +1,983 @@
1
- import os from 'os';
2
- import path from 'path';
3
- import EventEmitter from 'events';
4
- import fs from 'fs';
5
-
6
- import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
- import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
- import { GoogleGenAI } from '@google/genai';
9
-
10
- import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
- import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
- import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
- import { uid } from '@sre/utils';
14
-
15
- import { processWithConcurrencyLimit } from '@sre/utils';
16
-
17
- import {
18
- TLLMMessageBlock,
19
- ToolData,
20
- TLLMMessageRole,
21
- TLLMToolResultMessageBlock,
22
- APIKeySource,
23
- TLLMEvent,
24
- BasicCredentials,
25
- ILLMRequestFuncParams,
26
- TLLMChatResponse,
27
- TGoogleAIRequestBody,
28
- ILLMRequestContext,
29
- TLLMPreparedParams,
30
- LLMInterface,
31
- } from '@sre/types/LLM.types';
32
- import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
-
34
- import { SystemEvents } from '@sre/Core/SystemEvents';
35
- import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
- import { Logger } from '@sre/helpers/Log.helper';
37
-
38
- import { LLMConnector } from '../LLMConnector';
39
-
40
- const logger = Logger('GoogleAIConnector');
41
-
42
- const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
- 'gemini-1.5-pro-exp-0801',
44
- 'gemini-1.5-pro-latest',
45
- 'gemini-1.5-pro-latest',
46
- 'gemini-1.5-pro',
47
- 'gemini-1.5-pro-001',
48
- 'gemini-1.5-flash-latest',
49
- 'gemini-1.5-flash-001',
50
- 'gemini-1.5-flash',
51
- ];
52
- const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
-
54
- // Supported file MIME types for Google AI's Gemini models
55
- const VALID_MIME_TYPES = [
56
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
- ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
- ];
61
-
62
- // will be removed after updating the SDK
63
- type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
-
65
- const IMAGE_GEN_FIXED_PRICING = {
66
- 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
- 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
- 'imagen-4': 0.04, // Standard Imagen 4
69
- 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
- 'gemini-2.5-flash-image': 0.039,
71
- };
72
-
73
- export class GoogleAIConnector extends LLMConnector {
74
- public name = 'LLM:GoogleAI';
75
-
76
- private validMimeTypes = {
77
- all: VALID_MIME_TYPES,
78
- image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
- };
80
-
81
- private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
- const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
-
84
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
-
86
- return new GoogleGenerativeAI(apiKey);
87
- }
88
-
89
- protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
- try {
91
- logger.debug(`request ${this.name}`, acRequest.candidate);
92
- const prompt = body.messages;
93
- delete body.messages;
94
-
95
- const genAI = await this.getClient(context);
96
- const $model = genAI.getGenerativeModel(body);
97
-
98
- const result = await $model.generateContent(prompt);
99
-
100
- const response = await result.response;
101
- const content = response.text();
102
- const finishReason = response.candidates[0].finishReason || 'stop';
103
- const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
- this.reportUsage(usage, {
105
- modelEntryName: context.modelEntryName,
106
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
- agentId: context.agentId,
108
- teamId: context.teamId,
109
- });
110
-
111
- const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
-
113
- let toolsData: ToolData[] = [];
114
- let useTool = false;
115
-
116
- if (toolCalls && toolCalls.length > 0) {
117
- toolsData = toolCalls.map((toolCall, index) => ({
118
- index,
119
- id: `tool-${index}`,
120
- type: 'function',
121
- name: toolCall.functionCall.name,
122
- arguments: JSON.stringify(toolCall.functionCall.args),
123
- role: TLLMMessageRole.Assistant,
124
- }));
125
- useTool = true;
126
- }
127
-
128
- return {
129
- content,
130
- finishReason: finishReason.toLowerCase(),
131
- useTool,
132
- toolsData,
133
- message: { content, role: 'assistant' },
134
- usage,
135
- };
136
- } catch (error: any) {
137
- logger.error(`request ${this.name}`, error, acRequest.candidate);
138
- throw error;
139
- }
140
- }
141
-
142
- protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
- logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
- const emitter = new EventEmitter();
145
-
146
- const prompt = body.messages;
147
- delete body.messages;
148
-
149
- const genAI = await this.getClient(context);
150
- const $model = genAI.getGenerativeModel(body);
151
-
152
- try {
153
- const result = await $model.generateContentStream(prompt);
154
-
155
- let toolsData: ToolData[] = [];
156
- let usage: UsageMetadataWithThoughtsToken;
157
-
158
- // Process stream asynchronously while as we need to return emitter immediately
159
- (async () => {
160
- for await (const chunk of result.stream) {
161
- const chunkText = chunk.text();
162
- emitter.emit('content', chunkText);
163
-
164
- if (chunk.candidates[0]?.content?.parts) {
165
- const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
- if (toolCalls.length > 0) {
167
- toolsData = toolCalls.map((toolCall, index) => ({
168
- index,
169
- id: `tool-${index}`,
170
- type: 'function',
171
- name: toolCall.functionCall.name,
172
- arguments: JSON.stringify(toolCall.functionCall.args),
173
- role: TLLMMessageRole.Assistant,
174
- }));
175
- emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
- }
177
- }
178
-
179
- // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
- // rather just sends the same usage for the entire request.
181
- // notice that the output tokens are only sent in the last chunk usage metadata.
182
- // so we will just update a var to hold the latest usage and report it when the stream ends.
183
- // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
- // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
- // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
- if (chunk?.usageMetadata) {
187
- usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
- }
189
- }
190
-
191
- if (usage) {
192
- this.reportUsage(usage, {
193
- modelEntryName: context.modelEntryName,
194
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
- agentId: context.agentId,
196
- teamId: context.teamId,
197
- });
198
- }
199
-
200
- setTimeout(() => {
201
- emitter.emit('end', toolsData);
202
- }, 100);
203
- })();
204
-
205
- return emitter;
206
- } catch (error: any) {
207
- logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
- throw error;
209
- }
210
- }
211
- // #region Image Generation, will be moved to a different subsystem/service
212
-
213
- protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
-
217
- const model = body.model || 'imagen-3.0-generate-001';
218
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
-
220
- // Use traditional Imagen models
221
- const config = {
222
- numberOfImages: body.n || 1,
223
- aspectRatio: body.aspect_ratio || body.size || '1:1',
224
- personGeneration: body.person_generation || 'allow_adult',
225
- };
226
-
227
- const ai = new GoogleGenAI({ apiKey });
228
-
229
- // Default to GenerateImages interface if not specified
230
- const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
-
232
- let response: any;
233
-
234
- if (modelInterface === LLMInterface.GenerateContent) {
235
- // Use Gemini image generation API
236
- response = await ai.models.generateContent({
237
- model,
238
- contents: body.prompt,
239
- });
240
-
241
- // Extract image data from Gemini response format
242
- const imageData: any[] = [];
243
- if (response.candidates?.[0]?.content?.parts) {
244
- for (const part of response.candidates[0].content.parts) {
245
- if (part.inlineData?.data) {
246
- imageData.push({
247
- url: `data:image/png;base64,${part.inlineData.data}`,
248
- b64_json: part.inlineData.data,
249
- revised_prompt: body.prompt,
250
- });
251
- }
252
- }
253
- }
254
-
255
- // Report input tokens and image cost pricing based on the official pricing page:
256
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
-
259
- this.reportImageUsage({
260
- usage: {
261
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
- usageMetadata,
263
- },
264
- context,
265
- });
266
-
267
- if (imageData.length === 0) {
268
- throw new Error(
269
- 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
- );
271
- }
272
-
273
- return {
274
- created: Math.floor(Date.now() / 1000),
275
- data: imageData,
276
- };
277
- } else if (modelInterface === LLMInterface.GenerateImages) {
278
- response = await ai.models.generateImages({
279
- model,
280
- prompt: body.prompt,
281
- config,
282
- });
283
-
284
- // Report input tokens and image cost pricing based on the official pricing page:
285
- // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
- this.reportImageUsage({
288
- usage: {
289
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
- usageMetadata,
291
- },
292
- numberOfImages: config.numberOfImages,
293
- context,
294
- });
295
-
296
- return {
297
- created: Math.floor(Date.now() / 1000),
298
- data:
299
- response.generatedImages?.map((generatedImage: any) => ({
300
- url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
- b64_json: generatedImage.image.imageBytes,
302
- revised_prompt: body.prompt,
303
- })) || [],
304
- };
305
- } else {
306
- throw new Error(`Unsupported interface: ${modelInterface}`);
307
- }
308
- }
309
-
310
- protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
- const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
- if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
-
314
- // A model supports image editing if it implements the `generateContent` interface.
315
- const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
- if (!supportsEditing) {
317
- throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
- }
319
-
320
- const ai = new GoogleGenAI({ apiKey });
321
- const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
-
323
- // Use the prepared body which already contains processed files and contents
324
- const response = await ai.models.generateContent({
325
- model: body.model,
326
- contents: body.contents,
327
- });
328
-
329
- // Extract image data from Gemini response format
330
- const imageData: any[] = [];
331
- if (response.candidates?.[0]?.content?.parts) {
332
- for (const part of response.candidates[0].content.parts) {
333
- if (part.inlineData?.data) {
334
- imageData.push({
335
- url: `data:image/png;base64,${part.inlineData.data}`,
336
- b64_json: part.inlineData.data,
337
- revised_prompt: body._metadata?.prompt || body.prompt,
338
- });
339
- }
340
- }
341
- }
342
-
343
- // Report pricing for input tokens and image costs
344
- const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
-
346
- this.reportImageUsage({
347
- usage: {
348
- cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
- usageMetadata,
350
- },
351
- context,
352
- });
353
-
354
- return {
355
- created: Math.floor(Date.now() / 1000),
356
- data: imageData,
357
- };
358
- }
359
-
360
- protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
- const model = params?.model;
362
-
363
- // Check if this is an image generation request based on capabilities
364
- if (params?.capabilities?.imageGeneration) {
365
- // Determine if this is image editing (has files) or generation
366
- const hasFiles = params?.files?.length > 0;
367
- if (hasFiles) {
368
- return this.prepareImageEditBody(params) as any;
369
- } else {
370
- return this.prepareBodyForImageGenRequest(params) as any;
371
- }
372
- }
373
-
374
- const messages = await this.prepareMessages(params);
375
-
376
- let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
- model: model as string,
378
- messages,
379
- };
380
-
381
- const responseFormat = params?.responseFormat || '';
382
- let responseMimeType = '';
383
- let systemInstruction = '';
384
-
385
- if (responseFormat === 'json') {
386
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
-
388
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
- responseMimeType = 'application/json';
390
- }
391
- }
392
-
393
- const config: GenerationConfig = {};
394
-
395
- if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
- if (params.temperature !== undefined) config.temperature = params.temperature;
397
- if (params.topP !== undefined) config.topP = params.topP;
398
- if (params.topK !== undefined) config.topK = params.topK;
399
- if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
- if (responseMimeType) config.responseMimeType = responseMimeType;
401
-
402
- if (systemInstruction) body.systemInstruction = systemInstruction;
403
- if (Object.keys(config).length > 0) {
404
- body.generationConfig = config;
405
- }
406
-
407
- return body;
408
- }
409
-
410
- protected reportUsage(
411
- usage: UsageMetadataWithThoughtsToken,
412
- metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
- ) {
414
- // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
- const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
- let tier = '';
417
- const tierThresholds = {
418
- 'gemini-1.5-pro': 128_000,
419
- 'gemini-2.5-pro': 200_000,
420
- };
421
-
422
- const textInputTokens =
423
- usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
- const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
-
426
- // Find matching model and set tier based on threshold
427
- const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
- if (modelWithTier) {
429
- tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
- }
431
-
432
- // #endregion
433
-
434
- const usageData = {
435
- sourceId: `llm:${modelName}`,
436
- input_tokens: textInputTokens,
437
- output_tokens: usage?.candidatesTokenCount || 0,
438
- input_tokens_audio: audioInputTokens,
439
- input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
- input_tokens_cache_write: 0,
441
- reasoning_tokens: usage?.thoughtsTokenCount,
442
- keySource: metadata.keySource,
443
- agentId: metadata.agentId,
444
- teamId: metadata.teamId,
445
- tier,
446
- };
447
- SystemEvents.emit('USAGE:LLM', usageData);
448
-
449
- return usageData;
450
- }
451
-
452
- /**
453
- * Extract text and image tokens from Google AI usage metadata
454
- */
455
- private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
- const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
- const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
-
459
- return { textTokens, imageTokens };
460
- }
461
-
462
- protected reportImageUsage({
463
- usage,
464
- context,
465
- numberOfImages = 1,
466
- }: {
467
- usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
- context: ILLMRequestContext;
469
- numberOfImages?: number;
470
- }) {
471
- // Extract text and image tokens from rawUsage if available
472
- let input_tokens_txt = 0;
473
- let input_tokens_img = 0;
474
-
475
- if (usage.usageMetadata) {
476
- const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
- input_tokens_txt = textTokens;
478
- input_tokens_img = imageTokens;
479
- }
480
-
481
- const imageUsageData = {
482
- sourceId: `api:imagegen.smyth`,
483
- keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
-
485
- cost: usage.cost * numberOfImages,
486
- input_tokens_txt,
487
- input_tokens_img,
488
-
489
- agentId: context.agentId,
490
- teamId: context.teamId,
491
- };
492
- SystemEvents.emit('USAGE:API', imageUsageData);
493
- }
494
-
495
- public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
- const tools = toolDefinitions.map((tool) => {
497
- const { name, description, properties, requiredFields } = tool;
498
-
499
- // Ensure the function name is valid
500
- const validName = this.sanitizeFunctionName(name);
501
-
502
- // Ensure properties are non-empty for OBJECT type
503
- const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
-
505
- return {
506
- functionDeclarations: [
507
- {
508
- name: validName,
509
- description: description || '',
510
- parameters: {
511
- type: 'OBJECT',
512
- properties: validProperties,
513
- required: requiredFields || [],
514
- },
515
- },
516
- ],
517
- };
518
- });
519
-
520
- return {
521
- tools,
522
- toolChoice: {
523
- type: toolChoice,
524
- },
525
- };
526
- }
527
-
528
- public transformToolMessageBlocks({
529
- messageBlock,
530
- toolsData,
531
- }: {
532
- messageBlock: TLLMMessageBlock;
533
- toolsData: ToolData[];
534
- }): TLLMToolResultMessageBlock[] {
535
- const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
-
537
- if (messageBlock) {
538
- const content = [];
539
- if (typeof messageBlock.content === 'string') {
540
- content.push({ text: messageBlock.content });
541
- } else if (Array.isArray(messageBlock.content)) {
542
- content.push(...messageBlock.content);
543
- }
544
-
545
- if (messageBlock.parts) {
546
- const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
- if (functionCalls.length > 0) {
548
- content.push(
549
- ...functionCalls.map((call) => ({
550
- functionCall: {
551
- name: call.functionCall.name,
552
- args: JSON.parse(call.functionCall.args),
553
- },
554
- }))
555
- );
556
- }
557
- }
558
-
559
- messageBlocks.push({
560
- role: messageBlock.role,
561
- parts: content,
562
- });
563
- }
564
-
565
- const transformedToolsData = toolsData.map(
566
- (toolData): TLLMToolResultMessageBlock => ({
567
- role: TLLMMessageRole.User,
568
- parts: [
569
- {
570
- functionResponse: {
571
- name: toolData.name,
572
- response: {
573
- name: toolData.name,
574
- content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
- },
576
- },
577
- },
578
- ],
579
- })
580
- );
581
-
582
- return [...messageBlocks, ...transformedToolsData];
583
- }
584
-
585
- public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
- const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
-
588
- return _messages.map((message) => {
589
- const _message = { ...message };
590
- let textContent = '';
591
-
592
- // Map roles to valid Google AI roles
593
- switch (_message.role) {
594
- case TLLMMessageRole.Assistant:
595
- case TLLMMessageRole.System:
596
- _message.role = TLLMMessageRole.Model;
597
- break;
598
- case TLLMMessageRole.User:
599
- // User role is already valid
600
- break;
601
- default:
602
- _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
- }
604
-
605
- // * empty text causes error that's why we added '...'
606
-
607
- if (_message?.parts) {
608
- textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
- } else if (Array.isArray(_message?.content)) {
610
- textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
- } else if (_message?.content) {
612
- textContent = (_message.content as string) || '...';
613
- }
614
-
615
- _message.parts = [{ text: textContent || '...' }];
616
-
617
- delete _message.content; // Remove content to avoid error
618
-
619
- return _message;
620
- });
621
- }
622
-
623
- private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
- let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
-
626
- const files: BinaryInput[] = params?.files || [];
627
-
628
- if (files.length > 0) {
629
- messages = await this.prepareMessagesWithFiles(params);
630
- } else if (params?.toolsConfig?.tools?.length > 0) {
631
- messages = await this.prepareMessagesWithTools(params);
632
- } else {
633
- messages = await this.prepareMessagesWithTextQuery(params);
634
- }
635
-
636
- return messages;
637
- }
638
-
639
- private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
- const model = params.model;
641
-
642
- let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
- let systemInstruction = '';
644
- const files: BinaryInput[] = params?.files || [];
645
-
646
- // #region Upload files
647
- const promises = [];
648
- const _files = [];
649
-
650
- for (let image of files) {
651
- const binaryInput = BinaryInput.from(image);
652
- promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
-
654
- _files.push(binaryInput);
655
- }
656
-
657
- await Promise.all(promises);
658
- // #endregion Upload files
659
-
660
- // If user provide mix of valid and invalid files, we will only process the valid files
661
- const validFiles = this.getValidFiles(_files, 'all');
662
-
663
- const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
-
665
- // GoogleAI only supports one video file at a time
666
- if (hasVideo && validFiles.length > 1) {
667
- throw new Error('Only one video file is supported at a time.');
668
- }
669
-
670
- const fileUploadingTasks = validFiles.map((file) => async () => {
671
- try {
672
- const uploadedFile = await this.uploadFile({
673
- file,
674
- apiKey: (params.credentials as BasicCredentials).apiKey,
675
- agentId: params.agentId,
676
- });
677
-
678
- return { url: uploadedFile.url, mimetype: file.mimetype };
679
- } catch {
680
- return null;
681
- }
682
- });
683
-
684
- const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
-
686
- // We throw error when there are no valid uploaded files,
687
- if (uploadedFiles && uploadedFiles?.length === 0) {
688
- throw new Error(`There is an issue during upload file in Google AI Server!`);
689
- }
690
-
691
- const fileData = this.getFileData(uploadedFiles);
692
-
693
- const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
- let prompt = userMessage?.content || '';
695
-
696
- // if the the model does not support system instruction, we will add it to the prompt
697
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
- prompt = `${prompt}\n${systemInstruction}`;
699
- }
700
- //#endregion Separate system message and add JSON response instruction if needed
701
-
702
- // Adjust input structure handling for multiple image files to accommodate variations.
703
- messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
-
705
- return messages as string;
706
- }
707
-
708
- private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
- let formattedMessages: TLLMMessageBlock[];
710
- let systemInstruction = '';
711
-
712
- let messages = params?.messages || [];
713
-
714
- const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
-
716
- if (hasSystemMessage) {
717
- const separateMessages = LLMHelper.separateSystemMessages(messages);
718
- const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
- systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
- formattedMessages = separateMessages.otherMessages;
721
- } else {
722
- formattedMessages = messages;
723
- }
724
-
725
- const toolsPrompt: GenerateContentRequest = {
726
- contents: formattedMessages as any,
727
- };
728
-
729
- if (systemInstruction) {
730
- toolsPrompt.systemInstruction = systemInstruction;
731
- }
732
-
733
- if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
- if (params?.toolsConfig?.tool_choice) {
735
- // Map tool choice to valid Google AI function calling modes
736
- let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
- const toolChoice = params?.toolsConfig?.tool_choice;
738
-
739
- if (toolChoice === 'auto') {
740
- mode = FunctionCallingMode.AUTO;
741
- } else if (toolChoice === 'required') {
742
- mode = FunctionCallingMode.ANY;
743
- } else if (toolChoice === 'none') {
744
- mode = FunctionCallingMode.NONE;
745
- } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
- // Handle OpenAI-style named tool choice - force any function call
747
- mode = FunctionCallingMode.ANY;
748
- }
749
-
750
- toolsPrompt.toolConfig = {
751
- functionCallingConfig: { mode },
752
- };
753
- }
754
-
755
- return toolsPrompt;
756
- }
757
-
758
- private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
- const model = params.model;
760
- let systemInstruction = '';
761
- let prompt = '';
762
-
763
- const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
-
765
- if ('content' in systemMessage) {
766
- systemInstruction = systemMessage.content as string;
767
- }
768
-
769
- const responseFormat = params?.responseFormat || '';
770
- let responseMimeType = '';
771
-
772
- if (responseFormat === 'json') {
773
- systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
-
775
- if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
- responseMimeType = 'application/json';
777
- }
778
- }
779
-
780
- if (otherMessages?.length > 0) {
781
- // Concatenate messages with prompt and remove messages from params as it's not supported
782
- prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
- }
784
-
785
- // if the the model does not support system instruction, we will add it to the prompt
786
- if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
- prompt = `${prompt}\n${systemInstruction}`;
788
- }
789
- //#endregion Separate system message and add JSON response instruction if needed
790
-
791
- return prompt;
792
- }
793
-
794
- private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
- return {
796
- prompt: params.prompt,
797
- model: params.model,
798
- aspectRatio: (params as any).aspectRatio,
799
- personGeneration: (params as any).personGeneration,
800
- };
801
- }
802
-
803
- private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
- const model = params.model || 'gemini-2.5-flash-image-preview';
805
-
806
- // Construct edit prompt with image and instructions
807
- let editPrompt = params.prompt || 'Edit this image';
808
- if ((params as any).instruction) {
809
- editPrompt += `. ${(params as any).instruction}`;
810
- }
811
-
812
- // For image editing, we need to include the original image in the contents
813
- const contents: any[] = [];
814
- const files: BinaryInput[] = params?.files || [];
815
-
816
- if (files.length > 0) {
817
- // Get only valid image files for editing
818
- const validImageFiles = this.getValidFiles(files, 'image');
819
-
820
- if (validImageFiles.length === 0) {
821
- throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
- }
823
-
824
- // Process each image file
825
- for (const file of validImageFiles) {
826
- try {
827
- // Read the file data as base64
828
- const bufferData = await file.getBuffer();
829
- const base64Image = Buffer.from(bufferData).toString('base64');
830
-
831
- contents.push({
832
- inlineData: {
833
- mimeType: file.mimetype,
834
- data: base64Image,
835
- },
836
- });
837
- } catch (error) {
838
- throw new Error(`Failed to process image file: ${error.message}`);
839
- }
840
- }
841
- } else {
842
- throw new Error('No image provided for editing. Please include an image file.');
843
- }
844
-
845
- // Add the edit instruction
846
- contents.push({ text: editPrompt });
847
-
848
- // Return the complete request body that can be used directly in imageEditRequest
849
- return {
850
- model,
851
- contents,
852
- // Additional metadata for usage reporting
853
- _metadata: {
854
- prompt: editPrompt,
855
- numberOfImages: (params as any).n || 1,
856
- aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
- personGeneration: (params as any).person_generation || 'allow_adult',
858
- },
859
- };
860
- }
861
-
862
- // Add this helper method to sanitize function names
863
- private sanitizeFunctionName(name: string): string {
864
- // Check if name is undefined or null
865
- if (name == null) {
866
- return '_unnamed_function';
867
- }
868
-
869
- // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
- let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
-
872
- // Ensure the name starts with a letter or underscore
873
- if (!/^[a-zA-Z_]/.test(sanitized)) {
874
- sanitized = '_' + sanitized;
875
- }
876
-
877
- // If sanitized is empty after removing invalid characters, use a default name
878
- if (sanitized === '') {
879
- sanitized = '_unnamed_function';
880
- }
881
-
882
- // Truncate to 64 characters if longer
883
- sanitized = sanitized.slice(0, 64);
884
-
885
- return sanitized;
886
- }
887
-
888
- private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
- try {
890
- if (!apiKey || !file?.mimetype) {
891
- throw new Error('Missing required parameters to save file for Google AI!');
892
- }
893
-
894
- // Create a temporary directory
895
- const tempDir = os.tmpdir();
896
- const fileName = uid();
897
- const tempFilePath = path.join(tempDir, fileName);
898
-
899
- const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
-
901
- // Write buffer data to temp file
902
- await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
-
904
- // Upload the file to the Google File Manager
905
- const fileManager = new GoogleAIFileManager(apiKey);
906
-
907
- const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
- mimeType: file.mimetype,
909
- displayName: fileName,
910
- });
911
-
912
- const name = uploadResponse.file.name;
913
-
914
- // Poll getFile() on a set interval (10 seconds here) to check file state.
915
- let uploadedFile = await fileManager.getFile(name);
916
- while (uploadedFile.state === FileState.PROCESSING) {
917
- process.stdout.write('.');
918
- // Sleep for 10 seconds
919
- await new Promise((resolve) => setTimeout(resolve, 10_000));
920
- // Fetch the file from the API again
921
- uploadedFile = await fileManager.getFile(name);
922
- }
923
-
924
- if (uploadedFile.state === FileState.FAILED) {
925
- throw new Error('File processing failed.');
926
- }
927
-
928
- // Clean up temp file
929
- await fs.promises.unlink(tempFilePath);
930
-
931
- return {
932
- url: uploadResponse.file.uri || '',
933
- };
934
- } catch (error) {
935
- throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
- }
937
- }
938
-
939
- private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
- const validSources = [];
941
-
942
- for (let file of files) {
943
- if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
- validSources.push(file);
945
- }
946
- }
947
-
948
- if (validSources?.length === 0) {
949
- throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
- }
951
-
952
- return validSources;
953
- }
954
-
955
- private getFileData(
956
- files: {
957
- url: string;
958
- mimetype: string;
959
- }[]
960
- ): {
961
- fileData: {
962
- mimeType: string;
963
- fileUri: string;
964
- };
965
- }[] {
966
- try {
967
- const imageData = [];
968
-
969
- for (let file of files) {
970
- imageData.push({
971
- fileData: {
972
- mimeType: file.mimetype,
973
- fileUri: file.url,
974
- },
975
- });
976
- }
977
-
978
- return imageData;
979
- } catch (error) {
980
- throw error;
981
- }
982
- }
983
- }
1
+ import os from 'os';
2
+ import path from 'path';
3
+ import EventEmitter from 'events';
4
+ import fs from 'fs';
5
+
6
+ import { GoogleGenerativeAI, ModelParams, GenerationConfig, GenerateContentRequest, UsageMetadata, FunctionCallingMode } from '@google/generative-ai';
7
+ import { GoogleAIFileManager, FileState } from '@google/generative-ai/server';
8
+ import { GoogleGenAI } from '@google/genai';
9
+
10
+ import { JSON_RESPONSE_INSTRUCTION, BUILT_IN_MODEL_PREFIX } from '@sre/constants';
11
+ import { BinaryInput } from '@sre/helpers/BinaryInput.helper';
12
+ import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class';
13
+ import { uid } from '@sre/utils';
14
+
15
+ import { processWithConcurrencyLimit } from '@sre/utils';
16
+
17
+ import {
18
+ TLLMMessageBlock,
19
+ ToolData,
20
+ TLLMMessageRole,
21
+ TLLMToolResultMessageBlock,
22
+ APIKeySource,
23
+ TLLMEvent,
24
+ BasicCredentials,
25
+ ILLMRequestFuncParams,
26
+ TLLMChatResponse,
27
+ TGoogleAIRequestBody,
28
+ ILLMRequestContext,
29
+ TLLMPreparedParams,
30
+ LLMInterface,
31
+ } from '@sre/types/LLM.types';
32
+ import { LLMHelper } from '@sre/LLMManager/LLM.helper';
33
+
34
+ import { SystemEvents } from '@sre/Core/SystemEvents';
35
+ import { SUPPORTED_MIME_TYPES_MAP } from '@sre/constants';
36
+ import { Logger } from '@sre/helpers/Log.helper';
37
+
38
+ import { LLMConnector } from '../LLMConnector';
39
+
40
+ const logger = Logger('GoogleAIConnector');
41
+
42
+ const MODELS_SUPPORT_SYSTEM_INSTRUCTION = [
43
+ 'gemini-1.5-pro-exp-0801',
44
+ 'gemini-1.5-pro-latest',
45
+ 'gemini-1.5-pro-latest',
46
+ 'gemini-1.5-pro',
47
+ 'gemini-1.5-pro-001',
48
+ 'gemini-1.5-flash-latest',
49
+ 'gemini-1.5-flash-001',
50
+ 'gemini-1.5-flash',
51
+ ];
52
+ const MODELS_SUPPORT_JSON_RESPONSE = MODELS_SUPPORT_SYSTEM_INSTRUCTION;
53
+
54
+ // Supported file MIME types for Google AI's Gemini models
55
+ const VALID_MIME_TYPES = [
56
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
57
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.audio,
58
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.video,
59
+ ...SUPPORTED_MIME_TYPES_MAP.GoogleAI.document,
60
+ ];
61
+
62
+ // will be removed after updating the SDK
63
+ type UsageMetadataWithThoughtsToken = UsageMetadata & { thoughtsTokenCount?: number; cost?: number };
64
+
65
+ const IMAGE_GEN_FIXED_PRICING = {
66
+ 'imagen-3.0-generate-001': 0.04, // Fixed cost per image
67
+ 'imagen-4.0-generate-001': 0.04, // Fixed cost per image
68
+ 'imagen-4': 0.04, // Standard Imagen 4
69
+ 'imagen-4-ultra': 0.06, // Imagen 4 Ultra
70
+ 'gemini-2.5-flash-image': 0.039,
71
+ };
72
+
73
+ export class GoogleAIConnector extends LLMConnector {
74
+ public name = 'LLM:GoogleAI';
75
+
76
+ private validMimeTypes = {
77
+ all: VALID_MIME_TYPES,
78
+ image: SUPPORTED_MIME_TYPES_MAP.GoogleAI.image,
79
+ };
80
+
81
+ private async getClient(params: ILLMRequestContext): Promise<GoogleGenerativeAI> {
82
+ const apiKey = (params.credentials as BasicCredentials)?.apiKey;
83
+
84
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
85
+
86
+ return new GoogleGenerativeAI(apiKey);
87
+ }
88
+
89
+ protected async request({ acRequest, body, context }: ILLMRequestFuncParams): Promise<TLLMChatResponse> {
90
+ try {
91
+ logger.debug(`request ${this.name}`, acRequest.candidate);
92
+ const prompt = body.messages;
93
+ delete body.messages;
94
+
95
+ const genAI = await this.getClient(context);
96
+ const $model = genAI.getGenerativeModel(body);
97
+
98
+ const result = await $model.generateContent(prompt);
99
+
100
+ const response = await result.response;
101
+ const content = response.text();
102
+ const finishReason = response.candidates[0].finishReason || 'stop';
103
+ const usage = response?.usageMetadata as UsageMetadataWithThoughtsToken;
104
+ this.reportUsage(usage, {
105
+ modelEntryName: context.modelEntryName,
106
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
107
+ agentId: context.agentId,
108
+ teamId: context.teamId,
109
+ });
110
+
111
+ const toolCalls = response.candidates[0]?.content?.parts?.filter((part) => part.functionCall);
112
+
113
+ let toolsData: ToolData[] = [];
114
+ let useTool = false;
115
+
116
+ if (toolCalls && toolCalls.length > 0) {
117
+ toolsData = toolCalls.map((toolCall, index) => ({
118
+ index,
119
+ id: `tool-${index}`,
120
+ type: 'function',
121
+ name: toolCall.functionCall.name,
122
+ arguments: JSON.stringify(toolCall.functionCall.args),
123
+ role: TLLMMessageRole.Assistant,
124
+ }));
125
+ useTool = true;
126
+ }
127
+
128
+ return {
129
+ content,
130
+ finishReason: finishReason.toLowerCase(),
131
+ useTool,
132
+ toolsData,
133
+ message: { content, role: 'assistant' },
134
+ usage,
135
+ };
136
+ } catch (error: any) {
137
+ logger.error(`request ${this.name}`, error, acRequest.candidate);
138
+ throw error;
139
+ }
140
+ }
141
+
142
+ protected async streamRequest({ acRequest, body, context }: ILLMRequestFuncParams): Promise<EventEmitter> {
143
+ logger.debug(`streamRequest ${this.name}`, acRequest.candidate);
144
+ const emitter = new EventEmitter();
145
+
146
+ const prompt = body.messages;
147
+ delete body.messages;
148
+
149
+ const genAI = await this.getClient(context);
150
+ const $model = genAI.getGenerativeModel(body);
151
+
152
+ try {
153
+ const result = await $model.generateContentStream(prompt);
154
+
155
+ let toolsData: ToolData[] = [];
156
+ let usage: UsageMetadataWithThoughtsToken;
157
+
158
+ // Process stream asynchronously while as we need to return emitter immediately
159
+ (async () => {
160
+ for await (const chunk of result.stream) {
161
+ const chunkText = chunk.text();
162
+ emitter.emit('content', chunkText);
163
+
164
+ if (chunk.candidates[0]?.content?.parts) {
165
+ const toolCalls = chunk.candidates[0].content.parts.filter((part) => part.functionCall);
166
+ if (toolCalls.length > 0) {
167
+ toolsData = toolCalls.map((toolCall, index) => ({
168
+ index,
169
+ id: `tool-${index}`,
170
+ type: 'function',
171
+ name: toolCall.functionCall.name,
172
+ arguments: JSON.stringify(toolCall.functionCall.args),
173
+ role: TLLMMessageRole.Assistant,
174
+ }));
175
+ emitter.emit(TLLMEvent.ToolInfo, toolsData);
176
+ }
177
+ }
178
+
179
+ // the same usage is sent on each emit. IMPORTANT: google does not send usage for each chunk but
180
+ // rather just sends the same usage for the entire request.
181
+ // notice that the output tokens are only sent in the last chunk usage metadata.
182
+ // so we will just update a var to hold the latest usage and report it when the stream ends.
183
+ // e.g emit1: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
184
+ // e.g emit2: { input_tokens: 500, output_tokens: undefined } -> same input_tokens
185
+ // e.g emit3: { input_tokens: 500, output_tokens: 10 } -> same input_tokens, new output_tokens in the last chunk
186
+ if (chunk?.usageMetadata) {
187
+ usage = chunk.usageMetadata as UsageMetadataWithThoughtsToken;
188
+ }
189
+ }
190
+
191
+ if (usage) {
192
+ this.reportUsage(usage, {
193
+ modelEntryName: context.modelEntryName,
194
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
195
+ agentId: context.agentId,
196
+ teamId: context.teamId,
197
+ });
198
+ }
199
+
200
+ setTimeout(() => {
201
+ emitter.emit('end', toolsData);
202
+ }, 100);
203
+ })();
204
+
205
+ return emitter;
206
+ } catch (error: any) {
207
+ logger.error(`streamRequest ${this.name}`, error, acRequest.candidate);
208
+ throw error;
209
+ }
210
+ }
211
+ // #region Image Generation, will be moved to a different subsystem/service
212
+
213
+ protected async imageGenRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
214
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
215
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
216
+
217
+ const model = body.model || 'imagen-3.0-generate-001';
218
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
219
+
220
+ // Use traditional Imagen models
221
+ const config = {
222
+ numberOfImages: body.n || 1,
223
+ aspectRatio: body.aspect_ratio || body.size || '1:1',
224
+ personGeneration: body.person_generation || 'allow_adult',
225
+ };
226
+
227
+ const ai = new GoogleGenAI({ apiKey });
228
+
229
+ // Default to GenerateImages interface if not specified
230
+ const modelInterface = context.modelInfo?.interface || LLMInterface.GenerateImages;
231
+
232
+ let response: any;
233
+
234
+ if (modelInterface === LLMInterface.GenerateContent) {
235
+ // Use Gemini image generation API
236
+ response = await ai.models.generateContent({
237
+ model,
238
+ contents: body.prompt,
239
+ });
240
+
241
+ // Extract image data from Gemini response format
242
+ const imageData: any[] = [];
243
+ if (response.candidates?.[0]?.content?.parts) {
244
+ for (const part of response.candidates[0].content.parts) {
245
+ if (part.inlineData?.data) {
246
+ imageData.push({
247
+ url: `data:image/png;base64,${part.inlineData.data}`,
248
+ b64_json: part.inlineData.data,
249
+ revised_prompt: body.prompt,
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ // Report input tokens and image cost pricing based on the official pricing page:
256
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
257
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
258
+
259
+ this.reportImageUsage({
260
+ usage: {
261
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
262
+ usageMetadata,
263
+ },
264
+ context,
265
+ });
266
+
267
+ if (imageData.length === 0) {
268
+ throw new Error(
269
+ 'Please enter a valid prompt — for example: "Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme."'
270
+ );
271
+ }
272
+
273
+ return {
274
+ created: Math.floor(Date.now() / 1000),
275
+ data: imageData,
276
+ };
277
+ } else if (modelInterface === LLMInterface.GenerateImages) {
278
+ response = await ai.models.generateImages({
279
+ model,
280
+ prompt: body.prompt,
281
+ config,
282
+ });
283
+
284
+ // Report input tokens and image cost pricing based on the official pricing page:
285
+ // https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image-preview
286
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
287
+ this.reportImageUsage({
288
+ usage: {
289
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
290
+ usageMetadata,
291
+ },
292
+ numberOfImages: config.numberOfImages,
293
+ context,
294
+ });
295
+
296
+ return {
297
+ created: Math.floor(Date.now() / 1000),
298
+ data:
299
+ response.generatedImages?.map((generatedImage: any) => ({
300
+ url: generatedImage.image.imageBytes ? `data:image/png;base64,${generatedImage.image.imageBytes}` : undefined,
301
+ b64_json: generatedImage.image.imageBytes,
302
+ revised_prompt: body.prompt,
303
+ })) || [],
304
+ };
305
+ } else {
306
+ throw new Error(`Unsupported interface: ${modelInterface}`);
307
+ }
308
+ }
309
+
310
+ protected async imageEditRequest({ body, context }: ILLMRequestFuncParams): Promise<any> {
311
+ const apiKey = (context.credentials as BasicCredentials)?.apiKey;
312
+ if (!apiKey) throw new Error('Please provide an API key for Google AI');
313
+
314
+ // A model supports image editing if it implements the `generateContent` interface.
315
+ const supportsEditing = context.modelInfo?.interface === LLMInterface.GenerateContent;
316
+ if (!supportsEditing) {
317
+ throw new Error(`Image editing is not supported for model: ${body.model}. This model only supports image generation.`);
318
+ }
319
+
320
+ const ai = new GoogleGenAI({ apiKey });
321
+ const modelName = context.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
322
+
323
+ // Use the prepared body which already contains processed files and contents
324
+ const response = await ai.models.generateContent({
325
+ model: body.model,
326
+ contents: body.contents,
327
+ });
328
+
329
+ // Extract image data from Gemini response format
330
+ const imageData: any[] = [];
331
+ if (response.candidates?.[0]?.content?.parts) {
332
+ for (const part of response.candidates[0].content.parts) {
333
+ if (part.inlineData?.data) {
334
+ imageData.push({
335
+ url: `data:image/png;base64,${part.inlineData.data}`,
336
+ b64_json: part.inlineData.data,
337
+ revised_prompt: body._metadata?.prompt || body.prompt,
338
+ });
339
+ }
340
+ }
341
+ }
342
+
343
+ // Report pricing for input tokens and image costs
344
+ const usageMetadata = response?.usageMetadata as UsageMetadataWithThoughtsToken;
345
+
346
+ this.reportImageUsage({
347
+ usage: {
348
+ cost: IMAGE_GEN_FIXED_PRICING[modelName],
349
+ usageMetadata,
350
+ },
351
+ context,
352
+ });
353
+
354
+ return {
355
+ created: Math.floor(Date.now() / 1000),
356
+ data: imageData,
357
+ };
358
+ }
359
+
360
+ protected async reqBodyAdapter(params: TLLMPreparedParams): Promise<TGoogleAIRequestBody> {
361
+ const model = params?.model;
362
+
363
+ // Check if this is an image generation request based on capabilities
364
+ if (params?.capabilities?.imageGeneration) {
365
+ // Determine if this is image editing (has files) or generation
366
+ const hasFiles = params?.files?.length > 0;
367
+ if (hasFiles) {
368
+ return this.prepareImageEditBody(params) as any;
369
+ } else {
370
+ return this.prepareBodyForImageGenRequest(params) as any;
371
+ }
372
+ }
373
+
374
+ const messages = await this.prepareMessages(params);
375
+
376
+ let body: ModelParams & { messages: string | TLLMMessageBlock[] | GenerateContentRequest } = {
377
+ model: model as string,
378
+ messages,
379
+ };
380
+
381
+ const responseFormat = params?.responseFormat || '';
382
+ let responseMimeType = '';
383
+ let systemInstruction = '';
384
+
385
+ if (responseFormat === 'json') {
386
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
387
+
388
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
389
+ responseMimeType = 'application/json';
390
+ }
391
+ }
392
+
393
+ const config: GenerationConfig = {};
394
+
395
+ if (params.maxTokens !== undefined) config.maxOutputTokens = params.maxTokens;
396
+ if (params.temperature !== undefined) config.temperature = params.temperature;
397
+ if (params.topP !== undefined) config.topP = params.topP;
398
+ if (params.topK !== undefined) config.topK = params.topK;
399
+ if (params.stopSequences?.length) config.stopSequences = params.stopSequences;
400
+ if (responseMimeType) config.responseMimeType = responseMimeType;
401
+
402
+ if (systemInstruction) body.systemInstruction = systemInstruction;
403
+ if (Object.keys(config).length > 0) {
404
+ body.generationConfig = config;
405
+ }
406
+
407
+ return body;
408
+ }
409
+
410
+ protected reportUsage(
411
+ usage: UsageMetadataWithThoughtsToken,
412
+ metadata: { modelEntryName: string; keySource: APIKeySource; agentId: string; teamId: string }
413
+ ) {
414
+ // SmythOS (built-in) models have a prefix, so we need to remove it to get the model name
415
+ const modelName = metadata.modelEntryName.replace(BUILT_IN_MODEL_PREFIX, '');
416
+ let tier = '';
417
+ const tierThresholds = {
418
+ 'gemini-1.5-pro': 128_000,
419
+ 'gemini-2.5-pro': 200_000,
420
+ };
421
+
422
+ const textInputTokens =
423
+ usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || usage?.promptTokenCount || 0;
424
+ const audioInputTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'AUDIO')?.tokenCount || 0;
425
+
426
+ // Find matching model and set tier based on threshold
427
+ const modelWithTier = Object.keys(tierThresholds).find((model) => modelName.includes(model));
428
+ if (modelWithTier) {
429
+ tier = textInputTokens < tierThresholds[modelWithTier] ? 'tier1' : 'tier2';
430
+ }
431
+
432
+ // #endregion
433
+
434
+ const usageData = {
435
+ sourceId: `llm:${modelName}`,
436
+ input_tokens: textInputTokens,
437
+ output_tokens: usage?.candidatesTokenCount || 0,
438
+ input_tokens_audio: audioInputTokens,
439
+ input_tokens_cache_read: usage?.cachedContentTokenCount || 0,
440
+ input_tokens_cache_write: 0,
441
+ reasoning_tokens: usage?.thoughtsTokenCount,
442
+ keySource: metadata.keySource,
443
+ agentId: metadata.agentId,
444
+ teamId: metadata.teamId,
445
+ tier,
446
+ };
447
+ SystemEvents.emit('USAGE:LLM', usageData);
448
+
449
+ return usageData;
450
+ }
451
+
452
+ /**
453
+ * Extract text and image tokens from Google AI usage metadata
454
+ */
455
+ private extractTokenCounts(usage: UsageMetadataWithThoughtsToken): { textTokens: number; imageTokens: number } {
456
+ const textTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'TEXT')?.tokenCount || 0;
457
+ const imageTokens = usage?.['promptTokensDetails']?.find((detail) => detail.modality === 'IMAGE')?.tokenCount || 0;
458
+
459
+ return { textTokens, imageTokens };
460
+ }
461
+
462
+ protected reportImageUsage({
463
+ usage,
464
+ context,
465
+ numberOfImages = 1,
466
+ }: {
467
+ usage: { cost?: number; usageMetadata?: UsageMetadataWithThoughtsToken };
468
+ context: ILLMRequestContext;
469
+ numberOfImages?: number;
470
+ }) {
471
+ // Extract text and image tokens from rawUsage if available
472
+ let input_tokens_txt = 0;
473
+ let input_tokens_img = 0;
474
+
475
+ if (usage.usageMetadata) {
476
+ const { textTokens, imageTokens } = this.extractTokenCounts(usage.usageMetadata);
477
+ input_tokens_txt = textTokens;
478
+ input_tokens_img = imageTokens;
479
+ }
480
+
481
+ const imageUsageData = {
482
+ sourceId: `api:imagegen.smyth`,
483
+ keySource: context.isUserKey ? APIKeySource.User : APIKeySource.Smyth,
484
+
485
+ cost: usage.cost * numberOfImages,
486
+ input_tokens_txt,
487
+ input_tokens_img,
488
+
489
+ agentId: context.agentId,
490
+ teamId: context.teamId,
491
+ };
492
+ SystemEvents.emit('USAGE:API', imageUsageData);
493
+ }
494
+
495
+ public formatToolsConfig({ toolDefinitions, toolChoice = 'auto' }) {
496
+ const tools = toolDefinitions.map((tool) => {
497
+ const { name, description, properties, requiredFields } = tool;
498
+
499
+ // Ensure the function name is valid
500
+ const validName = this.sanitizeFunctionName(name);
501
+
502
+ // Ensure properties are non-empty for OBJECT type
503
+ const validProperties = properties && Object.keys(properties).length > 0 ? properties : { dummy: { type: 'string' } };
504
+
505
+ return {
506
+ functionDeclarations: [
507
+ {
508
+ name: validName,
509
+ description: description || '',
510
+ parameters: {
511
+ type: 'OBJECT',
512
+ properties: validProperties,
513
+ required: requiredFields || [],
514
+ },
515
+ },
516
+ ],
517
+ };
518
+ });
519
+
520
+ return {
521
+ tools,
522
+ toolChoice: {
523
+ type: toolChoice,
524
+ },
525
+ };
526
+ }
527
+
528
+ public transformToolMessageBlocks({
529
+ messageBlock,
530
+ toolsData,
531
+ }: {
532
+ messageBlock: TLLMMessageBlock;
533
+ toolsData: ToolData[];
534
+ }): TLLMToolResultMessageBlock[] {
535
+ const messageBlocks: TLLMToolResultMessageBlock[] = [];
536
+
537
+ if (messageBlock) {
538
+ const content = [];
539
+ if (typeof messageBlock.content === 'string') {
540
+ content.push({ text: messageBlock.content });
541
+ } else if (Array.isArray(messageBlock.content)) {
542
+ content.push(...messageBlock.content);
543
+ }
544
+
545
+ if (messageBlock.parts) {
546
+ const functionCalls = messageBlock.parts.filter((part) => part.functionCall);
547
+ if (functionCalls.length > 0) {
548
+ content.push(
549
+ ...functionCalls.map((call) => ({
550
+ functionCall: {
551
+ name: call.functionCall.name,
552
+ args: JSON.parse(call.functionCall.args),
553
+ },
554
+ }))
555
+ );
556
+ }
557
+ }
558
+
559
+ messageBlocks.push({
560
+ role: messageBlock.role,
561
+ parts: content,
562
+ });
563
+ }
564
+
565
+ const transformedToolsData = toolsData.map(
566
+ (toolData): TLLMToolResultMessageBlock => ({
567
+ role: TLLMMessageRole.User,
568
+ parts: [
569
+ {
570
+ functionResponse: {
571
+ name: toolData.name,
572
+ response: {
573
+ name: toolData.name,
574
+ content: typeof toolData.result === 'string' ? toolData.result : JSON.stringify(toolData.result),
575
+ },
576
+ },
577
+ },
578
+ ],
579
+ })
580
+ );
581
+
582
+ return [...messageBlocks, ...transformedToolsData];
583
+ }
584
+
585
+ public getConsistentMessages(messages: TLLMMessageBlock[]): TLLMMessageBlock[] {
586
+ const _messages = LLMHelper.removeDuplicateUserMessages(messages);
587
+
588
+ return _messages.map((message) => {
589
+ const _message = { ...message };
590
+ let textContent = '';
591
+
592
+ // Map roles to valid Google AI roles
593
+ switch (_message.role) {
594
+ case TLLMMessageRole.Assistant:
595
+ case TLLMMessageRole.System:
596
+ _message.role = TLLMMessageRole.Model;
597
+ break;
598
+ case TLLMMessageRole.User:
599
+ // User role is already valid
600
+ break;
601
+ default:
602
+ _message.role = TLLMMessageRole.User; // Default to user for unknown roles
603
+ }
604
+
605
+ // * empty text causes error that's why we added '...'
606
+
607
+ if (_message?.parts) {
608
+ textContent = _message.parts.map((textBlock) => textBlock?.text || '...').join(' ');
609
+ } else if (Array.isArray(_message?.content)) {
610
+ textContent = _message.content.map((textBlock) => textBlock?.text || '...').join(' ');
611
+ } else if (_message?.content) {
612
+ textContent = (_message.content as string) || '...';
613
+ }
614
+
615
+ _message.parts = [{ text: textContent || '...' }];
616
+
617
+ delete _message.content; // Remove content to avoid error
618
+
619
+ return _message;
620
+ });
621
+ }
622
+
623
+ private async prepareMessages(params: TLLMPreparedParams): Promise<string | TLLMMessageBlock[] | GenerateContentRequest> {
624
+ let messages: string | TLLMMessageBlock[] | GenerateContentRequest = params?.messages || '';
625
+
626
+ const files: BinaryInput[] = params?.files || [];
627
+
628
+ if (files.length > 0) {
629
+ messages = await this.prepareMessagesWithFiles(params);
630
+ } else if (params?.toolsConfig?.tools?.length > 0) {
631
+ messages = await this.prepareMessagesWithTools(params);
632
+ } else {
633
+ messages = await this.prepareMessagesWithTextQuery(params);
634
+ }
635
+
636
+ return messages;
637
+ }
638
+
639
+ private async prepareMessagesWithFiles(params: TLLMPreparedParams): Promise<string> {
640
+ const model = params.model;
641
+
642
+ let messages: string | TLLMMessageBlock[] = params?.messages || '';
643
+ let systemInstruction = '';
644
+ const files: BinaryInput[] = params?.files || [];
645
+
646
+ // #region Upload files
647
+ const promises = [];
648
+ const _files = [];
649
+
650
+ for (let image of files) {
651
+ const binaryInput = BinaryInput.from(image);
652
+ promises.push(binaryInput.upload(AccessCandidate.agent(params.agentId)));
653
+
654
+ _files.push(binaryInput);
655
+ }
656
+
657
+ await Promise.all(promises);
658
+ // #endregion Upload files
659
+
660
+ // If user provide mix of valid and invalid files, we will only process the valid files
661
+ const validFiles = this.getValidFiles(_files, 'all');
662
+
663
+ const hasVideo = validFiles.some((file) => file?.mimetype?.includes('video'));
664
+
665
+ // GoogleAI only supports one video file at a time
666
+ if (hasVideo && validFiles.length > 1) {
667
+ throw new Error('Only one video file is supported at a time.');
668
+ }
669
+
670
+ const fileUploadingTasks = validFiles.map((file) => async () => {
671
+ try {
672
+ const uploadedFile = await this.uploadFile({
673
+ file,
674
+ apiKey: (params.credentials as BasicCredentials).apiKey,
675
+ agentId: params.agentId,
676
+ });
677
+
678
+ return { url: uploadedFile.url, mimetype: file.mimetype };
679
+ } catch {
680
+ return null;
681
+ }
682
+ });
683
+
684
+ const uploadedFiles = await processWithConcurrencyLimit(fileUploadingTasks);
685
+
686
+ // We throw error when there are no valid uploaded files,
687
+ if (uploadedFiles && uploadedFiles?.length === 0) {
688
+ throw new Error(`There is an issue during upload file in Google AI Server!`);
689
+ }
690
+
691
+ const fileData = this.getFileData(uploadedFiles);
692
+
693
+ const userMessage: TLLMMessageBlock = Array.isArray(messages) ? messages.pop() : { role: TLLMMessageRole.User, content: '' };
694
+ let prompt = userMessage?.content || '';
695
+
696
+ // if the the model does not support system instruction, we will add it to the prompt
697
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
698
+ prompt = `${prompt}\n${systemInstruction}`;
699
+ }
700
+ //#endregion Separate system message and add JSON response instruction if needed
701
+
702
+ // Adjust input structure handling for multiple image files to accommodate variations.
703
+ messages = fileData.length === 1 ? ([...fileData, { text: prompt }] as any) : ([prompt, ...fileData] as any);
704
+
705
+ return messages as string;
706
+ }
707
+
708
+ private async prepareMessagesWithTools(params: TLLMPreparedParams): Promise<GenerateContentRequest> {
709
+ let formattedMessages: TLLMMessageBlock[];
710
+ let systemInstruction = '';
711
+
712
+ let messages = params?.messages || [];
713
+
714
+ const hasSystemMessage = LLMHelper.hasSystemMessage(messages);
715
+
716
+ if (hasSystemMessage) {
717
+ const separateMessages = LLMHelper.separateSystemMessages(messages);
718
+ const systemMessageContent = (separateMessages.systemMessage as TLLMMessageBlock)?.content;
719
+ systemInstruction = typeof systemMessageContent === 'string' ? systemMessageContent : '';
720
+ formattedMessages = separateMessages.otherMessages;
721
+ } else {
722
+ formattedMessages = messages;
723
+ }
724
+
725
+ const toolsPrompt: GenerateContentRequest = {
726
+ contents: formattedMessages as any,
727
+ };
728
+
729
+ if (systemInstruction) {
730
+ toolsPrompt.systemInstruction = systemInstruction;
731
+ }
732
+
733
+ if (params?.toolsConfig?.tools) toolsPrompt.tools = params?.toolsConfig?.tools as any;
734
+ if (params?.toolsConfig?.tool_choice) {
735
+ // Map tool choice to valid Google AI function calling modes
736
+ let mode: FunctionCallingMode = FunctionCallingMode.AUTO; // default
737
+ const toolChoice = params?.toolsConfig?.tool_choice;
738
+
739
+ if (toolChoice === 'auto') {
740
+ mode = FunctionCallingMode.AUTO;
741
+ } else if (toolChoice === 'required') {
742
+ mode = FunctionCallingMode.ANY;
743
+ } else if (toolChoice === 'none') {
744
+ mode = FunctionCallingMode.NONE;
745
+ } else if (typeof toolChoice === 'object' && toolChoice.type === 'function') {
746
+ // Handle OpenAI-style named tool choice - force any function call
747
+ mode = FunctionCallingMode.ANY;
748
+ }
749
+
750
+ toolsPrompt.toolConfig = {
751
+ functionCallingConfig: { mode },
752
+ };
753
+ }
754
+
755
+ return toolsPrompt;
756
+ }
757
+
758
+ private async prepareMessagesWithTextQuery(params: TLLMPreparedParams): Promise<string> {
759
+ const model = params.model;
760
+ let systemInstruction = '';
761
+ let prompt = '';
762
+
763
+ const { systemMessage, otherMessages } = LLMHelper.separateSystemMessages(params?.messages as TLLMMessageBlock[]);
764
+
765
+ if ('content' in systemMessage) {
766
+ systemInstruction = systemMessage.content as string;
767
+ }
768
+
769
+ const responseFormat = params?.responseFormat || '';
770
+ let responseMimeType = '';
771
+
772
+ if (responseFormat === 'json') {
773
+ systemInstruction += JSON_RESPONSE_INSTRUCTION;
774
+
775
+ if (MODELS_SUPPORT_JSON_RESPONSE.includes(model as string)) {
776
+ responseMimeType = 'application/json';
777
+ }
778
+ }
779
+
780
+ if (otherMessages?.length > 0) {
781
+ // Concatenate messages with prompt and remove messages from params as it's not supported
782
+ prompt += otherMessages.map((message) => message?.parts?.[0]?.text || '').join('\n');
783
+ }
784
+
785
+ // if the the model does not support system instruction, we will add it to the prompt
786
+ if (!MODELS_SUPPORT_SYSTEM_INSTRUCTION.includes(model as string)) {
787
+ prompt = `${prompt}\n${systemInstruction}`;
788
+ }
789
+ //#endregion Separate system message and add JSON response instruction if needed
790
+
791
+ return prompt;
792
+ }
793
+
794
+ private async prepareBodyForImageGenRequest(params: TLLMPreparedParams): Promise<any> {
795
+ return {
796
+ prompt: params.prompt,
797
+ model: params.model,
798
+ aspectRatio: (params as any).aspectRatio,
799
+ personGeneration: (params as any).personGeneration,
800
+ };
801
+ }
802
+
803
+ private async prepareImageEditBody(params: TLLMPreparedParams): Promise<any> {
804
+ const model = params.model || 'gemini-2.5-flash-image-preview';
805
+
806
+ // Construct edit prompt with image and instructions
807
+ let editPrompt = params.prompt || 'Edit this image';
808
+ if ((params as any).instruction) {
809
+ editPrompt += `. ${(params as any).instruction}`;
810
+ }
811
+
812
+ // For image editing, we need to include the original image in the contents
813
+ const contents: any[] = [];
814
+ const files: BinaryInput[] = params?.files || [];
815
+
816
+ if (files.length > 0) {
817
+ // Get only valid image files for editing
818
+ const validImageFiles = this.getValidFiles(files, 'image');
819
+
820
+ if (validImageFiles.length === 0) {
821
+ throw new Error('No valid image files found for editing. Please provide at least one image file.');
822
+ }
823
+
824
+ // Process each image file
825
+ for (const file of validImageFiles) {
826
+ try {
827
+ // Read the file data as base64
828
+ const bufferData = await file.getBuffer();
829
+ const base64Image = Buffer.from(bufferData).toString('base64');
830
+
831
+ contents.push({
832
+ inlineData: {
833
+ mimeType: file.mimetype,
834
+ data: base64Image,
835
+ },
836
+ });
837
+ } catch (error) {
838
+ throw new Error(`Failed to process image file: ${error.message}`);
839
+ }
840
+ }
841
+ } else {
842
+ throw new Error('No image provided for editing. Please include an image file.');
843
+ }
844
+
845
+ // Add the edit instruction
846
+ contents.push({ text: editPrompt });
847
+
848
+ // Return the complete request body that can be used directly in imageEditRequest
849
+ return {
850
+ model,
851
+ contents,
852
+ // Additional metadata for usage reporting
853
+ _metadata: {
854
+ prompt: editPrompt,
855
+ numberOfImages: (params as any).n || 1,
856
+ aspectRatio: (params as any).aspect_ratio || (params as any).size || '1:1',
857
+ personGeneration: (params as any).person_generation || 'allow_adult',
858
+ },
859
+ };
860
+ }
861
+
862
+ // Add this helper method to sanitize function names
863
+ private sanitizeFunctionName(name: string): string {
864
+ // Check if name is undefined or null
865
+ if (name == null) {
866
+ return '_unnamed_function';
867
+ }
868
+
869
+ // Remove any characters that are not alphanumeric, underscore, dot, or dash
870
+ let sanitized = name.replace(/[^a-zA-Z0-9_.-]/g, '');
871
+
872
+ // Ensure the name starts with a letter or underscore
873
+ if (!/^[a-zA-Z_]/.test(sanitized)) {
874
+ sanitized = '_' + sanitized;
875
+ }
876
+
877
+ // If sanitized is empty after removing invalid characters, use a default name
878
+ if (sanitized === '') {
879
+ sanitized = '_unnamed_function';
880
+ }
881
+
882
+ // Truncate to 64 characters if longer
883
+ sanitized = sanitized.slice(0, 64);
884
+
885
+ return sanitized;
886
+ }
887
+
888
+ private async uploadFile({ file, apiKey, agentId }: { file: BinaryInput; apiKey: string; agentId: string }): Promise<{ url: string }> {
889
+ try {
890
+ if (!apiKey || !file?.mimetype) {
891
+ throw new Error('Missing required parameters to save file for Google AI!');
892
+ }
893
+
894
+ // Create a temporary directory
895
+ const tempDir = os.tmpdir();
896
+ const fileName = uid();
897
+ const tempFilePath = path.join(tempDir, fileName);
898
+
899
+ const bufferData = await file.readData(AccessCandidate.agent(agentId));
900
+
901
+ // Write buffer data to temp file
902
+ await fs.promises.writeFile(tempFilePath, new Uint8Array(bufferData));
903
+
904
+ // Upload the file to the Google File Manager
905
+ const fileManager = new GoogleAIFileManager(apiKey);
906
+
907
+ const uploadResponse = await fileManager.uploadFile(tempFilePath, {
908
+ mimeType: file.mimetype,
909
+ displayName: fileName,
910
+ });
911
+
912
+ const name = uploadResponse.file.name;
913
+
914
+ // Poll getFile() on a set interval (10 seconds here) to check file state.
915
+ let uploadedFile = await fileManager.getFile(name);
916
+ while (uploadedFile.state === FileState.PROCESSING) {
917
+ process.stdout.write('.');
918
+ // Sleep for 10 seconds
919
+ await new Promise((resolve) => setTimeout(resolve, 10_000));
920
+ // Fetch the file from the API again
921
+ uploadedFile = await fileManager.getFile(name);
922
+ }
923
+
924
+ if (uploadedFile.state === FileState.FAILED) {
925
+ throw new Error('File processing failed.');
926
+ }
927
+
928
+ // Clean up temp file
929
+ await fs.promises.unlink(tempFilePath);
930
+
931
+ return {
932
+ url: uploadResponse.file.uri || '',
933
+ };
934
+ } catch (error) {
935
+ throw new Error(`Error uploading file for Google AI: ${error.message}`);
936
+ }
937
+ }
938
+
939
+ private getValidFiles(files: BinaryInput[], type: 'image' | 'all') {
940
+ const validSources = [];
941
+
942
+ for (let file of files) {
943
+ if (this.validMimeTypes[type].includes(file?.mimetype)) {
944
+ validSources.push(file);
945
+ }
946
+ }
947
+
948
+ if (validSources?.length === 0) {
949
+ throw new Error(`Unsupported file(s). Please make sure your file is one of the following types: ${this.validMimeTypes[type].join(', ')}`);
950
+ }
951
+
952
+ return validSources;
953
+ }
954
+
955
+ private getFileData(
956
+ files: {
957
+ url: string;
958
+ mimetype: string;
959
+ }[]
960
+ ): {
961
+ fileData: {
962
+ mimeType: string;
963
+ fileUri: string;
964
+ };
965
+ }[] {
966
+ try {
967
+ const imageData = [];
968
+
969
+ for (let file of files) {
970
+ imageData.push({
971
+ fileData: {
972
+ mimeType: file.mimetype,
973
+ fileUri: file.url,
974
+ },
975
+ });
976
+ }
977
+
978
+ return imageData;
979
+ } catch (error) {
980
+ throw error;
981
+ }
982
+ }
983
+ }