@juspay/neurolink 9.5.2 → 9.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +29 -25
  3. package/dist/agent/directTools.d.ts +5 -5
  4. package/dist/cli/commands/config.d.ts +9 -9
  5. package/dist/cli/commands/serve.d.ts +37 -0
  6. package/dist/cli/commands/serve.js +302 -229
  7. package/dist/cli/commands/setup-anthropic.d.ts +2 -2
  8. package/dist/cli/commands/setup-azure.d.ts +2 -2
  9. package/dist/cli/commands/setup-bedrock.d.ts +2 -2
  10. package/dist/cli/commands/setup-gcp.d.ts +2 -2
  11. package/dist/cli/commands/setup-google-ai.d.ts +2 -2
  12. package/dist/cli/commands/setup-huggingface.d.ts +2 -2
  13. package/dist/cli/commands/setup-mistral.d.ts +2 -2
  14. package/dist/cli/commands/setup-openai.d.ts +2 -2
  15. package/dist/cli/commands/setup.d.ts +2 -2
  16. package/dist/cli/factories/commandFactory.js +16 -2
  17. package/dist/cli/loop/optionsSchema.d.ts +2 -2
  18. package/dist/cli/loop/session.d.ts +4 -0
  19. package/dist/cli/loop/session.js +49 -4
  20. package/dist/cli/utils/interactiveSetup.d.ts +4 -4
  21. package/dist/config/conversationMemory.d.ts +2 -0
  22. package/dist/config/conversationMemory.js +5 -5
  23. package/dist/constants/contextWindows.d.ts +46 -0
  24. package/dist/constants/contextWindows.js +156 -0
  25. package/dist/context/budgetChecker.d.ts +18 -0
  26. package/dist/context/budgetChecker.js +71 -0
  27. package/dist/context/contextCompactor.d.ts +22 -0
  28. package/dist/context/contextCompactor.js +106 -0
  29. package/dist/context/effectiveHistory.d.ts +52 -0
  30. package/dist/context/effectiveHistory.js +105 -0
  31. package/dist/context/errorDetection.d.ts +14 -0
  32. package/dist/context/errorDetection.js +124 -0
  33. package/dist/context/fileSummarizationService.d.ts +54 -0
  34. package/dist/context/fileSummarizationService.js +255 -0
  35. package/dist/context/fileSummarizer.d.ts +56 -0
  36. package/dist/context/fileSummarizer.js +145 -0
  37. package/dist/context/fileTokenBudget.d.ts +53 -0
  38. package/dist/context/fileTokenBudget.js +127 -0
  39. package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
  40. package/dist/context/prompts/summarizationPrompt.js +110 -0
  41. package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
  42. package/dist/context/stages/fileReadDeduplicator.js +66 -0
  43. package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
  44. package/dist/context/stages/slidingWindowTruncator.js +42 -0
  45. package/dist/context/stages/structuredSummarizer.d.ts +10 -0
  46. package/dist/context/stages/structuredSummarizer.js +49 -0
  47. package/dist/context/stages/toolOutputPruner.d.ts +10 -0
  48. package/dist/context/stages/toolOutputPruner.js +52 -0
  49. package/dist/context/summarizationEngine.d.ts +45 -0
  50. package/dist/context/summarizationEngine.js +110 -0
  51. package/dist/context/toolOutputLimits.d.ts +17 -0
  52. package/dist/context/toolOutputLimits.js +84 -0
  53. package/dist/context/toolPairRepair.d.ts +16 -0
  54. package/dist/context/toolPairRepair.js +66 -0
  55. package/dist/core/conversationMemoryManager.d.ts +5 -15
  56. package/dist/core/conversationMemoryManager.js +15 -75
  57. package/dist/core/modules/MessageBuilder.d.ts +1 -1
  58. package/dist/core/modules/MessageBuilder.js +2 -0
  59. package/dist/core/modules/TelemetryHandler.d.ts +2 -3
  60. package/dist/core/modules/TelemetryHandler.js +3 -3
  61. package/dist/core/modules/ToolsManager.d.ts +2 -2
  62. package/dist/core/redisConversationMemoryManager.d.ts +8 -14
  63. package/dist/core/redisConversationMemoryManager.js +69 -78
  64. package/dist/factories/providerFactory.d.ts +2 -2
  65. package/dist/files/fileReferenceRegistry.d.ts +276 -0
  66. package/dist/files/fileReferenceRegistry.js +1543 -0
  67. package/dist/files/fileTools.d.ts +423 -0
  68. package/dist/files/fileTools.js +449 -0
  69. package/dist/files/index.d.ts +14 -0
  70. package/dist/files/index.js +13 -0
  71. package/dist/files/streamingReader.d.ts +93 -0
  72. package/dist/files/streamingReader.js +321 -0
  73. package/dist/files/types.d.ts +23 -0
  74. package/dist/files/types.js +23 -0
  75. package/dist/image-gen/imageGenTools.d.ts +2 -2
  76. package/dist/image-gen/types.d.ts +12 -12
  77. package/dist/lib/agent/directTools.d.ts +7 -7
  78. package/dist/lib/config/conversationMemory.d.ts +2 -0
  79. package/dist/lib/config/conversationMemory.js +5 -5
  80. package/dist/lib/constants/contextWindows.d.ts +46 -0
  81. package/dist/lib/constants/contextWindows.js +157 -0
  82. package/dist/lib/context/budgetChecker.d.ts +18 -0
  83. package/dist/lib/context/budgetChecker.js +72 -0
  84. package/dist/lib/context/contextCompactor.d.ts +22 -0
  85. package/dist/lib/context/contextCompactor.js +107 -0
  86. package/dist/lib/context/effectiveHistory.d.ts +52 -0
  87. package/dist/lib/context/effectiveHistory.js +106 -0
  88. package/dist/lib/context/errorDetection.d.ts +14 -0
  89. package/dist/lib/context/errorDetection.js +125 -0
  90. package/dist/lib/context/fileSummarizationService.d.ts +54 -0
  91. package/dist/lib/context/fileSummarizationService.js +256 -0
  92. package/dist/lib/context/fileSummarizer.d.ts +56 -0
  93. package/dist/lib/context/fileSummarizer.js +146 -0
  94. package/dist/lib/context/fileTokenBudget.d.ts +53 -0
  95. package/dist/lib/context/fileTokenBudget.js +128 -0
  96. package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
  97. package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
  98. package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
  99. package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
  100. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
  101. package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
  102. package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
  103. package/dist/lib/context/stages/structuredSummarizer.js +50 -0
  104. package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
  105. package/dist/lib/context/stages/toolOutputPruner.js +53 -0
  106. package/dist/lib/context/summarizationEngine.d.ts +45 -0
  107. package/dist/lib/context/summarizationEngine.js +111 -0
  108. package/dist/lib/context/toolOutputLimits.d.ts +17 -0
  109. package/dist/lib/context/toolOutputLimits.js +85 -0
  110. package/dist/lib/context/toolPairRepair.d.ts +16 -0
  111. package/dist/lib/context/toolPairRepair.js +67 -0
  112. package/dist/lib/core/conversationMemoryManager.d.ts +5 -15
  113. package/dist/lib/core/conversationMemoryManager.js +15 -75
  114. package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
  115. package/dist/lib/core/modules/MessageBuilder.js +2 -0
  116. package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
  117. package/dist/lib/core/modules/TelemetryHandler.js +3 -3
  118. package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
  119. package/dist/lib/core/redisConversationMemoryManager.d.ts +8 -14
  120. package/dist/lib/core/redisConversationMemoryManager.js +69 -78
  121. package/dist/lib/factories/providerFactory.d.ts +2 -2
  122. package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
  123. package/dist/lib/files/fileReferenceRegistry.js +1544 -0
  124. package/dist/lib/files/fileTools.d.ts +423 -0
  125. package/dist/lib/files/fileTools.js +450 -0
  126. package/dist/lib/files/index.d.ts +14 -0
  127. package/dist/lib/files/index.js +14 -0
  128. package/dist/lib/files/streamingReader.d.ts +93 -0
  129. package/dist/lib/files/streamingReader.js +322 -0
  130. package/dist/lib/files/types.d.ts +23 -0
  131. package/dist/lib/files/types.js +24 -0
  132. package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
  133. package/dist/lib/image-gen/types.d.ts +12 -12
  134. package/dist/lib/memory/mem0Initializer.d.ts +2 -2
  135. package/dist/lib/neurolink.d.ts +61 -2
  136. package/dist/lib/neurolink.js +619 -307
  137. package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
  138. package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
  139. package/dist/lib/processors/archive/index.d.ts +33 -0
  140. package/dist/lib/processors/archive/index.js +43 -0
  141. package/dist/lib/processors/base/types.d.ts +70 -64
  142. package/dist/lib/processors/base/types.js +6 -0
  143. package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
  144. package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
  145. package/dist/lib/processors/config/mimeTypes.js +25 -0
  146. package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
  147. package/dist/lib/processors/config/sizeLimits.js +56 -44
  148. package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
  149. package/dist/lib/processors/document/ExcelProcessor.js +72 -1
  150. package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
  151. package/dist/lib/processors/document/PptxProcessor.js +158 -0
  152. package/dist/lib/processors/document/index.d.ts +1 -0
  153. package/dist/lib/processors/document/index.js +6 -0
  154. package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
  155. package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
  156. package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
  157. package/dist/lib/processors/index.d.ts +8 -2
  158. package/dist/lib/processors/index.js +5 -2
  159. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
  160. package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
  161. package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
  162. package/dist/lib/processors/media/AudioProcessor.js +708 -0
  163. package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
  164. package/dist/lib/processors/media/VideoProcessor.js +992 -0
  165. package/dist/lib/processors/media/index.d.ts +27 -0
  166. package/dist/lib/processors/media/index.js +37 -0
  167. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
  168. package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
  169. package/dist/lib/processors/registry/index.d.ts +1 -1
  170. package/dist/lib/processors/registry/index.js +1 -1
  171. package/dist/lib/processors/registry/types.d.ts +2 -2
  172. package/dist/lib/providers/googleAiStudio.d.ts +34 -0
  173. package/dist/lib/providers/googleAiStudio.js +267 -397
  174. package/dist/lib/providers/googleVertex.d.ts +55 -1
  175. package/dist/lib/providers/googleVertex.js +452 -719
  176. package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
  177. package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
  178. package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
  179. package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
  180. package/dist/lib/rag/document/loaders.d.ts +6 -71
  181. package/dist/lib/rag/document/loaders.js +5 -5
  182. package/dist/lib/rag/graphRag/graphRAG.js +26 -9
  183. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  184. package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
  185. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
  186. package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
  187. package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
  188. package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
  189. package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
  190. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
  191. package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
  192. package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
  193. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
  194. package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
  195. package/dist/lib/rag/types.d.ts +3 -3
  196. package/dist/lib/sdk/toolRegistration.d.ts +2 -2
  197. package/dist/lib/server/middleware/cache.d.ts +2 -2
  198. package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
  199. package/dist/lib/server/routes/mcpRoutes.js +277 -249
  200. package/dist/lib/server/routes/memoryRoutes.js +287 -281
  201. package/dist/lib/server/utils/validation.d.ts +10 -10
  202. package/dist/lib/session/globalSessionState.d.ts +2 -2
  203. package/dist/lib/telemetry/telemetryService.d.ts +2 -2
  204. package/dist/lib/types/common.d.ts +39 -0
  205. package/dist/lib/types/contextTypes.d.ts +255 -0
  206. package/dist/lib/types/contextTypes.js +0 -2
  207. package/dist/lib/types/conversation.d.ts +62 -0
  208. package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
  209. package/dist/lib/types/conversationMemoryInterface.js +7 -0
  210. package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
  211. package/dist/lib/types/fileReferenceTypes.js +9 -0
  212. package/dist/lib/types/fileTypes.d.ts +26 -3
  213. package/dist/lib/types/generateTypes.d.ts +22 -1
  214. package/dist/lib/types/index.d.ts +4 -5
  215. package/dist/lib/types/index.js +8 -10
  216. package/dist/lib/types/modelTypes.d.ts +2 -2
  217. package/dist/lib/types/processorTypes.d.ts +597 -0
  218. package/dist/lib/types/processorTypes.js +91 -0
  219. package/dist/lib/types/ragTypes.d.ts +481 -0
  220. package/dist/lib/types/ragTypes.js +8 -0
  221. package/dist/lib/types/sdkTypes.d.ts +17 -18
  222. package/dist/lib/types/streamTypes.d.ts +11 -1
  223. package/dist/lib/utils/async/retry.d.ts +2 -2
  224. package/dist/lib/utils/async/withTimeout.js +3 -1
  225. package/dist/lib/utils/conversationMemory.d.ts +12 -6
  226. package/dist/lib/utils/conversationMemory.js +76 -36
  227. package/dist/lib/utils/fileDetector.d.ts +62 -0
  228. package/dist/lib/utils/fileDetector.js +1014 -14
  229. package/dist/lib/utils/json/safeParse.d.ts +2 -2
  230. package/dist/lib/utils/messageBuilder.js +806 -153
  231. package/dist/lib/utils/modelChoices.d.ts +2 -2
  232. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
  233. package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
  234. package/dist/lib/utils/rateLimiter.d.ts +2 -2
  235. package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
  236. package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
  237. package/dist/lib/utils/thinkingConfig.d.ts +6 -6
  238. package/dist/lib/utils/tokenEstimation.d.ts +68 -0
  239. package/dist/lib/utils/tokenEstimation.js +113 -0
  240. package/dist/lib/utils/tokenUtils.d.ts +4 -4
  241. package/dist/lib/utils/ttsProcessor.d.ts +2 -2
  242. package/dist/lib/workflow/config.d.ts +150 -150
  243. package/dist/memory/mem0Initializer.d.ts +2 -2
  244. package/dist/neurolink.d.ts +61 -2
  245. package/dist/neurolink.js +619 -307
  246. package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
  247. package/dist/processors/archive/ArchiveProcessor.js +1308 -0
  248. package/dist/processors/archive/index.d.ts +33 -0
  249. package/dist/processors/archive/index.js +42 -0
  250. package/dist/processors/base/types.d.ts +70 -64
  251. package/dist/processors/base/types.js +6 -0
  252. package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
  253. package/dist/processors/cli/fileProcessorCli.js +5 -5
  254. package/dist/processors/config/mimeTypes.js +25 -0
  255. package/dist/processors/config/sizeLimits.d.ts +52 -40
  256. package/dist/processors/config/sizeLimits.js +56 -44
  257. package/dist/processors/document/ExcelProcessor.d.ts +14 -0
  258. package/dist/processors/document/ExcelProcessor.js +72 -1
  259. package/dist/processors/document/PptxProcessor.d.ts +63 -0
  260. package/dist/processors/document/PptxProcessor.js +157 -0
  261. package/dist/processors/document/index.d.ts +1 -0
  262. package/dist/processors/document/index.js +6 -0
  263. package/dist/processors/errors/FileErrorCode.d.ts +2 -2
  264. package/dist/processors/errors/errorHelpers.d.ts +2 -2
  265. package/dist/processors/errors/errorSerializer.d.ts +4 -4
  266. package/dist/processors/index.d.ts +8 -2
  267. package/dist/processors/index.js +5 -2
  268. package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
  269. package/dist/processors/integration/FileProcessorIntegration.js +7 -7
  270. package/dist/processors/media/AudioProcessor.d.ts +328 -0
  271. package/dist/processors/media/AudioProcessor.js +707 -0
  272. package/dist/processors/media/VideoProcessor.d.ts +350 -0
  273. package/dist/processors/media/VideoProcessor.js +991 -0
  274. package/dist/processors/media/ffprobe-static.d.ts +4 -0
  275. package/dist/processors/media/index.d.ts +27 -0
  276. package/dist/processors/media/index.js +36 -0
  277. package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
  278. package/dist/processors/registry/ProcessorRegistry.js +103 -8
  279. package/dist/processors/registry/index.d.ts +1 -1
  280. package/dist/processors/registry/index.js +1 -1
  281. package/dist/processors/registry/types.d.ts +2 -2
  282. package/dist/providers/googleAiStudio.d.ts +34 -0
  283. package/dist/providers/googleAiStudio.js +267 -397
  284. package/dist/providers/googleVertex.d.ts +55 -1
  285. package/dist/providers/googleVertex.js +452 -719
  286. package/dist/providers/sagemaker/detection.d.ts +6 -6
  287. package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
  288. package/dist/providers/sagemaker/parsers.d.ts +4 -4
  289. package/dist/rag/chunkers/RecursiveChunker.js +2 -2
  290. package/dist/rag/document/loaders.d.ts +6 -71
  291. package/dist/rag/document/loaders.js +5 -5
  292. package/dist/rag/graphRag/graphRAG.js +26 -9
  293. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  294. package/dist/rag/metadata/metadataExtractor.js +6 -3
  295. package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
  296. package/dist/rag/pipeline/RAGPipeline.js +11 -11
  297. package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
  298. package/dist/rag/pipeline/contextAssembly.js +6 -3
  299. package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
  300. package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
  301. package/dist/rag/resilience/RetryHandler.d.ts +2 -21
  302. package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
  303. package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
  304. package/dist/rag/retrieval/vectorQueryTool.js +4 -3
  305. package/dist/rag/types.d.ts +3 -3
  306. package/dist/sdk/toolRegistration.d.ts +2 -2
  307. package/dist/server/middleware/cache.d.ts +2 -2
  308. package/dist/server/middleware/rateLimit.d.ts +2 -2
  309. package/dist/server/routes/mcpRoutes.js +277 -249
  310. package/dist/server/routes/memoryRoutes.js +287 -281
  311. package/dist/server/utils/validation.d.ts +4 -4
  312. package/dist/session/globalSessionState.d.ts +2 -2
  313. package/dist/telemetry/telemetryService.d.ts +2 -2
  314. package/dist/types/common.d.ts +39 -0
  315. package/dist/types/contextTypes.d.ts +255 -0
  316. package/dist/types/contextTypes.js +0 -2
  317. package/dist/types/conversation.d.ts +62 -0
  318. package/dist/types/conversationMemoryInterface.d.ts +27 -0
  319. package/dist/types/conversationMemoryInterface.js +6 -0
  320. package/dist/types/fileReferenceTypes.d.ts +222 -0
  321. package/dist/types/fileReferenceTypes.js +8 -0
  322. package/dist/types/fileTypes.d.ts +26 -3
  323. package/dist/types/generateTypes.d.ts +22 -1
  324. package/dist/types/index.d.ts +4 -5
  325. package/dist/types/index.js +8 -10
  326. package/dist/types/processorTypes.d.ts +597 -0
  327. package/dist/types/processorTypes.js +90 -0
  328. package/dist/types/ragTypes.d.ts +481 -0
  329. package/dist/types/ragTypes.js +7 -0
  330. package/dist/types/sdkTypes.d.ts +17 -18
  331. package/dist/types/streamTypes.d.ts +11 -1
  332. package/dist/utils/async/retry.d.ts +2 -2
  333. package/dist/utils/async/withTimeout.js +3 -1
  334. package/dist/utils/conversationMemory.d.ts +12 -6
  335. package/dist/utils/conversationMemory.js +76 -36
  336. package/dist/utils/fileDetector.d.ts +62 -0
  337. package/dist/utils/fileDetector.js +1014 -14
  338. package/dist/utils/json/safeParse.d.ts +2 -2
  339. package/dist/utils/messageBuilder.js +806 -153
  340. package/dist/utils/modelChoices.d.ts +2 -2
  341. package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
  342. package/dist/utils/multimodalOptionsBuilder.js +1 -0
  343. package/dist/utils/rateLimiter.d.ts +2 -2
  344. package/dist/utils/sanitizers/filename.d.ts +4 -4
  345. package/dist/utils/sanitizers/svg.d.ts +2 -2
  346. package/dist/utils/thinkingConfig.d.ts +6 -6
  347. package/dist/utils/tokenEstimation.d.ts +68 -0
  348. package/dist/utils/tokenEstimation.js +112 -0
  349. package/dist/utils/tokenUtils.d.ts +4 -4
  350. package/dist/utils/ttsProcessor.d.ts +2 -2
  351. package/dist/workflow/config.d.ts +104 -104
  352. package/package.json +18 -6
  353. package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
  354. package/dist/lib/utils/conversationMemoryUtils.js +0 -138
  355. package/dist/utils/conversationMemoryUtils.d.ts +0 -25
  356. package/dist/utils/conversationMemoryUtils.js +0 -137
@@ -17,7 +17,7 @@ import { logger } from "../utils/logger.js";
17
17
  import { isGemini3Model } from "../utils/modelDetection.js";
18
18
  import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
19
19
  import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
20
- import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
20
+ import { createNativeThinkingConfig, } from "../utils/thinkingConfig.js";
21
21
  import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
22
22
  // Import proper types for multimodal message handling
23
23
  // Enhanced Anthropic support with direct imports
@@ -863,44 +863,29 @@ export class GoogleVertexProvider extends BaseProvider {
863
863
  location,
864
864
  });
865
865
  }
866
+ // ── Shared helpers for native Gemini 3 SDK methods ──
866
867
  /**
867
- * Execute stream using native @google/genai SDK for Gemini 3 models on Vertex AI
868
- * This bypasses @ai-sdk/google-vertex to properly handle thought_signature
868
+ * Build multimodal content parts (user message) from input text, PDFs, and images.
869
+ * Shared by both stream and generate native Gemini 3 paths.
869
870
  */
870
- async executeNativeGemini3Stream(options) {
871
- const client = await this.createVertexGenAIClient(options.region);
872
- const modelName = options.model || this.modelName || getDefaultVertexModel();
873
- const effectiveLocation = options.region || this.location || getVertexLocation();
874
- logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
875
- model: modelName,
876
- hasTools: !!options.tools && Object.keys(options.tools).length > 0,
877
- project: this.projectId,
878
- location: effectiveLocation,
879
- });
880
- const contents = [];
881
- // Build user message parts - start with text
882
- const userParts = [{ text: options.input.text }];
871
+ buildNativeContentParts(inputText, multimodalInput, logLabel) {
872
+ const userParts = [{ text: inputText }];
883
873
  // Add PDF files as inlineData parts if present
884
- // Cast input to access multimodal properties that may exist at runtime
885
- const multimodalInput = options.input;
886
874
  if (multimodalInput?.pdfFiles && multimodalInput.pdfFiles.length > 0) {
887
- logger.debug(`[GoogleVertex] Processing ${multimodalInput.pdfFiles.length} PDF file(s) for native stream`);
875
+ logger.debug(`[GoogleVertex] Processing ${multimodalInput.pdfFiles.length} PDF file(s) for ${logLabel}`);
888
876
  for (const pdfFile of multimodalInput.pdfFiles) {
889
877
  let pdfBuffer;
890
878
  if (typeof pdfFile === "string") {
891
- // Check if it's a file path
892
879
  if (fs.existsSync(pdfFile)) {
893
880
  pdfBuffer = fs.readFileSync(pdfFile);
894
881
  }
895
882
  else {
896
- // Assume it's already base64 encoded
897
883
  pdfBuffer = Buffer.from(pdfFile, "base64");
898
884
  }
899
885
  }
900
886
  else {
901
887
  pdfBuffer = pdfFile;
902
888
  }
903
- // Convert to base64 for the native SDK
904
889
  const base64Data = pdfBuffer.toString("base64");
905
890
  userParts.push({
906
891
  inlineData: {
@@ -912,14 +897,13 @@ export class GoogleVertexProvider extends BaseProvider {
912
897
  }
913
898
  // Add images as inlineData parts if present
914
899
  if (multimodalInput?.images && multimodalInput.images.length > 0) {
915
- logger.debug(`[GoogleVertex] Processing ${multimodalInput.images.length} image(s) for native stream`);
900
+ logger.debug(`[GoogleVertex] Processing ${multimodalInput.images.length} image(s) for ${logLabel}`);
916
901
  for (const image of multimodalInput.images) {
917
902
  let imageBuffer;
918
903
  let mimeType = "image/jpeg"; // Default
919
904
  if (typeof image === "string") {
920
905
  if (fs.existsSync(image)) {
921
906
  imageBuffer = fs.readFileSync(image);
922
- // Detect mime type from extension
923
907
  const ext = path.extname(image).toLowerCase();
924
908
  if (ext === ".png") {
925
909
  mimeType = "image/png";
@@ -932,7 +916,6 @@ export class GoogleVertexProvider extends BaseProvider {
932
916
  }
933
917
  }
934
918
  else if (image.startsWith("data:")) {
935
- // Handle data URL
936
919
  const matches = image.match(/^data:([^;]+);base64,(.+)$/);
937
920
  if (matches) {
938
921
  mimeType = matches[1];
@@ -943,7 +926,6 @@ export class GoogleVertexProvider extends BaseProvider {
943
926
  }
944
927
  }
945
928
  else {
946
- // Assume base64 string
947
929
  imageBuffer = Buffer.from(image, "base64");
948
930
  }
949
931
  }
@@ -959,44 +941,56 @@ export class GoogleVertexProvider extends BaseProvider {
959
941
  });
960
942
  }
961
943
  }
962
- contents.push({
963
- role: "user",
964
- parts: userParts,
965
- });
966
- let tools;
944
+ return [
945
+ {
946
+ role: "user",
947
+ parts: userParts,
948
+ },
949
+ ];
950
+ }
951
+ /**
952
+ * Convert Vercel AI SDK tools to @google/genai FunctionDeclarations and build an execute map.
953
+ * Shared by both stream and generate native Gemini 3 paths.
954
+ */
955
+ convertToolsToNativeFunctionDeclarations(toolsMap, logLabel) {
956
+ if (Object.keys(toolsMap).length === 0) {
957
+ return { tools: undefined, executeMap: new Map() };
958
+ }
959
+ const functionDeclarations = [];
967
960
  const executeMap = new Map();
968
- if (options.tools &&
969
- Object.keys(options.tools).length > 0 &&
970
- !options.disableTools) {
971
- const functionDeclarations = [];
972
- for (const [name, tool] of Object.entries(options.tools)) {
973
- const decl = {
974
- name,
975
- description: tool.description || `Tool: ${name}`,
976
- };
977
- if (tool.parameters) {
978
- // Convert and inline schema to resolve $ref/definitions
979
- const rawSchema = convertZodToJsonSchema(tool.parameters);
980
- decl.parametersJsonSchema = inlineJsonSchema(rawSchema);
981
- // Remove $schema if present - @google/genai doesn't need it
982
- if (decl.parametersJsonSchema.$schema) {
983
- delete decl.parametersJsonSchema.$schema;
984
- }
985
- }
986
- functionDeclarations.push(decl);
987
- if (tool.execute) {
988
- executeMap.set(name, tool.execute);
961
+ for (const [name, tool] of Object.entries(toolsMap)) {
962
+ const decl = {
963
+ name,
964
+ description: tool.description || `Tool: ${name}`,
965
+ };
966
+ if (tool.parameters) {
967
+ const rawSchema = convertZodToJsonSchema(tool.parameters);
968
+ decl.parametersJsonSchema = inlineJsonSchema(rawSchema);
969
+ if (decl.parametersJsonSchema.$schema) {
970
+ delete decl.parametersJsonSchema.$schema;
989
971
  }
990
972
  }
991
- tools = [{ functionDeclarations }];
992
- logger.debug("[GoogleVertex] Converted tools for native SDK", {
993
- toolCount: functionDeclarations.length,
994
- toolNames: functionDeclarations.map((t) => t.name),
995
- });
973
+ functionDeclarations.push(decl);
974
+ if (tool.execute) {
975
+ executeMap.set(name, tool.execute);
976
+ }
996
977
  }
997
- // Build config
978
+ logger.debug(`[GoogleVertex] Converted tools for ${logLabel}`, {
979
+ toolCount: functionDeclarations.length,
980
+ toolNames: functionDeclarations.map((t) => t.name),
981
+ });
982
+ return {
983
+ tools: [{ functionDeclarations }],
984
+ executeMap,
985
+ };
986
+ }
987
+ /**
988
+ * Build the native @google/genai config object for generate/stream calls.
989
+ * Shared by both stream and generate native Gemini 3 paths.
990
+ */
991
+ buildNativeGenerateConfig(options, tools) {
998
992
  const config = {
999
- temperature: options.temperature ?? 1.0, // Gemini 3 requires 1.0 for tool calling
993
+ temperature: options.temperature ?? 1.0,
1000
994
  maxOutputTokens: options.maxTokens,
1001
995
  };
1002
996
  if (tools) {
@@ -1005,21 +999,215 @@ export class GoogleVertexProvider extends BaseProvider {
1005
999
  if (options.systemPrompt) {
1006
1000
  config.systemInstruction = options.systemPrompt;
1007
1001
  }
1008
- // Add thinking config for Gemini 3
1009
1002
  const nativeThinkingConfig = createNativeThinkingConfig(options.thinkingConfig);
1010
1003
  if (nativeThinkingConfig) {
1011
1004
  config.thinkingConfig = nativeThinkingConfig;
1012
1005
  }
1006
+ return config;
1007
+ }
1008
+ /**
1009
+ * Compute a safe maxSteps value from raw input.
1010
+ */
1011
+ computeMaxSteps(rawMaxSteps) {
1012
+ const raw = rawMaxSteps || DEFAULT_MAX_STEPS;
1013
+ return Number.isFinite(raw) && raw > 0
1014
+ ? Math.min(Math.floor(raw), 100)
1015
+ : Math.min(DEFAULT_MAX_STEPS, 100);
1016
+ }
1017
+ /**
1018
+ * Extract text from raw native SDK response parts, filtering out non-text parts
1019
+ * (thoughtSignature, functionCall) to avoid SDK warnings.
1020
+ */
1021
+ extractTextFromRawParts(rawParts) {
1022
+ return rawParts
1023
+ .filter((part) => typeof part.text === "string")
1024
+ .map((part) => part.text)
1025
+ .join("");
1026
+ }
1027
+ /**
1028
+ * Execute a set of function calls from the model, tracking failures and retries.
1029
+ * Returns function response parts to be added to conversation history.
1030
+ * Shared by both stream and generate native Gemini 3 paths.
1031
+ */
1032
+ async executeNativeFunctionCalls(calls, executeMap, failedTools, allToolCalls, toolExecutions) {
1033
+ const functionResponses = [];
1034
+ for (const call of calls) {
1035
+ allToolCalls.push({ toolName: call.name, args: call.args });
1036
+ // Check if this tool has already exceeded retry limit
1037
+ const failedInfo = failedTools.get(call.name);
1038
+ if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1039
+ logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1040
+ const errorOutput = {
1041
+ error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1042
+ status: "permanently_failed",
1043
+ do_not_retry: true,
1044
+ };
1045
+ toolExecutions?.push({
1046
+ name: call.name,
1047
+ input: call.args,
1048
+ output: errorOutput,
1049
+ });
1050
+ functionResponses.push({
1051
+ functionResponse: { name: call.name, response: errorOutput },
1052
+ });
1053
+ continue;
1054
+ }
1055
+ const execute = executeMap.get(call.name);
1056
+ if (execute) {
1057
+ try {
1058
+ const toolOptions = {
1059
+ toolCallId: `${call.name}-${Date.now()}`,
1060
+ messages: [],
1061
+ abortSignal: undefined,
1062
+ };
1063
+ const result = await execute(call.args, toolOptions);
1064
+ toolExecutions?.push({
1065
+ name: call.name,
1066
+ input: call.args,
1067
+ output: result,
1068
+ });
1069
+ functionResponses.push({
1070
+ functionResponse: {
1071
+ name: call.name,
1072
+ response: { result },
1073
+ },
1074
+ });
1075
+ }
1076
+ catch (error) {
1077
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
1078
+ const currentFailInfo = failedTools.get(call.name) || {
1079
+ count: 0,
1080
+ lastError: "",
1081
+ };
1082
+ currentFailInfo.count++;
1083
+ currentFailInfo.lastError = errorMessage;
1084
+ failedTools.set(call.name, currentFailInfo);
1085
+ logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1086
+ const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1087
+ const errorOutput = {
1088
+ error: isPermanentFailure
1089
+ ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1090
+ : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1091
+ status: isPermanentFailure ? "permanently_failed" : "failed",
1092
+ do_not_retry: isPermanentFailure,
1093
+ retry_count: currentFailInfo.count,
1094
+ max_retries: DEFAULT_TOOL_MAX_RETRIES,
1095
+ };
1096
+ toolExecutions?.push({
1097
+ name: call.name,
1098
+ input: call.args,
1099
+ output: errorOutput,
1100
+ });
1101
+ functionResponses.push({
1102
+ functionResponse: { name: call.name, response: errorOutput },
1103
+ });
1104
+ }
1105
+ }
1106
+ else {
1107
+ // Tool not found is a permanent error
1108
+ const errorOutput = {
1109
+ error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1110
+ status: "permanently_failed",
1111
+ do_not_retry: true,
1112
+ };
1113
+ toolExecutions?.push({
1114
+ name: call.name,
1115
+ input: call.args,
1116
+ output: errorOutput,
1117
+ });
1118
+ functionResponses.push({
1119
+ functionResponse: { name: call.name, response: errorOutput },
1120
+ });
1121
+ }
1122
+ }
1123
+ return functionResponses;
1124
+ }
1125
+ /**
1126
+ * Collect raw response parts and function calls from a native SDK content stream chunk.
1127
+ * Also accumulates token usage metadata.
1128
+ * Returns updated token counts.
1129
+ */
1130
+ processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage) {
1131
+ const chunkRecord = chunk;
1132
+ const candidates = chunkRecord.candidates;
1133
+ const firstCandidate = candidates?.[0];
1134
+ const chunkContent = firstCandidate?.content;
1135
+ if (chunkContent && Array.isArray(chunkContent.parts)) {
1136
+ rawResponseParts.push(...chunkContent.parts);
1137
+ }
1138
+ if (chunk.functionCalls) {
1139
+ stepFunctionCalls.push(...chunk.functionCalls);
1140
+ }
1141
+ const usageMetadata = chunkRecord.usageMetadata;
1142
+ if (usageMetadata) {
1143
+ if (usageMetadata.promptTokenCount !== undefined &&
1144
+ usageMetadata.promptTokenCount > 0) {
1145
+ tokenUsage.input = usageMetadata.promptTokenCount;
1146
+ }
1147
+ if (usageMetadata.candidatesTokenCount !== undefined &&
1148
+ usageMetadata.candidatesTokenCount > 0) {
1149
+ tokenUsage.output = usageMetadata.candidatesTokenCount;
1150
+ }
1151
+ }
1152
+ }
1153
+ /**
1154
+ * Push model response parts to conversation history, preserving thoughtSignature
1155
+ * for Gemini 3 multi-turn tool calling.
1156
+ */
1157
+ pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls) {
1158
+ currentContents.push({
1159
+ role: "model",
1160
+ parts: rawResponseParts.length > 0
1161
+ ? rawResponseParts
1162
+ : stepFunctionCalls.map((fc) => ({ functionCall: fc })),
1163
+ });
1164
+ }
1165
+ /**
1166
+ * Compute final text for maxSteps termination when the model was still calling tools.
1167
+ */
1168
+ computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText) {
1169
+ if (step >= maxSteps && !finalText) {
1170
+ logger.warn(`[GoogleVertex] Tool call loop terminated after reaching maxSteps (${maxSteps}). ` +
1171
+ `Model was still calling tools. Using accumulated text from last step.`);
1172
+ return (lastStepText ||
1173
+ `[Tool execution limit reached after ${maxSteps} steps. The model continued requesting tool calls beyond the limit.]`);
1174
+ }
1175
+ return finalText;
1176
+ }
1177
+ // ── End shared helpers ──
1178
+ /**
1179
+ * Execute stream using native @google/genai SDK for Gemini 3 models on Vertex AI
1180
+ * This bypasses @ai-sdk/google-vertex to properly handle thought_signature
1181
+ */
1182
+ async executeNativeGemini3Stream(options) {
1183
+ const client = await this.createVertexGenAIClient(options.region);
1184
+ const modelName = options.model || this.modelName || getDefaultVertexModel();
1185
+ const effectiveLocation = options.region || this.location || getVertexLocation();
1186
+ logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1187
+ model: modelName,
1188
+ hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1189
+ project: this.projectId,
1190
+ location: effectiveLocation,
1191
+ });
1192
+ // Build contents from input with multimodal support
1193
+ const multimodalInput = options.input;
1194
+ const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1195
+ // Convert tools to native format
1196
+ const toolsInput = options.tools &&
1197
+ Object.keys(options.tools).length > 0 &&
1198
+ !options.disableTools
1199
+ ? options.tools
1200
+ : {};
1201
+ const { tools, executeMap } = this.convertToolsToNativeFunctionDeclarations(toolsInput, "native SDK");
1202
+ // Build config
1203
+ const config = this.buildNativeGenerateConfig(options, tools);
1013
1204
  // Add JSON output format support for native SDK stream
1014
- // Note: Combining tools + schema may have limitations with Gemini models
1015
1205
  const streamOptions = options;
1016
1206
  if (streamOptions.output?.format === "json" || streamOptions.schema) {
1017
1207
  config.responseMimeType = "application/json";
1018
- // Convert schema to JSON schema format for the native SDK
1019
1208
  if (streamOptions.schema) {
1020
1209
  const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1021
1210
  const inlinedSchema = inlineJsonSchema(rawSchema);
1022
- // Remove $schema if present - @google/genai doesn't need it
1023
1211
  if (inlinedSchema.$schema) {
1024
1212
  delete inlinedSchema.$schema;
1025
1213
  }
@@ -1030,23 +1218,14 @@ export class GoogleVertexProvider extends BaseProvider {
1030
1218
  }
1031
1219
  }
1032
1220
  const startTime = Date.now();
1033
- // Ensure maxSteps is a valid positive integer to prevent infinite loops
1034
- const rawMaxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
1035
- const maxSteps = Number.isFinite(rawMaxSteps) && rawMaxSteps > 0
1036
- ? Math.min(Math.floor(rawMaxSteps), 100) // Cap at 100 for safety
1037
- : Math.min(DEFAULT_MAX_STEPS, 100);
1221
+ const maxSteps = this.computeMaxSteps(options.maxSteps);
1038
1222
  const currentContents = [...contents];
1039
1223
  let finalText = "";
1040
- let lastStepText = ""; // Track text from last step for maxSteps termination
1224
+ let lastStepText = "";
1041
1225
  const allToolCalls = [];
1042
1226
  let step = 0;
1043
- // Track failed tools to prevent infinite retry loops
1044
- // Key: tool name, Value: { count: retry attempts, lastError: error message }
1045
1227
  const failedTools = new Map();
1046
- // Track token usage across all steps
1047
- // promptTokenCount is typically in the final chunk, candidatesTokenCount accumulates
1048
- let totalInputTokens = 0;
1049
- let totalOutputTokens = 0;
1228
+ const tokenUsage = { input: 0, output: 0 };
1050
1229
  // Agentic loop for tool calling
1051
1230
  while (step < maxSteps) {
1052
1231
  step++;
@@ -1058,142 +1237,19 @@ export class GoogleVertexProvider extends BaseProvider {
1058
1237
  config,
1059
1238
  });
1060
1239
  const stepFunctionCalls = [];
1061
- // Capture raw response parts including thoughtSignature
1062
1240
  const rawResponseParts = [];
1063
1241
  for await (const chunk of stream) {
1064
- // Extract raw parts from candidates FIRST
1065
- // This avoids using chunk.text which triggers SDK warning when
1066
- // non-text parts (thoughtSignature, functionCall) are present
1067
- const chunkRecord = chunk;
1068
- const candidates = chunkRecord.candidates;
1069
- const firstCandidate = candidates?.[0];
1070
- const chunkContent = firstCandidate?.content;
1071
- if (chunkContent && Array.isArray(chunkContent.parts)) {
1072
- rawResponseParts.push(...chunkContent.parts);
1073
- }
1074
- if (chunk.functionCalls) {
1075
- stepFunctionCalls.push(...chunk.functionCalls);
1076
- }
1077
- // Extract usage metadata from chunk
1078
- // promptTokenCount is typically in the final chunk, candidatesTokenCount accumulates
1079
- const usageMetadata = chunkRecord.usageMetadata;
1080
- if (usageMetadata) {
1081
- // Take the latest promptTokenCount (usually only in final chunk)
1082
- if (usageMetadata.promptTokenCount !== undefined &&
1083
- usageMetadata.promptTokenCount > 0) {
1084
- totalInputTokens = usageMetadata.promptTokenCount;
1085
- }
1086
- // Take the latest candidatesTokenCount (accumulates through chunks)
1087
- if (usageMetadata.candidatesTokenCount !== undefined &&
1088
- usageMetadata.candidatesTokenCount > 0) {
1089
- totalOutputTokens = usageMetadata.candidatesTokenCount;
1090
- }
1091
- }
1242
+ this.processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage);
1092
1243
  }
1093
- // Extract text from raw parts after stream completes
1094
- // This avoids SDK warning about non-text parts (thoughtSignature, functionCall)
1095
- const stepText = rawResponseParts
1096
- .filter((part) => typeof part.text === "string")
1097
- .map((part) => part.text)
1098
- .join("");
1099
- // If no function calls, we're done
1244
+ const stepText = this.extractTextFromRawParts(rawResponseParts);
1100
1245
  if (stepFunctionCalls.length === 0) {
1101
1246
  finalText = stepText;
1102
1247
  break;
1103
1248
  }
1104
- // Track the last step text for maxSteps termination
1105
1249
  lastStepText = stepText;
1106
- // Execute function calls
1107
1250
  logger.debug(`[GoogleVertex] Executing ${stepFunctionCalls.length} function calls`);
1108
- // Add model response with ALL parts (including thoughtSignature) to history
1109
- // This preserves the thought_signature which is required for Gemini 3 multi-turn tool calling
1110
- currentContents.push({
1111
- role: "model",
1112
- parts: rawResponseParts.length > 0
1113
- ? rawResponseParts
1114
- : stepFunctionCalls.map((fc) => ({
1115
- functionCall: fc,
1116
- })),
1117
- });
1118
- // Execute each function and collect responses
1119
- const functionResponses = [];
1120
- for (const call of stepFunctionCalls) {
1121
- allToolCalls.push({ toolName: call.name, args: call.args });
1122
- // Check if this tool has already exceeded retry limit
1123
- const failedInfo = failedTools.get(call.name);
1124
- if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1125
- logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1126
- functionResponses.push({
1127
- functionResponse: {
1128
- name: call.name,
1129
- response: {
1130
- error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1131
- status: "permanently_failed",
1132
- do_not_retry: true,
1133
- },
1134
- },
1135
- });
1136
- continue;
1137
- }
1138
- const execute = executeMap.get(call.name);
1139
- if (execute) {
1140
- try {
1141
- // AI SDK Tool execute requires (args, options) - provide minimal options
1142
- const toolOptions = {
1143
- toolCallId: `${call.name}-${Date.now()}`,
1144
- messages: [],
1145
- abortSignal: undefined,
1146
- };
1147
- const result = await execute(call.args, toolOptions);
1148
- functionResponses.push({
1149
- functionResponse: { name: call.name, response: { result } },
1150
- });
1151
- }
1152
- catch (error) {
1153
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
1154
- // Track this failure
1155
- const currentFailInfo = failedTools.get(call.name) || {
1156
- count: 0,
1157
- lastError: "",
1158
- };
1159
- currentFailInfo.count++;
1160
- currentFailInfo.lastError = errorMessage;
1161
- failedTools.set(call.name, currentFailInfo);
1162
- logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1163
- // Determine if this is a permanent failure
1164
- const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1165
- functionResponses.push({
1166
- functionResponse: {
1167
- name: call.name,
1168
- response: {
1169
- error: isPermanentFailure
1170
- ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1171
- : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1172
- status: isPermanentFailure
1173
- ? "permanently_failed"
1174
- : "failed",
1175
- do_not_retry: isPermanentFailure,
1176
- retry_count: currentFailInfo.count,
1177
- max_retries: DEFAULT_TOOL_MAX_RETRIES,
1178
- },
1179
- },
1180
- });
1181
- }
1182
- }
1183
- else {
1184
- // Tool not found is a permanent error
1185
- functionResponses.push({
1186
- functionResponse: {
1187
- name: call.name,
1188
- response: {
1189
- error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1190
- status: "permanently_failed",
1191
- do_not_retry: true,
1192
- },
1193
- },
1194
- });
1195
- }
1196
- }
1251
+ this.pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls);
1252
+ const functionResponses = await this.executeNativeFunctionCalls(stepFunctionCalls, executeMap, failedTools, allToolCalls);
1197
1253
  // Add function responses to history
1198
1254
  currentContents.push({
1199
1255
  role: "function",
@@ -1205,14 +1261,7 @@ export class GoogleVertexProvider extends BaseProvider {
1205
1261
  throw this.handleProviderError(error);
1206
1262
  }
1207
1263
  }
1208
- // Handle maxSteps termination - if we exited the loop due to maxSteps being reached
1209
- if (step >= maxSteps && !finalText) {
1210
- logger.warn(`[GoogleVertex] Tool call loop terminated after reaching maxSteps (${maxSteps}). ` +
1211
- `Model was still calling tools. Using accumulated text from last step.`);
1212
- finalText =
1213
- lastStepText ||
1214
- `[Tool execution limit reached after ${maxSteps} steps. The model continued requesting tool calls beyond the limit.]`;
1215
- }
1264
+ finalText = this.computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText);
1216
1265
  const responseTime = Date.now() - startTime;
1217
1266
  // Create async iterable for streaming result
1218
1267
  async function* createTextStream() {
@@ -1223,9 +1272,9 @@ export class GoogleVertexProvider extends BaseProvider {
1223
1272
  provider: this.providerName,
1224
1273
  model: modelName,
1225
1274
  usage: {
1226
- input: totalInputTokens,
1227
- output: totalOutputTokens,
1228
- total: totalInputTokens + totalOutputTokens,
1275
+ input: tokenUsage.input,
1276
+ output: tokenUsage.output,
1277
+ total: tokenUsage.input + tokenUsage.output,
1229
1278
  },
1230
1279
  toolCalls: allToolCalls.map((tc) => ({
1231
1280
  toolName: tc.toolName,
@@ -1254,163 +1303,28 @@ export class GoogleVertexProvider extends BaseProvider {
1254
1303
  });
1255
1304
  // Build contents from input with multimodal support
1256
1305
  const inputText = options.prompt || options.input?.text || "Please respond.";
1257
- const contents = [];
1258
- // Build user message parts - start with text
1259
- const userParts = [{ text: inputText }];
1260
- // Add PDF files as inlineData parts if present
1261
- // Cast input to access multimodal properties that may exist at runtime
1262
1306
  const multimodalInput = options.input;
1263
- if (multimodalInput?.pdfFiles && multimodalInput.pdfFiles.length > 0) {
1264
- logger.debug(`[GoogleVertex] Processing ${multimodalInput.pdfFiles.length} PDF file(s) for native generate`);
1265
- for (const pdfFile of multimodalInput.pdfFiles) {
1266
- let pdfBuffer;
1267
- if (typeof pdfFile === "string") {
1268
- // Check if it's a file path
1269
- if (fs.existsSync(pdfFile)) {
1270
- pdfBuffer = fs.readFileSync(pdfFile);
1271
- }
1272
- else {
1273
- // Assume it's already base64 encoded
1274
- pdfBuffer = Buffer.from(pdfFile, "base64");
1275
- }
1276
- }
1277
- else {
1278
- pdfBuffer = pdfFile;
1279
- }
1280
- // Convert to base64 for the native SDK
1281
- const base64Data = pdfBuffer.toString("base64");
1282
- userParts.push({
1283
- inlineData: {
1284
- mimeType: "application/pdf",
1285
- data: base64Data,
1286
- },
1287
- });
1288
- }
1289
- }
1290
- // Add images as inlineData parts if present
1291
- if (multimodalInput?.images && multimodalInput.images.length > 0) {
1292
- logger.debug(`[GoogleVertex] Processing ${multimodalInput.images.length} image(s) for native generate`);
1293
- for (const image of multimodalInput.images) {
1294
- let imageBuffer;
1295
- let mimeType = "image/jpeg"; // Default
1296
- if (typeof image === "string") {
1297
- if (fs.existsSync(image)) {
1298
- imageBuffer = fs.readFileSync(image);
1299
- // Detect mime type from extension
1300
- const ext = path.extname(image).toLowerCase();
1301
- if (ext === ".png") {
1302
- mimeType = "image/png";
1303
- }
1304
- else if (ext === ".gif") {
1305
- mimeType = "image/gif";
1306
- }
1307
- else if (ext === ".webp") {
1308
- mimeType = "image/webp";
1309
- }
1310
- }
1311
- else if (image.startsWith("data:")) {
1312
- // Handle data URL
1313
- const matches = image.match(/^data:([^;]+);base64,(.+)$/);
1314
- if (matches) {
1315
- mimeType = matches[1];
1316
- imageBuffer = Buffer.from(matches[2], "base64");
1317
- }
1318
- else {
1319
- continue; // Skip invalid data URL
1320
- }
1321
- }
1322
- else {
1323
- // Assume base64 string
1324
- imageBuffer = Buffer.from(image, "base64");
1325
- }
1326
- }
1327
- else {
1328
- imageBuffer = image;
1329
- }
1330
- const base64Data = imageBuffer.toString("base64");
1331
- userParts.push({
1332
- inlineData: {
1333
- mimeType,
1334
- data: base64Data,
1335
- },
1336
- });
1337
- }
1338
- }
1339
- contents.push({
1340
- role: "user",
1341
- parts: userParts,
1342
- });
1307
+ const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
1343
1308
  // Get tools from SDK and options
1344
1309
  const shouldUseTools = !options.disableTools && this.supportsTools();
1345
1310
  const sdkTools = shouldUseTools ? await this.getAllTools() : {};
1346
1311
  const combinedTools = { ...sdkTools, ...(options.tools || {}) };
1347
- let tools;
1348
- const executeMap = new Map();
1349
- if (Object.keys(combinedTools).length > 0) {
1350
- const functionDeclarations = [];
1351
- for (const [name, tool] of Object.entries(combinedTools)) {
1352
- const decl = {
1353
- name,
1354
- description: tool.description || `Tool: ${name}`,
1355
- };
1356
- if (tool.parameters) {
1357
- // Convert and inline schema to resolve $ref/definitions
1358
- const rawSchema = convertZodToJsonSchema(tool.parameters);
1359
- decl.parametersJsonSchema = inlineJsonSchema(rawSchema);
1360
- // Remove $schema if present - @google/genai doesn't need it
1361
- if (decl.parametersJsonSchema.$schema) {
1362
- delete decl.parametersJsonSchema.$schema;
1363
- }
1364
- }
1365
- functionDeclarations.push(decl);
1366
- if (tool.execute) {
1367
- executeMap.set(name, tool.execute);
1368
- }
1369
- }
1370
- tools = [{ functionDeclarations }];
1371
- logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
1372
- toolCount: functionDeclarations.length,
1373
- toolNames: functionDeclarations.map((t) => t.name),
1374
- });
1375
- }
1312
+ const { tools, executeMap } = this.convertToolsToNativeFunctionDeclarations(combinedTools, "native SDK generate");
1376
1313
  // Build config
1377
- const config = {
1378
- temperature: options.temperature ?? 1.0, // Gemini 3 requires 1.0 for tool calling
1379
- maxOutputTokens: options.maxTokens,
1380
- };
1381
- if (tools) {
1382
- config.tools = tools;
1383
- }
1384
- if (options.systemPrompt) {
1385
- config.systemInstruction = options.systemPrompt;
1386
- }
1387
- // Add thinking config for Gemini 3
1388
- const nativeThinkingConfig2 = createNativeThinkingConfig(options.thinkingConfig);
1389
- if (nativeThinkingConfig2) {
1390
- config.thinkingConfig = nativeThinkingConfig2;
1391
- }
1314
+ const config = this.buildNativeGenerateConfig(options, tools);
1392
1315
  // Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
1393
1316
  // For now, schemas are handled via the AI SDK fallback path, not native SDK
1394
1317
  // TODO: Implement proper $ref resolution for complex nested schemas
1395
1318
  const startTime = Date.now();
1396
- // Ensure maxSteps is a valid positive integer to prevent infinite loops
1397
- const rawMaxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
1398
- const maxSteps = Number.isFinite(rawMaxSteps) && rawMaxSteps > 0
1399
- ? Math.min(Math.floor(rawMaxSteps), 100) // Cap at 100 for safety
1400
- : Math.min(DEFAULT_MAX_STEPS, 100);
1319
+ const maxSteps = this.computeMaxSteps(options.maxSteps);
1401
1320
  const currentContents = [...contents];
1402
1321
  let finalText = "";
1403
- let lastStepText = ""; // Track text from last step for maxSteps termination
1322
+ let lastStepText = "";
1404
1323
  const allToolCalls = [];
1405
1324
  const toolExecutions = [];
1406
1325
  let step = 0;
1407
- // Track failed tools to prevent infinite retry loops
1408
- // Key: tool name, Value: { count: retry attempts, lastError: error message }
1409
1326
  const failedTools = new Map();
1410
- // Track token usage across all steps
1411
- // promptTokenCount is typically in the final chunk, candidatesTokenCount accumulates
1412
- let totalInputTokens = 0;
1413
- let totalOutputTokens = 0;
1327
+ const tokenUsage = { input: 0, output: 0 };
1414
1328
  // Agentic loop for tool calling
1415
1329
  while (step < maxSteps) {
1416
1330
  step++;
@@ -1423,168 +1337,19 @@ export class GoogleVertexProvider extends BaseProvider {
1423
1337
  config,
1424
1338
  });
1425
1339
  const stepFunctionCalls = [];
1426
- // Capture raw response parts including thoughtSignature
1427
1340
  const rawResponseParts = [];
1428
- // Collect all chunks from stream
1429
1341
  for await (const chunk of stream) {
1430
- // Extract raw parts from candidates FIRST
1431
- // This avoids using chunk.text which triggers SDK warning when
1432
- // non-text parts (thoughtSignature, functionCall) are present
1433
- const chunkRecord = chunk;
1434
- const candidates = chunkRecord.candidates;
1435
- const firstCandidate = candidates?.[0];
1436
- const chunkContent = firstCandidate?.content;
1437
- if (chunkContent && Array.isArray(chunkContent.parts)) {
1438
- rawResponseParts.push(...chunkContent.parts);
1439
- }
1440
- if (chunk.functionCalls) {
1441
- stepFunctionCalls.push(...chunk.functionCalls);
1442
- }
1443
- // Extract usage metadata from chunk
1444
- // promptTokenCount is typically in the final chunk, candidatesTokenCount accumulates
1445
- const usageMetadata = chunkRecord.usageMetadata;
1446
- if (usageMetadata) {
1447
- // Take the latest promptTokenCount (usually only in final chunk)
1448
- if (usageMetadata.promptTokenCount !== undefined &&
1449
- usageMetadata.promptTokenCount > 0) {
1450
- totalInputTokens = usageMetadata.promptTokenCount;
1451
- }
1452
- // Take the latest candidatesTokenCount (accumulates through chunks)
1453
- if (usageMetadata.candidatesTokenCount !== undefined &&
1454
- usageMetadata.candidatesTokenCount > 0) {
1455
- totalOutputTokens = usageMetadata.candidatesTokenCount;
1456
- }
1457
- }
1342
+ this.processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage);
1458
1343
  }
1459
- // Extract text from raw parts after stream completes
1460
- // This avoids SDK warning about non-text parts (thoughtSignature, functionCall)
1461
- const stepText = rawResponseParts
1462
- .filter((part) => typeof part.text === "string")
1463
- .map((part) => part.text)
1464
- .join("");
1465
- // If no function calls, we're done
1344
+ const stepText = this.extractTextFromRawParts(rawResponseParts);
1466
1345
  if (stepFunctionCalls.length === 0) {
1467
1346
  finalText = stepText;
1468
1347
  break;
1469
1348
  }
1470
- // Track the last step text for maxSteps termination
1471
1349
  lastStepText = stepText;
1472
- // Execute function calls
1473
1350
  logger.debug(`[GoogleVertex] Generate executing ${stepFunctionCalls.length} function calls`);
1474
- // Add model response with ALL parts (including thoughtSignature) to history
1475
- // This preserves the thought_signature which is required for Gemini 3 multi-turn tool calling
1476
- currentContents.push({
1477
- role: "model",
1478
- parts: rawResponseParts.length > 0
1479
- ? rawResponseParts
1480
- : stepFunctionCalls.map((fc) => ({
1481
- functionCall: fc,
1482
- })),
1483
- });
1484
- // Execute each function and collect responses
1485
- const functionResponses = [];
1486
- for (const call of stepFunctionCalls) {
1487
- allToolCalls.push({ toolName: call.name, args: call.args });
1488
- // Check if this tool has already exceeded retry limit
1489
- const failedInfo = failedTools.get(call.name);
1490
- if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1491
- logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1492
- const errorOutput = {
1493
- error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1494
- status: "permanently_failed",
1495
- do_not_retry: true,
1496
- };
1497
- toolExecutions.push({
1498
- name: call.name,
1499
- input: call.args,
1500
- output: errorOutput,
1501
- });
1502
- functionResponses.push({
1503
- functionResponse: {
1504
- name: call.name,
1505
- response: errorOutput,
1506
- },
1507
- });
1508
- continue;
1509
- }
1510
- const execute = executeMap.get(call.name);
1511
- if (execute) {
1512
- try {
1513
- // AI SDK Tool execute requires (args, options) - provide minimal options
1514
- const toolOptions = {
1515
- toolCallId: `${call.name}-${Date.now()}`,
1516
- messages: [],
1517
- abortSignal: undefined,
1518
- };
1519
- const execResult = await execute(call.args, toolOptions);
1520
- // Track execution
1521
- toolExecutions.push({
1522
- name: call.name,
1523
- input: call.args,
1524
- output: execResult,
1525
- });
1526
- functionResponses.push({
1527
- functionResponse: {
1528
- name: call.name,
1529
- response: { result: execResult },
1530
- },
1531
- });
1532
- }
1533
- catch (error) {
1534
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
1535
- // Track this failure
1536
- const currentFailInfo = failedTools.get(call.name) || {
1537
- count: 0,
1538
- lastError: "",
1539
- };
1540
- currentFailInfo.count++;
1541
- currentFailInfo.lastError = errorMessage;
1542
- failedTools.set(call.name, currentFailInfo);
1543
- logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1544
- // Determine if this is a permanent failure
1545
- const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1546
- const errorOutput = {
1547
- error: isPermanentFailure
1548
- ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1549
- : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1550
- status: isPermanentFailure ? "permanently_failed" : "failed",
1551
- do_not_retry: isPermanentFailure,
1552
- retry_count: currentFailInfo.count,
1553
- max_retries: DEFAULT_TOOL_MAX_RETRIES,
1554
- };
1555
- toolExecutions.push({
1556
- name: call.name,
1557
- input: call.args,
1558
- output: errorOutput,
1559
- });
1560
- functionResponses.push({
1561
- functionResponse: {
1562
- name: call.name,
1563
- response: errorOutput,
1564
- },
1565
- });
1566
- }
1567
- }
1568
- else {
1569
- // Tool not found is a permanent error
1570
- const errorOutput = {
1571
- error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1572
- status: "permanently_failed",
1573
- do_not_retry: true,
1574
- };
1575
- toolExecutions.push({
1576
- name: call.name,
1577
- input: call.args,
1578
- output: errorOutput,
1579
- });
1580
- functionResponses.push({
1581
- functionResponse: {
1582
- name: call.name,
1583
- response: errorOutput,
1584
- },
1585
- });
1586
- }
1587
- }
1351
+ this.pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls);
1352
+ const functionResponses = await this.executeNativeFunctionCalls(stepFunctionCalls, executeMap, failedTools, allToolCalls, toolExecutions);
1588
1353
  // Add function responses to history
1589
1354
  currentContents.push({
1590
1355
  role: "function",
@@ -1596,14 +1361,7 @@ export class GoogleVertexProvider extends BaseProvider {
1596
1361
  throw this.handleProviderError(error);
1597
1362
  }
1598
1363
  }
1599
- // Handle maxSteps termination - if we exited the loop due to maxSteps being reached
1600
- if (step >= maxSteps && !finalText) {
1601
- logger.warn(`[GoogleVertex] Generate tool call loop terminated after reaching maxSteps (${maxSteps}). ` +
1602
- `Model was still calling tools. Using accumulated text from last step.`);
1603
- finalText =
1604
- lastStepText ||
1605
- `[Tool execution limit reached after ${maxSteps} steps. The model continued requesting tool calls beyond the limit.]`;
1606
- }
1364
+ finalText = this.computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText);
1607
1365
  const responseTime = Date.now() - startTime;
1608
1366
  // Build EnhancedGenerateResult
1609
1367
  return {
@@ -1611,9 +1369,9 @@ export class GoogleVertexProvider extends BaseProvider {
1611
1369
  provider: this.providerName,
1612
1370
  model: modelName,
1613
1371
  usage: {
1614
- input: totalInputTokens,
1615
- output: totalOutputTokens,
1616
- total: totalInputTokens + totalOutputTokens,
1372
+ input: tokenUsage.input,
1373
+ output: tokenUsage.output,
1374
+ total: tokenUsage.input + tokenUsage.output,
1617
1375
  },
1618
1376
  responseTime,
1619
1377
  toolsUsed: allToolCalls.map((tc) => tc.toolName),
@@ -2454,8 +2212,174 @@ export class GoogleVertexProvider extends BaseProvider {
2454
2212
  return Math.ceil(text.length / 4);
2455
2213
  }
2456
2214
  /**
2457
- * Build image parts for multimodal content
2215
+ * Obtain a Google Auth access token for Vertex AI REST API calls.
2458
2216
  */
2217
+ async getImageGenerationAccessToken() {
2218
+ const { GoogleAuth } = await import("google-auth-library");
2219
+ // Priority: GOOGLE_APPLICATION_CREDENTIALS_NEUROLINK > GOOGLE_APPLICATION_CREDENTIALS
2220
+ const credentialsPath = process.env.GOOGLE_APPLICATION_CREDENTIALS_NEUROLINK ||
2221
+ process.env.GOOGLE_APPLICATION_CREDENTIALS;
2222
+ const auth = new GoogleAuth({
2223
+ ...(credentialsPath && { keyFilename: credentialsPath }),
2224
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
2225
+ });
2226
+ const client = await auth.getClient();
2227
+ const accessToken = await client.getAccessToken();
2228
+ if (!accessToken.token) {
2229
+ throw new AuthenticationError("Failed to obtain access token from Google Auth", this.providerName);
2230
+ }
2231
+ return accessToken.token;
2232
+ }
2233
+ /**
2234
+ * Build request parts for image generation from prompt, PDFs, and images.
2235
+ */
2236
+ buildImageGenerationParts(prompt, pdfFiles, inputImages) {
2237
+ const parts = [];
2238
+ if (prompt) {
2239
+ parts.push({ text: prompt });
2240
+ }
2241
+ // Add PDF files as inline data
2242
+ for (const pdfFile of pdfFiles) {
2243
+ let pdfBase64;
2244
+ if (Buffer.isBuffer(pdfFile)) {
2245
+ pdfBase64 = pdfFile.toString("base64");
2246
+ }
2247
+ else if (typeof pdfFile === "string") {
2248
+ const isFilePath = pdfFile.startsWith("/") ||
2249
+ /^[a-zA-Z]:\\/.test(pdfFile) ||
2250
+ pdfFile.startsWith("./") ||
2251
+ pdfFile.startsWith("../") ||
2252
+ pdfFile.startsWith("..\\") ||
2253
+ pdfFile.startsWith(".\\");
2254
+ if (isFilePath) {
2255
+ const normalizedPath = path.resolve(pdfFile);
2256
+ const cwd = process.cwd();
2257
+ if (!normalizedPath.startsWith(cwd + path.sep) &&
2258
+ normalizedPath !== cwd) {
2259
+ throw new ProviderError(`PDF file path must be within current directory for security`, this.providerName);
2260
+ }
2261
+ if (!fs.existsSync(normalizedPath)) {
2262
+ throw new ProviderError(`PDF file not found: ${normalizedPath}`, this.providerName);
2263
+ }
2264
+ const pdfBuffer = fs.readFileSync(normalizedPath);
2265
+ pdfBase64 = pdfBuffer.toString("base64");
2266
+ }
2267
+ else {
2268
+ pdfBase64 = pdfFile;
2269
+ }
2270
+ }
2271
+ else {
2272
+ logger.warn("Invalid PDF file format, skipping", {
2273
+ type: typeof pdfFile,
2274
+ });
2275
+ continue;
2276
+ }
2277
+ parts.push({
2278
+ inlineData: {
2279
+ mimeType: "application/pdf",
2280
+ data: pdfBase64,
2281
+ },
2282
+ });
2283
+ logger.debug("Added PDF file to request", {
2284
+ dataLength: pdfBase64.length,
2285
+ });
2286
+ }
2287
+ // Add images (including those converted from PDF by baseProvider)
2288
+ for (let i = 0; i < inputImages.length; i++) {
2289
+ const image = inputImages[i];
2290
+ let imageBase64;
2291
+ let mimeType;
2292
+ if (Buffer.isBuffer(image)) {
2293
+ imageBase64 = image.toString("base64");
2294
+ mimeType = this.detectImageType(image);
2295
+ }
2296
+ else if (typeof image === "string") {
2297
+ const isFilePath = image.startsWith("/") ||
2298
+ /^[a-zA-Z]:\\/.test(image) ||
2299
+ image.startsWith("./") ||
2300
+ image.startsWith("../") ||
2301
+ image.startsWith("..\\") ||
2302
+ image.startsWith(".\\");
2303
+ if (isFilePath) {
2304
+ const normalizedPath = path.resolve(image);
2305
+ if (!fs.existsSync(normalizedPath)) {
2306
+ logger.warn(`Image file not found: ${normalizedPath}, skipping`);
2307
+ continue;
2308
+ }
2309
+ const imageBuffer = fs.readFileSync(normalizedPath);
2310
+ imageBase64 = imageBuffer.toString("base64");
2311
+ mimeType = this.detectImageType(imageBuffer);
2312
+ }
2313
+ else if (image.startsWith("data:")) {
2314
+ const matches = image.match(/^data:([^;]+);base64,(.+)$/);
2315
+ if (matches) {
2316
+ mimeType = matches[1];
2317
+ imageBase64 = matches[2];
2318
+ }
2319
+ else {
2320
+ logger.warn("Invalid data URL format, skipping image", {
2321
+ index: i,
2322
+ });
2323
+ continue;
2324
+ }
2325
+ }
2326
+ else {
2327
+ imageBase64 = image;
2328
+ const decodedBuffer = Buffer.from(imageBase64, "base64");
2329
+ mimeType = this.detectImageType(decodedBuffer);
2330
+ }
2331
+ }
2332
+ else {
2333
+ logger.warn("Invalid image format, skipping", {
2334
+ type: typeof image,
2335
+ index: i,
2336
+ });
2337
+ continue;
2338
+ }
2339
+ parts.push({
2340
+ inlineData: {
2341
+ mimeType: mimeType,
2342
+ data: imageBase64,
2343
+ },
2344
+ });
2345
+ logger.debug("Added image to request", {
2346
+ index: i,
2347
+ mimeType,
2348
+ dataLength: imageBase64.length,
2349
+ });
2350
+ }
2351
+ return parts;
2352
+ }
2353
+ /**
2354
+ * Parse the Vertex AI image generation REST API response and extract image data.
2355
+ */
2356
+ parseImageGenerationResponse(data, imageModelName) {
2357
+ const candidate = data.candidates?.[0];
2358
+ if (!candidate?.content?.parts) {
2359
+ throw new ProviderError("No content parts in Vertex AI response", this.providerName);
2360
+ }
2361
+ // Find image part (check both camelCase and snake_case)
2362
+ const imagePart = candidate.content.parts.find((part) => (part.inlineData || part.inline_data) &&
2363
+ ((part.inlineData && part.inlineData.mimeType) ||
2364
+ (part.inline_data && part.inline_data.mime_type)) &&
2365
+ ((part.inlineData && part.inlineData.mimeType?.startsWith("image/")) ||
2366
+ (part.inline_data &&
2367
+ part.inline_data.mime_type?.startsWith("image/"))));
2368
+ if (!imagePart) {
2369
+ const hasTextContent = candidate.content.parts.some((part) => part.text);
2370
+ throw new ProviderError(hasTextContent
2371
+ ? `Image generation completed but model returned text instead of image data. Model: ${imageModelName}`
2372
+ : `Image generation completed but no image data was returned. Model: ${imageModelName}`, this.providerName);
2373
+ }
2374
+ const imageData = imagePart.inlineData?.data || imagePart.inline_data?.data;
2375
+ const mimeType = imagePart.inlineData?.mimeType ||
2376
+ imagePart.inline_data?.mime_type ||
2377
+ "image/png";
2378
+ if (!imageData) {
2379
+ throw new ProviderError("Image part found but no data available", this.providerName);
2380
+ }
2381
+ return { imageData, mimeType };
2382
+ }
2459
2383
  /**
2460
2384
  * Overrides the BaseProvider's image generation method to implement it for Vertex AI.
2461
2385
  * Uses REST API approach with google-auth-library for authentication.
@@ -2469,18 +2393,15 @@ export class GoogleVertexProvider extends BaseProvider {
2469
2393
  const inputImages = options.input?.images || [];
2470
2394
  const hasPdfInput = pdfFiles.length > 0;
2471
2395
  const hasImageInput = inputImages.length > 0;
2472
- // Validate that we have at least a prompt or PDF/image input
2473
2396
  if (!prompt.trim() && !hasPdfInput && !hasImageInput) {
2474
2397
  throw new ProviderError("Image generation requires either a prompt, PDF file, or image as input", this.providerName);
2475
2398
  }
2476
- // Select appropriate model - use gemini-3-pro-image-preview for PDF input
2399
+ // Select appropriate model
2477
2400
  let imageModelName = options.model || this.modelName || "gemini-3-pro-image-preview";
2478
- // If PDF files are provided, ensure we use a model that supports PDF input
2479
2401
  if (hasPdfInput && !imageModelName.includes("gemini-3-pro-image")) {
2480
2402
  imageModelName = "gemini-3-pro-image-preview";
2481
2403
  }
2482
2404
  // Determine location - some image models require 'global' location
2483
- // Check if the model is in GLOBAL_LOCATION_MODELS array (includes gemini-3-pro-image-preview, gemini-2.5-flash-image, etc.)
2484
2405
  const imageLocation = process.env.GOOGLE_VERTEX_IMAGE_LOCATION || "global";
2485
2406
  const requiresGlobalLocation = GLOBAL_LOCATION_MODELS.some((model) => imageModelName.includes(model) || model.includes(imageModelName));
2486
2407
  const location = requiresGlobalLocation ? imageLocation : this.location;
@@ -2497,175 +2418,20 @@ export class GoogleVertexProvider extends BaseProvider {
2497
2418
  imageCount: inputImages.length,
2498
2419
  });
2499
2420
  try {
2500
- // Import google-auth-library dynamically
2501
- const { GoogleAuth } = await import("google-auth-library");
2502
- // Determine which credentials file to use
2503
- // Priority: GOOGLE_APPLICATION_CREDENTIALS_NEUROLINK > GOOGLE_APPLICATION_CREDENTIALS
2504
- const credentialsPath = process.env.GOOGLE_APPLICATION_CREDENTIALS_NEUROLINK ||
2505
- process.env.GOOGLE_APPLICATION_CREDENTIALS;
2506
- // Initialize GoogleAuth with credentials
2507
- // Use keyFilename to explicitly specify the credentials file to avoid using wrong service account
2508
- const auth = new GoogleAuth({
2509
- ...(credentialsPath && { keyFilename: credentialsPath }),
2510
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
2511
- });
2512
- // Get access token
2513
- const client = await auth.getClient();
2514
- const accessToken = await client.getAccessToken();
2515
- if (!accessToken.token) {
2516
- throw new AuthenticationError("Failed to obtain access token from Google Auth", this.providerName);
2517
- }
2518
- // Build parts array - supports text prompt and optional PDF files
2519
- const parts = [];
2520
- // Add text prompt
2521
- if (prompt) {
2522
- parts.push({ text: prompt });
2523
- }
2524
- // Add PDF files as inline data (for gemini-3-pro-image-preview)
2525
- if (hasPdfInput) {
2526
- for (const pdfFile of pdfFiles) {
2527
- let pdfBase64;
2528
- if (Buffer.isBuffer(pdfFile)) {
2529
- pdfBase64 = pdfFile.toString("base64");
2530
- }
2531
- else if (typeof pdfFile === "string") {
2532
- // Check if it's already base64 or a file path
2533
- // Supports absolute paths, Windows paths, and relative paths
2534
- const isFilePath = pdfFile.startsWith("/") ||
2535
- /^[a-zA-Z]:\\/.test(pdfFile) ||
2536
- pdfFile.startsWith("./") ||
2537
- pdfFile.startsWith("../") ||
2538
- pdfFile.startsWith("..\\") ||
2539
- pdfFile.startsWith(".\\");
2540
- if (isFilePath) {
2541
- // Validate and normalize the path for security
2542
- const normalizedPath = path.resolve(pdfFile);
2543
- const cwd = process.cwd();
2544
- // Security: Ensure path is within current working directory
2545
- if (!normalizedPath.startsWith(cwd + path.sep) &&
2546
- normalizedPath !== cwd) {
2547
- throw new ProviderError(`PDF file path must be within current directory for security`, this.providerName);
2548
- }
2549
- // Security: Validate file exists before reading
2550
- if (!fs.existsSync(normalizedPath)) {
2551
- throw new ProviderError(`PDF file not found: ${normalizedPath}`, this.providerName);
2552
- }
2553
- // Read the file
2554
- const pdfBuffer = fs.readFileSync(normalizedPath);
2555
- pdfBase64 = pdfBuffer.toString("base64");
2556
- }
2557
- else {
2558
- // Assume it's already base64
2559
- pdfBase64 = pdfFile;
2560
- }
2561
- }
2562
- else {
2563
- logger.warn("Invalid PDF file format, skipping", {
2564
- type: typeof pdfFile,
2565
- });
2566
- continue;
2567
- }
2568
- parts.push({
2569
- inlineData: {
2570
- mimeType: "application/pdf",
2571
- data: pdfBase64,
2572
- },
2573
- });
2574
- logger.debug("Added PDF file to request", {
2575
- dataLength: pdfBase64.length,
2576
- });
2577
- }
2578
- }
2579
- // Add images (including those converted from PDF by baseProvider)
2580
- // This handles the case where PDFs are converted to images for models that don't support native PDF
2581
- if (hasImageInput) {
2582
- for (let i = 0; i < inputImages.length; i++) {
2583
- const image = inputImages[i];
2584
- let imageBase64;
2585
- let mimeType;
2586
- if (Buffer.isBuffer(image)) {
2587
- imageBase64 = image.toString("base64");
2588
- mimeType = this.detectImageType(image);
2589
- }
2590
- else if (typeof image === "string") {
2591
- // Check if it's a file path or already base64
2592
- const isFilePath = image.startsWith("/") ||
2593
- /^[a-zA-Z]:\\/.test(image) ||
2594
- image.startsWith("./") ||
2595
- image.startsWith("../") ||
2596
- image.startsWith("..\\") ||
2597
- image.startsWith(".\\");
2598
- if (isFilePath) {
2599
- // Read from file path
2600
- const normalizedPath = path.resolve(image);
2601
- if (!fs.existsSync(normalizedPath)) {
2602
- logger.warn(`Image file not found: ${normalizedPath}, skipping`);
2603
- continue;
2604
- }
2605
- const imageBuffer = fs.readFileSync(normalizedPath);
2606
- imageBase64 = imageBuffer.toString("base64");
2607
- mimeType = this.detectImageType(imageBuffer);
2608
- }
2609
- else if (image.startsWith("data:")) {
2610
- // Data URL format: data:image/png;base64,<base64data>
2611
- const matches = image.match(/^data:([^;]+);base64,(.+)$/);
2612
- if (matches) {
2613
- mimeType = matches[1];
2614
- imageBase64 = matches[2];
2615
- }
2616
- else {
2617
- logger.warn("Invalid data URL format, skipping image", {
2618
- index: i,
2619
- });
2620
- continue;
2621
- }
2622
- }
2623
- else {
2624
- // Assume it's already base64 encoded
2625
- imageBase64 = image;
2626
- // Try to detect type from base64 data
2627
- const decodedBuffer = Buffer.from(imageBase64, "base64");
2628
- mimeType = this.detectImageType(decodedBuffer);
2629
- }
2630
- }
2631
- else {
2632
- logger.warn("Invalid image format, skipping", {
2633
- type: typeof image,
2634
- index: i,
2635
- });
2636
- continue;
2637
- }
2638
- parts.push({
2639
- inlineData: {
2640
- mimeType: mimeType,
2641
- data: imageBase64,
2642
- },
2643
- });
2644
- logger.debug("Added image to request", {
2645
- index: i,
2646
- mimeType,
2647
- dataLength: imageBase64.length,
2648
- });
2649
- }
2650
- }
2421
+ const token = await this.getImageGenerationAccessToken();
2422
+ const parts = this.buildImageGenerationParts(prompt, pdfFiles, inputImages);
2651
2423
  // Build request body with CRITICAL response_modalities setting
2652
2424
  const requestBody = {
2653
- contents: [
2654
- {
2655
- role: "user",
2656
- parts: parts,
2657
- },
2658
- ],
2425
+ contents: [{ role: "user", parts }],
2659
2426
  generation_config: {
2660
- response_modalities: ["TEXT", "IMAGE"], // CRITICAL for image generation
2427
+ response_modalities: ["TEXT", "IMAGE"],
2661
2428
  temperature: options.temperature || 0.7,
2662
2429
  candidate_count: 1,
2663
2430
  },
2664
2431
  };
2665
- // Construct Vertex AI endpoint - use appropriate base URL for location
2432
+ // Construct Vertex AI endpoint
2666
2433
  let url;
2667
2434
  if (location === "global") {
2668
- // Global endpoint doesn't have region prefix
2669
2435
  url = `https://aiplatform.googleapis.com/v1/projects/${this.projectId}/locations/global/publishers/google/models/${imageModelName}:generateContent`;
2670
2436
  }
2671
2437
  else {
@@ -2674,19 +2440,14 @@ export class GoogleVertexProvider extends BaseProvider {
2674
2440
  logger.debug("Making REST API call to Vertex AI", {
2675
2441
  url,
2676
2442
  model: imageModelName,
2677
- hasAccessToken: !!accessToken.token,
2443
+ hasAccessToken: true,
2678
2444
  });
2679
2445
  // Add timeout protection (120 seconds for image generation)
2680
- // Note: Using Promise.race instead of createTimeoutController because:
2681
- // 1. This is a one-off REST API call (not streaming) where fetch completion is atomic
2682
- // 2. AbortController mid-request cancellation isn't beneficial for image generation
2683
- // since the server generates the full image before responding
2684
- // 3. The simpler Promise.race pattern is sufficient for this use case
2685
2446
  const timeoutMs = 120000;
2686
2447
  const fetchPromise = fetch(url, {
2687
2448
  method: "POST",
2688
2449
  headers: {
2689
- Authorization: `Bearer ${accessToken.token}`,
2450
+ Authorization: `Bearer ${token}`,
2690
2451
  "Content-Type": "application/json",
2691
2452
  },
2692
2453
  body: JSON.stringify(requestBody),
@@ -2702,41 +2463,13 @@ export class GoogleVertexProvider extends BaseProvider {
2702
2463
  throw new ProviderError(`Vertex AI API error (${response.status}): ${errorText}`, this.providerName);
2703
2464
  }
2704
2465
  const data = (await response.json());
2705
- // Extract image from response (handle both inlineData and inline_data formats)
2706
- const candidate = data.candidates?.[0];
2707
- if (!candidate?.content?.parts) {
2708
- throw new ProviderError("No content parts in Vertex AI response", this.providerName);
2709
- }
2710
- // Find image part (check both camelCase and snake_case)
2711
- const imagePart = candidate.content.parts.find((part) => (part.inlineData || part.inline_data) &&
2712
- ((part.inlineData && part.inlineData.mimeType) ||
2713
- (part.inline_data && part.inline_data.mime_type)) &&
2714
- ((part.inlineData &&
2715
- part.inlineData.mimeType?.startsWith("image/")) ||
2716
- (part.inline_data &&
2717
- part.inline_data.mime_type?.startsWith("image/"))));
2718
- if (!imagePart) {
2719
- // Check if response contains text instead of image (don't expose text content in error for security)
2720
- const hasTextContent = candidate.content.parts.some((part) => part.text);
2721
- throw new ProviderError(hasTextContent
2722
- ? `Image generation completed but model returned text instead of image data. Model: ${imageModelName}`
2723
- : `Image generation completed but no image data was returned. Model: ${imageModelName}`, this.providerName);
2724
- }
2725
- // Extract image data (handle both formats)
2726
- const imageData = imagePart.inlineData?.data || imagePart.inline_data?.data;
2727
- const mimeType = imagePart.inlineData?.mimeType ||
2728
- imagePart.inline_data?.mime_type ||
2729
- "image/png";
2730
- if (!imageData) {
2731
- throw new ProviderError("Image part found but no data available", this.providerName);
2732
- }
2466
+ const { imageData, mimeType } = this.parseImageGenerationResponse(data, imageModelName);
2733
2467
  logger.info("Image generation successful", {
2734
2468
  model: imageModelName,
2735
2469
  mimeType,
2736
2470
  dataLength: imageData.length,
2737
2471
  responseTime: Date.now() - startTime,
2738
2472
  });
2739
- // Return result structure
2740
2473
  const result = {
2741
2474
  content: `Generated image using ${imageModelName} (${mimeType})`,
2742
2475
  imageOutput: {