@juspay/neurolink 9.64.0 → 9.65.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +18 -17
  3. package/dist/adapters/providerImageAdapter.js +29 -1
  4. package/dist/adapters/replicate/auth.d.ts +19 -0
  5. package/dist/adapters/replicate/auth.js +32 -0
  6. package/dist/adapters/replicate/predictionLifecycle.d.ts +46 -0
  7. package/dist/adapters/replicate/predictionLifecycle.js +283 -0
  8. package/dist/adapters/video/klingVideoHandler.d.ts +37 -0
  9. package/dist/adapters/video/klingVideoHandler.js +305 -0
  10. package/dist/adapters/video/replicateVideoHandler.d.ts +29 -0
  11. package/dist/adapters/video/replicateVideoHandler.js +157 -0
  12. package/dist/adapters/video/runwayVideoHandler.d.ts +32 -0
  13. package/dist/adapters/video/runwayVideoHandler.js +316 -0
  14. package/dist/adapters/video/vertexVideoHandler.d.ts +19 -1
  15. package/dist/adapters/video/vertexVideoHandler.js +33 -9
  16. package/dist/agent/directTools.js +11 -3
  17. package/dist/autoresearch/runner.js +8 -2
  18. package/dist/avatar/index.d.ts +13 -0
  19. package/dist/avatar/index.js +13 -0
  20. package/dist/avatar/providers/DIDAvatar.d.ts +49 -0
  21. package/dist/avatar/providers/DIDAvatar.js +501 -0
  22. package/dist/avatar/providers/HeyGenAvatar.d.ts +30 -0
  23. package/dist/avatar/providers/HeyGenAvatar.js +337 -0
  24. package/dist/avatar/providers/ReplicateAvatar.d.ts +36 -0
  25. package/dist/avatar/providers/ReplicateAvatar.js +267 -0
  26. package/dist/browser/neurolink.min.js +624 -601
  27. package/dist/cli/commands/mcp.js +29 -0
  28. package/dist/cli/commands/proxy.js +24 -5
  29. package/dist/cli/factories/commandFactory.d.ts +11 -1
  30. package/dist/cli/factories/commandFactory.js +291 -38
  31. package/dist/constants/contextWindows.js +101 -0
  32. package/dist/constants/enums.d.ts +273 -2
  33. package/dist/constants/enums.js +290 -1
  34. package/dist/constants/videoErrors.d.ts +4 -0
  35. package/dist/constants/videoErrors.js +4 -0
  36. package/dist/core/baseProvider.d.ts +22 -2
  37. package/dist/core/baseProvider.js +217 -11
  38. package/dist/core/constants.d.ts +12 -0
  39. package/dist/core/constants.js +72 -1
  40. package/dist/evaluation/index.d.ts +2 -0
  41. package/dist/evaluation/index.js +4 -0
  42. package/dist/factories/providerFactory.js +7 -1
  43. package/dist/factories/providerRegistry.js +202 -5
  44. package/dist/features/ppt/contentPlanner.js +42 -14
  45. package/dist/index.d.ts +9 -1
  46. package/dist/index.js +16 -1
  47. package/dist/lib/adapters/providerImageAdapter.js +29 -1
  48. package/dist/lib/adapters/replicate/auth.d.ts +19 -0
  49. package/dist/lib/adapters/replicate/auth.js +33 -0
  50. package/dist/lib/adapters/replicate/predictionLifecycle.d.ts +46 -0
  51. package/dist/lib/adapters/replicate/predictionLifecycle.js +284 -0
  52. package/dist/lib/adapters/video/klingVideoHandler.d.ts +37 -0
  53. package/dist/lib/adapters/video/klingVideoHandler.js +306 -0
  54. package/dist/lib/adapters/video/replicateVideoHandler.d.ts +29 -0
  55. package/dist/lib/adapters/video/replicateVideoHandler.js +158 -0
  56. package/dist/lib/adapters/video/runwayVideoHandler.d.ts +32 -0
  57. package/dist/lib/adapters/video/runwayVideoHandler.js +317 -0
  58. package/dist/lib/adapters/video/vertexVideoHandler.d.ts +19 -1
  59. package/dist/lib/adapters/video/vertexVideoHandler.js +33 -9
  60. package/dist/lib/agent/directTools.js +11 -3
  61. package/dist/lib/autoresearch/runner.js +8 -2
  62. package/dist/lib/avatar/index.d.ts +13 -0
  63. package/dist/lib/avatar/index.js +14 -0
  64. package/dist/lib/avatar/providers/DIDAvatar.d.ts +49 -0
  65. package/dist/lib/avatar/providers/DIDAvatar.js +502 -0
  66. package/dist/lib/avatar/providers/HeyGenAvatar.d.ts +30 -0
  67. package/dist/lib/avatar/providers/HeyGenAvatar.js +338 -0
  68. package/dist/lib/avatar/providers/ReplicateAvatar.d.ts +36 -0
  69. package/dist/lib/avatar/providers/ReplicateAvatar.js +268 -0
  70. package/dist/lib/constants/contextWindows.js +101 -0
  71. package/dist/lib/constants/enums.d.ts +273 -2
  72. package/dist/lib/constants/enums.js +290 -1
  73. package/dist/lib/constants/videoErrors.d.ts +4 -0
  74. package/dist/lib/constants/videoErrors.js +4 -0
  75. package/dist/lib/core/baseProvider.d.ts +22 -2
  76. package/dist/lib/core/baseProvider.js +217 -11
  77. package/dist/lib/core/constants.d.ts +12 -0
  78. package/dist/lib/core/constants.js +72 -1
  79. package/dist/lib/evaluation/index.d.ts +2 -0
  80. package/dist/lib/evaluation/index.js +4 -0
  81. package/dist/lib/factories/providerFactory.js +7 -1
  82. package/dist/lib/factories/providerRegistry.js +202 -5
  83. package/dist/lib/features/ppt/contentPlanner.js +42 -14
  84. package/dist/lib/index.d.ts +9 -1
  85. package/dist/lib/index.js +16 -1
  86. package/dist/lib/middleware/builtin/lifecycle.js +39 -9
  87. package/dist/lib/music/index.d.ts +13 -0
  88. package/dist/lib/music/index.js +14 -0
  89. package/dist/lib/music/providers/BeatovenMusic.d.ts +31 -0
  90. package/dist/lib/music/providers/BeatovenMusic.js +334 -0
  91. package/dist/lib/music/providers/ElevenLabsMusic.d.ts +30 -0
  92. package/dist/lib/music/providers/ElevenLabsMusic.js +169 -0
  93. package/dist/lib/music/providers/LyriaMusic.d.ts +29 -0
  94. package/dist/lib/music/providers/LyriaMusic.js +173 -0
  95. package/dist/lib/music/providers/ReplicateMusic.d.ts +31 -0
  96. package/dist/lib/music/providers/ReplicateMusic.js +262 -0
  97. package/dist/lib/neurolink.d.ts +30 -0
  98. package/dist/lib/neurolink.js +323 -77
  99. package/dist/lib/providers/amazonBedrock.d.ts +10 -0
  100. package/dist/lib/providers/amazonBedrock.js +94 -39
  101. package/dist/lib/providers/anthropic.js +55 -7
  102. package/dist/lib/providers/anthropicBaseProvider.js +1 -1
  103. package/dist/lib/providers/azureOpenai.js +66 -17
  104. package/dist/lib/providers/cloudflare.d.ts +35 -0
  105. package/dist/lib/providers/cloudflare.js +174 -0
  106. package/dist/lib/providers/cohere.d.ts +52 -0
  107. package/dist/lib/providers/cohere.js +253 -0
  108. package/dist/lib/providers/deepseek.js +72 -17
  109. package/dist/lib/providers/fireworks.d.ts +33 -0
  110. package/dist/lib/providers/fireworks.js +164 -0
  111. package/dist/lib/providers/googleAiStudio.js +126 -10
  112. package/dist/lib/providers/googleNativeGemini3.d.ts +26 -6
  113. package/dist/lib/providers/googleNativeGemini3.js +276 -29
  114. package/dist/lib/providers/googleVertex.js +639 -181
  115. package/dist/lib/providers/groq.d.ts +33 -0
  116. package/dist/lib/providers/groq.js +181 -0
  117. package/dist/lib/providers/huggingFace.js +9 -8
  118. package/dist/lib/providers/ideogram.d.ts +34 -0
  119. package/dist/lib/providers/ideogram.js +184 -0
  120. package/dist/lib/providers/index.d.ts +13 -0
  121. package/dist/lib/providers/index.js +13 -0
  122. package/dist/lib/providers/jina.d.ts +59 -0
  123. package/dist/lib/providers/jina.js +218 -0
  124. package/dist/lib/providers/llamaCpp.js +14 -46
  125. package/dist/lib/providers/lmStudio.js +14 -47
  126. package/dist/lib/providers/mistral.js +7 -7
  127. package/dist/lib/providers/nvidiaNim.js +160 -19
  128. package/dist/lib/providers/ollama.js +7 -7
  129. package/dist/lib/providers/openAI.d.ts +22 -1
  130. package/dist/lib/providers/openAI.js +181 -0
  131. package/dist/lib/providers/openRouter.js +35 -23
  132. package/dist/lib/providers/openaiCompatible.js +9 -8
  133. package/dist/lib/providers/perplexity.d.ts +33 -0
  134. package/dist/lib/providers/perplexity.js +179 -0
  135. package/dist/lib/providers/recraft.d.ts +34 -0
  136. package/dist/lib/providers/recraft.js +197 -0
  137. package/dist/lib/providers/replicate.d.ts +75 -0
  138. package/dist/lib/providers/replicate.js +403 -0
  139. package/dist/lib/providers/stability.d.ts +37 -0
  140. package/dist/lib/providers/stability.js +191 -0
  141. package/dist/lib/providers/togetherAi.d.ts +33 -0
  142. package/dist/lib/providers/togetherAi.js +176 -0
  143. package/dist/lib/providers/voyage.d.ts +47 -0
  144. package/dist/lib/providers/voyage.js +177 -0
  145. package/dist/lib/providers/xai.d.ts +33 -0
  146. package/dist/lib/providers/xai.js +172 -0
  147. package/dist/lib/telemetry/index.d.ts +1 -1
  148. package/dist/lib/telemetry/index.js +1 -1
  149. package/dist/lib/telemetry/tracers.d.ts +19 -0
  150. package/dist/lib/telemetry/tracers.js +19 -0
  151. package/dist/lib/telemetry/withSpan.d.ts +35 -0
  152. package/dist/lib/telemetry/withSpan.js +103 -0
  153. package/dist/lib/types/avatar.d.ts +143 -0
  154. package/dist/lib/types/avatar.js +20 -0
  155. package/dist/lib/types/cli.d.ts +6 -0
  156. package/dist/lib/types/conversation.d.ts +16 -0
  157. package/dist/lib/types/generate.d.ts +62 -5
  158. package/dist/lib/types/index.d.ts +5 -0
  159. package/dist/lib/types/index.js +7 -0
  160. package/dist/lib/types/middleware.d.ts +27 -0
  161. package/dist/lib/types/multimodal.d.ts +35 -2
  162. package/dist/lib/types/music.d.ts +165 -0
  163. package/dist/lib/types/music.js +21 -0
  164. package/dist/lib/types/providers.d.ts +144 -1
  165. package/dist/lib/types/replicate.d.ts +67 -0
  166. package/dist/lib/types/replicate.js +10 -0
  167. package/dist/lib/types/safeFetch.d.ts +15 -0
  168. package/dist/lib/types/safeFetch.js +7 -0
  169. package/dist/lib/types/stream.d.ts +2 -1
  170. package/dist/lib/types/tools.d.ts +13 -0
  171. package/dist/lib/types/video.d.ts +89 -0
  172. package/dist/lib/types/video.js +15 -0
  173. package/dist/lib/utils/avatarProcessor.d.ts +68 -0
  174. package/dist/lib/utils/avatarProcessor.js +172 -0
  175. package/dist/lib/utils/cloneOptions.d.ts +36 -0
  176. package/dist/lib/utils/cloneOptions.js +62 -0
  177. package/dist/lib/utils/lifecycleCallbacks.d.ts +51 -8
  178. package/dist/lib/utils/lifecycleCallbacks.js +82 -26
  179. package/dist/lib/utils/lifecycleTimeout.d.ts +25 -0
  180. package/dist/lib/utils/lifecycleTimeout.js +39 -0
  181. package/dist/lib/utils/logSanitize.d.ts +49 -0
  182. package/dist/lib/utils/logSanitize.js +170 -0
  183. package/dist/lib/utils/loggingFetch.d.ts +29 -0
  184. package/dist/lib/utils/loggingFetch.js +60 -0
  185. package/dist/lib/utils/messageBuilder.js +43 -25
  186. package/dist/lib/utils/modelChoices.js +236 -3
  187. package/dist/lib/utils/musicProcessor.d.ts +67 -0
  188. package/dist/lib/utils/musicProcessor.js +189 -0
  189. package/dist/lib/utils/optionsConversion.js +3 -2
  190. package/dist/lib/utils/parameterValidation.js +14 -4
  191. package/dist/lib/utils/pricing.js +193 -0
  192. package/dist/lib/utils/providerConfig.d.ts +55 -0
  193. package/dist/lib/utils/providerConfig.js +224 -0
  194. package/dist/lib/utils/safeFetch.d.ts +26 -0
  195. package/dist/lib/utils/safeFetch.js +83 -0
  196. package/dist/lib/utils/sizeGuard.d.ts +34 -0
  197. package/dist/lib/utils/sizeGuard.js +45 -0
  198. package/dist/lib/utils/ssrfGuard.d.ts +52 -0
  199. package/dist/lib/utils/ssrfGuard.js +411 -0
  200. package/dist/lib/utils/videoProcessor.d.ts +60 -0
  201. package/dist/lib/utils/videoProcessor.js +201 -0
  202. package/dist/lib/voice/providers/FishAudioTTS.d.ts +27 -0
  203. package/dist/lib/voice/providers/FishAudioTTS.js +183 -0
  204. package/dist/lib/workflow/core/ensembleExecutor.js +26 -9
  205. package/dist/middleware/builtin/lifecycle.js +39 -9
  206. package/dist/music/index.d.ts +13 -0
  207. package/dist/music/index.js +13 -0
  208. package/dist/music/providers/BeatovenMusic.d.ts +31 -0
  209. package/dist/music/providers/BeatovenMusic.js +333 -0
  210. package/dist/music/providers/ElevenLabsMusic.d.ts +30 -0
  211. package/dist/music/providers/ElevenLabsMusic.js +168 -0
  212. package/dist/music/providers/LyriaMusic.d.ts +29 -0
  213. package/dist/music/providers/LyriaMusic.js +172 -0
  214. package/dist/music/providers/ReplicateMusic.d.ts +31 -0
  215. package/dist/music/providers/ReplicateMusic.js +261 -0
  216. package/dist/neurolink.d.ts +30 -0
  217. package/dist/neurolink.js +323 -77
  218. package/dist/providers/amazonBedrock.d.ts +10 -0
  219. package/dist/providers/amazonBedrock.js +94 -39
  220. package/dist/providers/anthropic.js +55 -7
  221. package/dist/providers/anthropicBaseProvider.js +1 -1
  222. package/dist/providers/azureOpenai.js +66 -17
  223. package/dist/providers/cloudflare.d.ts +35 -0
  224. package/dist/providers/cloudflare.js +173 -0
  225. package/dist/providers/cohere.d.ts +52 -0
  226. package/dist/providers/cohere.js +252 -0
  227. package/dist/providers/deepseek.js +72 -17
  228. package/dist/providers/fireworks.d.ts +33 -0
  229. package/dist/providers/fireworks.js +163 -0
  230. package/dist/providers/googleAiStudio.js +126 -10
  231. package/dist/providers/googleNativeGemini3.d.ts +26 -6
  232. package/dist/providers/googleNativeGemini3.js +276 -29
  233. package/dist/providers/googleVertex.js +639 -181
  234. package/dist/providers/groq.d.ts +33 -0
  235. package/dist/providers/groq.js +180 -0
  236. package/dist/providers/huggingFace.js +9 -8
  237. package/dist/providers/ideogram.d.ts +34 -0
  238. package/dist/providers/ideogram.js +183 -0
  239. package/dist/providers/index.d.ts +13 -0
  240. package/dist/providers/index.js +13 -0
  241. package/dist/providers/jina.d.ts +59 -0
  242. package/dist/providers/jina.js +217 -0
  243. package/dist/providers/llamaCpp.js +14 -46
  244. package/dist/providers/lmStudio.js +14 -47
  245. package/dist/providers/mistral.js +7 -7
  246. package/dist/providers/nvidiaNim.js +160 -19
  247. package/dist/providers/ollama.js +7 -7
  248. package/dist/providers/openAI.d.ts +22 -1
  249. package/dist/providers/openAI.js +181 -0
  250. package/dist/providers/openRouter.js +35 -23
  251. package/dist/providers/openaiCompatible.js +9 -8
  252. package/dist/providers/perplexity.d.ts +33 -0
  253. package/dist/providers/perplexity.js +178 -0
  254. package/dist/providers/recraft.d.ts +34 -0
  255. package/dist/providers/recraft.js +196 -0
  256. package/dist/providers/replicate.d.ts +75 -0
  257. package/dist/providers/replicate.js +402 -0
  258. package/dist/providers/stability.d.ts +37 -0
  259. package/dist/providers/stability.js +190 -0
  260. package/dist/providers/togetherAi.d.ts +33 -0
  261. package/dist/providers/togetherAi.js +175 -0
  262. package/dist/providers/voyage.d.ts +47 -0
  263. package/dist/providers/voyage.js +176 -0
  264. package/dist/providers/xai.d.ts +33 -0
  265. package/dist/providers/xai.js +171 -0
  266. package/dist/telemetry/index.d.ts +1 -1
  267. package/dist/telemetry/index.js +1 -1
  268. package/dist/telemetry/tracers.d.ts +19 -0
  269. package/dist/telemetry/tracers.js +19 -0
  270. package/dist/telemetry/withSpan.d.ts +35 -0
  271. package/dist/telemetry/withSpan.js +103 -0
  272. package/dist/types/avatar.d.ts +143 -0
  273. package/dist/types/avatar.js +19 -0
  274. package/dist/types/cli.d.ts +6 -0
  275. package/dist/types/conversation.d.ts +16 -0
  276. package/dist/types/generate.d.ts +62 -5
  277. package/dist/types/index.d.ts +5 -0
  278. package/dist/types/index.js +7 -0
  279. package/dist/types/middleware.d.ts +27 -0
  280. package/dist/types/multimodal.d.ts +35 -2
  281. package/dist/types/music.d.ts +165 -0
  282. package/dist/types/music.js +20 -0
  283. package/dist/types/providers.d.ts +144 -1
  284. package/dist/types/replicate.d.ts +67 -0
  285. package/dist/types/replicate.js +9 -0
  286. package/dist/types/safeFetch.d.ts +15 -0
  287. package/dist/types/safeFetch.js +6 -0
  288. package/dist/types/stream.d.ts +2 -1
  289. package/dist/types/tools.d.ts +13 -0
  290. package/dist/types/video.d.ts +89 -0
  291. package/dist/types/video.js +14 -0
  292. package/dist/utils/avatarProcessor.d.ts +68 -0
  293. package/dist/utils/avatarProcessor.js +171 -0
  294. package/dist/utils/cloneOptions.d.ts +36 -0
  295. package/dist/utils/cloneOptions.js +61 -0
  296. package/dist/utils/lifecycleCallbacks.d.ts +51 -8
  297. package/dist/utils/lifecycleCallbacks.js +82 -26
  298. package/dist/utils/lifecycleTimeout.d.ts +25 -0
  299. package/dist/utils/lifecycleTimeout.js +38 -0
  300. package/dist/utils/logSanitize.d.ts +49 -0
  301. package/dist/utils/logSanitize.js +169 -0
  302. package/dist/utils/loggingFetch.d.ts +29 -0
  303. package/dist/utils/loggingFetch.js +59 -0
  304. package/dist/utils/messageBuilder.js +43 -25
  305. package/dist/utils/modelChoices.js +236 -3
  306. package/dist/utils/musicProcessor.d.ts +67 -0
  307. package/dist/utils/musicProcessor.js +188 -0
  308. package/dist/utils/optionsConversion.js +3 -2
  309. package/dist/utils/parameterValidation.js +14 -4
  310. package/dist/utils/pricing.js +193 -0
  311. package/dist/utils/providerConfig.d.ts +55 -0
  312. package/dist/utils/providerConfig.js +224 -0
  313. package/dist/utils/safeFetch.d.ts +26 -0
  314. package/dist/utils/safeFetch.js +82 -0
  315. package/dist/utils/sizeGuard.d.ts +34 -0
  316. package/dist/utils/sizeGuard.js +44 -0
  317. package/dist/utils/ssrfGuard.d.ts +52 -0
  318. package/dist/utils/ssrfGuard.js +410 -0
  319. package/dist/utils/videoProcessor.d.ts +60 -0
  320. package/dist/utils/videoProcessor.js +200 -0
  321. package/dist/voice/providers/FishAudioTTS.d.ts +27 -0
  322. package/dist/voice/providers/FishAudioTTS.js +182 -0
  323. package/dist/workflow/core/ensembleExecutor.js +26 -9
  324. package/package.json +32 -5
@@ -1,10 +1,11 @@
1
+ /* eslint-disable max-lines-per-function */
1
2
  // Native SDK imports - no more @ai-sdk/google-vertex dependency
2
3
  import fs from "fs";
3
4
  import path from "path";
4
5
  import os from "os";
5
6
  import { ErrorCategory, ErrorSeverity, } from "../constants/enums.js";
6
7
  import { BaseProvider } from "../core/baseProvider.js";
7
- import { DEFAULT_MAX_STEPS, DEFAULT_TOOL_MAX_RETRIES, GLOBAL_LOCATION_MODELS, IMAGE_GENERATION_MODELS, } from "../core/constants.js";
8
+ import { DEFAULT_MAX_STEPS, DEFAULT_TOOL_MAX_RETRIES, GLOBAL_LOCATION_MODELS, IMAGE_GENERATION_MODELS, TOOL_STORAGE_TIMEOUT_MS, } from "../core/constants.js";
8
9
  import { ModelConfigurationManager } from "../core/modelConfiguration.js";
9
10
  import { createProxyFetch } from "../proxy/proxyFetch.js";
10
11
  import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
@@ -16,10 +17,12 @@ import { hasRestrictedOutputLimit, RESTRICTED_OUTPUT_TOKEN_LIMIT, } from "../uti
16
17
  import { validateApiKey, createVertexProjectConfig, createGoogleAuthConfig, } from "../utils/providerConfig.js";
17
18
  import { convertZodToJsonSchema, inlineJsonSchema, ensureNestedSchemaTypes, } from "../utils/schemaConversion.js";
18
19
  import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
19
- import { TimeoutError } from "../utils/async/index.js";
20
- import { prependConversationMessages } from "./googleNativeGemini3.js";
21
- import { ATTR, tracers, withClientSpan, withSpan } from "../telemetry/index.js";
20
+ import { TimeoutError, withTimeout } from "../utils/async/index.js";
21
+ import { parseTimeout } from "../utils/timeout.js";
22
+ import { createTextChannel, extractThoughtSignature, prependConversationMessages, } from "./googleNativeGemini3.js";
23
+ import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
22
24
  import { calculateCost } from "../utils/pricing.js";
25
+ import { transformToolExecutions } from "../utils/transformationUtils.js";
23
26
  // Import proper types for multimodal message handling
24
27
  // Dynamic import helper for native Anthropic Vertex SDK
25
28
  let anthropicVertexModule = null;
@@ -35,6 +38,110 @@ const hasAnthropicSupport = () => {
35
38
  // Actual availability is checked at runtime when creating the client
36
39
  return true;
37
40
  };
41
+ /**
42
+ * Recursively strip JSON-schema fields that Vertex Gemini's function-call
43
+ * validator rejects with 400 INVALID_ARGUMENT. Vertex implements OpenAPI 3.0
44
+ * Schema strictly and rejects extension fields that the broader JSON Schema
45
+ * spec allows. The fields stripped here have no semantic meaning for the
46
+ * model, so removing them is safe for every caller.
47
+ *
48
+ * Fields removed:
49
+ * - `additionalProperties` — extension; Vertex rejects on any nested object.
50
+ * - `default` — Vertex rejects defaults on object/array-typed properties and
51
+ * on properties that are also marked `required`. Safest to strip globally
52
+ * because the model never inspects them.
53
+ * - `$schema`, `$id`, `$ref`, `definitions`, `$defs` — JSON-Schema-meta
54
+ * fields that Vertex doesn't recognise.
55
+ * - `examples` — accepted by some Gemini variants but not 2.5-flash; strip
56
+ * to avoid the model rejecting tool schemas under that path.
57
+ */
58
+ function stripAdditionalPropertiesDeep(schema) {
59
+ if (!schema || typeof schema !== "object") {
60
+ return;
61
+ }
62
+ const FIELDS_TO_STRIP = [
63
+ "additionalProperties",
64
+ "default",
65
+ "$schema",
66
+ "$id",
67
+ "$ref",
68
+ "definitions",
69
+ "$defs",
70
+ "examples",
71
+ ];
72
+ for (const field of FIELDS_TO_STRIP) {
73
+ if (field in schema) {
74
+ delete schema[field];
75
+ }
76
+ }
77
+ // JSON Schema Draft-4 `exclusiveMinimum: true` / `exclusiveMaximum: true`
78
+ // (boolean form) is rejected by Vertex's OpenAPI 3.0 validator, which
79
+ // expects a numeric bound. zod-to-json-schema's openApi3 target still
80
+ // emits the Draft-4 form for `z.number().positive()` etc. Translate the
81
+ // boolean form into the numeric form when paired with `minimum` /
82
+ // `maximum`; otherwise drop it (the model doesn't validate, so the
83
+ // constraint is informational only).
84
+ if (typeof schema.exclusiveMinimum === "boolean") {
85
+ if (schema.exclusiveMinimum === true &&
86
+ typeof schema.minimum === "number") {
87
+ schema.exclusiveMinimum = schema.minimum;
88
+ delete schema.minimum;
89
+ }
90
+ else {
91
+ delete schema.exclusiveMinimum;
92
+ }
93
+ }
94
+ if (typeof schema.exclusiveMaximum === "boolean") {
95
+ if (schema.exclusiveMaximum === true &&
96
+ typeof schema.maximum === "number") {
97
+ schema.exclusiveMaximum = schema.maximum;
98
+ delete schema.maximum;
99
+ }
100
+ else {
101
+ delete schema.exclusiveMaximum;
102
+ }
103
+ }
104
+ // Strip `maximum` values that exceed int32 range — Vertex's protobuf
105
+ // serializer treats `type: "integer"` as int32 and rejects bounds beyond
106
+ // 2^31. zod's `.positive().int()` emits Number.MAX_SAFE_INTEGER as the
107
+ // upper bound (8.9e15), which trips this. The constraint is informational
108
+ // for the model anyway, so dropping it is safe.
109
+ const INT32_MAX = 2147483647;
110
+ if (typeof schema.maximum === "number" && schema.maximum > INT32_MAX) {
111
+ delete schema.maximum;
112
+ }
113
+ if (typeof schema.minimum === "number" && schema.minimum < -INT32_MAX) {
114
+ delete schema.minimum;
115
+ }
116
+ if (schema.properties && typeof schema.properties === "object") {
117
+ for (const child of Object.values(schema.properties)) {
118
+ if (child && typeof child === "object") {
119
+ stripAdditionalPropertiesDeep(child);
120
+ }
121
+ }
122
+ }
123
+ if (schema.items && typeof schema.items === "object") {
124
+ if (Array.isArray(schema.items)) {
125
+ for (const item of schema.items) {
126
+ if (item && typeof item === "object") {
127
+ stripAdditionalPropertiesDeep(item);
128
+ }
129
+ }
130
+ }
131
+ else {
132
+ stripAdditionalPropertiesDeep(schema.items);
133
+ }
134
+ }
135
+ for (const key of ["allOf", "anyOf", "oneOf"]) {
136
+ if (Array.isArray(schema[key])) {
137
+ for (const branch of schema[key]) {
138
+ if (branch && typeof branch === "object") {
139
+ stripAdditionalPropertiesDeep(branch);
140
+ }
141
+ }
142
+ }
143
+ }
144
+ }
38
145
  // Configuration helpers - now using consolidated utility
39
146
  const getVertexProjectId = () => {
40
147
  return validateApiKey(createVertexProjectConfig());
@@ -604,7 +711,7 @@ export class GoogleVertexProvider extends BaseProvider {
604
711
  // the test:tracing observability harness sees the same span hierarchy
605
712
  // it sees for AI Studio. BaseProvider.stream does NOT emit this span
606
713
  // for any provider — each native provider has to add it itself.
607
- return withClientSpan({
714
+ return withClientStreamSpan({
608
715
  name: "neurolink.provider.stream",
609
716
  tracer: tracers.provider,
610
717
  attributes: {
@@ -671,7 +778,7 @@ export class GoogleVertexProvider extends BaseProvider {
671
778
  this.emitStreamEnd(modelName, streamStartTime, false, error);
672
779
  throw error;
673
780
  }
674
- });
781
+ }, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped }));
675
782
  }
676
783
  /**
677
784
  * Emit `stream:end` so the Pipeline B observability listener creates a
@@ -746,8 +853,11 @@ export class GoogleVertexProvider extends BaseProvider {
746
853
  });
747
854
  // Build contents from input with multimodal support
748
855
  const contents = [];
749
- // Build user message parts - start with text
750
- const userParts = [{ text: options.input.text }];
856
+ // Build user message parts - start with text.
857
+ // `options.input.text` is `string | undefined` in strict mode; the
858
+ // VertexNativePart `text` field requires `string`, so coerce to "" if
859
+ // unset (the multimodal-only path still appends other parts below).
860
+ const userParts = [{ text: options.input.text ?? "" }];
751
861
  // Add PDF files as inlineData parts if present
752
862
  // Cast input to access multimodal properties that may exist at runtime
753
863
  const multimodalInput = options.input;
@@ -886,6 +996,12 @@ export class GoogleVertexProvider extends BaseProvider {
886
996
  // ensureNestedSchemaTypes recursively adds missing type fields to tool schemas
887
997
  // Note: convertZodToJsonSchema now uses openApi3 target which produces nullable: true
888
998
  const typedSchema = ensureNestedSchemaTypes(inlinedSchema);
999
+ // Strip `additionalProperties` recursively — Vertex Gemini's
1000
+ // function-call validator rejects it on object schemas (returns
1001
+ // 400 INVALID_ARGUMENT) even though it's valid OpenAPI 3. The
1002
+ // field has no semantic meaning to the model, so dropping it
1003
+ // before send is safe for every caller.
1004
+ stripAdditionalPropertiesDeep(typedSchema);
889
1005
  decl.parametersJsonSchema = typedSchema;
890
1006
  }
891
1007
  functionDeclarations.push(decl);
@@ -1012,6 +1128,11 @@ export class GoogleVertexProvider extends BaseProvider {
1012
1128
  let finalText = "";
1013
1129
  let lastStepText = ""; // Track text from last step for maxSteps termination
1014
1130
  const allToolCalls = [];
1131
+ // Mirrors the generate-path shape so StreamResult.toolExecutions can be
1132
+ // populated (parity with AI-SDK-driven providers) and so the storage
1133
+ // hook can persist actual tool outputs rather than the placeholder
1134
+ // "success" string used by flushPendingToolData's default fallback.
1135
+ const toolExecutions = [];
1015
1136
  let step = 0;
1016
1137
  // Track structured output from final_result tool (when using final_result pattern)
1017
1138
  let finalResultStructuredOutput;
@@ -1118,22 +1239,38 @@ export class GoogleVertexProvider extends BaseProvider {
1118
1239
  });
1119
1240
  // Execute each function and collect responses
1120
1241
  const functionResponses = [];
1242
+ // Per-step bookkeeping for conversation-memory storage.
1243
+ const stepStorageCalls = [];
1244
+ const stepStorageResults = [];
1245
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
1246
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
1121
1247
  for (const call of stepFunctionCalls) {
1122
1248
  allToolCalls.push({ toolName: call.name, args: call.args });
1249
+ stepStorageCalls.push({ toolName: call.name, args: call.args });
1123
1250
  // Check if this tool has already exceeded retry limit
1124
1251
  const failedInfo = failedTools.get(call.name);
1125
1252
  if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1126
1253
  logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1254
+ const errorPayload = {
1255
+ error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1256
+ status: "permanently_failed",
1257
+ do_not_retry: true,
1258
+ };
1127
1259
  functionResponses.push({
1128
1260
  functionResponse: {
1129
1261
  name: call.name,
1130
- response: {
1131
- error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1132
- status: "permanently_failed",
1133
- do_not_retry: true,
1134
- },
1262
+ response: errorPayload,
1135
1263
  },
1136
1264
  });
1265
+ toolExecutions.push({
1266
+ name: call.name,
1267
+ input: call.args,
1268
+ output: errorPayload,
1269
+ });
1270
+ stepStorageResults.push({
1271
+ toolName: call.name,
1272
+ output: errorPayload,
1273
+ });
1137
1274
  continue;
1138
1275
  }
1139
1276
  const execute = executeMap.get(call.name);
@@ -1146,9 +1283,18 @@ export class GoogleVertexProvider extends BaseProvider {
1146
1283
  abortSignal: undefined,
1147
1284
  };
1148
1285
  const result = await execute(call.args, toolOptions);
1286
+ toolExecutions.push({
1287
+ name: call.name,
1288
+ input: call.args,
1289
+ output: result,
1290
+ });
1149
1291
  functionResponses.push({
1150
1292
  functionResponse: { name: call.name, response: { result } },
1151
1293
  });
1294
+ stepStorageResults.push({
1295
+ toolName: call.name,
1296
+ output: result,
1297
+ });
1152
1298
  }
1153
1299
  catch (error) {
1154
1300
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
@@ -1163,38 +1309,77 @@ export class GoogleVertexProvider extends BaseProvider {
1163
1309
  logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1164
1310
  // Determine if this is a permanent failure
1165
1311
  const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1312
+ const errorPayload = {
1313
+ error: isPermanentFailure
1314
+ ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1315
+ : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1316
+ status: isPermanentFailure ? "permanently_failed" : "failed",
1317
+ do_not_retry: isPermanentFailure,
1318
+ retry_count: currentFailInfo.count,
1319
+ max_retries: DEFAULT_TOOL_MAX_RETRIES,
1320
+ };
1166
1321
  functionResponses.push({
1167
1322
  functionResponse: {
1168
1323
  name: call.name,
1169
- response: {
1170
- error: isPermanentFailure
1171
- ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1172
- : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1173
- status: isPermanentFailure
1174
- ? "permanently_failed"
1175
- : "failed",
1176
- do_not_retry: isPermanentFailure,
1177
- retry_count: currentFailInfo.count,
1178
- max_retries: DEFAULT_TOOL_MAX_RETRIES,
1179
- },
1324
+ response: errorPayload,
1180
1325
  },
1181
1326
  });
1327
+ toolExecutions.push({
1328
+ name: call.name,
1329
+ input: call.args,
1330
+ output: errorPayload,
1331
+ });
1332
+ stepStorageResults.push({
1333
+ toolName: call.name,
1334
+ output: errorPayload,
1335
+ });
1182
1336
  }
1183
1337
  }
1184
1338
  else {
1185
1339
  // Tool not found is a permanent error
1340
+ const errorPayload = {
1341
+ error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1342
+ status: "permanently_failed",
1343
+ do_not_retry: true,
1344
+ };
1186
1345
  functionResponses.push({
1187
1346
  functionResponse: {
1188
1347
  name: call.name,
1189
- response: {
1190
- error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1191
- status: "permanently_failed",
1192
- do_not_retry: true,
1193
- },
1348
+ response: errorPayload,
1194
1349
  },
1195
1350
  });
1351
+ toolExecutions.push({
1352
+ name: call.name,
1353
+ input: call.args,
1354
+ output: errorPayload,
1355
+ });
1356
+ stepStorageResults.push({
1357
+ toolName: call.name,
1358
+ output: errorPayload,
1359
+ });
1196
1360
  }
1197
1361
  }
1362
+ // Persist this step's tool calls/results into conversation memory.
1363
+ // Without this, tool_call / tool_result rows never reach Redis and
1364
+ // the chat-history UI loses every tool invocation.
1365
+ //
1366
+ // `thoughtSignature` rides as a sibling on the first call of the
1367
+ // step — Gemini 3 needs it to match thinking patterns when the
1368
+ // conversation is replayed on the next turn.
1369
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
1370
+ const stepThoughtSig = extractThoughtSignature(rawResponseParts);
1371
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c, i) => ({
1372
+ ...c,
1373
+ ...(i === 0 && stepThoughtSig
1374
+ ? { thoughtSignature: stepThoughtSig }
1375
+ : {}),
1376
+ stepIndex: step,
1377
+ })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
1378
+ logger.warn("[GoogleVertex] Failed to store native Gemini stream tool executions", {
1379
+ error: error instanceof Error ? error.message : String(error),
1380
+ });
1381
+ });
1382
+ }
1198
1383
  // The @google/genai SDK only accepts "user" and "model" as valid
1199
1384
  // roles in contents — function/tool responses must use role: "user"
1200
1385
  // (matching the SDK's automaticFunctionCalling implementation and
@@ -1240,6 +1425,7 @@ export class GoogleVertexProvider extends BaseProvider {
1240
1425
  }
1241
1426
  // Filter out final_result from tool calls as it's an internal pattern
1242
1427
  const externalToolCalls = allToolCalls.filter((tc) => tc.toolName !== "final_result");
1428
+ const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
1243
1429
  const result = {
1244
1430
  stream: createTextStream(),
1245
1431
  provider: this.providerName,
@@ -1253,6 +1439,12 @@ export class GoogleVertexProvider extends BaseProvider {
1253
1439
  toolName: tc.toolName,
1254
1440
  args: tc.args,
1255
1441
  })),
1442
+ // Surface tools-used + execution summary so `hasToolActivity` in
1443
+ // conversationMemory.ts evaluates true for tool-only stream turns
1444
+ // (assistant text empty but tools ran) and downstream consumers see
1445
+ // the same shape AI-SDK-driven providers expose.
1446
+ toolsUsed: externalToolCalls.map((tc) => tc.toolName),
1447
+ toolExecutions: transformToolExecutions(externalToolExecutions),
1256
1448
  metadata: {
1257
1449
  streamId: `native-vertex-${Date.now()}`,
1258
1450
  startTime,
@@ -1426,6 +1618,12 @@ export class GoogleVertexProvider extends BaseProvider {
1426
1618
  // ensureNestedSchemaTypes recursively adds missing type fields to tool schemas
1427
1619
  // Note: convertZodToJsonSchema now uses openApi3 target which produces nullable: true
1428
1620
  const typedSchema = ensureNestedSchemaTypes(inlinedSchema);
1621
+ // Strip `additionalProperties` recursively — Vertex Gemini's
1622
+ // function-call validator rejects it on object schemas (returns
1623
+ // 400 INVALID_ARGUMENT) even though it's valid OpenAPI 3. The
1624
+ // field has no semantic meaning to the model, so dropping it
1625
+ // before send is safe for every caller.
1626
+ stripAdditionalPropertiesDeep(typedSchema);
1429
1627
  decl.parametersJsonSchema = typedSchema;
1430
1628
  }
1431
1629
  functionDeclarations.push(decl);
@@ -1648,6 +1846,10 @@ export class GoogleVertexProvider extends BaseProvider {
1648
1846
  });
1649
1847
  // Execute each function and collect responses
1650
1848
  const functionResponses = [];
1849
+ const toolCallsBefore = allToolCalls.length;
1850
+ const toolExecsBefore = toolExecutions.length;
1851
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
1852
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
1651
1853
  for (const call of stepFunctionCalls) {
1652
1854
  allToolCalls.push({ toolName: call.name, args: call.args });
1653
1855
  // Check if this tool has already exceeded retry limit
@@ -1750,6 +1952,32 @@ export class GoogleVertexProvider extends BaseProvider {
1750
1952
  });
1751
1953
  }
1752
1954
  }
1955
+ // Persist this step's tool calls/results into conversation memory.
1956
+ // Without this, tool_call / tool_result rows never reach Redis and
1957
+ // the chat-history UI loses every tool invocation. The first call
1958
+ // of the step carries the step's `thoughtSignature` so Gemini 3 can
1959
+ // match thinking patterns on replay.
1960
+ const stepToolCalls = allToolCalls.slice(toolCallsBefore);
1961
+ const stepToolExecs = toolExecutions.slice(toolExecsBefore);
1962
+ if (stepToolCalls.length > 0 || stepToolExecs.length > 0) {
1963
+ const stepThoughtSig = extractThoughtSignature(rawResponseParts);
1964
+ withTimeout(this.handleToolExecutionStorage(stepToolCalls.map((tc, i) => ({
1965
+ toolName: tc.toolName,
1966
+ args: tc.args,
1967
+ ...(i === 0 && stepThoughtSig
1968
+ ? { thoughtSignature: stepThoughtSig }
1969
+ : {}),
1970
+ stepIndex: step,
1971
+ })), stepToolExecs.map((te) => ({
1972
+ toolName: te.name,
1973
+ output: te.output,
1974
+ stepIndex: step,
1975
+ })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
1976
+ logger.warn("[GoogleVertex] Failed to store native Gemini generate tool executions", {
1977
+ error: error instanceof Error ? error.message : String(error),
1978
+ });
1979
+ });
1980
+ }
1753
1981
  // The @google/genai SDK only accepts "user" and "model" as valid
1754
1982
  // roles in contents — function/tool responses must use role: "user"
1755
1983
  // (matching the SDK's automaticFunctionCalling implementation and
@@ -1788,7 +2016,7 @@ export class GoogleVertexProvider extends BaseProvider {
1788
2016
  },
1789
2017
  responseTime,
1790
2018
  toolsUsed: externalToolCalls.map((tc) => tc.toolName),
1791
- toolExecutions: externalToolExecutions,
2019
+ toolExecutions: transformToolExecutions(externalToolExecutions),
1792
2020
  enhancedWithTools: externalToolCalls.length > 0,
1793
2021
  };
1794
2022
  // Add structured output if final_result tool was used
@@ -1824,7 +2052,15 @@ export class GoogleVertexProvider extends BaseProvider {
1824
2052
  });
1825
2053
  // Build messages from input
1826
2054
  const messages = [];
1827
- // Add conversation history if present
2055
+ // Add conversation history if present.
2056
+ //
2057
+ // Intentionally text-only. Anthropic's API rejects messages where a
2058
+ // tool_use_id reference appears without its matching tool_use in the
2059
+ // same turn — so synthesising tool_use / tool_result blocks from
2060
+ // stored ChatMessages risks emitting orphaned references that fail
2061
+ // validation. Tool rows are still persisted to Redis (chat-history
2062
+ // UI renders them) but they don't re-enter the model's context on
2063
+ // subsequent turns.
1828
2064
  if (options.conversationMessages &&
1829
2065
  options.conversationMessages.length > 0) {
1830
2066
  for (const msg of options.conversationMessages) {
@@ -2055,157 +2291,270 @@ export class GoogleVertexProvider extends BaseProvider {
2055
2291
  stop_sequences: options.stopSequences,
2056
2292
  }),
2057
2293
  };
2058
- // Handle tool calling loop with max steps
2294
+ // ── Real-time streaming via stream.on('text', ...) ────────────────────
2295
+ //
2296
+ // The Anthropic SDK exposes per-delta streaming through `stream.on('text', listener)`:
2297
+ // each content_block_delta SSE event fires the listener synchronously
2298
+ // with that token's text — typically ~10 chars per delta, ~26ms apart
2299
+ // on Claude Haiku. Awaiting `stream.finalMessage()` here would buffer
2300
+ // the entire response before yielding anything; the listener pattern
2301
+ // keeps the wire and the consumer in lockstep instead.
2302
+ //
2303
+ // Structure: push-channel + background agentic loop, returning the
2304
+ // StreamResult immediately so callers can iterate `channel.iterable`
2305
+ // while generation is still in progress. Mirrors the executeStream
2306
+ // pattern in googleAiStudio.ts.
2059
2307
  const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
2060
- let step = 0;
2061
- let finalText = "";
2062
- let structuredOutput;
2063
2308
  const allToolCalls = [];
2064
- // Track each Anthropic text block separately so the returned async
2065
- // iterable yields multiple chunks. The chunk-count smoke test fails
2066
- // when an entire response collapses into a single yield, even though
2067
- // the upstream stream is genuinely incremental.
2068
- const allTextBlocks = [];
2069
- let totalInputTokens = 0;
2070
- let totalOutputTokens = 0;
2071
- const currentMessages = [...messages];
2072
- while (step < maxSteps) {
2073
- step++;
2309
+ const toolExecutions = [];
2310
+ const channel = createTextChannel();
2311
+ // Mutable holders the StreamResult references. Background loop updates
2312
+ // these as state progresses; consumer reads them after iterating the
2313
+ // stream to completion (channel.close() is called AFTER mutations).
2314
+ const usage = { input: 0, output: 0, total: 0 };
2315
+ const metadata = {
2316
+ streamId: `native-anthropic-vertex-${Date.now()}`,
2317
+ startTime,
2318
+ responseTime: 0,
2319
+ totalToolExecutions: 0,
2320
+ };
2321
+ const toolsUsedRef = [];
2322
+ const structuredOutputRef = {};
2323
+ // Track the active Anthropic stream so options.abortSignal can cancel it
2324
+ // mid-flight (pre-rewrite code had no abort handling — fixed for free).
2325
+ let activeStream;
2326
+ const abortHandler = () => {
2074
2327
  try {
2075
- // Use streaming API
2076
- const stream = await client.messages.stream({
2077
- ...requestParams,
2078
- messages: currentMessages,
2079
- });
2080
- // Collect the full response
2081
- const response = await stream.finalMessage();
2082
- // Update token counts
2083
- totalInputTokens += response.usage?.input_tokens || 0;
2084
- totalOutputTokens += response.usage?.output_tokens || 0;
2085
- // Check if we need to handle tool use
2086
- const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
2087
- // Check for final_result tool call (for structured output)
2088
- if (useFinalResultTool) {
2089
- const finalResultCall = toolUseBlocks.find((block) => block.name === "final_result");
2090
- if (finalResultCall) {
2091
- // Extract structured output and convert to JSON string for finalText
2092
- structuredOutput = finalResultCall.input;
2093
- finalText = JSON.stringify(structuredOutput);
2094
- logger.debug("[GoogleVertex] Extracted structured output from final_result tool (stream)", { keys: Object.keys(structuredOutput) });
2095
- break; // We have the structured output, we're done
2096
- }
2097
- }
2098
- // Extract text from response
2099
- const textBlocks = response.content.filter((block) => block.type === "text");
2100
- const responseText = textBlocks.map((b) => b.text).join("");
2101
- // Preserve each Anthropic text block separately so the
2102
- // consumer-visible stream yields multiple chunks (one per block).
2103
- for (const tb of textBlocks) {
2104
- if (tb.text.length > 0) {
2105
- allTextBlocks.push(tb.text);
2328
+ activeStream?.controller.abort();
2329
+ }
2330
+ catch {
2331
+ /* ignore — stream may already be finalized */
2332
+ }
2333
+ };
2334
+ options.abortSignal?.addEventListener("abort", abortHandler);
2335
+ // Defensive upper bound: if neither the caller nor the SDK ever fires,
2336
+ // abort the stream after the configured timeout so a stalled
2337
+ // Vertex/Anthropic endpoint can't hang forever. options.timeout wins
2338
+ // if set; otherwise 5 min — generous for tool-heavy turns.
2339
+ const streamTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2340
+ const streamTimeoutHandle = setTimeout(() => {
2341
+ logger.warn(`[GoogleVertex] Anthropic stream exceeded ${streamTimeoutMs}ms — aborting`);
2342
+ abortHandler();
2343
+ }, streamTimeoutMs);
2344
+ const loopPromise = (async () => {
2345
+ let step = 0;
2346
+ const currentMessages = [...messages];
2347
+ try {
2348
+ while (step < maxSteps) {
2349
+ if (options.abortSignal?.aborted) {
2350
+ throw new Error("Stream aborted by caller");
2106
2351
  }
2107
- }
2108
- if (toolUseBlocks.length === 0) {
2109
- // No tool calls, we're done
2110
- finalText = responseText || finalText;
2111
- break;
2112
- }
2113
- // Handle tool calls
2114
- const toolResults = [];
2115
- for (const toolUse of toolUseBlocks) {
2116
- allToolCalls.push({
2117
- toolName: toolUse.name,
2118
- args: toolUse.input,
2352
+ step++;
2353
+ const stream = await client.messages.stream({
2354
+ ...requestParams,
2355
+ messages: currentMessages,
2119
2356
  });
2120
- const execute = executeMap.get(toolUse.name);
2121
- if (execute) {
2122
- try {
2123
- const result = await execute(toolUse.input);
2124
- toolResults.push({
2125
- type: "tool_result",
2126
- tool_use_id: toolUse.id,
2127
- content: typeof result === "string" ? result : JSON.stringify(result),
2128
- });
2357
+ activeStream = stream;
2358
+ // Forward each text delta to the consumer as it arrives. The
2359
+ // Anthropic SDK fires this listener synchronously for every
2360
+ // content_block_delta SSE event, so the channel sees bytes at
2361
+ // the same cadence the wire delivers them.
2362
+ stream.on("text", (delta) => {
2363
+ if (delta.length > 0) {
2364
+ channel.push(delta);
2129
2365
  }
2130
- catch (err) {
2366
+ });
2367
+ // finalMessage() resolves AFTER message_stop. By then the listener
2368
+ // has already fired for every delta — awaiting here doesn't block
2369
+ // visible streaming, it just gives us the structured response
2370
+ // shape needed for tool_use block extraction.
2371
+ const response = await stream.finalMessage();
2372
+ activeStream = undefined;
2373
+ usage.input += response.usage?.input_tokens || 0;
2374
+ usage.output += response.usage?.output_tokens || 0;
2375
+ usage.total = usage.input + usage.output;
2376
+ const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
2377
+ // Structured-output pattern: when the model returns the
2378
+ // final_result tool call, push its arguments as JSON and stop.
2379
+ // Single-shot yield so callers consuming the stream still see
2380
+ // the structured value.
2381
+ if (useFinalResultTool) {
2382
+ const finalResultCall = toolUseBlocks.find((block) => block.name === "final_result");
2383
+ if (finalResultCall) {
2384
+ structuredOutputRef.value = finalResultCall.input;
2385
+ channel.push(JSON.stringify(finalResultCall.input));
2386
+ logger.debug("[GoogleVertex] Extracted structured output from final_result tool (stream)", { keys: Object.keys(finalResultCall.input) });
2387
+ break;
2388
+ }
2389
+ }
2390
+ // No tools — pure text turn. Listener already pushed all deltas;
2391
+ // loop terminates and channel.close() flushes the consumer.
2392
+ if (toolUseBlocks.length === 0) {
2393
+ break;
2394
+ }
2395
+ // Tool execution loop. tool:start / tool:end events fire from
2396
+ // ToolsManager's wrapped execute (ToolsManager.ts:355) — no inline
2397
+ // emit needed.
2398
+ const toolResults = [];
2399
+ // Per-step bookkeeping for conversation-memory storage.
2400
+ const stepStorageCalls = [];
2401
+ const stepStorageResults = [];
2402
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
2403
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
2404
+ for (const toolUse of toolUseBlocks) {
2405
+ allToolCalls.push({
2406
+ toolName: toolUse.name,
2407
+ args: toolUse.input,
2408
+ });
2409
+ toolsUsedRef.push(toolUse.name);
2410
+ stepStorageCalls.push({
2411
+ toolCallId: toolUse.id,
2412
+ toolName: toolUse.name,
2413
+ args: toolUse.input,
2414
+ });
2415
+ const execute = executeMap.get(toolUse.name);
2416
+ if (execute) {
2417
+ try {
2418
+ const toolOptions = {
2419
+ toolCallId: toolUse.id,
2420
+ messages: [],
2421
+ abortSignal: options.abortSignal,
2422
+ };
2423
+ const result = await execute(toolUse.input, toolOptions);
2424
+ toolExecutions.push({
2425
+ name: toolUse.name,
2426
+ input: toolUse.input,
2427
+ output: result,
2428
+ });
2429
+ // Anthropic requires tool_result.content to be a string.
2430
+ // JSON.stringify returns undefined for undefined/function/symbol,
2431
+ // so coerce defensively to keep the follow-up turn valid.
2432
+ const resultContent = typeof result === "string"
2433
+ ? result
2434
+ : (JSON.stringify(result ?? null) ?? String(result));
2435
+ toolResults.push({
2436
+ type: "tool_result",
2437
+ tool_use_id: toolUse.id,
2438
+ content: resultContent,
2439
+ });
2440
+ stepStorageResults.push({
2441
+ toolCallId: toolUse.id,
2442
+ toolName: toolUse.name,
2443
+ output: result,
2444
+ });
2445
+ }
2446
+ catch (err) {
2447
+ const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
2448
+ const errorPayload = { error: errMsg };
2449
+ toolExecutions.push({
2450
+ name: toolUse.name,
2451
+ input: toolUse.input,
2452
+ output: errorPayload,
2453
+ });
2454
+ toolResults.push({
2455
+ type: "tool_result",
2456
+ tool_use_id: toolUse.id,
2457
+ content: errMsg,
2458
+ });
2459
+ stepStorageResults.push({
2460
+ toolCallId: toolUse.id,
2461
+ toolName: toolUse.name,
2462
+ output: errorPayload,
2463
+ });
2464
+ }
2465
+ }
2466
+ else {
2467
+ const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
2468
+ const errorPayload = { error: errMsg };
2469
+ toolExecutions.push({
2470
+ name: toolUse.name,
2471
+ input: toolUse.input,
2472
+ output: errorPayload,
2473
+ });
2131
2474
  toolResults.push({
2132
2475
  type: "tool_result",
2133
2476
  tool_use_id: toolUse.id,
2134
- content: `Error executing tool: ${err instanceof Error ? err.message : String(err)}`,
2477
+ content: errMsg,
2478
+ });
2479
+ stepStorageResults.push({
2480
+ toolCallId: toolUse.id,
2481
+ toolName: toolUse.name,
2482
+ output: errorPayload,
2135
2483
  });
2136
2484
  }
2137
2485
  }
2138
- else {
2139
- toolResults.push({
2140
- type: "tool_result",
2141
- tool_use_id: toolUse.id,
2142
- content: `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`,
2486
+ // Persist this step's tool calls/results into conversation memory.
2487
+ // Without this hook, tool rows never land in Redis and the
2488
+ // chat-history UI loses every tool invocation.
2489
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
2490
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c) => ({ ...c, stepIndex: step })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
2491
+ logger.warn("[GoogleVertex] Failed to store native Anthropic stream tool executions", {
2492
+ error: error instanceof Error ? error.message : String(error),
2493
+ });
2143
2494
  });
2144
2495
  }
2496
+ // Continue the loop: assistant turn + tool_result user turn.
2497
+ // Filter server_tool_use blocks (Anthropic API rejects them in
2498
+ // subsequent message turns).
2499
+ const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
2500
+ currentMessages.push({
2501
+ role: "assistant",
2502
+ content: assistantContent,
2503
+ });
2504
+ currentMessages.push({
2505
+ role: "user",
2506
+ content: toolResults,
2507
+ });
2145
2508
  }
2146
- // Add assistant message and tool results to continue the loop
2147
- // Filter out server_tool_use blocks that the Anthropic API doesn't accept in messages
2148
- const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
2149
- currentMessages.push({
2150
- role: "assistant",
2151
- content: assistantContent,
2152
- });
2153
- currentMessages.push({
2154
- role: "user",
2155
- content: toolResults,
2156
- });
2157
- // Store last text in case we hit max steps
2158
- if (responseText) {
2159
- finalText = responseText;
2160
- }
2161
- }
2162
- catch (error) {
2163
- logger.error("[GoogleVertex] Native Anthropic SDK stream error", error);
2164
- throw this.handleProviderError(error);
2509
+ metadata.responseTime = Date.now() - startTime;
2510
+ metadata.totalToolExecutions = allToolCalls.filter((tc) => tc.toolName !== "final_result").length;
2511
+ channel.close();
2165
2512
  }
2166
- }
2167
- const responseTime = Date.now() - startTime;
2168
- // Yield each text block separately so the CLI receives multiple
2169
- // stream chunks instead of a single coalesced buffer. The Anthropic
2170
- // SDK gives us discrete text blocks; collapsing them into one chunk
2171
- // breaks the chunk-count smoke test even though the upstream
2172
- // streaming is real.
2173
- const finalContentBlocks = (() => {
2174
- if (structuredOutput) {
2175
- return [finalText];
2513
+ catch (err) {
2514
+ logger.error("[GoogleVertex] Native Anthropic SDK stream error", err);
2515
+ channel.error(this.handleProviderError(err));
2176
2516
  }
2177
- if (allTextBlocks.length > 0) {
2178
- return allTextBlocks;
2517
+ finally {
2518
+ options.abortSignal?.removeEventListener("abort", abortHandler);
2519
+ clearTimeout(streamTimeoutHandle);
2179
2520
  }
2180
- return finalText ? [finalText] : [];
2181
2521
  })();
2182
- async function* createTextStream() {
2183
- for (const part of finalContentBlocks) {
2184
- if (part.length > 0) {
2185
- yield { content: part };
2186
- }
2187
- }
2188
- }
2189
- return {
2190
- stream: createTextStream(),
2522
+ // Suppress unhandled-rejection: errors funnel through channel.error()
2523
+ // and surface when the consumer iterates the stream.
2524
+ loopPromise.catch(() => undefined);
2525
+ // Return StreamResult IMMEDIATELY — caller's for-await can begin
2526
+ // iterating channel.iterable while the background loop is still
2527
+ // generating. usage / metadata / toolCalls / toolExecutions are mutable
2528
+ // references that the loop fills in over time; the consumer reads them
2529
+ // after iteration completes (after channel.close() has fired).
2530
+ const result = {
2531
+ stream: channel.iterable,
2191
2532
  provider: this.providerName,
2192
2533
  model: modelName,
2193
- usage: {
2194
- input: totalInputTokens,
2195
- output: totalOutputTokens,
2196
- total: totalInputTokens + totalOutputTokens,
2197
- },
2198
- toolCalls: allToolCalls.map((tc) => ({
2199
- toolName: tc.toolName,
2200
- args: tc.args,
2201
- })),
2202
- metadata: {
2203
- streamId: `native-anthropic-vertex-${Date.now()}`,
2204
- startTime,
2205
- responseTime,
2206
- totalToolExecutions: allToolCalls.length,
2207
- },
2534
+ usage,
2535
+ metadata,
2208
2536
  };
2537
+ Object.defineProperty(result, "toolCalls", {
2538
+ enumerable: true,
2539
+ configurable: true,
2540
+ get: () => allToolCalls.filter((tc) => tc.toolName !== "final_result"),
2541
+ });
2542
+ Object.defineProperty(result, "toolsUsed", {
2543
+ enumerable: true,
2544
+ configurable: true,
2545
+ get: () => toolsUsedRef.filter((name) => name !== "final_result"),
2546
+ });
2547
+ Object.defineProperty(result, "toolExecutions", {
2548
+ enumerable: true,
2549
+ configurable: true,
2550
+ get: () => transformToolExecutions(toolExecutions.filter((te) => te.name !== "final_result")),
2551
+ });
2552
+ Object.defineProperty(result, "structuredOutput", {
2553
+ enumerable: true,
2554
+ configurable: true,
2555
+ get: () => structuredOutputRef.value,
2556
+ });
2557
+ return result;
2209
2558
  }
2210
2559
  /**
2211
2560
  * Execute generate using native @anthropic-ai/vertex-sdk for Claude models on Vertex AI
@@ -2228,6 +2577,9 @@ export class GoogleVertexProvider extends BaseProvider {
2228
2577
  // the older surface. The Vertex Claude STREAM path already follows this
2229
2578
  // priority — keeping the GENERATE path on `conversationHistory` only
2230
2579
  // would silently drop multi-turn context for memory/loop sessions.
2580
+ // Intentionally text-only: see the stream sibling for the rationale —
2581
+ // synthesising tool_use / tool_result blocks from stored ChatMessages
2582
+ // risks emitting orphaned references that Anthropic's API rejects.
2231
2583
  const historyMessages = options.conversationMessages && options.conversationMessages.length > 0
2232
2584
  ? options.conversationMessages
2233
2585
  : options.conversationHistory;
@@ -2471,10 +2823,14 @@ export class GoogleVertexProvider extends BaseProvider {
2471
2823
  while (step < maxSteps) {
2472
2824
  step++;
2473
2825
  try {
2474
- const response = await client.messages.create({
2826
+ // Bound the SDK wait so a stalled Vertex/Anthropic call can't hang
2827
+ // generate forever. options.timeout wins if set, otherwise default
2828
+ // to 5 min — generous for tool-heavy turns.
2829
+ const generateTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2830
+ const response = await withTimeout(client.messages.create({
2475
2831
  ...requestParams,
2476
2832
  messages: currentMessages,
2477
- });
2833
+ }), generateTimeoutMs, "Anthropic generate timed out");
2478
2834
  // Update token counts
2479
2835
  totalInputTokens += response.usage?.input_tokens || 0;
2480
2836
  totalOutputTokens += response.usage?.output_tokens || 0;
@@ -2501,42 +2857,105 @@ export class GoogleVertexProvider extends BaseProvider {
2501
2857
  }
2502
2858
  // Handle tool calls
2503
2859
  const toolResults = [];
2860
+ // Per-step bookkeeping for conversation-memory storage. Tracks calls
2861
+ // and results for ONLY the tools fired in this step so the storage
2862
+ // hook can tag them with the current stepIndex.
2863
+ const stepStorageCalls = [];
2864
+ const stepStorageResults = [];
2865
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
2866
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
2504
2867
  for (const toolUse of toolUseBlocks) {
2505
2868
  allToolCalls.push({
2506
2869
  toolName: toolUse.name,
2507
2870
  args: toolUse.input,
2508
2871
  });
2872
+ stepStorageCalls.push({
2873
+ toolCallId: toolUse.id,
2874
+ toolName: toolUse.name,
2875
+ args: toolUse.input,
2876
+ });
2509
2877
  const execute = executeMap.get(toolUse.name);
2510
2878
  if (execute) {
2511
2879
  try {
2512
- const result = await execute(toolUse.input);
2880
+ const toolOptions = {
2881
+ toolCallId: toolUse.id,
2882
+ messages: [],
2883
+ abortSignal: options.abortSignal,
2884
+ };
2885
+ const result = await execute(toolUse.input, toolOptions);
2513
2886
  toolExecutions.push({
2514
2887
  name: toolUse.name,
2515
2888
  input: toolUse.input,
2516
2889
  output: result,
2517
2890
  });
2891
+ // Anthropic requires tool_result.content to be a string.
2892
+ // JSON.stringify returns undefined for undefined/function/symbol,
2893
+ // so coerce defensively to keep the follow-up turn valid.
2894
+ const resultContent = typeof result === "string"
2895
+ ? result
2896
+ : (JSON.stringify(result ?? null) ?? String(result));
2518
2897
  toolResults.push({
2519
2898
  type: "tool_result",
2520
2899
  tool_use_id: toolUse.id,
2521
- content: typeof result === "string" ? result : JSON.stringify(result),
2900
+ content: resultContent,
2901
+ });
2902
+ stepStorageResults.push({
2903
+ toolCallId: toolUse.id,
2904
+ toolName: toolUse.name,
2905
+ output: result,
2522
2906
  });
2523
2907
  }
2524
2908
  catch (err) {
2909
+ const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
2910
+ const errorPayload = { error: errMsg };
2911
+ toolExecutions.push({
2912
+ name: toolUse.name,
2913
+ input: toolUse.input,
2914
+ output: errorPayload,
2915
+ });
2525
2916
  toolResults.push({
2526
2917
  type: "tool_result",
2527
2918
  tool_use_id: toolUse.id,
2528
- content: `Error executing tool: ${err instanceof Error ? err.message : String(err)}`,
2919
+ content: errMsg,
2920
+ });
2921
+ stepStorageResults.push({
2922
+ toolCallId: toolUse.id,
2923
+ toolName: toolUse.name,
2924
+ output: errorPayload,
2529
2925
  });
2530
2926
  }
2531
2927
  }
2532
2928
  else {
2929
+ const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
2930
+ const errorPayload = { error: errMsg };
2931
+ toolExecutions.push({
2932
+ name: toolUse.name,
2933
+ input: toolUse.input,
2934
+ output: errorPayload,
2935
+ });
2533
2936
  toolResults.push({
2534
2937
  type: "tool_result",
2535
2938
  tool_use_id: toolUse.id,
2536
- content: `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`,
2939
+ content: errMsg,
2940
+ });
2941
+ stepStorageResults.push({
2942
+ toolCallId: toolUse.id,
2943
+ toolName: toolUse.name,
2944
+ output: errorPayload,
2537
2945
  });
2538
2946
  }
2539
2947
  }
2948
+ // Persist this step's tool calls/results into conversation memory.
2949
+ // Without this, tool_call / tool_result rows never reach Redis and
2950
+ // the chat-history UI loses every tool invocation.
2951
+ // Fire-and-forget — storage failures must not break generation.
2952
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
2953
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c) => ({ ...c, stepIndex: step })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
2954
+ logger.warn("[GoogleVertex] Failed to store native Anthropic generate tool executions", {
2955
+ error: error instanceof Error ? error.message : String(error),
2956
+ });
2957
+ });
2958
+ }
2540
2959
  // Add assistant message and tool results to continue the loop
2541
2960
  // Filter out server_tool_use blocks that the Anthropic API doesn't accept in messages
2542
2961
  const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
@@ -2559,6 +2978,8 @@ export class GoogleVertexProvider extends BaseProvider {
2559
2978
  }
2560
2979
  }
2561
2980
  const responseTime = Date.now() - startTime;
2981
+ const externalToolCalls = allToolCalls.filter((tc) => tc.toolName !== "final_result");
2982
+ const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
2562
2983
  const result = {
2563
2984
  content: finalText,
2564
2985
  provider: this.providerName,
@@ -2569,9 +2990,9 @@ export class GoogleVertexProvider extends BaseProvider {
2569
2990
  total: totalInputTokens + totalOutputTokens,
2570
2991
  },
2571
2992
  responseTime,
2572
- toolsUsed: allToolCalls.map((tc) => tc.toolName),
2573
- toolExecutions,
2574
- enhancedWithTools: allToolCalls.length > 0,
2993
+ toolsUsed: externalToolCalls.map((tc) => tc.toolName),
2994
+ toolExecutions: transformToolExecutions(externalToolExecutions),
2995
+ enhancedWithTools: externalToolCalls.length > 0,
2575
2996
  };
2576
2997
  // Route through enhanceResult so analytics/evaluation/tracing are picked
2577
2998
  // up the same way the BaseProvider.generate() path picks them up. The
@@ -2736,6 +3157,17 @@ export class GoogleVertexProvider extends BaseProvider {
2736
3157
  this.emitGenerationEnd(modelName, videoResult, generateStartTime, true);
2737
3158
  return videoResult;
2738
3159
  }
3160
+ // TTS direct-synthesis mode: when caller passes `tts.enabled` without
3161
+ // `tts.useAiResponse`, route to the shared `handleDirectTTSSynthesis`
3162
+ // (synthesise the input text directly; no LLM call). BaseProvider's
3163
+ // standard generate() does the same dispatch — we replicate it here
3164
+ // because Vertex's override bypasses that path.
3165
+ if (options.tts?.enabled && !options.tts?.useAiResponse) {
3166
+ logger.info("[GoogleVertex] Routing TTS direct-synthesis to handleDirectTTSSynthesis", { model: modelName });
3167
+ const ttsResult = await this.handleDirectTTSSynthesis(options, generateStartTime);
3168
+ this.emitGenerationEnd(modelName, ttsResult, generateStartTime, true);
3169
+ return ttsResult;
3170
+ }
2739
3171
  // Check if this is an image generation model - route to executeImageGeneration without tools
2740
3172
  const isImageModel = IMAGE_GENERATION_MODELS.some((m) => modelName.toLowerCase().startsWith(m.toLowerCase()));
2741
3173
  if (isImageModel) {
@@ -2796,23 +3228,41 @@ export class GoogleVertexProvider extends BaseProvider {
2796
3228
  "";
2797
3229
  try {
2798
3230
  let result;
2799
- // Route Claude models to native Anthropic SDK
2800
- if (isAnthropicModel(modelName)) {
2801
- logger.info("[GoogleVertex] Routing Claude generate to native @anthropic-ai/vertex-sdk", {
2802
- model: modelName,
2803
- totalToolCount: Object.keys(mergedOptions.tools).length,
2804
- });
2805
- result = await this.executeNativeAnthropicGenerate(mergedOptions);
2806
- }
2807
- else {
2808
- // ALL Gemini models use native @google/genai SDK
3231
+ // Wrap the actual native generate call in `neurolink.executeGeneration`
3232
+ // so the observability span chain (tested by
3233
+ // "Tracing: Generate Span Chain") sees a third inner span on the
3234
+ // native @google/genai / @anthropic-ai/vertex-sdk path — Pipeline A
3235
+ // gets this for free from GenerationHandler.executeGeneration.
3236
+ result = await withSpan({
3237
+ name: "neurolink.executeGeneration",
3238
+ tracer: tracers.provider,
3239
+ attributes: {
3240
+ [ATTR.GEN_AI_SYSTEM]: this.providerName,
3241
+ [ATTR.GEN_AI_MODEL]: modelName,
3242
+ "neurolink.path": isAnthropicModel(modelName)
3243
+ ? "native.anthropic"
3244
+ : "native.google-genai",
3245
+ },
3246
+ }, async () => {
3247
+ if (isAnthropicModel(modelName)) {
3248
+ logger.info("[GoogleVertex] Routing Claude generate to native @anthropic-ai/vertex-sdk", {
3249
+ model: modelName,
3250
+ totalToolCount: Object.keys(mergedOptions.tools).length,
3251
+ });
3252
+ return this.executeNativeAnthropicGenerate(mergedOptions);
3253
+ }
2809
3254
  logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
2810
3255
  model: modelName,
2811
3256
  totalToolCount: Object.keys(mergedOptions.tools).length,
2812
3257
  });
2813
- result = await this.executeNativeGemini3Generate(mergedOptions);
2814
- }
3258
+ return this.executeNativeGemini3Generate(mergedOptions);
3259
+ });
2815
3260
  this.attachUsageAndCostAttributes(generateSpan, modelName, result?.usage);
3261
+ // Pipe through TTS-of-AI-response when caller asks for it. The
3262
+ // shared `synthesizeAIResponseIfNeeded` no-ops when tts is not
3263
+ // enabled / useAiResponse is false, so the cost is zero on
3264
+ // non-TTS paths.
3265
+ result = await this.synthesizeAIResponseIfNeeded(result, options);
2816
3266
  // Fire onFinish lifecycle callback for the native generate path.
2817
3267
  // Pipeline A providers get this for free via the AI SDK middleware
2818
3268
  // wrapper (LifecycleMiddleware); native @google/genai bypasses
@@ -3052,6 +3502,14 @@ export class GoogleVertexProvider extends BaseProvider {
3052
3502
  ? { error: error instanceof Error ? error.message : String(error) }
3053
3503
  : {}),
3054
3504
  });
3505
+ // Mark on the result so the SDK-level runStandardGenerateRequest knows
3506
+ // this provider already emitted `generation:end` itself and skips its
3507
+ // own duplicate emission. Without this flag the public event listener
3508
+ // (and the observability test) would see two events per generate call.
3509
+ if (result && typeof result === "object") {
3510
+ result._generationEndEmitted =
3511
+ true;
3512
+ }
3055
3513
  }
3056
3514
  formatProviderError(error) {
3057
3515
  const errorRecord = error;