@juspay/neurolink 9.65.0 → 9.65.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-lines-per-function */
1
2
  // Native SDK imports - no more @ai-sdk/google-vertex dependency
2
3
  import fs from "fs";
3
4
  import path from "path";
@@ -5,7 +6,7 @@ import os from "os";
5
6
  import {} from "ai";
6
7
  import { AIProviderName, ErrorCategory, ErrorSeverity, } from "../constants/enums.js";
7
8
  import { BaseProvider } from "../core/baseProvider.js";
8
- import { DEFAULT_MAX_STEPS, DEFAULT_TOOL_MAX_RETRIES, GLOBAL_LOCATION_MODELS, IMAGE_GENERATION_MODELS, } from "../core/constants.js";
9
+ import { DEFAULT_MAX_STEPS, DEFAULT_TOOL_MAX_RETRIES, GLOBAL_LOCATION_MODELS, IMAGE_GENERATION_MODELS, TOOL_STORAGE_TIMEOUT_MS, } from "../core/constants.js";
9
10
  import { ModelConfigurationManager } from "../core/modelConfiguration.js";
10
11
  import { createProxyFetch } from "../proxy/proxyFetch.js";
11
12
  import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
@@ -17,10 +18,12 @@ import { hasRestrictedOutputLimit, RESTRICTED_OUTPUT_TOKEN_LIMIT, } from "../uti
17
18
  import { validateApiKey, createVertexProjectConfig, createGoogleAuthConfig, } from "../utils/providerConfig.js";
18
19
  import { convertZodToJsonSchema, inlineJsonSchema, ensureNestedSchemaTypes, } from "../utils/schemaConversion.js";
19
20
  import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
20
- import { TimeoutError } from "../utils/async/index.js";
21
- import { prependConversationMessages } from "./googleNativeGemini3.js";
21
+ import { TimeoutError, withTimeout } from "../utils/async/index.js";
22
+ import { parseTimeout } from "../utils/timeout.js";
23
+ import { createTextChannel, extractThoughtSignature, prependConversationMessages, } from "./googleNativeGemini3.js";
22
24
  import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
23
25
  import { calculateCost } from "../utils/pricing.js";
26
+ import { transformToolExecutions } from "../utils/transformationUtils.js";
24
27
  // Import proper types for multimodal message handling
25
28
  // Dynamic import helper for native Anthropic Vertex SDK
26
29
  let anthropicVertexModule = null;
@@ -1126,6 +1129,11 @@ export class GoogleVertexProvider extends BaseProvider {
1126
1129
  let finalText = "";
1127
1130
  let lastStepText = ""; // Track text from last step for maxSteps termination
1128
1131
  const allToolCalls = [];
1132
+ // Mirrors the generate-path shape so StreamResult.toolExecutions can be
1133
+ // populated (parity with AI-SDK-driven providers) and so the storage
1134
+ // hook can persist actual tool outputs rather than the placeholder
1135
+ // "success" string used by flushPendingToolData's default fallback.
1136
+ const toolExecutions = [];
1129
1137
  let step = 0;
1130
1138
  // Track structured output from final_result tool (when using final_result pattern)
1131
1139
  let finalResultStructuredOutput;
@@ -1232,22 +1240,38 @@ export class GoogleVertexProvider extends BaseProvider {
1232
1240
  });
1233
1241
  // Execute each function and collect responses
1234
1242
  const functionResponses = [];
1243
+ // Per-step bookkeeping for conversation-memory storage.
1244
+ const stepStorageCalls = [];
1245
+ const stepStorageResults = [];
1246
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
1247
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
1235
1248
  for (const call of stepFunctionCalls) {
1236
1249
  allToolCalls.push({ toolName: call.name, args: call.args });
1250
+ stepStorageCalls.push({ toolName: call.name, args: call.args });
1237
1251
  // Check if this tool has already exceeded retry limit
1238
1252
  const failedInfo = failedTools.get(call.name);
1239
1253
  if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1240
1254
  logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1255
+ const errorPayload = {
1256
+ error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1257
+ status: "permanently_failed",
1258
+ do_not_retry: true,
1259
+ };
1241
1260
  functionResponses.push({
1242
1261
  functionResponse: {
1243
1262
  name: call.name,
1244
- response: {
1245
- error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1246
- status: "permanently_failed",
1247
- do_not_retry: true,
1248
- },
1263
+ response: errorPayload,
1249
1264
  },
1250
1265
  });
1266
+ toolExecutions.push({
1267
+ name: call.name,
1268
+ input: call.args,
1269
+ output: errorPayload,
1270
+ });
1271
+ stepStorageResults.push({
1272
+ toolName: call.name,
1273
+ output: errorPayload,
1274
+ });
1251
1275
  continue;
1252
1276
  }
1253
1277
  const execute = executeMap.get(call.name);
@@ -1260,9 +1284,18 @@ export class GoogleVertexProvider extends BaseProvider {
1260
1284
  abortSignal: undefined,
1261
1285
  };
1262
1286
  const result = await execute(call.args, toolOptions);
1287
+ toolExecutions.push({
1288
+ name: call.name,
1289
+ input: call.args,
1290
+ output: result,
1291
+ });
1263
1292
  functionResponses.push({
1264
1293
  functionResponse: { name: call.name, response: { result } },
1265
1294
  });
1295
+ stepStorageResults.push({
1296
+ toolName: call.name,
1297
+ output: result,
1298
+ });
1266
1299
  }
1267
1300
  catch (error) {
1268
1301
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
@@ -1277,38 +1310,77 @@ export class GoogleVertexProvider extends BaseProvider {
1277
1310
  logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1278
1311
  // Determine if this is a permanent failure
1279
1312
  const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1313
+ const errorPayload = {
1314
+ error: isPermanentFailure
1315
+ ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1316
+ : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1317
+ status: isPermanentFailure ? "permanently_failed" : "failed",
1318
+ do_not_retry: isPermanentFailure,
1319
+ retry_count: currentFailInfo.count,
1320
+ max_retries: DEFAULT_TOOL_MAX_RETRIES,
1321
+ };
1280
1322
  functionResponses.push({
1281
1323
  functionResponse: {
1282
1324
  name: call.name,
1283
- response: {
1284
- error: isPermanentFailure
1285
- ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1286
- : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1287
- status: isPermanentFailure
1288
- ? "permanently_failed"
1289
- : "failed",
1290
- do_not_retry: isPermanentFailure,
1291
- retry_count: currentFailInfo.count,
1292
- max_retries: DEFAULT_TOOL_MAX_RETRIES,
1293
- },
1325
+ response: errorPayload,
1294
1326
  },
1295
1327
  });
1328
+ toolExecutions.push({
1329
+ name: call.name,
1330
+ input: call.args,
1331
+ output: errorPayload,
1332
+ });
1333
+ stepStorageResults.push({
1334
+ toolName: call.name,
1335
+ output: errorPayload,
1336
+ });
1296
1337
  }
1297
1338
  }
1298
1339
  else {
1299
1340
  // Tool not found is a permanent error
1341
+ const errorPayload = {
1342
+ error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1343
+ status: "permanently_failed",
1344
+ do_not_retry: true,
1345
+ };
1300
1346
  functionResponses.push({
1301
1347
  functionResponse: {
1302
1348
  name: call.name,
1303
- response: {
1304
- error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1305
- status: "permanently_failed",
1306
- do_not_retry: true,
1307
- },
1349
+ response: errorPayload,
1308
1350
  },
1309
1351
  });
1352
+ toolExecutions.push({
1353
+ name: call.name,
1354
+ input: call.args,
1355
+ output: errorPayload,
1356
+ });
1357
+ stepStorageResults.push({
1358
+ toolName: call.name,
1359
+ output: errorPayload,
1360
+ });
1310
1361
  }
1311
1362
  }
1363
+ // Persist this step's tool calls/results into conversation memory.
1364
+ // Without this, tool_call / tool_result rows never reach Redis and
1365
+ // the chat-history UI loses every tool invocation.
1366
+ //
1367
+ // `thoughtSignature` rides as a sibling on the first call of the
1368
+ // step — Gemini 3 needs it to match thinking patterns when the
1369
+ // conversation is replayed on the next turn.
1370
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
1371
+ const stepThoughtSig = extractThoughtSignature(rawResponseParts);
1372
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c, i) => ({
1373
+ ...c,
1374
+ ...(i === 0 && stepThoughtSig
1375
+ ? { thoughtSignature: stepThoughtSig }
1376
+ : {}),
1377
+ stepIndex: step,
1378
+ })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
1379
+ logger.warn("[GoogleVertex] Failed to store native Gemini stream tool executions", {
1380
+ error: error instanceof Error ? error.message : String(error),
1381
+ });
1382
+ });
1383
+ }
1312
1384
  // The @google/genai SDK only accepts "user" and "model" as valid
1313
1385
  // roles in contents — function/tool responses must use role: "user"
1314
1386
  // (matching the SDK's automaticFunctionCalling implementation and
@@ -1354,6 +1426,7 @@ export class GoogleVertexProvider extends BaseProvider {
1354
1426
  }
1355
1427
  // Filter out final_result from tool calls as it's an internal pattern
1356
1428
  const externalToolCalls = allToolCalls.filter((tc) => tc.toolName !== "final_result");
1429
+ const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
1357
1430
  const result = {
1358
1431
  stream: createTextStream(),
1359
1432
  provider: this.providerName,
@@ -1367,6 +1440,12 @@ export class GoogleVertexProvider extends BaseProvider {
1367
1440
  toolName: tc.toolName,
1368
1441
  args: tc.args,
1369
1442
  })),
1443
+ // Surface tools-used + execution summary so `hasToolActivity` in
1444
+ // conversationMemory.ts evaluates true for tool-only stream turns
1445
+ // (assistant text empty but tools ran) and downstream consumers see
1446
+ // the same shape AI-SDK-driven providers expose.
1447
+ toolsUsed: externalToolCalls.map((tc) => tc.toolName),
1448
+ toolExecutions: transformToolExecutions(externalToolExecutions),
1370
1449
  metadata: {
1371
1450
  streamId: `native-vertex-${Date.now()}`,
1372
1451
  startTime,
@@ -1768,6 +1847,10 @@ export class GoogleVertexProvider extends BaseProvider {
1768
1847
  });
1769
1848
  // Execute each function and collect responses
1770
1849
  const functionResponses = [];
1850
+ const toolCallsBefore = allToolCalls.length;
1851
+ const toolExecsBefore = toolExecutions.length;
1852
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
1853
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
1771
1854
  for (const call of stepFunctionCalls) {
1772
1855
  allToolCalls.push({ toolName: call.name, args: call.args });
1773
1856
  // Check if this tool has already exceeded retry limit
@@ -1870,6 +1953,32 @@ export class GoogleVertexProvider extends BaseProvider {
1870
1953
  });
1871
1954
  }
1872
1955
  }
1956
+ // Persist this step's tool calls/results into conversation memory.
1957
+ // Without this, tool_call / tool_result rows never reach Redis and
1958
+ // the chat-history UI loses every tool invocation. The first call
1959
+ // of the step carries the step's `thoughtSignature` so Gemini 3 can
1960
+ // match thinking patterns on replay.
1961
+ const stepToolCalls = allToolCalls.slice(toolCallsBefore);
1962
+ const stepToolExecs = toolExecutions.slice(toolExecsBefore);
1963
+ if (stepToolCalls.length > 0 || stepToolExecs.length > 0) {
1964
+ const stepThoughtSig = extractThoughtSignature(rawResponseParts);
1965
+ withTimeout(this.handleToolExecutionStorage(stepToolCalls.map((tc, i) => ({
1966
+ toolName: tc.toolName,
1967
+ args: tc.args,
1968
+ ...(i === 0 && stepThoughtSig
1969
+ ? { thoughtSignature: stepThoughtSig }
1970
+ : {}),
1971
+ stepIndex: step,
1972
+ })), stepToolExecs.map((te) => ({
1973
+ toolName: te.name,
1974
+ output: te.output,
1975
+ stepIndex: step,
1976
+ })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
1977
+ logger.warn("[GoogleVertex] Failed to store native Gemini generate tool executions", {
1978
+ error: error instanceof Error ? error.message : String(error),
1979
+ });
1980
+ });
1981
+ }
1873
1982
  // The @google/genai SDK only accepts "user" and "model" as valid
1874
1983
  // roles in contents — function/tool responses must use role: "user"
1875
1984
  // (matching the SDK's automaticFunctionCalling implementation and
@@ -1908,7 +2017,7 @@ export class GoogleVertexProvider extends BaseProvider {
1908
2017
  },
1909
2018
  responseTime,
1910
2019
  toolsUsed: externalToolCalls.map((tc) => tc.toolName),
1911
- toolExecutions: externalToolExecutions,
2020
+ toolExecutions: transformToolExecutions(externalToolExecutions),
1912
2021
  enhancedWithTools: externalToolCalls.length > 0,
1913
2022
  };
1914
2023
  // Add structured output if final_result tool was used
@@ -1944,7 +2053,15 @@ export class GoogleVertexProvider extends BaseProvider {
1944
2053
  });
1945
2054
  // Build messages from input
1946
2055
  const messages = [];
1947
- // Add conversation history if present
2056
+ // Add conversation history if present.
2057
+ //
2058
+ // Intentionally text-only. Anthropic's API rejects messages where a
2059
+ // tool_use_id reference appears without its matching tool_use in the
2060
+ // same turn — so synthesising tool_use / tool_result blocks from
2061
+ // stored ChatMessages risks emitting orphaned references that fail
2062
+ // validation. Tool rows are still persisted to Redis (chat-history
2063
+ // UI renders them) but they don't re-enter the model's context on
2064
+ // subsequent turns.
1948
2065
  if (options.conversationMessages &&
1949
2066
  options.conversationMessages.length > 0) {
1950
2067
  for (const msg of options.conversationMessages) {
@@ -2175,157 +2292,270 @@ export class GoogleVertexProvider extends BaseProvider {
2175
2292
  stop_sequences: options.stopSequences,
2176
2293
  }),
2177
2294
  };
2178
- // Handle tool calling loop with max steps
2295
+ // ── Real-time streaming via stream.on('text', ...) ────────────────────
2296
+ //
2297
+ // The Anthropic SDK exposes per-delta streaming through `stream.on('text', listener)`:
2298
+ // each content_block_delta SSE event fires the listener synchronously
2299
+ // with that token's text — typically ~10 chars per delta, ~26ms apart
2300
+ // on Claude Haiku. Awaiting `stream.finalMessage()` here would buffer
2301
+ // the entire response before yielding anything; the listener pattern
2302
+ // keeps the wire and the consumer in lockstep instead.
2303
+ //
2304
+ // Structure: push-channel + background agentic loop, returning the
2305
+ // StreamResult immediately so callers can iterate `channel.iterable`
2306
+ // while generation is still in progress. Mirrors the executeStream
2307
+ // pattern in googleAiStudio.ts.
2179
2308
  const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
2180
- let step = 0;
2181
- let finalText = "";
2182
- let structuredOutput;
2183
2309
  const allToolCalls = [];
2184
- // Track each Anthropic text block separately so the returned async
2185
- // iterable yields multiple chunks. The chunk-count smoke test fails
2186
- // when an entire response collapses into a single yield, even though
2187
- // the upstream stream is genuinely incremental.
2188
- const allTextBlocks = [];
2189
- let totalInputTokens = 0;
2190
- let totalOutputTokens = 0;
2191
- const currentMessages = [...messages];
2192
- while (step < maxSteps) {
2193
- step++;
2310
+ const toolExecutions = [];
2311
+ const channel = createTextChannel();
2312
+ // Mutable holders the StreamResult references. Background loop updates
2313
+ // these as state progresses; consumer reads them after iterating the
2314
+ // stream to completion (channel.close() is called AFTER mutations).
2315
+ const usage = { input: 0, output: 0, total: 0 };
2316
+ const metadata = {
2317
+ streamId: `native-anthropic-vertex-${Date.now()}`,
2318
+ startTime,
2319
+ responseTime: 0,
2320
+ totalToolExecutions: 0,
2321
+ };
2322
+ const toolsUsedRef = [];
2323
+ const structuredOutputRef = {};
2324
+ // Track the active Anthropic stream so options.abortSignal can cancel it
2325
+ // mid-flight (pre-rewrite code had no abort handling — fixed for free).
2326
+ let activeStream;
2327
+ const abortHandler = () => {
2194
2328
  try {
2195
- // Use streaming API
2196
- const stream = await client.messages.stream({
2197
- ...requestParams,
2198
- messages: currentMessages,
2199
- });
2200
- // Collect the full response
2201
- const response = await stream.finalMessage();
2202
- // Update token counts
2203
- totalInputTokens += response.usage?.input_tokens || 0;
2204
- totalOutputTokens += response.usage?.output_tokens || 0;
2205
- // Check if we need to handle tool use
2206
- const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
2207
- // Check for final_result tool call (for structured output)
2208
- if (useFinalResultTool) {
2209
- const finalResultCall = toolUseBlocks.find((block) => block.name === "final_result");
2210
- if (finalResultCall) {
2211
- // Extract structured output and convert to JSON string for finalText
2212
- structuredOutput = finalResultCall.input;
2213
- finalText = JSON.stringify(structuredOutput);
2214
- logger.debug("[GoogleVertex] Extracted structured output from final_result tool (stream)", { keys: Object.keys(structuredOutput) });
2215
- break; // We have the structured output, we're done
2216
- }
2217
- }
2218
- // Extract text from response
2219
- const textBlocks = response.content.filter((block) => block.type === "text");
2220
- const responseText = textBlocks.map((b) => b.text).join("");
2221
- // Preserve each Anthropic text block separately so the
2222
- // consumer-visible stream yields multiple chunks (one per block).
2223
- for (const tb of textBlocks) {
2224
- if (tb.text.length > 0) {
2225
- allTextBlocks.push(tb.text);
2329
+ activeStream?.controller.abort();
2330
+ }
2331
+ catch {
2332
+ /* ignore — stream may already be finalized */
2333
+ }
2334
+ };
2335
+ options.abortSignal?.addEventListener("abort", abortHandler);
2336
+ // Defensive upper bound: if neither the caller nor the SDK ever fires,
2337
+ // abort the stream after the configured timeout so a stalled
2338
+ // Vertex/Anthropic endpoint can't hang forever. options.timeout wins
2339
+ // if set; otherwise 5 min — generous for tool-heavy turns.
2340
+ const streamTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2341
+ const streamTimeoutHandle = setTimeout(() => {
2342
+ logger.warn(`[GoogleVertex] Anthropic stream exceeded ${streamTimeoutMs}ms — aborting`);
2343
+ abortHandler();
2344
+ }, streamTimeoutMs);
2345
+ const loopPromise = (async () => {
2346
+ let step = 0;
2347
+ const currentMessages = [...messages];
2348
+ try {
2349
+ while (step < maxSteps) {
2350
+ if (options.abortSignal?.aborted) {
2351
+ throw new Error("Stream aborted by caller");
2226
2352
  }
2227
- }
2228
- if (toolUseBlocks.length === 0) {
2229
- // No tool calls, we're done
2230
- finalText = responseText || finalText;
2231
- break;
2232
- }
2233
- // Handle tool calls
2234
- const toolResults = [];
2235
- for (const toolUse of toolUseBlocks) {
2236
- allToolCalls.push({
2237
- toolName: toolUse.name,
2238
- args: toolUse.input,
2353
+ step++;
2354
+ const stream = await client.messages.stream({
2355
+ ...requestParams,
2356
+ messages: currentMessages,
2239
2357
  });
2240
- const execute = executeMap.get(toolUse.name);
2241
- if (execute) {
2242
- try {
2243
- const result = await execute(toolUse.input);
2244
- toolResults.push({
2245
- type: "tool_result",
2246
- tool_use_id: toolUse.id,
2247
- content: typeof result === "string" ? result : JSON.stringify(result),
2248
- });
2358
+ activeStream = stream;
2359
+ // Forward each text delta to the consumer as it arrives. The
2360
+ // Anthropic SDK fires this listener synchronously for every
2361
+ // content_block_delta SSE event, so the channel sees bytes at
2362
+ // the same cadence the wire delivers them.
2363
+ stream.on("text", (delta) => {
2364
+ if (delta.length > 0) {
2365
+ channel.push(delta);
2249
2366
  }
2250
- catch (err) {
2367
+ });
2368
+ // finalMessage() resolves AFTER message_stop. By then the listener
2369
+ // has already fired for every delta — awaiting here doesn't block
2370
+ // visible streaming, it just gives us the structured response
2371
+ // shape needed for tool_use block extraction.
2372
+ const response = await stream.finalMessage();
2373
+ activeStream = undefined;
2374
+ usage.input += response.usage?.input_tokens || 0;
2375
+ usage.output += response.usage?.output_tokens || 0;
2376
+ usage.total = usage.input + usage.output;
2377
+ const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
2378
+ // Structured-output pattern: when the model returns the
2379
+ // final_result tool call, push its arguments as JSON and stop.
2380
+ // Single-shot yield so callers consuming the stream still see
2381
+ // the structured value.
2382
+ if (useFinalResultTool) {
2383
+ const finalResultCall = toolUseBlocks.find((block) => block.name === "final_result");
2384
+ if (finalResultCall) {
2385
+ structuredOutputRef.value = finalResultCall.input;
2386
+ channel.push(JSON.stringify(finalResultCall.input));
2387
+ logger.debug("[GoogleVertex] Extracted structured output from final_result tool (stream)", { keys: Object.keys(finalResultCall.input) });
2388
+ break;
2389
+ }
2390
+ }
2391
+ // No tools — pure text turn. Listener already pushed all deltas;
2392
+ // loop terminates and channel.close() flushes the consumer.
2393
+ if (toolUseBlocks.length === 0) {
2394
+ break;
2395
+ }
2396
+ // Tool execution loop. tool:start / tool:end events fire from
2397
+ // ToolsManager's wrapped execute (ToolsManager.ts:355) — no inline
2398
+ // emit needed.
2399
+ const toolResults = [];
2400
+ // Per-step bookkeeping for conversation-memory storage.
2401
+ const stepStorageCalls = [];
2402
+ const stepStorageResults = [];
2403
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
2404
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
2405
+ for (const toolUse of toolUseBlocks) {
2406
+ allToolCalls.push({
2407
+ toolName: toolUse.name,
2408
+ args: toolUse.input,
2409
+ });
2410
+ toolsUsedRef.push(toolUse.name);
2411
+ stepStorageCalls.push({
2412
+ toolCallId: toolUse.id,
2413
+ toolName: toolUse.name,
2414
+ args: toolUse.input,
2415
+ });
2416
+ const execute = executeMap.get(toolUse.name);
2417
+ if (execute) {
2418
+ try {
2419
+ const toolOptions = {
2420
+ toolCallId: toolUse.id,
2421
+ messages: [],
2422
+ abortSignal: options.abortSignal,
2423
+ };
2424
+ const result = await execute(toolUse.input, toolOptions);
2425
+ toolExecutions.push({
2426
+ name: toolUse.name,
2427
+ input: toolUse.input,
2428
+ output: result,
2429
+ });
2430
+ // Anthropic requires tool_result.content to be a string.
2431
+ // JSON.stringify returns undefined for undefined/function/symbol,
2432
+ // so coerce defensively to keep the follow-up turn valid.
2433
+ const resultContent = typeof result === "string"
2434
+ ? result
2435
+ : (JSON.stringify(result ?? null) ?? String(result));
2436
+ toolResults.push({
2437
+ type: "tool_result",
2438
+ tool_use_id: toolUse.id,
2439
+ content: resultContent,
2440
+ });
2441
+ stepStorageResults.push({
2442
+ toolCallId: toolUse.id,
2443
+ toolName: toolUse.name,
2444
+ output: result,
2445
+ });
2446
+ }
2447
+ catch (err) {
2448
+ const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
2449
+ const errorPayload = { error: errMsg };
2450
+ toolExecutions.push({
2451
+ name: toolUse.name,
2452
+ input: toolUse.input,
2453
+ output: errorPayload,
2454
+ });
2455
+ toolResults.push({
2456
+ type: "tool_result",
2457
+ tool_use_id: toolUse.id,
2458
+ content: errMsg,
2459
+ });
2460
+ stepStorageResults.push({
2461
+ toolCallId: toolUse.id,
2462
+ toolName: toolUse.name,
2463
+ output: errorPayload,
2464
+ });
2465
+ }
2466
+ }
2467
+ else {
2468
+ const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
2469
+ const errorPayload = { error: errMsg };
2470
+ toolExecutions.push({
2471
+ name: toolUse.name,
2472
+ input: toolUse.input,
2473
+ output: errorPayload,
2474
+ });
2251
2475
  toolResults.push({
2252
2476
  type: "tool_result",
2253
2477
  tool_use_id: toolUse.id,
2254
- content: `Error executing tool: ${err instanceof Error ? err.message : String(err)}`,
2478
+ content: errMsg,
2479
+ });
2480
+ stepStorageResults.push({
2481
+ toolCallId: toolUse.id,
2482
+ toolName: toolUse.name,
2483
+ output: errorPayload,
2255
2484
  });
2256
2485
  }
2257
2486
  }
2258
- else {
2259
- toolResults.push({
2260
- type: "tool_result",
2261
- tool_use_id: toolUse.id,
2262
- content: `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`,
2487
+ // Persist this step's tool calls/results into conversation memory.
2488
+ // Without this hook, tool rows never land in Redis and the
2489
+ // chat-history UI loses every tool invocation.
2490
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
2491
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c) => ({ ...c, stepIndex: step })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
2492
+ logger.warn("[GoogleVertex] Failed to store native Anthropic stream tool executions", {
2493
+ error: error instanceof Error ? error.message : String(error),
2494
+ });
2263
2495
  });
2264
2496
  }
2497
+ // Continue the loop: assistant turn + tool_result user turn.
2498
+ // Filter server_tool_use blocks (Anthropic API rejects them in
2499
+ // subsequent message turns).
2500
+ const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
2501
+ currentMessages.push({
2502
+ role: "assistant",
2503
+ content: assistantContent,
2504
+ });
2505
+ currentMessages.push({
2506
+ role: "user",
2507
+ content: toolResults,
2508
+ });
2265
2509
  }
2266
- // Add assistant message and tool results to continue the loop
2267
- // Filter out server_tool_use blocks that the Anthropic API doesn't accept in messages
2268
- const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
2269
- currentMessages.push({
2270
- role: "assistant",
2271
- content: assistantContent,
2272
- });
2273
- currentMessages.push({
2274
- role: "user",
2275
- content: toolResults,
2276
- });
2277
- // Store last text in case we hit max steps
2278
- if (responseText) {
2279
- finalText = responseText;
2280
- }
2281
- }
2282
- catch (error) {
2283
- logger.error("[GoogleVertex] Native Anthropic SDK stream error", error);
2284
- throw this.handleProviderError(error);
2510
+ metadata.responseTime = Date.now() - startTime;
2511
+ metadata.totalToolExecutions = allToolCalls.filter((tc) => tc.toolName !== "final_result").length;
2512
+ channel.close();
2285
2513
  }
2286
- }
2287
- const responseTime = Date.now() - startTime;
2288
- // Yield each text block separately so the CLI receives multiple
2289
- // stream chunks instead of a single coalesced buffer. The Anthropic
2290
- // SDK gives us discrete text blocks; collapsing them into one chunk
2291
- // breaks the chunk-count smoke test even though the upstream
2292
- // streaming is real.
2293
- const finalContentBlocks = (() => {
2294
- if (structuredOutput) {
2295
- return [finalText];
2514
+ catch (err) {
2515
+ logger.error("[GoogleVertex] Native Anthropic SDK stream error", err);
2516
+ channel.error(this.handleProviderError(err));
2296
2517
  }
2297
- if (allTextBlocks.length > 0) {
2298
- return allTextBlocks;
2518
+ finally {
2519
+ options.abortSignal?.removeEventListener("abort", abortHandler);
2520
+ clearTimeout(streamTimeoutHandle);
2299
2521
  }
2300
- return finalText ? [finalText] : [];
2301
2522
  })();
2302
- async function* createTextStream() {
2303
- for (const part of finalContentBlocks) {
2304
- if (part.length > 0) {
2305
- yield { content: part };
2306
- }
2307
- }
2308
- }
2309
- return {
2310
- stream: createTextStream(),
2523
+ // Suppress unhandled-rejection: errors funnel through channel.error()
2524
+ // and surface when the consumer iterates the stream.
2525
+ loopPromise.catch(() => undefined);
2526
+ // Return StreamResult IMMEDIATELY — caller's for-await can begin
2527
+ // iterating channel.iterable while the background loop is still
2528
+ // generating. usage / metadata / toolCalls / toolExecutions are mutable
2529
+ // references that the loop fills in over time; the consumer reads them
2530
+ // after iteration completes (after channel.close() has fired).
2531
+ const result = {
2532
+ stream: channel.iterable,
2311
2533
  provider: this.providerName,
2312
2534
  model: modelName,
2313
- usage: {
2314
- input: totalInputTokens,
2315
- output: totalOutputTokens,
2316
- total: totalInputTokens + totalOutputTokens,
2317
- },
2318
- toolCalls: allToolCalls.map((tc) => ({
2319
- toolName: tc.toolName,
2320
- args: tc.args,
2321
- })),
2322
- metadata: {
2323
- streamId: `native-anthropic-vertex-${Date.now()}`,
2324
- startTime,
2325
- responseTime,
2326
- totalToolExecutions: allToolCalls.length,
2327
- },
2535
+ usage,
2536
+ metadata,
2328
2537
  };
2538
+ Object.defineProperty(result, "toolCalls", {
2539
+ enumerable: true,
2540
+ configurable: true,
2541
+ get: () => allToolCalls.filter((tc) => tc.toolName !== "final_result"),
2542
+ });
2543
+ Object.defineProperty(result, "toolsUsed", {
2544
+ enumerable: true,
2545
+ configurable: true,
2546
+ get: () => toolsUsedRef.filter((name) => name !== "final_result"),
2547
+ });
2548
+ Object.defineProperty(result, "toolExecutions", {
2549
+ enumerable: true,
2550
+ configurable: true,
2551
+ get: () => transformToolExecutions(toolExecutions.filter((te) => te.name !== "final_result")),
2552
+ });
2553
+ Object.defineProperty(result, "structuredOutput", {
2554
+ enumerable: true,
2555
+ configurable: true,
2556
+ get: () => structuredOutputRef.value,
2557
+ });
2558
+ return result;
2329
2559
  }
2330
2560
  /**
2331
2561
  * Execute generate using native @anthropic-ai/vertex-sdk for Claude models on Vertex AI
@@ -2348,6 +2578,9 @@ export class GoogleVertexProvider extends BaseProvider {
2348
2578
  // the older surface. The Vertex Claude STREAM path already follows this
2349
2579
  // priority — keeping the GENERATE path on `conversationHistory` only
2350
2580
  // would silently drop multi-turn context for memory/loop sessions.
2581
+ // Intentionally text-only: see the stream sibling for the rationale —
2582
+ // synthesising tool_use / tool_result blocks from stored ChatMessages
2583
+ // risks emitting orphaned references that Anthropic's API rejects.
2351
2584
  const historyMessages = options.conversationMessages && options.conversationMessages.length > 0
2352
2585
  ? options.conversationMessages
2353
2586
  : options.conversationHistory;
@@ -2591,10 +2824,14 @@ export class GoogleVertexProvider extends BaseProvider {
2591
2824
  while (step < maxSteps) {
2592
2825
  step++;
2593
2826
  try {
2594
- const response = await client.messages.create({
2827
+ // Bound the SDK wait so a stalled Vertex/Anthropic call can't hang
2828
+ // generate forever. options.timeout wins if set, otherwise default
2829
+ // to 5 min — generous for tool-heavy turns.
2830
+ const generateTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2831
+ const response = await withTimeout(client.messages.create({
2595
2832
  ...requestParams,
2596
2833
  messages: currentMessages,
2597
- });
2834
+ }), generateTimeoutMs, "Anthropic generate timed out");
2598
2835
  // Update token counts
2599
2836
  totalInputTokens += response.usage?.input_tokens || 0;
2600
2837
  totalOutputTokens += response.usage?.output_tokens || 0;
@@ -2621,42 +2858,105 @@ export class GoogleVertexProvider extends BaseProvider {
2621
2858
  }
2622
2859
  // Handle tool calls
2623
2860
  const toolResults = [];
2861
+ // Per-step bookkeeping for conversation-memory storage. Tracks calls
2862
+ // and results for ONLY the tools fired in this step so the storage
2863
+ // hook can tag them with the current stepIndex.
2864
+ const stepStorageCalls = [];
2865
+ const stepStorageResults = [];
2866
+ // Note: tool:start / tool:end events are emitted by ToolsManager's
2867
+ // wrapped `execute` (see ToolsManager.ts:355) — no inline emit needed.
2624
2868
  for (const toolUse of toolUseBlocks) {
2625
2869
  allToolCalls.push({
2626
2870
  toolName: toolUse.name,
2627
2871
  args: toolUse.input,
2628
2872
  });
2873
+ stepStorageCalls.push({
2874
+ toolCallId: toolUse.id,
2875
+ toolName: toolUse.name,
2876
+ args: toolUse.input,
2877
+ });
2629
2878
  const execute = executeMap.get(toolUse.name);
2630
2879
  if (execute) {
2631
2880
  try {
2632
- const result = await execute(toolUse.input);
2881
+ const toolOptions = {
2882
+ toolCallId: toolUse.id,
2883
+ messages: [],
2884
+ abortSignal: options.abortSignal,
2885
+ };
2886
+ const result = await execute(toolUse.input, toolOptions);
2633
2887
  toolExecutions.push({
2634
2888
  name: toolUse.name,
2635
2889
  input: toolUse.input,
2636
2890
  output: result,
2637
2891
  });
2892
+ // Anthropic requires tool_result.content to be a string.
2893
+ // JSON.stringify returns undefined for undefined/function/symbol,
2894
+ // so coerce defensively to keep the follow-up turn valid.
2895
+ const resultContent = typeof result === "string"
2896
+ ? result
2897
+ : (JSON.stringify(result ?? null) ?? String(result));
2638
2898
  toolResults.push({
2639
2899
  type: "tool_result",
2640
2900
  tool_use_id: toolUse.id,
2641
- content: typeof result === "string" ? result : JSON.stringify(result),
2901
+ content: resultContent,
2902
+ });
2903
+ stepStorageResults.push({
2904
+ toolCallId: toolUse.id,
2905
+ toolName: toolUse.name,
2906
+ output: result,
2642
2907
  });
2643
2908
  }
2644
2909
  catch (err) {
2910
+ const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
2911
+ const errorPayload = { error: errMsg };
2912
+ toolExecutions.push({
2913
+ name: toolUse.name,
2914
+ input: toolUse.input,
2915
+ output: errorPayload,
2916
+ });
2645
2917
  toolResults.push({
2646
2918
  type: "tool_result",
2647
2919
  tool_use_id: toolUse.id,
2648
- content: `Error executing tool: ${err instanceof Error ? err.message : String(err)}`,
2920
+ content: errMsg,
2921
+ });
2922
+ stepStorageResults.push({
2923
+ toolCallId: toolUse.id,
2924
+ toolName: toolUse.name,
2925
+ output: errorPayload,
2649
2926
  });
2650
2927
  }
2651
2928
  }
2652
2929
  else {
2930
+ const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
2931
+ const errorPayload = { error: errMsg };
2932
+ toolExecutions.push({
2933
+ name: toolUse.name,
2934
+ input: toolUse.input,
2935
+ output: errorPayload,
2936
+ });
2653
2937
  toolResults.push({
2654
2938
  type: "tool_result",
2655
2939
  tool_use_id: toolUse.id,
2656
- content: `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`,
2940
+ content: errMsg,
2941
+ });
2942
+ stepStorageResults.push({
2943
+ toolCallId: toolUse.id,
2944
+ toolName: toolUse.name,
2945
+ output: errorPayload,
2657
2946
  });
2658
2947
  }
2659
2948
  }
2949
+ // Persist this step's tool calls/results into conversation memory.
2950
+ // Without this, tool_call / tool_result rows never reach Redis and
2951
+ // the chat-history UI loses every tool invocation.
2952
+ // Fire-and-forget — storage failures must not break generation.
2953
+ if (stepStorageCalls.length > 0 || stepStorageResults.length > 0) {
2954
+ withTimeout(this.handleToolExecutionStorage(stepStorageCalls.map((c) => ({ ...c, stepIndex: step })), stepStorageResults.map((r) => ({ ...r, stepIndex: step })), options, new Date()), TOOL_STORAGE_TIMEOUT_MS, "tool storage write timed out").catch((error) => {
2955
+ logger.warn("[GoogleVertex] Failed to store native Anthropic generate tool executions", {
2956
+ error: error instanceof Error ? error.message : String(error),
2957
+ });
2958
+ });
2959
+ }
2660
2960
  // Add assistant message and tool results to continue the loop
2661
2961
  // Filter out server_tool_use blocks that the Anthropic API doesn't accept in messages
2662
2962
  const assistantContent = response.content.filter((block) => block.type !== "server_tool_use");
@@ -2679,6 +2979,8 @@ export class GoogleVertexProvider extends BaseProvider {
2679
2979
  }
2680
2980
  }
2681
2981
  const responseTime = Date.now() - startTime;
2982
+ const externalToolCalls = allToolCalls.filter((tc) => tc.toolName !== "final_result");
2983
+ const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
2682
2984
  const result = {
2683
2985
  content: finalText,
2684
2986
  provider: this.providerName,
@@ -2689,9 +2991,9 @@ export class GoogleVertexProvider extends BaseProvider {
2689
2991
  total: totalInputTokens + totalOutputTokens,
2690
2992
  },
2691
2993
  responseTime,
2692
- toolsUsed: allToolCalls.map((tc) => tc.toolName),
2693
- toolExecutions,
2694
- enhancedWithTools: allToolCalls.length > 0,
2994
+ toolsUsed: externalToolCalls.map((tc) => tc.toolName),
2995
+ toolExecutions: transformToolExecutions(externalToolExecutions),
2996
+ enhancedWithTools: externalToolCalls.length > 0,
2695
2997
  };
2696
2998
  // Route through enhanceResult so analytics/evaluation/tracing are picked
2697
2999
  // up the same way the BaseProvider.generate() path picks them up. The