@juspay/neurolink 9.26.0 → 9.26.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/adapters/providerImageAdapter.js +6 -0
- package/dist/constants/contextWindows.js +2 -0
- package/dist/constants/enums.d.ts +2 -0
- package/dist/constants/enums.js +2 -0
- package/dist/lib/adapters/providerImageAdapter.js +6 -0
- package/dist/lib/constants/contextWindows.js +2 -0
- package/dist/lib/constants/enums.d.ts +2 -0
- package/dist/lib/constants/enums.js +2 -0
- package/dist/lib/providers/googleAiStudio.js +135 -89
- package/dist/lib/providers/googleNativeGemini3.d.ts +43 -0
- package/dist/lib/providers/googleNativeGemini3.js +148 -18
- package/dist/lib/providers/googleVertex.js +162 -140
- package/dist/providers/googleAiStudio.js +135 -89
- package/dist/providers/googleNativeGemini3.d.ts +43 -0
- package/dist/providers/googleNativeGemini3.js +148 -18
- package/dist/providers/googleVertex.js +162 -140
- package/package.json +18 -17
|
@@ -158,7 +158,6 @@ export function sanitizeToolsForGemini(tools) {
|
|
|
158
158
|
logger.warn(`[Gemini] Failed to sanitize tool "${name}", skipping: ${error instanceof Error ? error.message : String(error)}`);
|
|
159
159
|
// Don't fall back to the original tool — an incompatible schema would fail the Gemini request
|
|
160
160
|
dropped.push(name);
|
|
161
|
-
continue;
|
|
162
161
|
}
|
|
163
162
|
}
|
|
164
163
|
return { tools: sanitized, dropped };
|
|
@@ -171,29 +170,45 @@ export function sanitizeToolsForGemini(tools) {
|
|
|
171
170
|
export function buildNativeToolDeclarations(tools) {
|
|
172
171
|
const functionDeclarations = [];
|
|
173
172
|
const executeMap = new Map();
|
|
173
|
+
const skippedTools = [];
|
|
174
174
|
for (const [name, tool] of Object.entries(tools)) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
175
|
+
try {
|
|
176
|
+
const decl = {
|
|
177
|
+
name,
|
|
178
|
+
description: tool.description || `Tool: ${name}`,
|
|
179
|
+
};
|
|
180
|
+
if (tool.parameters) {
|
|
181
|
+
let rawSchema;
|
|
182
|
+
if (isZodSchema(tool.parameters)) {
|
|
183
|
+
rawSchema = convertZodToJsonSchema(tool.parameters);
|
|
184
|
+
}
|
|
185
|
+
else if (typeof tool.parameters === "object") {
|
|
186
|
+
rawSchema = tool.parameters;
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
rawSchema = { type: "object", properties: {} };
|
|
190
|
+
}
|
|
191
|
+
// Unwrap Vercel AI SDK's jsonSchema() wrapper: { jsonSchema: { type: "object", ... } }
|
|
192
|
+
if (rawSchema.jsonSchema &&
|
|
193
|
+
typeof rawSchema.jsonSchema === "object" &&
|
|
194
|
+
!rawSchema.type) {
|
|
195
|
+
rawSchema = rawSchema.jsonSchema;
|
|
196
|
+
}
|
|
197
|
+
decl.parametersJsonSchema = sanitizeSchemaForGemini(inlineJsonSchema(rawSchema));
|
|
186
198
|
}
|
|
187
|
-
|
|
188
|
-
|
|
199
|
+
functionDeclarations.push(decl);
|
|
200
|
+
if (tool.execute) {
|
|
201
|
+
executeMap.set(name, tool.execute);
|
|
189
202
|
}
|
|
190
|
-
decl.parametersJsonSchema = sanitizeSchemaForGemini(inlineJsonSchema(rawSchema));
|
|
191
203
|
}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
204
|
+
catch (err) {
|
|
205
|
+
skippedTools.push(name);
|
|
206
|
+
logger.error(`[buildNativeToolDeclarations] Failed to convert tool "${name}":`, err);
|
|
195
207
|
}
|
|
196
208
|
}
|
|
209
|
+
if (skippedTools.length > 0) {
|
|
210
|
+
logger.warn(`[buildNativeToolDeclarations] ${skippedTools.length} tool(s) skipped due to schema errors: ${skippedTools.join(", ")}`);
|
|
211
|
+
}
|
|
197
212
|
return { toolsConfig: [{ functionDeclarations }], executeMap };
|
|
198
213
|
}
|
|
199
214
|
/**
|
|
@@ -265,6 +280,121 @@ export async function collectStreamChunks(stream) {
|
|
|
265
280
|
}
|
|
266
281
|
return { rawResponseParts, stepFunctionCalls, inputTokens, outputTokens };
|
|
267
282
|
}
|
|
283
|
+
/**
|
|
284
|
+
* Create a push-based text channel that bridges a background producer
|
|
285
|
+
* (the agentic tool-calling loop) with an async-iterable consumer.
|
|
286
|
+
*
|
|
287
|
+
* This enables truly incremental streaming: text parts are yielded to the
|
|
288
|
+
* caller as they arrive from the network, rather than being buffered until
|
|
289
|
+
* the model finishes generating.
|
|
290
|
+
*/
|
|
291
|
+
export function createTextChannel() {
|
|
292
|
+
const queue = [];
|
|
293
|
+
let done = false;
|
|
294
|
+
let fatalError = undefined;
|
|
295
|
+
// Resolve the current "wait for data" promise when new data arrives
|
|
296
|
+
let notify = null;
|
|
297
|
+
function wake() {
|
|
298
|
+
if (notify) {
|
|
299
|
+
const fn = notify;
|
|
300
|
+
notify = null;
|
|
301
|
+
fn();
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
function push(text) {
|
|
305
|
+
if (done) {
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
queue.push({ content: text });
|
|
309
|
+
wake();
|
|
310
|
+
}
|
|
311
|
+
function close() {
|
|
312
|
+
done = true;
|
|
313
|
+
wake();
|
|
314
|
+
}
|
|
315
|
+
function error(err) {
|
|
316
|
+
done = true;
|
|
317
|
+
fatalError = err;
|
|
318
|
+
wake();
|
|
319
|
+
}
|
|
320
|
+
let readIndex = 0;
|
|
321
|
+
async function* iterable() {
|
|
322
|
+
try {
|
|
323
|
+
while (true) {
|
|
324
|
+
if (readIndex < queue.length) {
|
|
325
|
+
yield queue[readIndex++];
|
|
326
|
+
// Periodically compact consumed chunks to avoid unbounded retention
|
|
327
|
+
if (readIndex > 1024 && readIndex * 2 >= queue.length) {
|
|
328
|
+
queue.splice(0, readIndex);
|
|
329
|
+
readIndex = 0;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
else if (done) {
|
|
333
|
+
if (fatalError !== undefined) {
|
|
334
|
+
throw fatalError instanceof Error
|
|
335
|
+
? fatalError
|
|
336
|
+
: new Error(String(fatalError));
|
|
337
|
+
}
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
// Wait until the producer pushes data or signals completion
|
|
342
|
+
await new Promise((resolve) => {
|
|
343
|
+
notify = resolve;
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
finally {
|
|
349
|
+
// Consumer stopped reading (e.g. disconnect/cancel): stop buffering.
|
|
350
|
+
done = true;
|
|
351
|
+
queue.length = 0;
|
|
352
|
+
notify?.();
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return { push, close, error, iterable: iterable() };
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Iterate a single stream step incrementally, pushing text parts to `channel`
|
|
359
|
+
* as they arrive from the network while simultaneously accumulating the full
|
|
360
|
+
* `CollectedChunkResult` needed for history and token accounting.
|
|
361
|
+
*
|
|
362
|
+
* Used for all steps (both intermediate tool-calling steps and the final
|
|
363
|
+
* text-only step). Text parts are pushed to the channel as they arrive,
|
|
364
|
+
* enabling truly incremental streaming. The complete `rawResponseParts`
|
|
365
|
+
* (including thoughtSignature) are still returned at the end for use by
|
|
366
|
+
* `pushModelResponseToHistory`.
|
|
367
|
+
*/
|
|
368
|
+
export async function collectStreamChunksIncremental(stream, channel) {
|
|
369
|
+
const rawResponseParts = [];
|
|
370
|
+
const stepFunctionCalls = [];
|
|
371
|
+
let inputTokens = 0;
|
|
372
|
+
let outputTokens = 0;
|
|
373
|
+
for await (const chunk of stream) {
|
|
374
|
+
const chunkRecord = chunk;
|
|
375
|
+
const candidates = chunkRecord.candidates;
|
|
376
|
+
const firstCandidate = candidates?.[0];
|
|
377
|
+
const chunkContent = firstCandidate?.content;
|
|
378
|
+
if (chunkContent && Array.isArray(chunkContent.parts)) {
|
|
379
|
+
for (const part of chunkContent.parts) {
|
|
380
|
+
rawResponseParts.push(part);
|
|
381
|
+
// Forward text parts to the consumer immediately
|
|
382
|
+
if (typeof part.text === "string" && part.text.length > 0) {
|
|
383
|
+
channel.push(part.text);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
if (chunk.functionCalls) {
|
|
388
|
+
stepFunctionCalls.push(...chunk.functionCalls);
|
|
389
|
+
}
|
|
390
|
+
const usage = chunkRecord.usageMetadata;
|
|
391
|
+
if (usage) {
|
|
392
|
+
inputTokens = Math.max(inputTokens, usage.promptTokenCount || 0);
|
|
393
|
+
outputTokens = Math.max(outputTokens, usage.candidatesTokenCount || 0);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
return { rawResponseParts, stepFunctionCalls, inputTokens, outputTokens };
|
|
397
|
+
}
|
|
268
398
|
/**
|
|
269
399
|
* Extract text from raw response parts, filtering out non-text parts
|
|
270
400
|
* (thoughtSignature, functionCall) to avoid SDK warnings.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
+
import dns from "node:dns";
|
|
1
2
|
import { createVertex, } from "@ai-sdk/google-vertex";
|
|
2
3
|
import { createVertexAnthropic, } from "@ai-sdk/google-vertex/anthropic";
|
|
4
|
+
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
|
|
3
5
|
import { embed, embedMany, Output, streamText, } from "ai";
|
|
4
|
-
import { trace, SpanKind, SpanStatusCode } from "@opentelemetry/api";
|
|
5
|
-
import dns from "node:dns";
|
|
6
6
|
import fs from "fs";
|
|
7
7
|
import os from "os";
|
|
8
8
|
import path from "path";
|
|
@@ -11,18 +11,18 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
11
11
|
import { DEFAULT_MAX_STEPS, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
|
|
12
12
|
import { ModelConfigurationManager } from "../core/modelConfiguration.js";
|
|
13
13
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
14
|
-
import {
|
|
14
|
+
import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
|
|
15
|
+
import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
|
|
15
16
|
import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js";
|
|
16
17
|
import { FileDetector } from "../utils/fileDetector.js";
|
|
17
18
|
import { logger } from "../utils/logger.js";
|
|
18
|
-
import { estimateTokens } from "../utils/tokenEstimation.js";
|
|
19
19
|
import { isGemini3Model } from "../utils/modelDetection.js";
|
|
20
20
|
import { calculateCost } from "../utils/pricing.js";
|
|
21
|
-
import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
|
|
22
21
|
import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
|
|
23
22
|
import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
|
|
24
23
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
25
|
-
import {
|
|
24
|
+
import { estimateTokens } from "../utils/tokenEstimation.js";
|
|
25
|
+
import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
|
|
26
26
|
// Import proper types for multimodal message handling
|
|
27
27
|
// Keep-alive note: Node.js native fetch and undici (used by createProxyFetch)
|
|
28
28
|
// handle HTTP keep-alive internally. The fetchWithRetry wrapper in proxyFetch.ts
|
|
@@ -1217,15 +1217,13 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1217
1217
|
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1218
1218
|
});
|
|
1219
1219
|
}
|
|
1220
|
-
// Build config
|
|
1220
|
+
// Build config — systemInstruction stays in config for Gemini 3.x.
|
|
1221
|
+
// The @google/genai SDK maps config.systemInstruction to the HTTP-level
|
|
1222
|
+
// system_instruction field, which is the correct mechanism for all
|
|
1223
|
+
// Gemini 3.x models (including global endpoint). Older workaround
|
|
1224
|
+
// that moved systemInstruction into user/model content messages caused
|
|
1225
|
+
// "Please use a valid role: user, model" on Gemini 3.1+ preview models.
|
|
1221
1226
|
const config = buildNativeConfig(options, toolsConfig);
|
|
1222
|
-
// Global endpoint rejects systemInstruction for Gemini 3.x —
|
|
1223
|
-
// move it into a prefixed user message (same fix as generate path)
|
|
1224
|
-
let streamSystemPreamble;
|
|
1225
|
-
if (effectiveLocation === "global" && config.systemInstruction) {
|
|
1226
|
-
streamSystemPreamble = config.systemInstruction;
|
|
1227
|
-
delete config.systemInstruction;
|
|
1228
|
-
}
|
|
1229
1227
|
// Add JSON output format support for native SDK stream
|
|
1230
1228
|
if (streamOptions.output?.format === "json" || streamOptions.schema) {
|
|
1231
1229
|
config.responseMimeType = "application/json";
|
|
@@ -1247,110 +1245,146 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1247
1245
|
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1248
1246
|
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1249
1247
|
// Inject conversation history so the native path has multi-turn context
|
|
1250
|
-
|
|
1251
|
-
//
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
role: "user",
|
|
1256
|
-
parts: [
|
|
1257
|
-
{ text: `[System Instructions]\n${streamSystemPreamble}` },
|
|
1258
|
-
],
|
|
1259
|
-
},
|
|
1260
|
-
{
|
|
1261
|
-
role: "model",
|
|
1262
|
-
parts: [{ text: "OK" }],
|
|
1263
|
-
},
|
|
1264
|
-
...currentContents,
|
|
1265
|
-
];
|
|
1266
|
-
}
|
|
1267
|
-
let finalText = "";
|
|
1268
|
-
let lastStepText = "";
|
|
1269
|
-
let totalInputTokens = 0;
|
|
1270
|
-
let totalOutputTokens = 0;
|
|
1248
|
+
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1249
|
+
// Create a push-based text channel so the caller receives tokens as
|
|
1250
|
+
// they arrive from the network rather than after full buffering.
|
|
1251
|
+
const channel = createTextChannel();
|
|
1252
|
+
// Shared mutable state updated by the background agentic loop.
|
|
1271
1253
|
const allToolCalls = [];
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1254
|
+
// Shared metadata object mutated by the background loop so that
|
|
1255
|
+
// responseTime and totalToolExecutions reflect final values.
|
|
1256
|
+
const metadata = {
|
|
1257
|
+
streamId: `native-vertex-${Date.now()}`,
|
|
1258
|
+
startTime,
|
|
1259
|
+
responseTime: 0,
|
|
1260
|
+
totalToolExecutions: 0,
|
|
1261
|
+
};
|
|
1262
|
+
// analyticsResolvers lets the background loop settle the analytics
|
|
1263
|
+
// promise once token counts are known (after the loop completes).
|
|
1264
|
+
let analyticsResolve;
|
|
1265
|
+
let analyticsReject;
|
|
1266
|
+
const analyticsPromise = new Promise((res, rej) => {
|
|
1267
|
+
analyticsResolve = res;
|
|
1268
|
+
analyticsReject = rej;
|
|
1269
|
+
});
|
|
1270
|
+
// Run the agentic loop in the background without awaiting it here,
|
|
1271
|
+
// so we can return the StreamResult (with channel.iterable) immediately.
|
|
1272
|
+
const loopPromise = (async () => {
|
|
1273
|
+
let lastStepText = "";
|
|
1274
|
+
let totalInputTokens = 0;
|
|
1275
|
+
let totalOutputTokens = 0;
|
|
1276
|
+
let step = 0;
|
|
1277
|
+
let completedWithFinalAnswer = false;
|
|
1278
|
+
const failedTools = new Map();
|
|
1279
|
+
try {
|
|
1280
|
+
// Agentic loop for tool calling
|
|
1281
|
+
while (step < maxSteps) {
|
|
1282
|
+
if (composedSignal?.aborted) {
|
|
1283
|
+
throw composedSignal.reason instanceof Error
|
|
1284
|
+
? composedSignal.reason
|
|
1285
|
+
: new Error("Request aborted");
|
|
1298
1286
|
}
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1287
|
+
step++;
|
|
1288
|
+
logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
|
|
1289
|
+
try {
|
|
1290
|
+
const rawStream = await client.models.generateContentStream({
|
|
1291
|
+
model: modelName,
|
|
1292
|
+
contents: currentContents,
|
|
1293
|
+
config,
|
|
1294
|
+
...(composedSignal
|
|
1295
|
+
? { httpOptions: { signal: composedSignal } }
|
|
1296
|
+
: {}),
|
|
1297
|
+
});
|
|
1298
|
+
// For every step, use incremental collection so text parts
|
|
1299
|
+
// are pushed to the channel as they arrive. For intermediate
|
|
1300
|
+
// steps (those that produce function calls) we still need the
|
|
1301
|
+
// complete rawResponseParts for pushModelResponseToHistory,
|
|
1302
|
+
// which collectStreamChunksIncremental provides at stream end.
|
|
1303
|
+
const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
|
|
1304
|
+
totalInputTokens += chunkResult.inputTokens;
|
|
1305
|
+
totalOutputTokens += chunkResult.outputTokens;
|
|
1306
|
+
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1307
|
+
// If no function calls, this was the final step — channel
|
|
1308
|
+
// already received all text parts incrementally.
|
|
1309
|
+
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1310
|
+
completedWithFinalAnswer = true;
|
|
1311
|
+
break;
|
|
1312
|
+
}
|
|
1313
|
+
lastStepText = stepText;
|
|
1314
|
+
// Record tool call events on the span
|
|
1315
|
+
for (const fc of chunkResult.stepFunctionCalls) {
|
|
1316
|
+
span.addEvent("gen_ai.tool_call", {
|
|
1317
|
+
"tool.name": fc.name,
|
|
1318
|
+
"tool.step": step,
|
|
1319
|
+
});
|
|
1320
|
+
}
|
|
1321
|
+
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1322
|
+
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1323
|
+
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
|
|
1324
|
+
// Function/tool responses must use role: "user" — the
|
|
1325
|
+
// @google/genai SDK's validateHistory() only accepts "user"
|
|
1326
|
+
// and "model" roles (matching automaticFunctionCalling).
|
|
1327
|
+
currentContents.push({
|
|
1328
|
+
role: "user",
|
|
1329
|
+
parts: functionResponses,
|
|
1305
1330
|
});
|
|
1306
1331
|
}
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
currentContents.push({
|
|
1312
|
-
role: "function",
|
|
1313
|
-
parts: functionResponses,
|
|
1314
|
-
});
|
|
1332
|
+
catch (error) {
|
|
1333
|
+
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1334
|
+
throw this.handleProviderError(error);
|
|
1335
|
+
}
|
|
1315
1336
|
}
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1337
|
+
// Handle max-steps termination: if the model was still calling
|
|
1338
|
+
// tools when we hit the limit, push a synthetic final message.
|
|
1339
|
+
if (step >= maxSteps && !completedWithFinalAnswer) {
|
|
1340
|
+
const fallback = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, "", // finalText is empty — model didn't stop on its own
|
|
1341
|
+
lastStepText);
|
|
1342
|
+
if (fallback) {
|
|
1343
|
+
channel.push(fallback);
|
|
1344
|
+
}
|
|
1319
1345
|
}
|
|
1346
|
+
const responseTime = Date.now() - startTime;
|
|
1347
|
+
// Propagate final values to the shared metadata object so that
|
|
1348
|
+
// the already-returned StreamResult reflects accurate telemetry.
|
|
1349
|
+
metadata.responseTime = responseTime;
|
|
1350
|
+
metadata.totalToolExecutions = allToolCalls.length;
|
|
1351
|
+
// Set token usage and finish reason on the span
|
|
1352
|
+
span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1353
|
+
span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1354
|
+
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
|
|
1355
|
+
? "max_steps"
|
|
1356
|
+
: "stop");
|
|
1357
|
+
analyticsResolve({
|
|
1358
|
+
provider: this.providerName,
|
|
1359
|
+
model: modelName,
|
|
1360
|
+
tokenUsage: {
|
|
1361
|
+
input: totalInputTokens,
|
|
1362
|
+
output: totalOutputTokens,
|
|
1363
|
+
total: totalInputTokens + totalOutputTokens,
|
|
1364
|
+
},
|
|
1365
|
+
requestDuration: responseTime,
|
|
1366
|
+
timestamp: new Date().toISOString(),
|
|
1367
|
+
});
|
|
1368
|
+
channel.close();
|
|
1320
1369
|
}
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
async function* createTextStream() {
|
|
1333
|
-
yield { content: finalText };
|
|
1334
|
-
}
|
|
1370
|
+
catch (err) {
|
|
1371
|
+
channel.error(err);
|
|
1372
|
+
analyticsReject(err);
|
|
1373
|
+
}
|
|
1374
|
+
finally {
|
|
1375
|
+
timeoutController?.cleanup();
|
|
1376
|
+
}
|
|
1377
|
+
})();
|
|
1378
|
+
// Suppress unhandled-rejection warnings on loopPromise — errors are
|
|
1379
|
+
// forwarded to the channel and will surface when the caller iterates.
|
|
1380
|
+
loopPromise.catch(() => undefined);
|
|
1335
1381
|
return {
|
|
1336
|
-
stream:
|
|
1382
|
+
stream: channel.iterable,
|
|
1337
1383
|
provider: this.providerName,
|
|
1338
1384
|
model: modelName,
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
total: totalInputTokens + totalOutputTokens,
|
|
1343
|
-
},
|
|
1344
|
-
toolCalls: allToolCalls.map((tc) => ({
|
|
1345
|
-
toolName: tc.toolName,
|
|
1346
|
-
args: tc.args,
|
|
1347
|
-
})),
|
|
1348
|
-
metadata: {
|
|
1349
|
-
streamId: `native-vertex-${Date.now()}`,
|
|
1350
|
-
startTime,
|
|
1351
|
-
responseTime,
|
|
1352
|
-
totalToolExecutions: allToolCalls.length,
|
|
1353
|
-
},
|
|
1385
|
+
toolCalls: allToolCalls,
|
|
1386
|
+
analytics: analyticsPromise,
|
|
1387
|
+
metadata,
|
|
1354
1388
|
};
|
|
1355
1389
|
});
|
|
1356
1390
|
}
|
|
@@ -1378,7 +1412,9 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1378
1412
|
location: effectiveLocation,
|
|
1379
1413
|
});
|
|
1380
1414
|
// Build contents from input with multimodal support
|
|
1381
|
-
|
|
1415
|
+
// Prefer input.text over prompt — processCSVFilesForNativeSDK enriches
|
|
1416
|
+
// input.text with inlined CSV data, so using prompt first would discard it.
|
|
1417
|
+
const inputText = options.input?.text || options.prompt || "Please respond.";
|
|
1382
1418
|
const multimodalInput = options.input;
|
|
1383
1419
|
const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
|
|
1384
1420
|
// Get tools from SDK and options
|
|
@@ -1404,16 +1440,9 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1404
1440
|
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1405
1441
|
});
|
|
1406
1442
|
}
|
|
1407
|
-
// Build config
|
|
1443
|
+
// Build config — systemInstruction stays in config for Gemini 3.x.
|
|
1444
|
+
// See stream path comment for rationale.
|
|
1408
1445
|
const config = buildNativeConfig(options, toolsConfig);
|
|
1409
|
-
// Global endpoint rejects systemInstruction for Gemini 3.x, returning
|
|
1410
|
-
// "Please use a valid role: user, model." Move it into a prefixed
|
|
1411
|
-
// user message so the model still receives the system context.
|
|
1412
|
-
let systemPreamble;
|
|
1413
|
-
if (effectiveLocation === "global" && config.systemInstruction) {
|
|
1414
|
-
systemPreamble = config.systemInstruction;
|
|
1415
|
-
delete config.systemInstruction;
|
|
1416
|
-
}
|
|
1417
1446
|
// Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
|
|
1418
1447
|
// For now, schemas are handled via the AI SDK fallback path, not native SDK
|
|
1419
1448
|
// TODO: Implement proper $ref resolution for complex nested schemas
|
|
@@ -1423,21 +1452,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1423
1452
|
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1424
1453
|
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1425
1454
|
// Inject conversation history so the native path has multi-turn context
|
|
1426
|
-
|
|
1427
|
-
// Prepend system prompt as a user message for the global endpoint
|
|
1428
|
-
if (systemPreamble) {
|
|
1429
|
-
currentContents = [
|
|
1430
|
-
{
|
|
1431
|
-
role: "user",
|
|
1432
|
-
parts: [{ text: `[System Instructions]\n${systemPreamble}` }],
|
|
1433
|
-
},
|
|
1434
|
-
{
|
|
1435
|
-
role: "model",
|
|
1436
|
-
parts: [{ text: "OK" }],
|
|
1437
|
-
},
|
|
1438
|
-
...currentContents,
|
|
1439
|
-
];
|
|
1440
|
-
}
|
|
1455
|
+
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1441
1456
|
let finalText = "";
|
|
1442
1457
|
let lastStepText = "";
|
|
1443
1458
|
let totalInputTokens = 0;
|
|
@@ -1449,8 +1464,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1449
1464
|
try {
|
|
1450
1465
|
// Agentic loop for tool calling
|
|
1451
1466
|
while (step < maxSteps) {
|
|
1452
|
-
if (
|
|
1453
|
-
|
|
1467
|
+
if (composedSignal?.aborted) {
|
|
1468
|
+
throw composedSignal.reason instanceof Error
|
|
1469
|
+
? composedSignal.reason
|
|
1470
|
+
: new Error("Request aborted");
|
|
1454
1471
|
}
|
|
1455
1472
|
step++;
|
|
1456
1473
|
logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
|
|
@@ -1483,9 +1500,11 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1483
1500
|
logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1484
1501
|
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1485
1502
|
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
|
|
1486
|
-
//
|
|
1503
|
+
// Function/tool responses must use role: "user" — the
|
|
1504
|
+
// @google/genai SDK's validateHistory() only accepts "user"
|
|
1505
|
+
// and "model" roles (matching automaticFunctionCalling).
|
|
1487
1506
|
currentContents.push({
|
|
1488
|
-
role: "
|
|
1507
|
+
role: "user",
|
|
1489
1508
|
parts: functionResponses,
|
|
1490
1509
|
});
|
|
1491
1510
|
}
|
|
@@ -1598,13 +1617,16 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1598
1617
|
? { prompt: optionsOrPrompt }
|
|
1599
1618
|
: optionsOrPrompt;
|
|
1600
1619
|
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
1620
|
+
// Structured output (JSON format or schema) is incompatible with tools on Gemini.
|
|
1621
|
+
// Mirror the stream path pattern to prevent silent downgrade on the generate path.
|
|
1622
|
+
const wantsStructuredOutput = options.output?.format === "json" || !!options.schema;
|
|
1601
1623
|
// Check if we should use native SDK for Gemini 3 with tools
|
|
1602
|
-
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
1624
|
+
const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
|
|
1603
1625
|
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
1604
1626
|
const hasTools = shouldUseTools &&
|
|
1605
1627
|
(Object.keys(sdkTools).length > 0 ||
|
|
1606
1628
|
(options.tools && Object.keys(options.tools).length > 0));
|
|
1607
|
-
if (isGemini3Model(modelName) && hasTools) {
|
|
1629
|
+
if (isGemini3Model(modelName) && hasTools && !wantsStructuredOutput) {
|
|
1608
1630
|
// Process CSV files before routing to native SDK (bypasses normal message builder)
|
|
1609
1631
|
const processedOptions = await this.processCSVFilesForNativeSDK(options);
|
|
1610
1632
|
// Merge SDK tools into options for native SDK path
|
|
@@ -2434,7 +2456,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2434
2456
|
if (!isRetryable || attempt === maxRetries) {
|
|
2435
2457
|
throw error;
|
|
2436
2458
|
}
|
|
2437
|
-
const delay = baseDelay *
|
|
2459
|
+
const delay = baseDelay * 2 ** (attempt - 1);
|
|
2438
2460
|
logger.warn(`[GoogleVertexProvider] Auth token transient error (${err?.code || err?.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
|
|
2439
2461
|
await new Promise((r) => setTimeout(r, delay));
|
|
2440
2462
|
}
|