@juspay/neurolink 9.42.0 → 9.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/dist/auth/anthropicOAuth.js +12 -0
- package/dist/browser/neurolink.min.js +337 -336
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +188 -184
- package/dist/cli/commands/proxy.js +537 -518
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +208 -230
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +138 -188
- package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/auth/anthropicOAuth.js +12 -0
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +208 -230
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +138 -188
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +38 -0
- package/dist/lib/neurolink.js +1858 -1689
- package/dist/lib/providers/googleAiStudio.js +0 -5
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +436 -444
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +73 -64
- package/dist/lib/providers/ollama.js +17 -4
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +139 -140
- package/dist/lib/proxy/claudeFormat.js +12 -4
- package/dist/lib/proxy/oauthFetch.js +298 -318
- package/dist/lib/proxy/proxyConfig.js +3 -1
- package/dist/lib/proxy/proxyFetch.js +250 -222
- package/dist/lib/proxy/requestLogger.js +132 -45
- package/dist/lib/proxy/sseInterceptor.js +36 -11
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +10 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +2726 -2272
- package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
- package/dist/lib/tasks/store/redisTaskStore.js +23 -16
- package/dist/lib/tasks/taskManager.d.ts +2 -0
- package/dist/lib/tasks/taskManager.js +100 -5
- package/dist/lib/telemetry/telemetryService.js +9 -5
- package/dist/lib/types/proxyTypes.d.ts +124 -1
- package/dist/lib/utils/providerHealth.d.ts +1 -0
- package/dist/lib/utils/providerHealth.js +46 -31
- package/dist/lib/utils/providerUtils.js +11 -22
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +38 -0
- package/dist/neurolink.js +1858 -1689
- package/dist/providers/googleAiStudio.js +0 -5
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +436 -444
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +73 -64
- package/dist/providers/ollama.js +17 -4
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +139 -140
- package/dist/proxy/claudeFormat.js +12 -4
- package/dist/proxy/oauthFetch.js +298 -318
- package/dist/proxy/proxyConfig.js +3 -1
- package/dist/proxy/proxyFetch.js +250 -222
- package/dist/proxy/requestLogger.js +132 -45
- package/dist/proxy/sseInterceptor.js +36 -11
- package/dist/server/routes/claudeProxyRoutes.d.ts +10 -1
- package/dist/server/routes/claudeProxyRoutes.js +2726 -2272
- package/dist/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/tasks/backends/bullmqBackend.js +24 -18
- package/dist/tasks/store/redisTaskStore.js +23 -16
- package/dist/tasks/taskManager.d.ts +2 -0
- package/dist/tasks/taskManager.js +100 -5
- package/dist/telemetry/telemetryService.js +9 -5
- package/dist/types/proxyTypes.d.ts +124 -1
- package/dist/utils/providerHealth.d.ts +1 -0
- package/dist/utils/providerHealth.js +46 -31
- package/dist/utils/providerUtils.js +12 -22
- package/package.json +3 -2
- package/scripts/observability/check-proxy-telemetry.mjs +1 -1
- package/scripts/observability/manage-local-openobserve.sh +36 -5
|
@@ -777,272 +777,76 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
777
777
|
this.validateStreamOptions(options);
|
|
778
778
|
}
|
|
779
779
|
async executeStream(options, analysisSchema) {
|
|
780
|
-
|
|
781
|
-
const
|
|
782
|
-
|
|
783
|
-
|
|
780
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
781
|
+
const nativeGemini3Result = await this.maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName);
|
|
782
|
+
if (nativeGemini3Result) {
|
|
783
|
+
return nativeGemini3Result;
|
|
784
|
+
}
|
|
785
|
+
return this.executeAISDKStream(options, analysisSchema, modelName);
|
|
786
|
+
}
|
|
787
|
+
async maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName) {
|
|
784
788
|
const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema;
|
|
785
|
-
|
|
786
|
-
// Need to check early if we should route to native SDK
|
|
787
|
-
const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
|
|
789
|
+
const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
|
|
788
790
|
const optionTools = options.tools || {};
|
|
789
|
-
const sdkTools =
|
|
791
|
+
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
790
792
|
const combinedToolCount = Object.keys(optionTools).length + Object.keys(sdkTools).length;
|
|
791
|
-
const hasTools =
|
|
792
|
-
if (isGemini3Model(
|
|
793
|
-
|
|
794
|
-
const processedOptions = await this.processCSVFilesForNativeSDK(options);
|
|
795
|
-
// Merge SDK tools into options for native SDK path
|
|
796
|
-
const mergedOptions = {
|
|
797
|
-
...processedOptions,
|
|
798
|
-
tools: { ...sdkTools, ...optionTools },
|
|
799
|
-
};
|
|
800
|
-
logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
|
|
801
|
-
model: gemini3CheckModelName,
|
|
802
|
-
optionToolCount: Object.keys(optionTools).length,
|
|
803
|
-
sdkToolCount: Object.keys(sdkTools).length,
|
|
804
|
-
totalToolCount: combinedToolCount,
|
|
805
|
-
});
|
|
806
|
-
return this.executeNativeGemini3Stream(mergedOptions);
|
|
793
|
+
const hasTools = shouldUseTools && combinedToolCount > 0;
|
|
794
|
+
if (!isGemini3Model(modelName) || !hasTools) {
|
|
795
|
+
return null;
|
|
807
796
|
}
|
|
808
|
-
|
|
797
|
+
const processedOptions = await this.processCSVFilesForNativeSDK(options);
|
|
798
|
+
const mergedOptions = {
|
|
799
|
+
...processedOptions,
|
|
800
|
+
tools: { ...sdkTools, ...optionTools },
|
|
801
|
+
};
|
|
802
|
+
logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
|
|
803
|
+
model: modelName,
|
|
804
|
+
optionToolCount: Object.keys(optionTools).length,
|
|
805
|
+
sdkToolCount: Object.keys(sdkTools).length,
|
|
806
|
+
totalToolCount: combinedToolCount,
|
|
807
|
+
});
|
|
808
|
+
return this.executeNativeGemini3Stream(mergedOptions);
|
|
809
|
+
}
|
|
810
|
+
async executeAISDKStream(options, analysisSchema, modelName) {
|
|
809
811
|
const functionTag = "GoogleVertexProvider.executeStream";
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
812
|
+
const tracking = {
|
|
813
|
+
chunkCount: 0,
|
|
814
|
+
collectedToolCalls: [],
|
|
815
|
+
collectedToolResults: [],
|
|
816
|
+
};
|
|
817
|
+
const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
|
|
814
818
|
try {
|
|
815
|
-
// Validate stream options
|
|
816
819
|
this.validateStreamOptionsOnly(options);
|
|
817
|
-
// Build message array from options with multimodal support
|
|
818
|
-
// Using protected helper from BaseProvider to eliminate code duplication
|
|
819
820
|
const messages = await this.buildMessagesForStream(options);
|
|
820
|
-
const model = await this.getAISDKModelWithMiddleware(options);
|
|
821
|
-
|
|
822
|
-
const
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
if (Object.keys(rawTools).length > 0 && !isAnthropic) {
|
|
831
|
-
const sanitized = sanitizeToolsForGemini(rawTools);
|
|
832
|
-
if (sanitized.dropped.length > 0) {
|
|
833
|
-
logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
|
|
834
|
-
}
|
|
835
|
-
tools =
|
|
836
|
-
Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
|
|
837
|
-
}
|
|
838
|
-
else if (isAnthropic && Object.keys(rawTools).length > 0) {
|
|
839
|
-
// Anthropic models don't need Gemini sanitization — pass tools through
|
|
840
|
-
tools = rawTools;
|
|
841
|
-
}
|
|
842
|
-
else {
|
|
843
|
-
tools = undefined;
|
|
844
|
-
}
|
|
845
|
-
logger.debug(`${functionTag}: Tools for streaming`, {
|
|
821
|
+
const model = await this.getAISDKModelWithMiddleware(options);
|
|
822
|
+
const { shouldUseTools, tools, isAnthropic } = await this.resolveAISDKStreamTools(options, modelName, functionTag);
|
|
823
|
+
const streamOptions = this.buildAISDKStreamOptions({
|
|
824
|
+
options,
|
|
825
|
+
analysisSchema,
|
|
826
|
+
functionTag,
|
|
827
|
+
modelName,
|
|
828
|
+
model,
|
|
829
|
+
messages,
|
|
830
|
+
tools,
|
|
846
831
|
shouldUseTools,
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
toolNames: Object.keys(tools ?? {}),
|
|
851
|
-
});
|
|
852
|
-
// Model-specific maxTokens handling
|
|
853
|
-
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
854
|
-
// Use cached model configuration to determine maxTokens handling for streaming performance
|
|
855
|
-
// This avoids hardcoded model-specific logic and repeated config lookups
|
|
856
|
-
const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
|
|
857
|
-
const maxTokens = shouldSetMaxTokens
|
|
858
|
-
? options.maxTokens // No default limit
|
|
859
|
-
: undefined;
|
|
860
|
-
const collectedToolCalls = [];
|
|
861
|
-
const collectedToolResults = [];
|
|
862
|
-
// Build complete stream options with proper typing
|
|
863
|
-
let streamOptions = {
|
|
864
|
-
model: model,
|
|
865
|
-
messages: messages,
|
|
866
|
-
temperature: options.temperature,
|
|
867
|
-
...(maxTokens && { maxTokens }),
|
|
868
|
-
maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
|
|
869
|
-
...(shouldUseTools &&
|
|
870
|
-
tools &&
|
|
871
|
-
Object.keys(tools).length > 0 && {
|
|
872
|
-
tools,
|
|
873
|
-
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
874
|
-
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
875
|
-
}),
|
|
876
|
-
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
877
|
-
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
878
|
-
// Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
|
|
879
|
-
// Gemini 2.5: use thinkingBudget via providerOptions
|
|
880
|
-
...(options.thinkingConfig?.enabled && {
|
|
881
|
-
providerOptions: {
|
|
882
|
-
vertex: {
|
|
883
|
-
thinkingConfig: {
|
|
884
|
-
...(options.thinkingConfig.thinkingLevel && {
|
|
885
|
-
thinkingLevel: options.thinkingConfig.thinkingLevel,
|
|
886
|
-
}),
|
|
887
|
-
...(options.thinkingConfig.budgetTokens &&
|
|
888
|
-
!options.thinkingConfig.thinkingLevel && {
|
|
889
|
-
thinkingBudget: options.thinkingConfig.budgetTokens,
|
|
890
|
-
}),
|
|
891
|
-
includeThoughts: true,
|
|
892
|
-
},
|
|
893
|
-
},
|
|
894
|
-
},
|
|
895
|
-
}),
|
|
896
|
-
onError: (event) => {
|
|
897
|
-
const error = event.error;
|
|
898
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
899
|
-
logger.error(`${functionTag}: Stream error`, {
|
|
900
|
-
provider: this.providerName,
|
|
901
|
-
modelName: this.modelName,
|
|
902
|
-
error: errorMessage,
|
|
903
|
-
chunkCount,
|
|
904
|
-
});
|
|
905
|
-
},
|
|
906
|
-
onFinish: (event) => {
|
|
907
|
-
logger.debug(`${functionTag}: Stream finished`, {
|
|
908
|
-
finishReason: event.finishReason,
|
|
909
|
-
totalChunks: chunkCount,
|
|
910
|
-
});
|
|
911
|
-
},
|
|
912
|
-
onChunk: () => {
|
|
913
|
-
chunkCount++;
|
|
914
|
-
},
|
|
915
|
-
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
916
|
-
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
917
|
-
for (const toolCall of toolCalls) {
|
|
918
|
-
collectedToolCalls.push({
|
|
919
|
-
toolCallId: toolCall.toolCallId,
|
|
920
|
-
toolName: toolCall.toolName,
|
|
921
|
-
args: toolCall.args ??
|
|
922
|
-
toolCall.input ??
|
|
923
|
-
toolCall
|
|
924
|
-
.parameters ??
|
|
925
|
-
{},
|
|
926
|
-
});
|
|
927
|
-
}
|
|
928
|
-
for (const toolResult of toolResults) {
|
|
929
|
-
const rawToolResult = toolResult;
|
|
930
|
-
collectedToolResults.push({
|
|
931
|
-
toolName: toolResult.toolName,
|
|
932
|
-
status: rawToolResult.error ? "failure" : "success",
|
|
933
|
-
output: (rawToolResult.output ??
|
|
934
|
-
rawToolResult.result) ?? undefined,
|
|
935
|
-
error: rawToolResult.error,
|
|
936
|
-
id: rawToolResult.toolCallId ?? toolResult.toolName,
|
|
937
|
-
});
|
|
938
|
-
}
|
|
939
|
-
// Handle tool execution storage
|
|
940
|
-
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
941
|
-
logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
|
|
942
|
-
provider: this.providerName,
|
|
943
|
-
error: error instanceof Error ? error.message : String(error),
|
|
944
|
-
});
|
|
945
|
-
});
|
|
946
|
-
},
|
|
947
|
-
};
|
|
948
|
-
if (analysisSchema) {
|
|
949
|
-
try {
|
|
950
|
-
// Gemini cannot use tools and JSON schema simultaneously
|
|
951
|
-
if (!isAnthropic) {
|
|
952
|
-
delete streamOptions.tools;
|
|
953
|
-
delete streamOptions.toolChoice;
|
|
954
|
-
delete streamOptions.stopWhen;
|
|
955
|
-
}
|
|
956
|
-
streamOptions = {
|
|
957
|
-
...streamOptions,
|
|
958
|
-
experimental_output: Output.object({
|
|
959
|
-
schema: analysisSchema,
|
|
960
|
-
}),
|
|
961
|
-
};
|
|
962
|
-
}
|
|
963
|
-
catch (error) {
|
|
964
|
-
logger.warn("Schema application failed, continuing without schema", {
|
|
965
|
-
error: String(error),
|
|
966
|
-
});
|
|
967
|
-
}
|
|
968
|
-
}
|
|
969
|
-
// Wrap streamText in an OTel span to capture provider-level latency and token usage
|
|
970
|
-
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
971
|
-
kind: SpanKind.CLIENT,
|
|
972
|
-
attributes: {
|
|
973
|
-
"gen_ai.system": "vertex",
|
|
974
|
-
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
975
|
-
},
|
|
832
|
+
isAnthropic,
|
|
833
|
+
timeoutController,
|
|
834
|
+
tracking,
|
|
976
835
|
});
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
streamSpan.setStatus({
|
|
984
|
-
code: SpanStatusCode.ERROR,
|
|
985
|
-
message: err instanceof Error ? err.message : String(err),
|
|
986
|
-
});
|
|
987
|
-
streamSpan.end();
|
|
988
|
-
throw err;
|
|
989
|
-
}
|
|
990
|
-
// Collect token usage and finish reason asynchronously when the stream completes,
|
|
991
|
-
// then end the span. This avoids blocking the stream consumer.
|
|
992
|
-
Promise.resolve(result.usage)
|
|
993
|
-
.then((usage) => {
|
|
994
|
-
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
|
|
995
|
-
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
|
|
996
|
-
const effectiveModel = options.model ||
|
|
997
|
-
getModelId(model, this.modelName || getDefaultVertexModel());
|
|
998
|
-
const cost = calculateCost(this.providerName, effectiveModel, {
|
|
999
|
-
input: usage.inputTokens || 0,
|
|
1000
|
-
output: usage.outputTokens || 0,
|
|
1001
|
-
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
|
|
1002
|
-
});
|
|
1003
|
-
if (cost && cost > 0) {
|
|
1004
|
-
streamSpan.setAttribute("neurolink.cost", cost);
|
|
1005
|
-
}
|
|
1006
|
-
})
|
|
1007
|
-
.catch(() => {
|
|
1008
|
-
// Usage may not be available if the stream is aborted
|
|
1009
|
-
});
|
|
1010
|
-
Promise.resolve(result.finishReason)
|
|
1011
|
-
.then((reason) => {
|
|
1012
|
-
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
1013
|
-
})
|
|
1014
|
-
.catch(() => {
|
|
1015
|
-
// Finish reason may not be available if the stream is aborted
|
|
1016
|
-
});
|
|
1017
|
-
Promise.resolve(result.text)
|
|
1018
|
-
.then(() => {
|
|
1019
|
-
streamSpan.end();
|
|
1020
|
-
})
|
|
1021
|
-
.catch((err) => {
|
|
1022
|
-
streamSpan.setStatus({
|
|
1023
|
-
code: SpanStatusCode.ERROR,
|
|
1024
|
-
message: err instanceof Error ? err.message : String(err),
|
|
1025
|
-
});
|
|
1026
|
-
streamSpan.end();
|
|
836
|
+
const result = this.startObservedAISDKStream(streamOptions, model, modelName, options);
|
|
837
|
+
this.observeAISDKStreamResult(result, {
|
|
838
|
+
model,
|
|
839
|
+
modelName,
|
|
840
|
+
options,
|
|
841
|
+
timeoutController,
|
|
1027
842
|
});
|
|
1028
|
-
// Defer timeout cleanup until the stream completes or errors.
|
|
1029
|
-
// Guard against NoOutputGeneratedError becoming an unhandled rejection.
|
|
1030
|
-
Promise.resolve(result.text)
|
|
1031
|
-
.catch((err) => {
|
|
1032
|
-
logger.debug("Stream text promise rejected (expected for empty streams)", {
|
|
1033
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1034
|
-
});
|
|
1035
|
-
})
|
|
1036
|
-
.finally(() => timeoutController?.cleanup());
|
|
1037
|
-
// Transform string stream to content object stream using BaseProvider method
|
|
1038
|
-
const transformedStream = this.createTextStream(result);
|
|
1039
843
|
return {
|
|
1040
|
-
stream:
|
|
844
|
+
stream: this.createTextStream(result),
|
|
1041
845
|
provider: this.providerName,
|
|
1042
846
|
model: this.modelName,
|
|
1043
847
|
...(shouldUseTools && {
|
|
1044
|
-
toolCalls: collectedToolCalls,
|
|
1045
|
-
toolResults: collectedToolResults,
|
|
848
|
+
toolCalls: tracking.collectedToolCalls,
|
|
849
|
+
toolResults: tracking.collectedToolResults,
|
|
1046
850
|
}),
|
|
1047
851
|
};
|
|
1048
852
|
}
|
|
@@ -1052,11 +856,222 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1052
856
|
provider: this.providerName,
|
|
1053
857
|
modelName: this.modelName,
|
|
1054
858
|
error: String(error),
|
|
1055
|
-
chunkCount,
|
|
859
|
+
chunkCount: tracking.chunkCount,
|
|
1056
860
|
});
|
|
1057
861
|
throw this.handleProviderError(error);
|
|
1058
862
|
}
|
|
1059
863
|
}
|
|
864
|
+
async resolveAISDKStreamTools(options, modelName, functionTag) {
|
|
865
|
+
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
866
|
+
const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
|
|
867
|
+
const rawTools = shouldUseTools
|
|
868
|
+
? { ...baseStreamTools, ...(options.tools || {}) }
|
|
869
|
+
: {};
|
|
870
|
+
const isAnthropic = isAnthropicModel(modelName);
|
|
871
|
+
let tools;
|
|
872
|
+
if (Object.keys(rawTools).length > 0 && !isAnthropic) {
|
|
873
|
+
const sanitized = sanitizeToolsForGemini(rawTools);
|
|
874
|
+
if (sanitized.dropped.length > 0) {
|
|
875
|
+
logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
|
|
876
|
+
}
|
|
877
|
+
tools =
|
|
878
|
+
Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
|
|
879
|
+
}
|
|
880
|
+
else if (isAnthropic && Object.keys(rawTools).length > 0) {
|
|
881
|
+
tools = rawTools;
|
|
882
|
+
}
|
|
883
|
+
else {
|
|
884
|
+
tools = undefined;
|
|
885
|
+
}
|
|
886
|
+
logger.debug(`${functionTag}: Tools for streaming`, {
|
|
887
|
+
shouldUseTools,
|
|
888
|
+
baseToolCount: Object.keys(baseStreamTools).length,
|
|
889
|
+
externalToolCount: Object.keys(options.tools || {}).length,
|
|
890
|
+
toolCount: Object.keys(tools ?? {}).length,
|
|
891
|
+
toolNames: Object.keys(tools ?? {}),
|
|
892
|
+
});
|
|
893
|
+
return {
|
|
894
|
+
shouldUseTools,
|
|
895
|
+
tools,
|
|
896
|
+
isAnthropic,
|
|
897
|
+
baseToolCount: Object.keys(baseStreamTools).length,
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
buildAISDKStreamOptions(params) {
|
|
901
|
+
const { options, analysisSchema, functionTag, modelName, model, messages, tools, shouldUseTools, isAnthropic, timeoutController, tracking, } = params;
|
|
902
|
+
const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
|
|
903
|
+
const maxTokens = shouldSetMaxTokens ? options.maxTokens : undefined;
|
|
904
|
+
let streamOptions = {
|
|
905
|
+
model,
|
|
906
|
+
messages,
|
|
907
|
+
temperature: options.temperature,
|
|
908
|
+
...(maxTokens && { maxTokens }),
|
|
909
|
+
maxRetries: 0,
|
|
910
|
+
...(shouldUseTools &&
|
|
911
|
+
tools &&
|
|
912
|
+
Object.keys(tools).length > 0 && {
|
|
913
|
+
tools,
|
|
914
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
915
|
+
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
916
|
+
}),
|
|
917
|
+
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
918
|
+
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
919
|
+
...(options.thinkingConfig?.enabled && {
|
|
920
|
+
providerOptions: {
|
|
921
|
+
vertex: {
|
|
922
|
+
thinkingConfig: {
|
|
923
|
+
...(options.thinkingConfig.thinkingLevel && {
|
|
924
|
+
thinkingLevel: options.thinkingConfig.thinkingLevel,
|
|
925
|
+
}),
|
|
926
|
+
...(options.thinkingConfig.budgetTokens &&
|
|
927
|
+
!options.thinkingConfig.thinkingLevel && {
|
|
928
|
+
thinkingBudget: options.thinkingConfig.budgetTokens,
|
|
929
|
+
}),
|
|
930
|
+
includeThoughts: true,
|
|
931
|
+
},
|
|
932
|
+
},
|
|
933
|
+
},
|
|
934
|
+
}),
|
|
935
|
+
onError: (event) => {
|
|
936
|
+
const errorMessage = event.error instanceof Error
|
|
937
|
+
? event.error.message
|
|
938
|
+
: String(event.error);
|
|
939
|
+
logger.error(`${functionTag}: Stream error`, {
|
|
940
|
+
provider: this.providerName,
|
|
941
|
+
modelName: this.modelName,
|
|
942
|
+
error: errorMessage,
|
|
943
|
+
chunkCount: tracking.chunkCount,
|
|
944
|
+
});
|
|
945
|
+
},
|
|
946
|
+
onFinish: (event) => {
|
|
947
|
+
logger.debug(`${functionTag}: Stream finished`, {
|
|
948
|
+
finishReason: event.finishReason,
|
|
949
|
+
totalChunks: tracking.chunkCount,
|
|
950
|
+
});
|
|
951
|
+
},
|
|
952
|
+
onChunk: () => {
|
|
953
|
+
tracking.chunkCount++;
|
|
954
|
+
},
|
|
955
|
+
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
956
|
+
this.captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking);
|
|
957
|
+
},
|
|
958
|
+
};
|
|
959
|
+
if (!analysisSchema) {
|
|
960
|
+
return streamOptions;
|
|
961
|
+
}
|
|
962
|
+
try {
|
|
963
|
+
if (!isAnthropic) {
|
|
964
|
+
delete streamOptions.tools;
|
|
965
|
+
delete streamOptions.toolChoice;
|
|
966
|
+
delete streamOptions.stopWhen;
|
|
967
|
+
}
|
|
968
|
+
streamOptions = {
|
|
969
|
+
...streamOptions,
|
|
970
|
+
experimental_output: Output.object({ schema: analysisSchema }),
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
catch (error) {
|
|
974
|
+
logger.warn("Schema application failed, continuing without schema", {
|
|
975
|
+
error: String(error),
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
return streamOptions;
|
|
979
|
+
}
|
|
980
|
+
captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking) {
|
|
981
|
+
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
982
|
+
for (const toolCall of toolCalls) {
|
|
983
|
+
tracking.collectedToolCalls.push({
|
|
984
|
+
toolCallId: toolCall.toolCallId,
|
|
985
|
+
toolName: toolCall.toolName,
|
|
986
|
+
args: toolCall.args ?? toolCall.input ?? toolCall.parameters ?? {},
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
for (const toolResult of toolResults) {
|
|
990
|
+
tracking.collectedToolResults.push({
|
|
991
|
+
toolName: toolResult.toolName,
|
|
992
|
+
status: toolResult.error ? "failure" : "success",
|
|
993
|
+
output: (toolResult.output ?? toolResult.result) ??
|
|
994
|
+
undefined,
|
|
995
|
+
error: toolResult.error,
|
|
996
|
+
id: toolResult.toolCallId ?? toolResult.toolName,
|
|
997
|
+
});
|
|
998
|
+
}
|
|
999
|
+
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
1000
|
+
logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
|
|
1001
|
+
provider: this.providerName,
|
|
1002
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1003
|
+
});
|
|
1004
|
+
});
|
|
1005
|
+
}
|
|
1006
|
+
startObservedAISDKStream(streamOptions, model, modelName, options) {
|
|
1007
|
+
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
1008
|
+
kind: SpanKind.CLIENT,
|
|
1009
|
+
attributes: {
|
|
1010
|
+
"gen_ai.system": "vertex",
|
|
1011
|
+
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
1012
|
+
},
|
|
1013
|
+
});
|
|
1014
|
+
try {
|
|
1015
|
+
const result = streamText(streamOptions);
|
|
1016
|
+
this.attachAISDKStreamObservers(result, streamSpan, model, modelName, options);
|
|
1017
|
+
return result;
|
|
1018
|
+
}
|
|
1019
|
+
catch (error) {
|
|
1020
|
+
streamSpan.recordException(error instanceof Error ? error : new Error(String(error)));
|
|
1021
|
+
streamSpan.setStatus({
|
|
1022
|
+
code: SpanStatusCode.ERROR,
|
|
1023
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1024
|
+
});
|
|
1025
|
+
streamSpan.end();
|
|
1026
|
+
throw error;
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
attachAISDKStreamObservers(result, streamSpan, model, modelName, options) {
|
|
1030
|
+
Promise.resolve(result.usage)
|
|
1031
|
+
.then((usage) => {
|
|
1032
|
+
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
|
|
1033
|
+
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
|
|
1034
|
+
const effectiveModel = options.model ||
|
|
1035
|
+
getModelId(model, modelName || getDefaultVertexModel());
|
|
1036
|
+
const cost = calculateCost(this.providerName, effectiveModel, {
|
|
1037
|
+
input: usage.inputTokens || 0,
|
|
1038
|
+
output: usage.outputTokens || 0,
|
|
1039
|
+
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
|
|
1040
|
+
});
|
|
1041
|
+
if (cost && cost > 0) {
|
|
1042
|
+
streamSpan.setAttribute("neurolink.cost", cost);
|
|
1043
|
+
}
|
|
1044
|
+
})
|
|
1045
|
+
.catch(() => undefined);
|
|
1046
|
+
Promise.resolve(result.finishReason)
|
|
1047
|
+
.then((reason) => {
|
|
1048
|
+
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
1049
|
+
})
|
|
1050
|
+
.catch(() => undefined);
|
|
1051
|
+
Promise.resolve(result.text)
|
|
1052
|
+
.then(() => {
|
|
1053
|
+
streamSpan.end();
|
|
1054
|
+
})
|
|
1055
|
+
.catch((error) => {
|
|
1056
|
+
streamSpan.setStatus({
|
|
1057
|
+
code: SpanStatusCode.ERROR,
|
|
1058
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1059
|
+
});
|
|
1060
|
+
streamSpan.end();
|
|
1061
|
+
});
|
|
1062
|
+
}
|
|
1063
|
+
observeAISDKStreamResult(result, params) {
|
|
1064
|
+
void params.model;
|
|
1065
|
+
void params.modelName;
|
|
1066
|
+
void params.options;
|
|
1067
|
+
Promise.resolve(result.text)
|
|
1068
|
+
.catch((error) => {
|
|
1069
|
+
logger.debug("Stream text promise rejected (expected for empty streams)", {
|
|
1070
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1071
|
+
});
|
|
1072
|
+
})
|
|
1073
|
+
.finally(() => params.timeoutController?.cleanup());
|
|
1074
|
+
}
|
|
1060
1075
|
/**
|
|
1061
1076
|
* Create @google/genai client configured for Vertex AI
|
|
1062
1077
|
*/
|
|
@@ -1211,210 +1226,187 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1211
1226
|
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
1212
1227
|
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1213
1228
|
},
|
|
1214
|
-
},
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1229
|
+
}, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
|
|
1230
|
+
}
|
|
1231
|
+
async executeNativeGemini3StreamWithSpan(options, modelName, span) {
|
|
1232
|
+
const client = await this.createVertexGenAIClient(options.region);
|
|
1233
|
+
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1234
|
+
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
|
|
1235
|
+
model: modelName,
|
|
1236
|
+
hasTools: !!options.tools && Object.keys(options.tools).length > 0,
|
|
1237
|
+
project: this.projectId,
|
|
1238
|
+
location: effectiveLocation,
|
|
1239
|
+
});
|
|
1240
|
+
const multimodalInput = options.input;
|
|
1241
|
+
const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
|
|
1242
|
+
let hasToolsInput = !!options.tools &&
|
|
1243
|
+
Object.keys(options.tools).length > 0 &&
|
|
1244
|
+
!options.disableTools;
|
|
1245
|
+
const streamOptions = options;
|
|
1246
|
+
const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
|
|
1247
|
+
if (wantsJsonOutput && hasToolsInput) {
|
|
1248
|
+
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
1249
|
+
hasToolsInput = false;
|
|
1250
|
+
}
|
|
1251
|
+
let toolsConfig;
|
|
1252
|
+
let executeMap = new Map();
|
|
1253
|
+
if (hasToolsInput) {
|
|
1254
|
+
const toolDeclarationResult = buildNativeToolDeclarations(options.tools);
|
|
1255
|
+
toolsConfig = toolDeclarationResult.toolsConfig;
|
|
1256
|
+
executeMap = toolDeclarationResult.executeMap;
|
|
1257
|
+
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
1258
|
+
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1259
|
+
toolNames: toolsConfig[0].functionDeclarations.map((tool) => tool.name),
|
|
1222
1260
|
});
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
logger.
|
|
1235
|
-
|
|
1236
|
-
}
|
|
1237
|
-
let toolsConfig;
|
|
1238
|
-
let executeMap = new Map();
|
|
1239
|
-
if (hasToolsInput) {
|
|
1240
|
-
const result = buildNativeToolDeclarations(options.tools);
|
|
1241
|
-
toolsConfig = result.toolsConfig;
|
|
1242
|
-
executeMap = result.executeMap;
|
|
1243
|
-
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
1244
|
-
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1245
|
-
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1261
|
+
}
|
|
1262
|
+
const config = buildNativeConfig(options, toolsConfig);
|
|
1263
|
+
if (wantsJsonOutput) {
|
|
1264
|
+
config.responseMimeType = "application/json";
|
|
1265
|
+
if (streamOptions.schema) {
|
|
1266
|
+
const rawSchema = convertZodToJsonSchema(streamOptions.schema);
|
|
1267
|
+
const inlinedSchema = inlineJsonSchema(rawSchema);
|
|
1268
|
+
if (inlinedSchema.$schema) {
|
|
1269
|
+
delete inlinedSchema.$schema;
|
|
1270
|
+
}
|
|
1271
|
+
config.responseSchema = inlinedSchema;
|
|
1272
|
+
logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
|
|
1273
|
+
schemaKeys: Object.keys(inlinedSchema),
|
|
1246
1274
|
});
|
|
1247
1275
|
}
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1276
|
+
}
|
|
1277
|
+
const startTime = Date.now();
|
|
1278
|
+
const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
|
|
1279
|
+
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1280
|
+
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1281
|
+
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1282
|
+
const channel = createTextChannel();
|
|
1283
|
+
const allToolCalls = [];
|
|
1284
|
+
const metadata = {
|
|
1285
|
+
streamId: `native-vertex-${Date.now()}`,
|
|
1286
|
+
startTime,
|
|
1287
|
+
responseTime: 0,
|
|
1288
|
+
totalToolExecutions: 0,
|
|
1289
|
+
};
|
|
1290
|
+
let analyticsResolve;
|
|
1291
|
+
let analyticsReject;
|
|
1292
|
+
const analyticsPromise = new Promise((resolve, reject) => {
|
|
1293
|
+
analyticsResolve = resolve;
|
|
1294
|
+
analyticsReject = reject;
|
|
1295
|
+
});
|
|
1296
|
+
const loopPromise = this.runNativeGemini3StreamLoop({
|
|
1297
|
+
client,
|
|
1298
|
+
modelName,
|
|
1299
|
+
span,
|
|
1300
|
+
config,
|
|
1301
|
+
currentContents,
|
|
1302
|
+
executeMap,
|
|
1303
|
+
channel,
|
|
1304
|
+
allToolCalls,
|
|
1305
|
+
metadata,
|
|
1306
|
+
analyticsResolve,
|
|
1307
|
+
analyticsReject,
|
|
1308
|
+
startTime,
|
|
1309
|
+
timeoutController,
|
|
1310
|
+
composedSignal,
|
|
1311
|
+
maxSteps,
|
|
1312
|
+
});
|
|
1313
|
+
loopPromise.catch(() => undefined);
|
|
1314
|
+
return {
|
|
1315
|
+
stream: channel.iterable,
|
|
1316
|
+
provider: this.providerName,
|
|
1317
|
+
model: modelName,
|
|
1318
|
+
toolCalls: allToolCalls,
|
|
1319
|
+
analytics: analyticsPromise,
|
|
1320
|
+
metadata,
|
|
1321
|
+
};
|
|
1322
|
+
}
|
|
1323
|
+
async runNativeGemini3StreamLoop(params) {
|
|
1324
|
+
let lastStepText = "";
|
|
1325
|
+
let totalInputTokens = 0;
|
|
1326
|
+
let totalOutputTokens = 0;
|
|
1327
|
+
let step = 0;
|
|
1328
|
+
let completedWithFinalAnswer = false;
|
|
1329
|
+
const failedTools = new Map();
|
|
1330
|
+
try {
|
|
1331
|
+
while (step < params.maxSteps) {
|
|
1332
|
+
if (params.composedSignal?.aborted) {
|
|
1333
|
+
throw params.composedSignal.reason instanceof Error
|
|
1334
|
+
? params.composedSignal.reason
|
|
1335
|
+
: new Error("Request aborted");
|
|
1268
1336
|
}
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
const timeout = this.getTimeout(options);
|
|
1272
|
-
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
1273
|
-
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1274
|
-
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1275
|
-
// Inject conversation history so the native path has multi-turn context
|
|
1276
|
-
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1277
|
-
// Create a push-based text channel so the caller receives tokens as
|
|
1278
|
-
// they arrive from the network rather than after full buffering.
|
|
1279
|
-
const channel = createTextChannel();
|
|
1280
|
-
// Shared mutable state updated by the background agentic loop.
|
|
1281
|
-
const allToolCalls = [];
|
|
1282
|
-
// Shared metadata object mutated by the background loop so that
|
|
1283
|
-
// responseTime and totalToolExecutions reflect final values.
|
|
1284
|
-
const metadata = {
|
|
1285
|
-
streamId: `native-vertex-${Date.now()}`,
|
|
1286
|
-
startTime,
|
|
1287
|
-
responseTime: 0,
|
|
1288
|
-
totalToolExecutions: 0,
|
|
1289
|
-
};
|
|
1290
|
-
// analyticsResolvers lets the background loop settle the analytics
|
|
1291
|
-
// promise once token counts are known (after the loop completes).
|
|
1292
|
-
let analyticsResolve;
|
|
1293
|
-
let analyticsReject;
|
|
1294
|
-
const analyticsPromise = new Promise((res, rej) => {
|
|
1295
|
-
analyticsResolve = res;
|
|
1296
|
-
analyticsReject = rej;
|
|
1297
|
-
});
|
|
1298
|
-
// Run the agentic loop in the background without awaiting it here,
|
|
1299
|
-
// so we can return the StreamResult (with channel.iterable) immediately.
|
|
1300
|
-
const loopPromise = (async () => {
|
|
1301
|
-
let lastStepText = "";
|
|
1302
|
-
let totalInputTokens = 0;
|
|
1303
|
-
let totalOutputTokens = 0;
|
|
1304
|
-
let step = 0;
|
|
1305
|
-
let completedWithFinalAnswer = false;
|
|
1306
|
-
const failedTools = new Map();
|
|
1337
|
+
step++;
|
|
1338
|
+
logger.debug(`[GoogleVertex] Native SDK step ${step}/${params.maxSteps}`);
|
|
1307
1339
|
try {
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
? { httpOptions: { signal: composedSignal } }
|
|
1324
|
-
: {}),
|
|
1325
|
-
});
|
|
1326
|
-
// For every step, use incremental collection so text parts
|
|
1327
|
-
// are pushed to the channel as they arrive. For intermediate
|
|
1328
|
-
// steps (those that produce function calls) we still need the
|
|
1329
|
-
// complete rawResponseParts for pushModelResponseToHistory,
|
|
1330
|
-
// which collectStreamChunksIncremental provides at stream end.
|
|
1331
|
-
const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
|
|
1332
|
-
totalInputTokens += chunkResult.inputTokens;
|
|
1333
|
-
totalOutputTokens += chunkResult.outputTokens;
|
|
1334
|
-
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1335
|
-
// If no function calls, this was the final step — channel
|
|
1336
|
-
// already received all text parts incrementally.
|
|
1337
|
-
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1338
|
-
completedWithFinalAnswer = true;
|
|
1339
|
-
break;
|
|
1340
|
-
}
|
|
1341
|
-
lastStepText = stepText;
|
|
1342
|
-
// Record tool call events on the span
|
|
1343
|
-
for (const fc of chunkResult.stepFunctionCalls) {
|
|
1344
|
-
span.addEvent("gen_ai.tool_call", {
|
|
1345
|
-
"tool.name": fc.name,
|
|
1346
|
-
"tool.step": step,
|
|
1347
|
-
});
|
|
1348
|
-
}
|
|
1349
|
-
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1350
|
-
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1351
|
-
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
|
|
1352
|
-
// Function/tool responses must use role: "user" — the
|
|
1353
|
-
// @google/genai SDK's validateHistory() only accepts "user"
|
|
1354
|
-
// and "model" roles (matching automaticFunctionCalling).
|
|
1355
|
-
currentContents.push({
|
|
1356
|
-
role: "user",
|
|
1357
|
-
parts: functionResponses,
|
|
1358
|
-
});
|
|
1359
|
-
}
|
|
1360
|
-
catch (error) {
|
|
1361
|
-
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1362
|
-
throw this.handleProviderError(error);
|
|
1363
|
-
}
|
|
1340
|
+
const rawStream = await params.client.models.generateContentStream({
|
|
1341
|
+
model: params.modelName,
|
|
1342
|
+
contents: params.currentContents,
|
|
1343
|
+
config: params.config,
|
|
1344
|
+
...(params.composedSignal
|
|
1345
|
+
? { httpOptions: { signal: params.composedSignal } }
|
|
1346
|
+
: {}),
|
|
1347
|
+
});
|
|
1348
|
+
const chunkResult = await collectStreamChunksIncremental(rawStream, params.channel);
|
|
1349
|
+
totalInputTokens += chunkResult.inputTokens;
|
|
1350
|
+
totalOutputTokens += chunkResult.outputTokens;
|
|
1351
|
+
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1352
|
+
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1353
|
+
completedWithFinalAnswer = true;
|
|
1354
|
+
break;
|
|
1364
1355
|
}
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
channel.push(fallback);
|
|
1372
|
-
}
|
|
1356
|
+
lastStepText = stepText;
|
|
1357
|
+
for (const functionCall of chunkResult.stepFunctionCalls) {
|
|
1358
|
+
params.span.addEvent("gen_ai.tool_call", {
|
|
1359
|
+
"tool.name": functionCall.name,
|
|
1360
|
+
"tool.step": step,
|
|
1361
|
+
});
|
|
1373
1362
|
}
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1381
|
-
span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1382
|
-
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
|
|
1383
|
-
? "max_steps"
|
|
1384
|
-
: "stop");
|
|
1385
|
-
analyticsResolve({
|
|
1386
|
-
provider: this.providerName,
|
|
1387
|
-
model: modelName,
|
|
1388
|
-
tokenUsage: {
|
|
1389
|
-
input: totalInputTokens,
|
|
1390
|
-
output: totalOutputTokens,
|
|
1391
|
-
total: totalInputTokens + totalOutputTokens,
|
|
1392
|
-
},
|
|
1393
|
-
requestDuration: responseTime,
|
|
1394
|
-
timestamp: new Date().toISOString(),
|
|
1363
|
+
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1364
|
+
pushModelResponseToHistory(params.currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1365
|
+
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, params.executeMap, failedTools, params.allToolCalls, { abortSignal: params.composedSignal });
|
|
1366
|
+
params.currentContents.push({
|
|
1367
|
+
role: "user",
|
|
1368
|
+
parts: functionResponses,
|
|
1395
1369
|
});
|
|
1396
|
-
channel.close();
|
|
1397
1370
|
}
|
|
1398
|
-
catch (
|
|
1399
|
-
|
|
1400
|
-
|
|
1371
|
+
catch (error) {
|
|
1372
|
+
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1373
|
+
throw this.handleProviderError(error);
|
|
1401
1374
|
}
|
|
1402
|
-
|
|
1403
|
-
|
|
1375
|
+
}
|
|
1376
|
+
if (step >= params.maxSteps && !completedWithFinalAnswer) {
|
|
1377
|
+
const fallback = handleMaxStepsTermination("[GoogleVertex]", step, params.maxSteps, "", lastStepText);
|
|
1378
|
+
if (fallback) {
|
|
1379
|
+
params.channel.push(fallback);
|
|
1404
1380
|
}
|
|
1405
|
-
}
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1381
|
+
}
|
|
1382
|
+
const responseTime = Date.now() - params.startTime;
|
|
1383
|
+
params.metadata.responseTime = responseTime;
|
|
1384
|
+
params.metadata.totalToolExecutions = params.allToolCalls.length;
|
|
1385
|
+
params.span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1386
|
+
params.span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1387
|
+
params.span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= params.maxSteps && !completedWithFinalAnswer
|
|
1388
|
+
? "max_steps"
|
|
1389
|
+
: "stop");
|
|
1390
|
+
params.analyticsResolve({
|
|
1411
1391
|
provider: this.providerName,
|
|
1412
|
-
model: modelName,
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1392
|
+
model: params.modelName,
|
|
1393
|
+
tokenUsage: {
|
|
1394
|
+
input: totalInputTokens,
|
|
1395
|
+
output: totalOutputTokens,
|
|
1396
|
+
total: totalInputTokens + totalOutputTokens,
|
|
1397
|
+
},
|
|
1398
|
+
requestDuration: responseTime,
|
|
1399
|
+
timestamp: new Date().toISOString(),
|
|
1400
|
+
});
|
|
1401
|
+
params.channel.close();
|
|
1402
|
+
}
|
|
1403
|
+
catch (error) {
|
|
1404
|
+
params.channel.error(error);
|
|
1405
|
+
params.analyticsReject(error);
|
|
1406
|
+
}
|
|
1407
|
+
finally {
|
|
1408
|
+
params.timeoutController?.cleanup();
|
|
1409
|
+
}
|
|
1418
1410
|
}
|
|
1419
1411
|
/**
|
|
1420
1412
|
* Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI
|