@juspay/neurolink 9.41.0 → 9.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +7 -1
- package/dist/auth/anthropicOAuth.d.ts +18 -3
- package/dist/auth/anthropicOAuth.js +149 -4
- package/dist/auth/providers/firebase.js +5 -1
- package/dist/auth/providers/jwt.js +5 -1
- package/dist/auth/providers/workos.js +5 -1
- package/dist/auth/sessionManager.d.ts +1 -1
- package/dist/auth/sessionManager.js +58 -27
- package/dist/browser/neurolink.min.js +354 -334
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +188 -181
- package/dist/cli/commands/proxy.d.ts +2 -1
- package/dist/cli/commands/proxy.js +713 -431
- package/dist/cli/commands/task.js +3 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +38 -0
- package/dist/cli/parser.js +4 -3
- package/dist/client/aiSdkAdapter.js +3 -0
- package/dist/client/streamingClient.js +30 -10
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +208 -230
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +138 -188
- package/dist/core/modules/GenerationHandler.js +3 -2
- package/dist/core/redisConversationMemoryManager.js +7 -3
- package/dist/evaluation/BatchEvaluator.js +4 -1
- package/dist/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
- package/dist/lib/auth/anthropicOAuth.js +149 -4
- package/dist/lib/auth/providers/firebase.js +5 -1
- package/dist/lib/auth/providers/jwt.js +5 -1
- package/dist/lib/auth/providers/workos.js +5 -1
- package/dist/lib/auth/sessionManager.d.ts +1 -1
- package/dist/lib/auth/sessionManager.js +58 -27
- package/dist/lib/client/aiSdkAdapter.js +3 -0
- package/dist/lib/client/streamingClient.js +30 -10
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +208 -230
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +138 -188
- package/dist/lib/core/modules/GenerationHandler.js +3 -2
- package/dist/lib/core/redisConversationMemoryManager.js +7 -3
- package/dist/lib/evaluation/BatchEvaluator.js +4 -1
- package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +41 -2
- package/dist/lib/neurolink.js +1616 -1681
- package/dist/lib/observability/otelBridge.d.ts +2 -2
- package/dist/lib/observability/otelBridge.js +12 -3
- package/dist/lib/providers/amazonBedrock.js +2 -4
- package/dist/lib/providers/anthropic.d.ts +9 -5
- package/dist/lib/providers/anthropic.js +19 -14
- package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/lib/providers/anthropicBaseProvider.js +5 -4
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +5 -4
- package/dist/lib/providers/googleAiStudio.js +30 -6
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +437 -423
- package/dist/lib/providers/huggingFace.d.ts +3 -3
- package/dist/lib/providers/huggingFace.js +6 -8
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +76 -55
- package/dist/lib/providers/mistral.js +2 -1
- package/dist/lib/providers/ollama.js +93 -23
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +141 -141
- package/dist/lib/providers/openRouter.js +2 -1
- package/dist/lib/providers/openaiCompatible.d.ts +4 -4
- package/dist/lib/providers/openaiCompatible.js +4 -4
- package/dist/lib/proxy/claudeFormat.d.ts +3 -2
- package/dist/lib/proxy/claudeFormat.js +27 -14
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/lib/proxy/modelRouter.js +3 -0
- package/dist/lib/proxy/oauthFetch.d.ts +1 -1
- package/dist/lib/proxy/oauthFetch.js +289 -316
- package/dist/lib/proxy/proxyConfig.js +46 -24
- package/dist/lib/proxy/proxyEnv.d.ts +19 -0
- package/dist/lib/proxy/proxyEnv.js +73 -0
- package/dist/lib/proxy/proxyFetch.js +291 -217
- package/dist/lib/proxy/proxyTracer.d.ts +133 -0
- package/dist/lib/proxy/proxyTracer.js +645 -0
- package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/lib/proxy/rawStreamCapture.js +83 -0
- package/dist/lib/proxy/requestLogger.d.ts +32 -5
- package/dist/lib/proxy/requestLogger.js +503 -47
- package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
- package/dist/lib/proxy/sseInterceptor.js +427 -0
- package/dist/lib/proxy/usageStats.d.ts +4 -3
- package/dist/lib/proxy/usageStats.js +25 -12
- package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/lib/rag/chunking/markdownChunker.js +15 -6
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
- package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/lib/tasks/store/redisTaskStore.js +54 -39
- package/dist/lib/tasks/taskManager.d.ts +5 -0
- package/dist/lib/tasks/taskManager.js +158 -30
- package/dist/lib/telemetry/index.d.ts +2 -1
- package/dist/lib/telemetry/index.js +2 -1
- package/dist/lib/telemetry/telemetryService.d.ts +3 -0
- package/dist/lib/telemetry/telemetryService.js +69 -5
- package/dist/lib/types/cli.d.ts +10 -0
- package/dist/lib/types/proxyTypes.d.ts +160 -5
- package/dist/lib/types/streamTypes.d.ts +25 -3
- package/dist/lib/utils/messageBuilder.js +3 -2
- package/dist/lib/utils/providerHealth.d.ts +19 -0
- package/dist/lib/utils/providerHealth.js +279 -33
- package/dist/lib/utils/providerUtils.js +17 -22
- package/dist/lib/utils/toolChoice.d.ts +4 -0
- package/dist/lib/utils/toolChoice.js +7 -0
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +41 -2
- package/dist/neurolink.js +1616 -1681
- package/dist/observability/otelBridge.d.ts +2 -2
- package/dist/observability/otelBridge.js +12 -3
- package/dist/providers/amazonBedrock.js +2 -4
- package/dist/providers/anthropic.d.ts +9 -5
- package/dist/providers/anthropic.js +19 -14
- package/dist/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/providers/anthropicBaseProvider.js +5 -4
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +5 -4
- package/dist/providers/googleAiStudio.js +30 -6
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +437 -423
- package/dist/providers/huggingFace.d.ts +3 -3
- package/dist/providers/huggingFace.js +6 -7
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +76 -55
- package/dist/providers/mistral.js +2 -1
- package/dist/providers/ollama.js +93 -23
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +141 -141
- package/dist/providers/openRouter.js +2 -1
- package/dist/providers/openaiCompatible.d.ts +4 -4
- package/dist/providers/openaiCompatible.js +4 -3
- package/dist/proxy/claudeFormat.d.ts +3 -2
- package/dist/proxy/claudeFormat.js +27 -14
- package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/proxy/modelRouter.js +3 -0
- package/dist/proxy/oauthFetch.d.ts +1 -1
- package/dist/proxy/oauthFetch.js +289 -316
- package/dist/proxy/proxyConfig.js +46 -24
- package/dist/proxy/proxyEnv.d.ts +19 -0
- package/dist/proxy/proxyEnv.js +72 -0
- package/dist/proxy/proxyFetch.js +291 -217
- package/dist/proxy/proxyTracer.d.ts +133 -0
- package/dist/proxy/proxyTracer.js +644 -0
- package/dist/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/proxy/rawStreamCapture.js +82 -0
- package/dist/proxy/requestLogger.d.ts +32 -5
- package/dist/proxy/requestLogger.js +503 -47
- package/dist/proxy/sseInterceptor.d.ts +97 -0
- package/dist/proxy/sseInterceptor.js +426 -0
- package/dist/proxy/usageStats.d.ts +4 -3
- package/dist/proxy/usageStats.js +25 -12
- package/dist/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/rag/chunking/markdownChunker.js +15 -6
- package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/tasks/backends/bullmqBackend.js +35 -22
- package/dist/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/tasks/store/redisTaskStore.js +54 -39
- package/dist/tasks/taskManager.d.ts +5 -0
- package/dist/tasks/taskManager.js +158 -30
- package/dist/telemetry/index.d.ts +2 -1
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/telemetryService.d.ts +3 -0
- package/dist/telemetry/telemetryService.js +69 -5
- package/dist/types/cli.d.ts +10 -0
- package/dist/types/proxyTypes.d.ts +160 -5
- package/dist/types/streamTypes.d.ts +25 -3
- package/dist/utils/messageBuilder.js +3 -2
- package/dist/utils/providerHealth.d.ts +19 -0
- package/dist/utils/providerHealth.js +279 -33
- package/dist/utils/providerUtils.js +18 -22
- package/dist/utils/toolChoice.d.ts +4 -0
- package/dist/utils/toolChoice.js +6 -0
- package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
- package/docs/changelog.md +252 -0
- package/package.json +19 -2
- package/scripts/observability/check-proxy-telemetry.mjs +235 -0
- package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
- package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
- package/scripts/observability/manage-local-openobserve.sh +215 -0
- package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
- package/scripts/observability/proxy-observability.env.example +23 -0
|
@@ -22,6 +22,7 @@ import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } fr
|
|
|
22
22
|
import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
|
|
23
23
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
24
24
|
import { estimateTokens } from "../utils/tokenEstimation.js";
|
|
25
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
25
26
|
import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
|
|
26
27
|
import { getModelId } from "./providerTypeUtils.js";
|
|
27
28
|
// Import proper types for multimodal message handling
|
|
@@ -776,251 +777,76 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
776
777
|
this.validateStreamOptions(options);
|
|
777
778
|
}
|
|
778
779
|
async executeStream(options, analysisSchema) {
|
|
779
|
-
|
|
780
|
-
const
|
|
781
|
-
|
|
782
|
-
|
|
780
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
781
|
+
const nativeGemini3Result = await this.maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName);
|
|
782
|
+
if (nativeGemini3Result) {
|
|
783
|
+
return nativeGemini3Result;
|
|
784
|
+
}
|
|
785
|
+
return this.executeAISDKStream(options, analysisSchema, modelName);
|
|
786
|
+
}
|
|
787
|
+
async maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName) {
|
|
783
788
|
const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema;
|
|
784
|
-
|
|
785
|
-
// Need to check early if we should route to native SDK
|
|
786
|
-
const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
|
|
789
|
+
const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
|
|
787
790
|
const optionTools = options.tools || {};
|
|
788
|
-
const sdkTools =
|
|
791
|
+
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
789
792
|
const combinedToolCount = Object.keys(optionTools).length + Object.keys(sdkTools).length;
|
|
790
|
-
const hasTools =
|
|
791
|
-
if (isGemini3Model(
|
|
792
|
-
|
|
793
|
-
const processedOptions = await this.processCSVFilesForNativeSDK(options);
|
|
794
|
-
// Merge SDK tools into options for native SDK path
|
|
795
|
-
const mergedOptions = {
|
|
796
|
-
...processedOptions,
|
|
797
|
-
tools: { ...sdkTools, ...optionTools },
|
|
798
|
-
};
|
|
799
|
-
logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
|
|
800
|
-
model: gemini3CheckModelName,
|
|
801
|
-
optionToolCount: Object.keys(optionTools).length,
|
|
802
|
-
sdkToolCount: Object.keys(sdkTools).length,
|
|
803
|
-
totalToolCount: combinedToolCount,
|
|
804
|
-
});
|
|
805
|
-
return this.executeNativeGemini3Stream(mergedOptions);
|
|
793
|
+
const hasTools = shouldUseTools && combinedToolCount > 0;
|
|
794
|
+
if (!isGemini3Model(modelName) || !hasTools) {
|
|
795
|
+
return null;
|
|
806
796
|
}
|
|
807
|
-
|
|
797
|
+
const processedOptions = await this.processCSVFilesForNativeSDK(options);
|
|
798
|
+
const mergedOptions = {
|
|
799
|
+
...processedOptions,
|
|
800
|
+
tools: { ...sdkTools, ...optionTools },
|
|
801
|
+
};
|
|
802
|
+
logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
|
|
803
|
+
model: modelName,
|
|
804
|
+
optionToolCount: Object.keys(optionTools).length,
|
|
805
|
+
sdkToolCount: Object.keys(sdkTools).length,
|
|
806
|
+
totalToolCount: combinedToolCount,
|
|
807
|
+
});
|
|
808
|
+
return this.executeNativeGemini3Stream(mergedOptions);
|
|
809
|
+
}
|
|
810
|
+
async executeAISDKStream(options, analysisSchema, modelName) {
|
|
808
811
|
const functionTag = "GoogleVertexProvider.executeStream";
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
812
|
+
const tracking = {
|
|
813
|
+
chunkCount: 0,
|
|
814
|
+
collectedToolCalls: [],
|
|
815
|
+
collectedToolResults: [],
|
|
816
|
+
};
|
|
817
|
+
const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
|
|
813
818
|
try {
|
|
814
|
-
// Validate stream options
|
|
815
819
|
this.validateStreamOptionsOnly(options);
|
|
816
|
-
// Build message array from options with multimodal support
|
|
817
|
-
// Using protected helper from BaseProvider to eliminate code duplication
|
|
818
820
|
const messages = await this.buildMessagesForStream(options);
|
|
819
|
-
const model = await this.getAISDKModelWithMiddleware(options);
|
|
820
|
-
|
|
821
|
-
const
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
if (Object.keys(rawTools).length > 0 && !isAnthropic) {
|
|
830
|
-
const sanitized = sanitizeToolsForGemini(rawTools);
|
|
831
|
-
if (sanitized.dropped.length > 0) {
|
|
832
|
-
logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
|
|
833
|
-
}
|
|
834
|
-
tools =
|
|
835
|
-
Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
|
|
836
|
-
}
|
|
837
|
-
else if (isAnthropic && Object.keys(rawTools).length > 0) {
|
|
838
|
-
// Anthropic models don't need Gemini sanitization — pass tools through
|
|
839
|
-
tools = rawTools;
|
|
840
|
-
}
|
|
841
|
-
else {
|
|
842
|
-
tools = undefined;
|
|
843
|
-
}
|
|
844
|
-
logger.debug(`${functionTag}: Tools for streaming`, {
|
|
821
|
+
const model = await this.getAISDKModelWithMiddleware(options);
|
|
822
|
+
const { shouldUseTools, tools, isAnthropic } = await this.resolveAISDKStreamTools(options, modelName, functionTag);
|
|
823
|
+
const streamOptions = this.buildAISDKStreamOptions({
|
|
824
|
+
options,
|
|
825
|
+
analysisSchema,
|
|
826
|
+
functionTag,
|
|
827
|
+
modelName,
|
|
828
|
+
model,
|
|
829
|
+
messages,
|
|
830
|
+
tools,
|
|
845
831
|
shouldUseTools,
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
toolNames: Object.keys(tools ?? {}),
|
|
850
|
-
});
|
|
851
|
-
// Model-specific maxTokens handling
|
|
852
|
-
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
853
|
-
// Use cached model configuration to determine maxTokens handling for streaming performance
|
|
854
|
-
// This avoids hardcoded model-specific logic and repeated config lookups
|
|
855
|
-
const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
|
|
856
|
-
const maxTokens = shouldSetMaxTokens
|
|
857
|
-
? options.maxTokens // No default limit
|
|
858
|
-
: undefined;
|
|
859
|
-
// Build complete stream options with proper typing
|
|
860
|
-
let streamOptions = {
|
|
861
|
-
model: model,
|
|
862
|
-
messages: messages,
|
|
863
|
-
temperature: options.temperature,
|
|
864
|
-
...(maxTokens && { maxTokens }),
|
|
865
|
-
maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
|
|
866
|
-
...(shouldUseTools &&
|
|
867
|
-
tools &&
|
|
868
|
-
Object.keys(tools).length > 0 && {
|
|
869
|
-
tools,
|
|
870
|
-
toolChoice: "auto",
|
|
871
|
-
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
872
|
-
}),
|
|
873
|
-
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
874
|
-
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
875
|
-
// Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
|
|
876
|
-
// Gemini 2.5: use thinkingBudget via providerOptions
|
|
877
|
-
...(options.thinkingConfig?.enabled && {
|
|
878
|
-
providerOptions: {
|
|
879
|
-
vertex: {
|
|
880
|
-
thinkingConfig: {
|
|
881
|
-
...(options.thinkingConfig.thinkingLevel && {
|
|
882
|
-
thinkingLevel: options.thinkingConfig.thinkingLevel,
|
|
883
|
-
}),
|
|
884
|
-
...(options.thinkingConfig.budgetTokens &&
|
|
885
|
-
!options.thinkingConfig.thinkingLevel && {
|
|
886
|
-
thinkingBudget: options.thinkingConfig.budgetTokens,
|
|
887
|
-
}),
|
|
888
|
-
includeThoughts: true,
|
|
889
|
-
},
|
|
890
|
-
},
|
|
891
|
-
},
|
|
892
|
-
}),
|
|
893
|
-
onError: (event) => {
|
|
894
|
-
const error = event.error;
|
|
895
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
896
|
-
logger.error(`${functionTag}: Stream error`, {
|
|
897
|
-
provider: this.providerName,
|
|
898
|
-
modelName: this.modelName,
|
|
899
|
-
error: errorMessage,
|
|
900
|
-
chunkCount,
|
|
901
|
-
});
|
|
902
|
-
},
|
|
903
|
-
onFinish: (event) => {
|
|
904
|
-
logger.debug(`${functionTag}: Stream finished`, {
|
|
905
|
-
finishReason: event.finishReason,
|
|
906
|
-
totalChunks: chunkCount,
|
|
907
|
-
});
|
|
908
|
-
},
|
|
909
|
-
onChunk: () => {
|
|
910
|
-
chunkCount++;
|
|
911
|
-
},
|
|
912
|
-
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
913
|
-
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
914
|
-
// Handle tool execution storage
|
|
915
|
-
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
916
|
-
logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
|
|
917
|
-
provider: this.providerName,
|
|
918
|
-
error: error instanceof Error ? error.message : String(error),
|
|
919
|
-
});
|
|
920
|
-
});
|
|
921
|
-
},
|
|
922
|
-
};
|
|
923
|
-
if (analysisSchema) {
|
|
924
|
-
try {
|
|
925
|
-
// Gemini cannot use tools and JSON schema simultaneously
|
|
926
|
-
if (!isAnthropic) {
|
|
927
|
-
delete streamOptions.tools;
|
|
928
|
-
delete streamOptions.toolChoice;
|
|
929
|
-
delete streamOptions.stopWhen;
|
|
930
|
-
}
|
|
931
|
-
streamOptions = {
|
|
932
|
-
...streamOptions,
|
|
933
|
-
experimental_output: Output.object({
|
|
934
|
-
schema: analysisSchema,
|
|
935
|
-
}),
|
|
936
|
-
};
|
|
937
|
-
}
|
|
938
|
-
catch (error) {
|
|
939
|
-
logger.warn("Schema application failed, continuing without schema", {
|
|
940
|
-
error: String(error),
|
|
941
|
-
});
|
|
942
|
-
}
|
|
943
|
-
}
|
|
944
|
-
// Wrap streamText in an OTel span to capture provider-level latency and token usage
|
|
945
|
-
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
946
|
-
kind: SpanKind.CLIENT,
|
|
947
|
-
attributes: {
|
|
948
|
-
"gen_ai.system": "vertex",
|
|
949
|
-
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
950
|
-
},
|
|
832
|
+
isAnthropic,
|
|
833
|
+
timeoutController,
|
|
834
|
+
tracking,
|
|
951
835
|
});
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
streamSpan.setStatus({
|
|
959
|
-
code: SpanStatusCode.ERROR,
|
|
960
|
-
message: err instanceof Error ? err.message : String(err),
|
|
961
|
-
});
|
|
962
|
-
streamSpan.end();
|
|
963
|
-
throw err;
|
|
964
|
-
}
|
|
965
|
-
// Collect token usage and finish reason asynchronously when the stream completes,
|
|
966
|
-
// then end the span. This avoids blocking the stream consumer.
|
|
967
|
-
Promise.resolve(result.usage)
|
|
968
|
-
.then((usage) => {
|
|
969
|
-
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
|
|
970
|
-
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
|
|
971
|
-
const effectiveModel = options.model ||
|
|
972
|
-
getModelId(model, this.modelName || getDefaultVertexModel());
|
|
973
|
-
const cost = calculateCost(this.providerName, effectiveModel, {
|
|
974
|
-
input: usage.inputTokens || 0,
|
|
975
|
-
output: usage.outputTokens || 0,
|
|
976
|
-
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
|
|
977
|
-
});
|
|
978
|
-
if (cost && cost > 0) {
|
|
979
|
-
streamSpan.setAttribute("neurolink.cost", cost);
|
|
980
|
-
}
|
|
981
|
-
})
|
|
982
|
-
.catch(() => {
|
|
983
|
-
// Usage may not be available if the stream is aborted
|
|
984
|
-
});
|
|
985
|
-
Promise.resolve(result.finishReason)
|
|
986
|
-
.then((reason) => {
|
|
987
|
-
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
988
|
-
})
|
|
989
|
-
.catch(() => {
|
|
990
|
-
// Finish reason may not be available if the stream is aborted
|
|
991
|
-
});
|
|
992
|
-
Promise.resolve(result.text)
|
|
993
|
-
.then(() => {
|
|
994
|
-
streamSpan.end();
|
|
995
|
-
})
|
|
996
|
-
.catch((err) => {
|
|
997
|
-
streamSpan.setStatus({
|
|
998
|
-
code: SpanStatusCode.ERROR,
|
|
999
|
-
message: err instanceof Error ? err.message : String(err),
|
|
1000
|
-
});
|
|
1001
|
-
streamSpan.end();
|
|
836
|
+
const result = this.startObservedAISDKStream(streamOptions, model, modelName, options);
|
|
837
|
+
this.observeAISDKStreamResult(result, {
|
|
838
|
+
model,
|
|
839
|
+
modelName,
|
|
840
|
+
options,
|
|
841
|
+
timeoutController,
|
|
1002
842
|
});
|
|
1003
|
-
// Defer timeout cleanup until the stream completes or errors.
|
|
1004
|
-
// Guard against NoOutputGeneratedError becoming an unhandled rejection.
|
|
1005
|
-
Promise.resolve(result.text)
|
|
1006
|
-
.catch((err) => {
|
|
1007
|
-
logger.debug("Stream text promise rejected (expected for empty streams)", {
|
|
1008
|
-
error: err instanceof Error ? err.message : String(err),
|
|
1009
|
-
});
|
|
1010
|
-
})
|
|
1011
|
-
.finally(() => timeoutController?.cleanup());
|
|
1012
|
-
// Transform string stream to content object stream using BaseProvider method
|
|
1013
|
-
const transformedStream = this.createTextStream(result);
|
|
1014
|
-
// Track tool calls and results for streaming
|
|
1015
|
-
const toolCalls = [];
|
|
1016
|
-
const toolResults = [];
|
|
1017
843
|
return {
|
|
1018
|
-
stream:
|
|
844
|
+
stream: this.createTextStream(result),
|
|
1019
845
|
provider: this.providerName,
|
|
1020
846
|
model: this.modelName,
|
|
1021
847
|
...(shouldUseTools && {
|
|
1022
|
-
toolCalls,
|
|
1023
|
-
toolResults,
|
|
848
|
+
toolCalls: tracking.collectedToolCalls,
|
|
849
|
+
toolResults: tracking.collectedToolResults,
|
|
1024
850
|
}),
|
|
1025
851
|
};
|
|
1026
852
|
}
|
|
@@ -1030,11 +856,222 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1030
856
|
provider: this.providerName,
|
|
1031
857
|
modelName: this.modelName,
|
|
1032
858
|
error: String(error),
|
|
1033
|
-
chunkCount,
|
|
859
|
+
chunkCount: tracking.chunkCount,
|
|
1034
860
|
});
|
|
1035
861
|
throw this.handleProviderError(error);
|
|
1036
862
|
}
|
|
1037
863
|
}
|
|
864
|
+
async resolveAISDKStreamTools(options, modelName, functionTag) {
|
|
865
|
+
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
866
|
+
const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
|
|
867
|
+
const rawTools = shouldUseTools
|
|
868
|
+
? { ...baseStreamTools, ...(options.tools || {}) }
|
|
869
|
+
: {};
|
|
870
|
+
const isAnthropic = isAnthropicModel(modelName);
|
|
871
|
+
let tools;
|
|
872
|
+
if (Object.keys(rawTools).length > 0 && !isAnthropic) {
|
|
873
|
+
const sanitized = sanitizeToolsForGemini(rawTools);
|
|
874
|
+
if (sanitized.dropped.length > 0) {
|
|
875
|
+
logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
|
|
876
|
+
}
|
|
877
|
+
tools =
|
|
878
|
+
Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
|
|
879
|
+
}
|
|
880
|
+
else if (isAnthropic && Object.keys(rawTools).length > 0) {
|
|
881
|
+
tools = rawTools;
|
|
882
|
+
}
|
|
883
|
+
else {
|
|
884
|
+
tools = undefined;
|
|
885
|
+
}
|
|
886
|
+
logger.debug(`${functionTag}: Tools for streaming`, {
|
|
887
|
+
shouldUseTools,
|
|
888
|
+
baseToolCount: Object.keys(baseStreamTools).length,
|
|
889
|
+
externalToolCount: Object.keys(options.tools || {}).length,
|
|
890
|
+
toolCount: Object.keys(tools ?? {}).length,
|
|
891
|
+
toolNames: Object.keys(tools ?? {}),
|
|
892
|
+
});
|
|
893
|
+
return {
|
|
894
|
+
shouldUseTools,
|
|
895
|
+
tools,
|
|
896
|
+
isAnthropic,
|
|
897
|
+
baseToolCount: Object.keys(baseStreamTools).length,
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
buildAISDKStreamOptions(params) {
|
|
901
|
+
const { options, analysisSchema, functionTag, modelName, model, messages, tools, shouldUseTools, isAnthropic, timeoutController, tracking, } = params;
|
|
902
|
+
const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
|
|
903
|
+
const maxTokens = shouldSetMaxTokens ? options.maxTokens : undefined;
|
|
904
|
+
let streamOptions = {
|
|
905
|
+
model,
|
|
906
|
+
messages,
|
|
907
|
+
temperature: options.temperature,
|
|
908
|
+
...(maxTokens && { maxTokens }),
|
|
909
|
+
maxRetries: 0,
|
|
910
|
+
...(shouldUseTools &&
|
|
911
|
+
tools &&
|
|
912
|
+
Object.keys(tools).length > 0 && {
|
|
913
|
+
tools,
|
|
914
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
915
|
+
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
916
|
+
}),
|
|
917
|
+
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
918
|
+
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
919
|
+
...(options.thinkingConfig?.enabled && {
|
|
920
|
+
providerOptions: {
|
|
921
|
+
vertex: {
|
|
922
|
+
thinkingConfig: {
|
|
923
|
+
...(options.thinkingConfig.thinkingLevel && {
|
|
924
|
+
thinkingLevel: options.thinkingConfig.thinkingLevel,
|
|
925
|
+
}),
|
|
926
|
+
...(options.thinkingConfig.budgetTokens &&
|
|
927
|
+
!options.thinkingConfig.thinkingLevel && {
|
|
928
|
+
thinkingBudget: options.thinkingConfig.budgetTokens,
|
|
929
|
+
}),
|
|
930
|
+
includeThoughts: true,
|
|
931
|
+
},
|
|
932
|
+
},
|
|
933
|
+
},
|
|
934
|
+
}),
|
|
935
|
+
onError: (event) => {
|
|
936
|
+
const errorMessage = event.error instanceof Error
|
|
937
|
+
? event.error.message
|
|
938
|
+
: String(event.error);
|
|
939
|
+
logger.error(`${functionTag}: Stream error`, {
|
|
940
|
+
provider: this.providerName,
|
|
941
|
+
modelName: this.modelName,
|
|
942
|
+
error: errorMessage,
|
|
943
|
+
chunkCount: tracking.chunkCount,
|
|
944
|
+
});
|
|
945
|
+
},
|
|
946
|
+
onFinish: (event) => {
|
|
947
|
+
logger.debug(`${functionTag}: Stream finished`, {
|
|
948
|
+
finishReason: event.finishReason,
|
|
949
|
+
totalChunks: tracking.chunkCount,
|
|
950
|
+
});
|
|
951
|
+
},
|
|
952
|
+
onChunk: () => {
|
|
953
|
+
tracking.chunkCount++;
|
|
954
|
+
},
|
|
955
|
+
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
956
|
+
this.captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking);
|
|
957
|
+
},
|
|
958
|
+
};
|
|
959
|
+
if (!analysisSchema) {
|
|
960
|
+
return streamOptions;
|
|
961
|
+
}
|
|
962
|
+
try {
|
|
963
|
+
if (!isAnthropic) {
|
|
964
|
+
delete streamOptions.tools;
|
|
965
|
+
delete streamOptions.toolChoice;
|
|
966
|
+
delete streamOptions.stopWhen;
|
|
967
|
+
}
|
|
968
|
+
streamOptions = {
|
|
969
|
+
...streamOptions,
|
|
970
|
+
experimental_output: Output.object({ schema: analysisSchema }),
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
catch (error) {
|
|
974
|
+
logger.warn("Schema application failed, continuing without schema", {
|
|
975
|
+
error: String(error),
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
return streamOptions;
|
|
979
|
+
}
|
|
980
|
+
captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking) {
|
|
981
|
+
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
982
|
+
for (const toolCall of toolCalls) {
|
|
983
|
+
tracking.collectedToolCalls.push({
|
|
984
|
+
toolCallId: toolCall.toolCallId,
|
|
985
|
+
toolName: toolCall.toolName,
|
|
986
|
+
args: toolCall.args ?? toolCall.input ?? toolCall.parameters ?? {},
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
for (const toolResult of toolResults) {
|
|
990
|
+
tracking.collectedToolResults.push({
|
|
991
|
+
toolName: toolResult.toolName,
|
|
992
|
+
status: toolResult.error ? "failure" : "success",
|
|
993
|
+
output: (toolResult.output ?? toolResult.result) ??
|
|
994
|
+
undefined,
|
|
995
|
+
error: toolResult.error,
|
|
996
|
+
id: toolResult.toolCallId ?? toolResult.toolName,
|
|
997
|
+
});
|
|
998
|
+
}
|
|
999
|
+
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
1000
|
+
logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
|
|
1001
|
+
provider: this.providerName,
|
|
1002
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1003
|
+
});
|
|
1004
|
+
});
|
|
1005
|
+
}
|
|
1006
|
+
startObservedAISDKStream(streamOptions, model, modelName, options) {
|
|
1007
|
+
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
1008
|
+
kind: SpanKind.CLIENT,
|
|
1009
|
+
attributes: {
|
|
1010
|
+
"gen_ai.system": "vertex",
|
|
1011
|
+
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
|
|
1012
|
+
},
|
|
1013
|
+
});
|
|
1014
|
+
try {
|
|
1015
|
+
const result = streamText(streamOptions);
|
|
1016
|
+
this.attachAISDKStreamObservers(result, streamSpan, model, modelName, options);
|
|
1017
|
+
return result;
|
|
1018
|
+
}
|
|
1019
|
+
catch (error) {
|
|
1020
|
+
streamSpan.recordException(error instanceof Error ? error : new Error(String(error)));
|
|
1021
|
+
streamSpan.setStatus({
|
|
1022
|
+
code: SpanStatusCode.ERROR,
|
|
1023
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1024
|
+
});
|
|
1025
|
+
streamSpan.end();
|
|
1026
|
+
throw error;
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
attachAISDKStreamObservers(result, streamSpan, model, modelName, options) {
|
|
1030
|
+
Promise.resolve(result.usage)
|
|
1031
|
+
.then((usage) => {
|
|
1032
|
+
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
|
|
1033
|
+
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
|
|
1034
|
+
const effectiveModel = options.model ||
|
|
1035
|
+
getModelId(model, modelName || getDefaultVertexModel());
|
|
1036
|
+
const cost = calculateCost(this.providerName, effectiveModel, {
|
|
1037
|
+
input: usage.inputTokens || 0,
|
|
1038
|
+
output: usage.outputTokens || 0,
|
|
1039
|
+
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
|
|
1040
|
+
});
|
|
1041
|
+
if (cost && cost > 0) {
|
|
1042
|
+
streamSpan.setAttribute("neurolink.cost", cost);
|
|
1043
|
+
}
|
|
1044
|
+
})
|
|
1045
|
+
.catch(() => undefined);
|
|
1046
|
+
Promise.resolve(result.finishReason)
|
|
1047
|
+
.then((reason) => {
|
|
1048
|
+
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
1049
|
+
})
|
|
1050
|
+
.catch(() => undefined);
|
|
1051
|
+
Promise.resolve(result.text)
|
|
1052
|
+
.then(() => {
|
|
1053
|
+
streamSpan.end();
|
|
1054
|
+
})
|
|
1055
|
+
.catch((error) => {
|
|
1056
|
+
streamSpan.setStatus({
|
|
1057
|
+
code: SpanStatusCode.ERROR,
|
|
1058
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1059
|
+
});
|
|
1060
|
+
streamSpan.end();
|
|
1061
|
+
});
|
|
1062
|
+
}
|
|
1063
|
+
observeAISDKStreamResult(result, params) {
|
|
1064
|
+
void params.model;
|
|
1065
|
+
void params.modelName;
|
|
1066
|
+
void params.options;
|
|
1067
|
+
Promise.resolve(result.text)
|
|
1068
|
+
.catch((error) => {
|
|
1069
|
+
logger.debug("Stream text promise rejected (expected for empty streams)", {
|
|
1070
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1071
|
+
});
|
|
1072
|
+
})
|
|
1073
|
+
.finally(() => params.timeoutController?.cleanup());
|
|
1074
|
+
}
|
|
1038
1075
|
/**
|
|
1039
1076
|
* Create @google/genai client configured for Vertex AI
|
|
1040
1077
|
*/
|
|
@@ -1189,210 +1226,187 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1189
1226
|
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
1190
1227
|
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1191
1228
|
},
|
|
1192
|
-
},
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1229
|
+
}, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
|
|
1230
|
+
}
|
|
1231
|
+
async executeNativeGemini3StreamWithSpan(options, modelName, span) {
|
|
1232
|
+
const client = await this.createVertexGenAIClient(options.region);
|
|
1233
|
+
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1234
|
+
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
|
|
1235
|
+
model: modelName,
|
|
1236
|
+
hasTools: !!options.tools && Object.keys(options.tools).length > 0,
|
|
1237
|
+
project: this.projectId,
|
|
1238
|
+
location: effectiveLocation,
|
|
1239
|
+
});
|
|
1240
|
+
const multimodalInput = options.input;
|
|
1241
|
+
const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
|
|
1242
|
+
let hasToolsInput = !!options.tools &&
|
|
1243
|
+
Object.keys(options.tools).length > 0 &&
|
|
1244
|
+
!options.disableTools;
|
|
1245
|
+
const streamOptions = options;
|
|
1246
|
+
const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
|
|
1247
|
+
if (wantsJsonOutput && hasToolsInput) {
|
|
1248
|
+
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
1249
|
+
hasToolsInput = false;
|
|
1250
|
+
}
|
|
1251
|
+
let toolsConfig;
|
|
1252
|
+
let executeMap = new Map();
|
|
1253
|
+
if (hasToolsInput) {
|
|
1254
|
+
const toolDeclarationResult = buildNativeToolDeclarations(options.tools);
|
|
1255
|
+
toolsConfig = toolDeclarationResult.toolsConfig;
|
|
1256
|
+
executeMap = toolDeclarationResult.executeMap;
|
|
1257
|
+
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
1258
|
+
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1259
|
+
toolNames: toolsConfig[0].functionDeclarations.map((tool) => tool.name),
|
|
1200
1260
|
});
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
logger.
|
|
1213
|
-
|
|
1214
|
-
}
|
|
1215
|
-
let toolsConfig;
|
|
1216
|
-
let executeMap = new Map();
|
|
1217
|
-
if (hasToolsInput) {
|
|
1218
|
-
const result = buildNativeToolDeclarations(options.tools);
|
|
1219
|
-
toolsConfig = result.toolsConfig;
|
|
1220
|
-
executeMap = result.executeMap;
|
|
1221
|
-
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
1222
|
-
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1223
|
-
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1261
|
+
}
|
|
1262
|
+
const config = buildNativeConfig(options, toolsConfig);
|
|
1263
|
+
if (wantsJsonOutput) {
|
|
1264
|
+
config.responseMimeType = "application/json";
|
|
1265
|
+
if (streamOptions.schema) {
|
|
1266
|
+
const rawSchema = convertZodToJsonSchema(streamOptions.schema);
|
|
1267
|
+
const inlinedSchema = inlineJsonSchema(rawSchema);
|
|
1268
|
+
if (inlinedSchema.$schema) {
|
|
1269
|
+
delete inlinedSchema.$schema;
|
|
1270
|
+
}
|
|
1271
|
+
config.responseSchema = inlinedSchema;
|
|
1272
|
+
logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
|
|
1273
|
+
schemaKeys: Object.keys(inlinedSchema),
|
|
1224
1274
|
});
|
|
1225
1275
|
}
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1276
|
+
}
|
|
1277
|
+
const startTime = Date.now();
|
|
1278
|
+
const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
|
|
1279
|
+
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1280
|
+
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1281
|
+
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1282
|
+
const channel = createTextChannel();
|
|
1283
|
+
const allToolCalls = [];
|
|
1284
|
+
const metadata = {
|
|
1285
|
+
streamId: `native-vertex-${Date.now()}`,
|
|
1286
|
+
startTime,
|
|
1287
|
+
responseTime: 0,
|
|
1288
|
+
totalToolExecutions: 0,
|
|
1289
|
+
};
|
|
1290
|
+
let analyticsResolve;
|
|
1291
|
+
let analyticsReject;
|
|
1292
|
+
const analyticsPromise = new Promise((resolve, reject) => {
|
|
1293
|
+
analyticsResolve = resolve;
|
|
1294
|
+
analyticsReject = reject;
|
|
1295
|
+
});
|
|
1296
|
+
const loopPromise = this.runNativeGemini3StreamLoop({
|
|
1297
|
+
client,
|
|
1298
|
+
modelName,
|
|
1299
|
+
span,
|
|
1300
|
+
config,
|
|
1301
|
+
currentContents,
|
|
1302
|
+
executeMap,
|
|
1303
|
+
channel,
|
|
1304
|
+
allToolCalls,
|
|
1305
|
+
metadata,
|
|
1306
|
+
analyticsResolve,
|
|
1307
|
+
analyticsReject,
|
|
1308
|
+
startTime,
|
|
1309
|
+
timeoutController,
|
|
1310
|
+
composedSignal,
|
|
1311
|
+
maxSteps,
|
|
1312
|
+
});
|
|
1313
|
+
loopPromise.catch(() => undefined);
|
|
1314
|
+
return {
|
|
1315
|
+
stream: channel.iterable,
|
|
1316
|
+
provider: this.providerName,
|
|
1317
|
+
model: modelName,
|
|
1318
|
+
toolCalls: allToolCalls,
|
|
1319
|
+
analytics: analyticsPromise,
|
|
1320
|
+
metadata,
|
|
1321
|
+
};
|
|
1322
|
+
}
|
|
1323
|
+
async runNativeGemini3StreamLoop(params) {
|
|
1324
|
+
let lastStepText = "";
|
|
1325
|
+
let totalInputTokens = 0;
|
|
1326
|
+
let totalOutputTokens = 0;
|
|
1327
|
+
let step = 0;
|
|
1328
|
+
let completedWithFinalAnswer = false;
|
|
1329
|
+
const failedTools = new Map();
|
|
1330
|
+
try {
|
|
1331
|
+
while (step < params.maxSteps) {
|
|
1332
|
+
if (params.composedSignal?.aborted) {
|
|
1333
|
+
throw params.composedSignal.reason instanceof Error
|
|
1334
|
+
? params.composedSignal.reason
|
|
1335
|
+
: new Error("Request aborted");
|
|
1246
1336
|
}
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
const timeout = this.getTimeout(options);
|
|
1250
|
-
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
1251
|
-
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1252
|
-
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1253
|
-
// Inject conversation history so the native path has multi-turn context
|
|
1254
|
-
const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
|
|
1255
|
-
// Create a push-based text channel so the caller receives tokens as
|
|
1256
|
-
// they arrive from the network rather than after full buffering.
|
|
1257
|
-
const channel = createTextChannel();
|
|
1258
|
-
// Shared mutable state updated by the background agentic loop.
|
|
1259
|
-
const allToolCalls = [];
|
|
1260
|
-
// Shared metadata object mutated by the background loop so that
|
|
1261
|
-
// responseTime and totalToolExecutions reflect final values.
|
|
1262
|
-
const metadata = {
|
|
1263
|
-
streamId: `native-vertex-${Date.now()}`,
|
|
1264
|
-
startTime,
|
|
1265
|
-
responseTime: 0,
|
|
1266
|
-
totalToolExecutions: 0,
|
|
1267
|
-
};
|
|
1268
|
-
// analyticsResolvers lets the background loop settle the analytics
|
|
1269
|
-
// promise once token counts are known (after the loop completes).
|
|
1270
|
-
let analyticsResolve;
|
|
1271
|
-
let analyticsReject;
|
|
1272
|
-
const analyticsPromise = new Promise((res, rej) => {
|
|
1273
|
-
analyticsResolve = res;
|
|
1274
|
-
analyticsReject = rej;
|
|
1275
|
-
});
|
|
1276
|
-
// Run the agentic loop in the background without awaiting it here,
|
|
1277
|
-
// so we can return the StreamResult (with channel.iterable) immediately.
|
|
1278
|
-
const loopPromise = (async () => {
|
|
1279
|
-
let lastStepText = "";
|
|
1280
|
-
let totalInputTokens = 0;
|
|
1281
|
-
let totalOutputTokens = 0;
|
|
1282
|
-
let step = 0;
|
|
1283
|
-
let completedWithFinalAnswer = false;
|
|
1284
|
-
const failedTools = new Map();
|
|
1337
|
+
step++;
|
|
1338
|
+
logger.debug(`[GoogleVertex] Native SDK step ${step}/${params.maxSteps}`);
|
|
1285
1339
|
try {
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
? { httpOptions: { signal: composedSignal } }
|
|
1302
|
-
: {}),
|
|
1303
|
-
});
|
|
1304
|
-
// For every step, use incremental collection so text parts
|
|
1305
|
-
// are pushed to the channel as they arrive. For intermediate
|
|
1306
|
-
// steps (those that produce function calls) we still need the
|
|
1307
|
-
// complete rawResponseParts for pushModelResponseToHistory,
|
|
1308
|
-
// which collectStreamChunksIncremental provides at stream end.
|
|
1309
|
-
const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
|
|
1310
|
-
totalInputTokens += chunkResult.inputTokens;
|
|
1311
|
-
totalOutputTokens += chunkResult.outputTokens;
|
|
1312
|
-
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1313
|
-
// If no function calls, this was the final step — channel
|
|
1314
|
-
// already received all text parts incrementally.
|
|
1315
|
-
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1316
|
-
completedWithFinalAnswer = true;
|
|
1317
|
-
break;
|
|
1318
|
-
}
|
|
1319
|
-
lastStepText = stepText;
|
|
1320
|
-
// Record tool call events on the span
|
|
1321
|
-
for (const fc of chunkResult.stepFunctionCalls) {
|
|
1322
|
-
span.addEvent("gen_ai.tool_call", {
|
|
1323
|
-
"tool.name": fc.name,
|
|
1324
|
-
"tool.step": step,
|
|
1325
|
-
});
|
|
1326
|
-
}
|
|
1327
|
-
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1328
|
-
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1329
|
-
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
|
|
1330
|
-
// Function/tool responses must use role: "user" — the
|
|
1331
|
-
// @google/genai SDK's validateHistory() only accepts "user"
|
|
1332
|
-
// and "model" roles (matching automaticFunctionCalling).
|
|
1333
|
-
currentContents.push({
|
|
1334
|
-
role: "user",
|
|
1335
|
-
parts: functionResponses,
|
|
1336
|
-
});
|
|
1337
|
-
}
|
|
1338
|
-
catch (error) {
|
|
1339
|
-
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1340
|
-
throw this.handleProviderError(error);
|
|
1341
|
-
}
|
|
1340
|
+
const rawStream = await params.client.models.generateContentStream({
|
|
1341
|
+
model: params.modelName,
|
|
1342
|
+
contents: params.currentContents,
|
|
1343
|
+
config: params.config,
|
|
1344
|
+
...(params.composedSignal
|
|
1345
|
+
? { httpOptions: { signal: params.composedSignal } }
|
|
1346
|
+
: {}),
|
|
1347
|
+
});
|
|
1348
|
+
const chunkResult = await collectStreamChunksIncremental(rawStream, params.channel);
|
|
1349
|
+
totalInputTokens += chunkResult.inputTokens;
|
|
1350
|
+
totalOutputTokens += chunkResult.outputTokens;
|
|
1351
|
+
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1352
|
+
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1353
|
+
completedWithFinalAnswer = true;
|
|
1354
|
+
break;
|
|
1342
1355
|
}
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
channel.push(fallback);
|
|
1350
|
-
}
|
|
1356
|
+
lastStepText = stepText;
|
|
1357
|
+
for (const functionCall of chunkResult.stepFunctionCalls) {
|
|
1358
|
+
params.span.addEvent("gen_ai.tool_call", {
|
|
1359
|
+
"tool.name": functionCall.name,
|
|
1360
|
+
"tool.step": step,
|
|
1361
|
+
});
|
|
1351
1362
|
}
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1359
|
-
span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1360
|
-
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
|
|
1361
|
-
? "max_steps"
|
|
1362
|
-
: "stop");
|
|
1363
|
-
analyticsResolve({
|
|
1364
|
-
provider: this.providerName,
|
|
1365
|
-
model: modelName,
|
|
1366
|
-
tokenUsage: {
|
|
1367
|
-
input: totalInputTokens,
|
|
1368
|
-
output: totalOutputTokens,
|
|
1369
|
-
total: totalInputTokens + totalOutputTokens,
|
|
1370
|
-
},
|
|
1371
|
-
requestDuration: responseTime,
|
|
1372
|
-
timestamp: new Date().toISOString(),
|
|
1363
|
+
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1364
|
+
pushModelResponseToHistory(params.currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1365
|
+
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, params.executeMap, failedTools, params.allToolCalls, { abortSignal: params.composedSignal });
|
|
1366
|
+
params.currentContents.push({
|
|
1367
|
+
role: "user",
|
|
1368
|
+
parts: functionResponses,
|
|
1373
1369
|
});
|
|
1374
|
-
channel.close();
|
|
1375
1370
|
}
|
|
1376
|
-
catch (
|
|
1377
|
-
|
|
1378
|
-
|
|
1371
|
+
catch (error) {
|
|
1372
|
+
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1373
|
+
throw this.handleProviderError(error);
|
|
1379
1374
|
}
|
|
1380
|
-
|
|
1381
|
-
|
|
1375
|
+
}
|
|
1376
|
+
if (step >= params.maxSteps && !completedWithFinalAnswer) {
|
|
1377
|
+
const fallback = handleMaxStepsTermination("[GoogleVertex]", step, params.maxSteps, "", lastStepText);
|
|
1378
|
+
if (fallback) {
|
|
1379
|
+
params.channel.push(fallback);
|
|
1382
1380
|
}
|
|
1383
|
-
}
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1381
|
+
}
|
|
1382
|
+
const responseTime = Date.now() - params.startTime;
|
|
1383
|
+
params.metadata.responseTime = responseTime;
|
|
1384
|
+
params.metadata.totalToolExecutions = params.allToolCalls.length;
|
|
1385
|
+
params.span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1386
|
+
params.span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1387
|
+
params.span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= params.maxSteps && !completedWithFinalAnswer
|
|
1388
|
+
? "max_steps"
|
|
1389
|
+
: "stop");
|
|
1390
|
+
params.analyticsResolve({
|
|
1389
1391
|
provider: this.providerName,
|
|
1390
|
-
model: modelName,
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1392
|
+
model: params.modelName,
|
|
1393
|
+
tokenUsage: {
|
|
1394
|
+
input: totalInputTokens,
|
|
1395
|
+
output: totalOutputTokens,
|
|
1396
|
+
total: totalInputTokens + totalOutputTokens,
|
|
1397
|
+
},
|
|
1398
|
+
requestDuration: responseTime,
|
|
1399
|
+
timestamp: new Date().toISOString(),
|
|
1400
|
+
});
|
|
1401
|
+
params.channel.close();
|
|
1402
|
+
}
|
|
1403
|
+
catch (error) {
|
|
1404
|
+
params.channel.error(error);
|
|
1405
|
+
params.analyticsReject(error);
|
|
1406
|
+
}
|
|
1407
|
+
finally {
|
|
1408
|
+
params.timeoutController?.cleanup();
|
|
1409
|
+
}
|
|
1396
1410
|
}
|
|
1397
1411
|
/**
|
|
1398
1412
|
* Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI
|