npm - @vscode/chat-lib - Versions diffs - 0.4.1-9 → 0.5.1-0 - Mend

@vscode/chat-lib 0.4.1-9 → 0.5.1-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (178) hide show

package/dist/src/_internal/extension/prompt/node/chatMLFetcher.js CHANGED Viewed

@@ -12,6 +12,7 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
 var __param = (this && this.__param) || function (paramIndex, decorator) {
     return function (target, key) { decorator(target, key, paramIndex); }
 };
+var ChatMLFetcherImpl_1;
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ChatMLFetcherImpl = exports.AbstractChatMLFetcher = void 0;
 exports.createTelemetryData = createTelemetryData;
@@ -35,8 +36,11 @@ const fetcherService_1 = require("../../../platform/networking/common/fetcherSer
 const networking_1 = require("../../../platform/networking/common/networking");
 const openai_1 = require("../../../platform/networking/common/openai");
 const chatStream_1 = require("../../../platform/networking/node/chatStream");
+const chatWebSocketManager_1 = require("../../../platform/networking/node/chatWebSocketManager");
 const stream_1 = require("../../../platform/networking/node/stream");
 const fetch_2 = require("../../../platform/openai/node/fetch");
+const index_1 = require("../../../platform/otel/common/index");
+const otelService_1 = require("../../../platform/otel/common/otelService");
 const requestLogger_1 = require("../../../platform/requestLogger/node/requestLogger");
 const nullExperimentationService_1 = require("../../../platform/telemetry/common/nullExperimentationService");
 const telemetry_1 = require("../../../platform/telemetry/common/telemetry");
@@ -83,7 +87,9 @@ class AbstractChatMLFetcher extends lifecycle_1.Disposable {
 }
 exports.AbstractChatMLFetcher = AbstractChatMLFetcher;
 let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
-    constructor(_fetcherService, _telemetryService, _requestLogger, _logService, _authenticationService, _interactionService, _chatQuotaService, _capiClientService, options, _configurationService, _experimentationService, _powerService, _instantiationService) {
+    static { ChatMLFetcherImpl_1 = this; }
+    static { this._maxConsecutiveWebSocketFallbacks = 3; }
+    constructor(_fetcherService, _telemetryService, _requestLogger, _logService, _authenticationService, _interactionService, _chatQuotaService, _capiClientService, options, _configurationService, _experimentationService, _powerService, _instantiationService, _webSocketManager, _otelService) {
         super(options);
         this._fetcherService = _fetcherService;
         this._telemetryService = _telemetryService;
@@ -97,23 +103,36 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
         this._experimentationService = _experimentationService;
         this._powerService = _powerService;
         this._instantiationService = _instantiationService;
+        this._webSocketManager = _webSocketManager;
+        this._otelService = _otelService;
         /**
          * Delays (in ms) between connectivity check attempts before retrying a failed request.
          * Configurable for testing purposes.
          */
         this.connectivityCheckDelays = [1000, 10000, 10000];
+        /**
+         * Tracks consecutive WebSocket request failures where the HTTP retry succeeded.
+         * After {@link _maxConsecutiveWebSocketFallbacks} such failures, WebSocket requests are disabled entirely.
+         */
+        this._consecutiveWebSocketRetryFallbacks = 0;
     }
     /**
      * Note: the returned array of strings may be less than `n` (e.g., in case there were errors during streaming)
      */
     async fetchMany(opts, token) {
-        let { debugName, endpoint: chatEndpoint, finishedCb, location, messages, requestOptions, source, telemetryProperties, userInitiatedRequest, requestKindOptions } = opts;
+        let { debugName, endpoint: chatEndpoint, finishedCb, location, messages, requestOptions, source, telemetryProperties, userInitiatedRequest, requestKindOptions, conversationId, turnId, useWebSocket, ignoreStatefulMarker } = opts;
+        if (useWebSocket && this._consecutiveWebSocketRetryFallbacks >= ChatMLFetcherImpl_1._maxConsecutiveWebSocketFallbacks) {
+            this._logService.debug(`[ChatWebSocketManager] Disabling WebSocket for request due to ${this._consecutiveWebSocketRetryFallbacks} consecutive WebSocket failures with successful HTTP fallback.`);
+            useWebSocket = false;
+            ignoreStatefulMarker = true;
+        }
         if (!telemetryProperties) {
             telemetryProperties = {};
         }
         if (!telemetryProperties.messageSource) {
             telemetryProperties.messageSource = debugName;
         }
+        const transport = useWebSocket ? 'websocket' : 'http';
         // TODO @lramos15 telemetry should not drive request ids
         const ourRequestId = telemetryProperties.requestId ?? telemetryProperties.messageId ?? (0, uuid_1.generateUuid)();
         const maxResponseTokens = chatEndpoint.maxOutputTokens;
@@ -127,6 +146,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
         const postOptions = this.preparePostOptions(requestOptions);
         const requestBody = chatEndpoint.createRequestBody({
             ...opts,
+            ignoreStatefulMarker,
             requestId: ourRequestId,
             postOptions
         });
@@ -141,7 +161,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             ourRequestId,
             location: opts.location,
             body: requestBody,
-            ignoreStatefulMarker: opts.ignoreStatefulMarker,
+            ignoreStatefulMarker,
             isConversationRequest: opts.isConversationRequest,
             customMetadata: opts.customMetadata
         });
@@ -155,6 +175,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
         let actualStatusCode;
         let suspendEventSeen;
         let resumeEventSeen;
+        let otelInferenceSpan;
         try {
             let response;
             const payloadValidationResult = isValidChatPayload(opts.messages, postOptions, chatEndpoint, this._configurationService, this._experimentationService);
@@ -169,13 +190,38 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             else {
                 const copilotToken = await this._authenticationService.getCopilotToken();
                 usernameToScrub = copilotToken.username;
-                const fetchResult = await this._fetchAndStreamChat(chatEndpoint, requestBody, baseTelemetry, streamRecorder.callback, requestOptions.secretKey, copilotToken, opts.location, ourRequestId, postOptions.n, token, userInitiatedRequest, telemetryProperties, opts.useFetcher, canRetryOnce, requestKindOptions);
+                const fetchResult = await this._fetchAndStreamChat(chatEndpoint, requestBody, baseTelemetry, streamRecorder.callback, requestOptions.secretKey, copilotToken, opts.location, ourRequestId, postOptions.n, token, userInitiatedRequest, useWebSocket, turnId, conversationId, telemetryProperties, opts.useFetcher, canRetryOnce, requestKindOptions);
                 response = fetchResult.result;
                 actualFetcher = fetchResult.fetcher;
                 actualBytesReceived = fetchResult.bytesReceived;
                 actualStatusCode = fetchResult.statusCode;
                 suspendEventSeen = fetchResult.suspendEventSeen;
                 resumeEventSeen = fetchResult.resumeEventSeen;
+                otelInferenceSpan = fetchResult.otelSpan;
+                // Tag span with debug name so orphaned spans (title, progressMessages, etc.) are identifiable
+                otelInferenceSpan?.setAttribute(index_1.GenAiAttr.AGENT_NAME, debugName);
+                // Extract and set structured prompt sections for the debug panel
+                if (otelInferenceSpan) {
+                    const capiMessages = requestBody.messages;
+                    // User request: last user-role message
+                    const userMessages = capiMessages?.filter(m => m.role === 'user');
+                    const lastUserMsg = userMessages?.[userMessages.length - 1];
+                    if (lastUserMsg?.content) {
+                        otelInferenceSpan.setAttribute(index_1.CopilotChatAttr.USER_REQUEST, lastUserMsg.content);
+                    }
+                    // System instructions (first system message) — raw text for debug panel sections
+                    const systemMsg = capiMessages?.find(m => m.role === 'system');
+                    if (systemMsg?.content) {
+                        otelInferenceSpan.setAttribute(index_1.GenAiAttr.SYSTEM_INSTRUCTIONS, systemMsg.content);
+                    }
+                }
+                // Always capture full request content for the debug panel
+                if (otelInferenceSpan) {
+                    const capiMessages = requestBody.messages;
+                    if (capiMessages) {
+                        otelInferenceSpan.setAttribute(index_1.GenAiAttr.INPUT_MESSAGES, (0, index_1.truncateForOTel)(JSON.stringify((0, index_1.toInputMessages)(capiMessages))));
+                    }
+                }
                 tokenCount = await chatEndpoint.acquireTokenizer().countMessagesTokens(messages);
                 const extensionId = source?.extensionId ?? constants_1.EXTENSION_ID;
                 this._onDidMakeChatMLRequest.fire({
@@ -189,7 +235,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             pendingLoggedChatRequest?.markTimeToFirstToken(timeToFirstToken);
             switch (response.type) {
                 case fetch_2.FetchResponseKind.Success: {
-                    const result = await this.processSuccessfulResponse(response, messages, requestBody, ourRequestId, maxResponseTokens, tokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpoint, userInitiatedRequest, actualFetcher, actualBytesReceived, suspendEventSeen, resumeEventSeen);
+                    const result = await this.processSuccessfulResponse(response, messages, requestBody, ourRequestId, maxResponseTokens, tokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpoint, userInitiatedRequest, transport, actualFetcher, actualBytesReceived, suspendEventSeen, resumeEventSeen);
                     // Handle FilteredRetry case with augmented messages
                     if (result.type === commonTypes_1.ChatFetchResponseType.FilteredRetry) {
                         if (opts.enableRetryOnFilter) {
@@ -237,6 +283,76 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                         };
                     }
                     pendingLoggedChatRequest?.resolve(result, streamRecorder.deltas);
+                    // Record OTel token usage metrics if available
+                    if (result.type === commonTypes_1.ChatFetchResponseType.Success && result.usage) {
+                        const metricAttrs = {
+                            operationName: index_1.GenAiOperationName.CHAT,
+                            providerName: index_1.GenAiProviderName.GITHUB,
+                            requestModel: chatEndpoint.model,
+                            responseModel: result.resolvedModel,
+                        };
+                        if (result.usage.prompt_tokens) {
+                            index_1.GenAiMetrics.recordTokenUsage(this._otelService, result.usage.prompt_tokens, 'input', metricAttrs);
+                        }
+                        if (result.usage.completion_tokens) {
+                            index_1.GenAiMetrics.recordTokenUsage(this._otelService, result.usage.completion_tokens, 'output', metricAttrs);
+                        }
+                        // Set token usage and response details on the chat span before ending it
+                        otelInferenceSpan?.setAttributes({
+                            [index_1.GenAiAttr.USAGE_INPUT_TOKENS]: result.usage.prompt_tokens ?? 0,
+                            [index_1.GenAiAttr.USAGE_OUTPUT_TOKENS]: result.usage.completion_tokens ?? 0,
+                            [index_1.GenAiAttr.RESPONSE_MODEL]: result.resolvedModel ?? chatEndpoint.model,
+                            [index_1.GenAiAttr.RESPONSE_ID]: result.requestId,
+                            [index_1.GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'],
+                            ...(result.usage.prompt_tokens_details?.cached_tokens
+                                ? { [index_1.GenAiAttr.USAGE_CACHE_READ_INPUT_TOKENS]: result.usage.prompt_tokens_details.cached_tokens }
+                                : {}),
+                            [index_1.CopilotChatAttr.TIME_TO_FIRST_TOKEN]: timeToFirstToken,
+                            ...(result.serverRequestId ? { [index_1.CopilotChatAttr.SERVER_REQUEST_ID]: result.serverRequestId } : {}),
+                            ...(result.usage.completion_tokens_details?.reasoning_tokens
+                                ? { [index_1.GenAiAttr.USAGE_REASONING_TOKENS]: result.usage.completion_tokens_details.reasoning_tokens }
+                                : {}),
+                        });
+                    }
+                    // Always capture response content for the debug panel
+                    if (otelInferenceSpan && result.type === commonTypes_1.ChatFetchResponseType.Success) {
+                        const responseText = streamRecorder.deltas.map(d => d.text).join('');
+                        const toolCalls = streamRecorder.deltas
+                            .filter(d => d.copilotToolCalls?.length)
+                            .flatMap(d => d.copilotToolCalls.map(tc => ({
+                            type: 'tool_call', id: tc.id, name: tc.name, arguments: tc.arguments
+                        })));
+                        const parts = [];
+                        if (responseText) {
+                            parts.push({ type: 'text', content: responseText });
+                        }
+                        parts.push(...toolCalls);
+                        if (parts.length > 0) {
+                            otelInferenceSpan.setAttribute(index_1.GenAiAttr.OUTPUT_MESSAGES, (0, index_1.truncateForOTel)(JSON.stringify([{ role: 'assistant', parts }])));
+                        }
+                    }
+                    // Emit OTel inference details event BEFORE ending the span
+                    // so the log record inherits the active trace context
+                    (0, index_1.emitInferenceDetailsEvent)(this._otelService, {
+                        model: chatEndpoint.model,
+                        temperature: requestOptions?.temperature,
+                        maxTokens: requestOptions?.max_tokens,
+                    }, result.type === commonTypes_1.ChatFetchResponseType.Success ? {
+                        id: result.requestId,
+                        model: result.resolvedModel,
+                        finishReasons: ['stop'],
+                        inputTokens: result.usage?.prompt_tokens,
+                        outputTokens: result.usage?.completion_tokens,
+                    } : undefined);
+                    otelInferenceSpan?.end();
+                    otelInferenceSpan = undefined;
+                    // Record OTel time-to-first-token metric
+                    if (timeToFirstToken > 0) {
+                        index_1.GenAiMetrics.recordTimeToFirstToken(this._otelService, chatEndpoint.model, timeToFirstToken / 1000);
+                    }
+                    if (useWebSocket && result.type === commonTypes_1.ChatFetchResponseType.Success) {
+                        this._consecutiveWebSocketRetryFallbacks = 0;
+                    }
                     return result;
                 }
                 case fetch_2.FetchResponseKind.Canceled:
@@ -245,6 +361,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                         requestId: ourRequestId,
                         model: chatEndpoint.model,
                         apiType: chatEndpoint.apiType,
+                        transport,
                         associatedRequestId: telemetryProperties.associatedRequestId,
                         retryAfterError: telemetryProperties.retryAfterError,
                         retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
@@ -268,6 +385,13 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                         issuedTime: baseTelemetry.issuedTime,
                     });
                     pendingLoggedChatRequest?.resolveWithCancelation();
+                    // Set canceled status on OTel span
+                    otelInferenceSpan?.setAttributes({
+                        [index_1.GenAiAttr.RESPONSE_FINISH_REASONS]: ['cancelled'],
+                        [index_1.CopilotChatAttr.CANCELED]: true,
+                    });
+                    otelInferenceSpan?.end();
+                    otelInferenceSpan = undefined;
                     return this.processCanceledResponse(response, ourRequestId, streamRecorder, telemetryProperties);
                 case fetch_2.FetchResponseKind.Failed: {
                     const processed = this.processFailedResponse(response, ourRequestId);
@@ -276,7 +400,9 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                     const statusCodesToRetry = retryServerErrorStatusCodes
                         .split(',')
                         .map(s => parseInt(s.trim(), 10));
-                    if (enableRetryOnError && actualStatusCode !== undefined && statusCodesToRetry.includes(actualStatusCode)) {
+                    const retryAfterServerError = enableRetryOnError && actualStatusCode !== undefined && statusCodesToRetry.includes(actualStatusCode);
+                    const retryWithoutWebSocket = enableRetryOnError && useWebSocket;
+                    if (retryAfterServerError || retryWithoutWebSocket) {
                         const { retryResult } = await this._retryAfterError({
                             opts,
                             processed,
@@ -285,6 +411,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                             tokenCount,
                             maxResponseTokens,
                             timeToError: timeToFirstToken,
+                            transport,
                             actualFetcher,
                             bytesReceived: actualBytesReceived,
                             baseTelemetry,
@@ -310,6 +437,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                         maxResponseTokens,
                         timeToFirstToken,
                         isVisionRequest: this.filterImageMessages(messages),
+                        transport,
                         fetcher: actualFetcher,
                         bytesReceived: actualBytesReceived,
                         issuedTime: baseTelemetry.issuedTime,
@@ -323,6 +451,14 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             }
         }
         catch (err) {
+            // End OTel inference span on error if not already ended
+            if (otelInferenceSpan) {
+                otelInferenceSpan.setStatus(2 /* SpanStatusCode.ERROR */, err instanceof Error ? err.message : String(err));
+                otelInferenceSpan.setAttribute(index_1.StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
+                otelInferenceSpan.setAttribute(index_1.GenAiAttr.RESPONSE_FINISH_REASONS, ['error']);
+                otelInferenceSpan.recordException(err);
+                otelInferenceSpan.end();
+            }
             const timeToError = Date.now() - baseTelemetry.issuedTime;
             if (err.fetcherId) {
                 actualFetcher = err.fetcherId;
@@ -334,34 +470,34 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                 resumeEventSeen = true;
             }
             const processed = this.processError(err, ourRequestId, err.gitHubRequestId, usernameToScrub);
-            if (processed.type === commonTypes_1.ChatFetchResponseType.NetworkError && enableRetryOnError) {
-                const isRetryNetworkErrorEnabled = this._configurationService.getExperimentBasedConfig(configurationService_1.ConfigKey.TeamInternal.RetryNetworkErrors, this._experimentationService);
-                if (isRetryNetworkErrorEnabled) {
-                    const { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId } = await this._retryAfterError({
-                        opts,
-                        processed,
-                        telemetryProperties,
-                        requestBody,
-                        tokenCount,
-                        maxResponseTokens,
-                        timeToError,
-                        actualFetcher,
-                        bytesReceived: err.bytesReceived,
-                        baseTelemetry,
-                        streamRecorder,
-                        retryReason: 'network_error',
-                        debugNamePrefix: 'retry-error-',
-                        pendingLoggedChatRequest,
-                        token,
-                        usernameToScrub,
-                        suspendEventSeen,
-                        resumeEventSeen,
-                    });
-                    if (retryResult) {
-                        return retryResult;
-                    }
-                    telemetryProperties = { ...telemetryProperties, connectivityTestError, connectivityTestErrorGitHubRequestId };
+            const retryNetworkError = enableRetryOnError && processed.type === commonTypes_1.ChatFetchResponseType.NetworkError && this._configurationService.getExperimentBasedConfig(configurationService_1.ConfigKey.TeamInternal.RetryNetworkErrors, this._experimentationService);
+            const retryWithoutWebSocket = enableRetryOnError && useWebSocket && (processed.type === commonTypes_1.ChatFetchResponseType.NetworkError || processed.type === commonTypes_1.ChatFetchResponseType.Failed);
+            if (retryNetworkError || retryWithoutWebSocket) {
+                const { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId } = await this._retryAfterError({
+                    opts,
+                    processed,
+                    telemetryProperties,
+                    requestBody,
+                    tokenCount,
+                    maxResponseTokens,
+                    timeToError,
+                    transport,
+                    actualFetcher,
+                    bytesReceived: err.bytesReceived,
+                    baseTelemetry,
+                    streamRecorder,
+                    retryReason: 'network_error',
+                    debugNamePrefix: 'retry-error-',
+                    pendingLoggedChatRequest,
+                    token,
+                    usernameToScrub,
+                    suspendEventSeen,
+                    resumeEventSeen,
+                });
+                if (retryResult) {
+                    return retryResult;
                 }
+                telemetryProperties = { ...telemetryProperties, connectivityTestError, connectivityTestErrorGitHubRequestId };
             }
             if (processed.type === commonTypes_1.ChatFetchResponseType.Canceled) {
                 chatMLFetcherTelemetry_1.ChatMLFetcherTelemetrySender.sendCancellationTelemetry(this._telemetryService, {
@@ -369,6 +505,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                     requestId: ourRequestId,
                     model: chatEndpoint.model,
                     apiType: chatEndpoint.apiType,
+                    transport,
                     associatedRequestId: telemetryProperties.associatedRequestId,
                     retryAfterError: telemetryProperties.retryAfterError,
                     retryAfterErrorGitHubRequestId: telemetryProperties.retryAfterErrorGitHubRequestId,
@@ -401,6 +538,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                     maxResponseTokens,
                     timeToFirstToken: timeToError,
                     isVisionRequest: this.filterImageMessages(messages),
+                    transport,
                     fetcher: actualFetcher,
                     bytesReceived: err.bytesReceived,
                     issuedTime: baseTelemetry.issuedTime,
@@ -428,6 +566,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                 const res = await this._fetcherService.fetch(url, {
                     headers,
                     useFetcher,
+                    callSite: 'capi-ping',
                 });
                 if (res.status >= 200 && res.status < 300) {
                     this._logService.info(`CAPI ping successful, proceeding with chat request retry...`);
@@ -468,7 +607,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
         return authHeaders;
     }
     async _retryAfterError(params) {
-        const { opts, processed, telemetryProperties, requestBody, tokenCount, maxResponseTokens, timeToError, actualFetcher, bytesReceived, baseTelemetry, streamRecorder, retryReason, debugNamePrefix, pendingLoggedChatRequest, token, usernameToScrub, suspendEventSeen, resumeEventSeen, } = params;
+        const { opts, processed, telemetryProperties, requestBody, tokenCount, maxResponseTokens, timeToError, transport, actualFetcher, bytesReceived, baseTelemetry, streamRecorder, retryReason, debugNamePrefix, pendingLoggedChatRequest, token, usernameToScrub, suspendEventSeen, resumeEventSeen, } = params;
         // net::ERR_NETWORK_CHANGED: https://github.com/microsoft/vscode/issues/260297
         const isNetworkChangedError = ['darwin', 'linux'].includes(process.platform) && processed.reason.indexOf('net::ERR_NETWORK_CHANGED') !== -1;
         // When Electron's network process crashes, all requests through it fail permanently.
@@ -495,6 +634,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             maxResponseTokens,
             timeToFirstToken: timeToError,
             isVisionRequest: this.filterImageMessages(opts.messages),
+            transport,
             fetcher: actualFetcher,
             bytesReceived,
             issuedTime: baseTelemetry.issuedTime,
@@ -505,6 +645,8 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
         streamRecorder.callback('', 0, { text: '', retryReason });
         const retryResult = await this.fetchMany({
             ...opts,
+            useWebSocket: false,
+            ignoreStatefulMarker: opts.useWebSocket || opts.ignoreStatefulMarker,
             debugName: debugNamePrefix + opts.debugName,
             userInitiatedRequest: false, // do not mark the retry as user initiated
             telemetryProperties: {
@@ -518,9 +660,17 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             useFetcher,
         }, token);
         pendingLoggedChatRequest?.resolve(retryResult, streamRecorder.deltas);
+        if (opts.useWebSocket && retryResult.type === commonTypes_1.ChatFetchResponseType.Success) {
+            this._consecutiveWebSocketRetryFallbacks++;
+            this._logService.info(`[ChatWebSocketManager] WebSocket request failed with successful HTTP fallback (${this._consecutiveWebSocketRetryFallbacks} consecutive).`);
+            if (opts.conversationId && opts.turnId) {
+                // Closing here because the retry is transparent.
+                this._webSocketManager.closeConnection(opts.conversationId, opts.turnId);
+            }
+        }
         return { retryResult, connectivityTestError, connectivityTestErrorGitHubRequestId };
     }
-    async _fetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions) {
+    async _fetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, useWebSocket, turnId, conversationId, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions) {
         const isPowerSaveBlockerEnabled = this._configurationService.getExperimentBasedConfig(configurationService_1.ConfigKey.TeamInternal.ChatRequestPowerSaveBlocker, this._experimentationService);
         const blockerHandle = isPowerSaveBlockerEnabled && location !== commonTypes_1.ChatLocation.Other ? this._powerService.acquirePowerSaveBlocker() : undefined;
         let suspendEventSeen = false;
@@ -534,7 +684,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             this._logService.info(`System resumed during streaming request ${ourRequestId} (${commonTypes_1.ChatLocation.toString(location)})`);
         });
         try {
-            const fetchResult = await this._doFetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions);
+            const fetchResult = await this._doFetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, useWebSocket, turnId, conversationId, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions);
             return { ...fetchResult, suspendEventSeen: suspendEventSeen || undefined, resumeEventSeen: resumeEventSeen || undefined };
         }
         catch (err) {
@@ -552,28 +702,190 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             blockerHandle?.dispose();
         }
     }
-    async _doFetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions) {
+    async _doFetchAndStreamChat(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, copilotToken, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, useWebSocket, turnId, conversationId, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions) {
         if (cancellationToken.isCancellationRequested) {
             return { result: { type: fetch_2.FetchResponseKind.Canceled, reason: 'before fetch request' } };
         }
-        this._logService.debug(`modelMaxPromptTokens ${chatEndpointInfo.modelMaxPromptTokens}`);
-        this._logService.debug(`modelMaxResponseTokens ${request.max_tokens ?? 2048}`);
-        this._logService.debug(`chat model ${chatEndpointInfo.model}`);
-        secretKey ??= copilotToken.token;
-        if (!secretKey) {
-            // If no key is set we error
-            const urlOrRequestMetadata = (0, networking_1.stringifyUrlOrRequestMetadata)(chatEndpointInfo.urlOrRequestMetadata);
-            this._logService.error(`Failed to send request to ${urlOrRequestMetadata} due to missing key`);
-            (0, stream_1.sendCommunicationErrorTelemetry)(this._telemetryService, `Failed to send request to ${urlOrRequestMetadata} due to missing key`);
-            return {
-                result: {
-                    type: fetch_2.FetchResponseKind.Failed,
-                    modelRequestId: undefined,
-                    failKind: fetch_2.ChatFailKind.TokenExpiredOrInvalid,
-                    reason: 'key is missing'
-                }
-            };
+        // OTel inference span for this LLM call
+        const serverAddress = typeof chatEndpointInfo.urlOrRequestMetadata === 'string'
+            ? (() => { try {
+                return new URL(chatEndpointInfo.urlOrRequestMetadata).hostname;
+            }
+            catch {
+                return undefined;
+            } })()
+            : undefined;
+        const chatSessionId = (0, requestLogger_1.getCurrentCapturingToken)()?.chatSessionId;
+        const otelSpan = this._otelService.startSpan(`chat ${chatEndpointInfo.model}`, {
+            kind: 2 /* SpanKind.CLIENT */,
+            attributes: {
+                [index_1.GenAiAttr.OPERATION_NAME]: index_1.GenAiOperationName.CHAT,
+                [index_1.GenAiAttr.PROVIDER_NAME]: index_1.GenAiProviderName.GITHUB,
+                [index_1.GenAiAttr.REQUEST_MODEL]: chatEndpointInfo.model,
+                [index_1.GenAiAttr.CONVERSATION_ID]: telemetryProperties?.requestId ?? ourRequestId,
+                [index_1.GenAiAttr.REQUEST_MAX_TOKENS]: request.max_tokens ?? request.max_output_tokens ?? request.max_completion_tokens ?? 2048,
+                ...(request.temperature !== undefined ? { [index_1.GenAiAttr.REQUEST_TEMPERATURE]: request.temperature } : {}),
+                ...(request.top_p !== undefined ? { [index_1.GenAiAttr.REQUEST_TOP_P]: request.top_p } : {}),
+                [index_1.CopilotChatAttr.MAX_PROMPT_TOKENS]: chatEndpointInfo.modelMaxPromptTokens,
+                ...(serverAddress ? { [index_1.StdAttr.SERVER_ADDRESS]: serverAddress } : {}),
+                ...(conversationId ? { [index_1.CopilotChatAttr.SESSION_ID]: conversationId } : {}),
+                ...(chatSessionId ? { [index_1.CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),
+            },
+        });
+        const otelStartTime = Date.now();
+        try {
+            this._logService.debug(`modelMaxPromptTokens ${chatEndpointInfo.modelMaxPromptTokens}`);
+            this._logService.debug(`modelMaxResponseTokens ${request.max_tokens ?? 2048}`);
+            this._logService.debug(`chat model ${chatEndpointInfo.model}`);
+            secretKey ??= copilotToken.token;
+            if (!secretKey) {
+                // If no key is set we error
+                const urlOrRequestMetadata = (0, networking_1.stringifyUrlOrRequestMetadata)(chatEndpointInfo.urlOrRequestMetadata);
+                this._logService.error(`Failed to send request to ${urlOrRequestMetadata} due to missing key`);
+                (0, stream_1.sendCommunicationErrorTelemetry)(this._telemetryService, `Failed to send request to ${urlOrRequestMetadata} due to missing key`);
+                return {
+                    result: {
+                        type: fetch_2.FetchResponseKind.Failed,
+                        modelRequestId: undefined,
+                        failKind: fetch_2.ChatFailKind.TokenExpiredOrInvalid,
+                        reason: 'key is missing'
+                    }
+                };
+            }
+            // WebSocket path: use persistent WebSocket connection for Responses API endpoints
+            if (useWebSocket && turnId && conversationId) {
+                const wsResult = await this._doFetchViaWebSocket(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, location, ourRequestId, turnId, conversationId, cancellationToken, userInitiatedRequest, telemetryProperties, requestKindOptions);
+                return { ...wsResult, otelSpan };
+            }
+            const httpResult = await this._doFetchViaHttp(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions);
+            return { ...httpResult, otelSpan };
+        }
+        catch (err) {
+            otelSpan.setStatus(2 /* SpanStatusCode.ERROR */, err instanceof Error ? err.message : String(err));
+            otelSpan.setAttribute(index_1.StdAttr.ERROR_TYPE, err instanceof Error ? err.constructor.name : 'Error');
+            otelSpan.recordException(err);
+            throw err;
+        }
+        finally {
+            const durationSec = (Date.now() - otelStartTime) / 1000;
+            index_1.GenAiMetrics.recordOperationDuration(this._otelService, durationSec, {
+                operationName: index_1.GenAiOperationName.CHAT,
+                providerName: index_1.GenAiProviderName.GITHUB,
+                requestModel: chatEndpointInfo.model,
+            });
+            // Span is NOT ended here — caller (fetchMany) will set token attributes and end it
+        }
+    }
+    /**
+     * Sends a chat request via a persistent WebSocket connection instead of HTTP POST.
+     * Events are the same Responses API streaming events, processed by OpenAIResponsesProcessor.
+     */
+    async _doFetchViaWebSocket(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, location, ourRequestId, turnId, conversationId, cancellationToken, userInitiatedRequest, telemetryProperties, requestKindOptions) {
+        const intent = locationToIntent(location);
+        const agentInteractionType = requestKindOptions?.kind === 'subagent' ?
+            'conversation-subagent' :
+            requestKindOptions?.kind === 'background' ?
+                'conversation-background' :
+                intent === 'conversation-agent' ? intent : undefined;
+        const additionalHeaders = {
+            'Authorization': `Bearer ${secretKey}`,
+            'X-Request-Id': ourRequestId,
+            'OpenAI-Intent': intent,
+            'X-GitHub-Api-Version': '2025-05-01',
+            'X-Interaction-Id': this._interactionService.interactionId,
+            'X-Initiator': userInitiatedRequest ? 'user' : 'agent',
+            ...(chatEndpointInfo.getExtraHeaders ? chatEndpointInfo.getExtraHeaders(location) : {}),
+        };
+        if (agentInteractionType) {
+            additionalHeaders['X-Interaction-Type'] = agentInteractionType;
+            additionalHeaders['X-Agent-Task-Id'] = ourRequestId;
+        }
+        if (request.messages?.some((m) => Array.isArray(m.content) ? m.content.some(c => 'image_url' in c) : false) && chatEndpointInfo.supportsVision) {
+            additionalHeaders['Copilot-Vision-Request'] = 'true';
         }
+        const connection = await this._webSocketManager.getOrCreateConnection(conversationId, turnId, additionalHeaders);
+        // Generate unique ID to link input and output messages
+        const modelCallId = (0, uuid_1.generateUuid)();
+        const telemetryData = telemetryData_1.TelemetryData.createAndMarkAsIssued({
+            endpoint: 'completions',
+            engineName: 'chat',
+            uiKind: commonTypes_1.ChatLocation.toString(location),
+            transport: 'websocket',
+            ...{ ...telemetryProperties, modelCallId },
+        }, {
+            maxTokenWindow: chatEndpointInfo.modelMaxPromptTokens
+        });
+        const modelRequestId = (0, fetch_1.getRequestId)(connection.responseHeaders);
+        telemetryData.extendWithRequestId(modelRequestId);
+        for (const [key, value] of Object.entries(request)) {
+            if (key === 'messages' || key === 'input') {
+                continue;
+            } // Skip messages (PII)
+            telemetryData.properties[`request.option.${key}`] = JSON.stringify(value) ?? 'undefined';
+        }
+        telemetryData.properties['headerRequestId'] = ourRequestId;
+        this._telemetryService.sendGHTelemetryEvent('request.sent', telemetryData.properties, telemetryData.measurements);
+        const requestStart = Date.now();
+        const handle = connection.sendRequest(request, cancellationToken);
+        const extendedBaseTelemetryData = baseTelemetryData.extendedBy({ modelCallId });
+        const processor = this._instantiationService.createInstance(responsesApi_1.OpenAIResponsesProcessor, extendedBaseTelemetryData, modelRequestId.headerRequestId, modelRequestId.gitHubRequestId);
+        const chatCompletions = new async_1.AsyncIterableObject(async (emitter) => {
+            try {
+                await new Promise((resolve, reject) => {
+                    handle.onEvent(event => {
+                        const completion = processor.push(event, finishedCb);
+                        if (completion) {
+                            (0, responsesApi_1.sendCompletionOutputTelemetry)(this._telemetryService, this._logService, completion, extendedBaseTelemetryData);
+                            emitter.emitOne(completion);
+                        }
+                    });
+                    handle.onError(error => {
+                        error.gitHubRequestId = modelRequestId.gitHubRequestId;
+                        if ((0, errors_2.isCancellationError)(error)) {
+                            reject(error);
+                            return;
+                        }
+                        const warningTelemetry = telemetryData.extendedBy({ error: error.message });
+                        this._telemetryService.sendGHTelemetryEvent('request.shownWarning', warningTelemetry.properties, warningTelemetry.measurements);
+                        const totalTimeMs = Date.now() - requestStart;
+                        telemetryData.measurements.totalTimeMs = totalTimeMs;
+                        telemetryData.properties.error = error.message;
+                        this._logService.debug(`request.error: [websocket], took ${totalTimeMs} ms`);
+                        this._telemetryService.sendGHTelemetryEvent('request.error', telemetryData.properties, telemetryData.measurements);
+                        reject(error);
+                    });
+                    handle.onComplete(() => {
+                        const totalTimeMs = Date.now() - requestStart;
+                        telemetryData.measurements.totalTimeMs = totalTimeMs;
+                        this._logService.debug(`request.response: [websocket], took ${totalTimeMs} ms`);
+                        this._telemetryService.sendGHTelemetryEvent('request.response', telemetryData.properties, telemetryData.measurements);
+                        resolve();
+                    });
+                });
+            }
+            finally {
+                let messagesToLog = request.messages;
+                if ((!messagesToLog || messagesToLog.length === 0) && request.input) {
+                    try {
+                        const rawMessages = (0, responsesApi_1.responseApiInputToRawMessagesForLogging)(request);
+                        messagesToLog = (0, openai_1.rawMessageToCAPI)(rawMessages);
+                    }
+                    catch (e) {
+                        this._logService.error(`Failed to convert Response API input to messages for telemetry:`, e);
+                        messagesToLog = [];
+                    }
+                }
+                (0, chatStream_1.sendEngineMessagesTelemetry)(this._telemetryService, messagesToLog ?? [], telemetryData, false, this._logService);
+            }
+        });
+        return {
+            result: {
+                type: fetch_2.FetchResponseKind.Success,
+                chatCompletions,
+            }
+        };
+    }
+    async _doFetchViaHttp(chatEndpointInfo, request, baseTelemetryData, finishedCb, secretKey, location, ourRequestId, nChoices, cancellationToken, userInitiatedRequest, telemetryProperties, useFetcher, canRetryOnce, requestKindOptions) {
         // Generate unique ID to link input and output messages
         const modelCallId = (0, uuid_1.generateUuid)();
         const response = await this._fetchWithInstrumentation(chatEndpointInfo, ourRequestId, request, secretKey, location, cancellationToken, userInitiatedRequest, { ...telemetryProperties, modelCallId }, useFetcher, canRetryOnce, requestKindOptions);
@@ -660,6 +972,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             endpoint: 'completions',
             engineName: 'chat',
             uiKind: commonTypes_1.ChatLocation.toString(location),
+            transport: 'http',
             ...telemetryProperties // This includes the modelCallId from fetchAndStreamChat
         }, {
             maxTokenWindow: chatEndpoint.modelMaxPromptTokens
@@ -946,7 +1259,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
             reason: `Request Failed: ${response.status} ${text}`
         };
     }
-    async processSuccessfulResponse(response, messages, requestBody, requestId, maxResponseTokens, promptTokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpointInfo, userInitiatedRequest, fetcher, bytesReceived, suspendEventSeen, resumeEventSeen) {
+    async processSuccessfulResponse(response, messages, requestBody, requestId, maxResponseTokens, promptTokenCount, timeToFirstToken, streamRecorder, baseTelemetry, chatEndpointInfo, userInitiatedRequest, transport, fetcher, bytesReceived, suspendEventSeen, resumeEventSeen) {
         const completions = [];
         for await (const chatCompletion of response.chatCompletions) {
             chatMLFetcherTelemetry_1.ChatMLFetcherTelemetrySender.sendSuccessTelemetry(this._telemetryService, {
@@ -960,6 +1273,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
                 timeToFirstToken,
                 timeToFirstTokenEmitted: (baseTelemetry && streamRecorder.firstTokenEmittedTime) ? streamRecorder.firstTokenEmittedTime - baseTelemetry.issuedTime : -1,
                 hasImageMessages: this.filterImageMessages(messages),
+                transport,
                 fetcher,
                 bytesReceived,
                 suspendEventSeen,
@@ -1225,7 +1539,7 @@ let ChatMLFetcherImpl = class ChatMLFetcherImpl extends AbstractChatMLFetcher {
     }
 };
 exports.ChatMLFetcherImpl = ChatMLFetcherImpl;
-exports.ChatMLFetcherImpl = ChatMLFetcherImpl = __decorate([
+exports.ChatMLFetcherImpl = ChatMLFetcherImpl = ChatMLFetcherImpl_1 = __decorate([
     __param(0, fetcherService_1.IFetcherService),
     __param(1, telemetry_1.ITelemetryService),
     __param(2, requestLogger_1.IRequestLogger),
@@ -1238,7 +1552,9 @@ exports.ChatMLFetcherImpl = ChatMLFetcherImpl = __decorate([
     __param(9, configurationService_1.IConfigurationService),
     __param(10, nullExperimentationService_1.IExperimentationService),
     __param(11, powerService_1.IPowerService),
-    __param(12, instantiation_1.IInstantiationService)
+    __param(12, instantiation_1.IInstantiationService),
+    __param(13, chatWebSocketManager_1.IChatWebSocketManager),
+    __param(14, otelService_1.IOTelService)
 ], ChatMLFetcherImpl);
 /**
  * Validates a chat request payload to ensure it is valid