npm - opencode-qwen-cli-auth - Versions diffs - 2.2.8 → 2.3.0 - Mend

opencode-qwen-cli-auth 2.2.8 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.js CHANGED Viewed

@@ -1,28 +1,98 @@
 /**
- * Alibaba Qwen OAuth Authentication Plugin for opencode
- *
- * Simple plugin: handles OAuth login + provides apiKey/baseURL to SDK.
- * SDK handles streaming, headers, and request format.
- *
+ * @fileoverview Alibaba Qwen OAuth Authentication Plugin for opencode
+ * Main plugin entry point implementing OAuth 2.0 Device Authorization Grant
+ * Handles authentication, request transformation, and error recovery
+ *
+ * Architecture:
+ * - OAuth flow: PKCE + Device Code Grant (RFC 8628)
+ * - Token management: Automatic refresh with file-based storage
+ * - Request handling: Custom fetch wrapper with retry logic
+ * - Error recovery: Quota degradation and CLI fallback
+ *
  * @license MIT with Usage Disclaimer (see LICENSE file)
  * @repository https://github.com/TVD-00/opencode-qwen-cli-auth
+ * @version 2.2.9
  */
 import { randomUUID } from "node:crypto";
 import { spawn } from "node:child_process";
 import { existsSync } from "node:fs";
 import { createPKCE, requestDeviceCode, pollForToken, getApiBaseUrl, saveToken, refreshAccessToken, loadStoredToken, getValidToken } from "./lib/auth/auth.js";
 import { PROVIDER_ID, AUTH_LABELS, DEVICE_FLOW, PORTAL_HEADERS } from "./lib/constants.js";
 import { logError, logInfo, logWarn, LOGGING_ENABLED } from "./lib/logger.js";
+/** Request timeout for chat completions in milliseconds */
 const CHAT_REQUEST_TIMEOUT_MS = 30000;
-const CHAT_MAX_RETRIES = 0;
-const CHAT_MAX_TOKENS_CAP = 2048;
+/** Maximum number of retry attempts for failed requests */
+const CHAT_MAX_RETRIES = 3;
+/** Output token cap for coder-model (64K tokens) */
+const CHAT_MAX_TOKENS_CAP = 65536;
+/** Default max tokens for chat requests */
 const CHAT_DEFAULT_MAX_TOKENS = 2048;
+/** Maximum consecutive polling failures before aborting OAuth flow */
 const MAX_CONSECUTIVE_POLL_FAILURES = 3;
+/** Reduced max tokens for quota degraded requests */
 const QUOTA_DEGRADE_MAX_TOKENS = 1024;
+/** Timeout for CLI fallback execution in milliseconds */
 const CLI_FALLBACK_TIMEOUT_MS = 8000;
+/** Maximum buffer size for CLI output in characters */
 const CLI_FALLBACK_MAX_BUFFER_CHARS = 1024 * 1024;
+/** Enable CLI fallback feature via environment variable */
 const ENABLE_CLI_FALLBACK = process.env.OPENCODE_QWEN_ENABLE_CLI_FALLBACK === "1";
+/** User agent string for plugin identification */
 const PLUGIN_USER_AGENT = "opencode-qwen-cli-auth/2.2.1";
+/** Output token limits per model for DashScope OAuth */
+const DASH_SCOPE_OUTPUT_LIMITS = {
+    "coder-model": 65536,
+    "vision-model": 8192,
+};
+function capPayloadMaxTokens(payload) {
+    if (!payload || typeof payload !== "object") {
+        return payload;
+    }
+    const model = typeof payload.model === "string" ? payload.model : "";
+    const normalizedModel = model.trim().toLowerCase();
+    const limit = DASH_SCOPE_OUTPUT_LIMITS[normalizedModel];
+    if (!limit) {
+        return payload;
+    }
+    const next = { ...payload };
+    let changed = false;
+    if (typeof next.max_tokens === "number" && next.max_tokens > limit) {
+        next.max_tokens = limit;
+        changed = true;
+    }
+    if (typeof next.max_completion_tokens === "number" && next.max_completion_tokens > limit) {
+        next.max_completion_tokens = limit;
+        changed = true;
+    }
+    // Some clients use camelCase.
+    if (typeof next.maxTokens === "number" && next.maxTokens > limit) {
+        next.maxTokens = limit;
+        changed = true;
+    }
+    if (next.options && typeof next.options === "object") {
+        const options = { ...next.options };
+        let optionsChanged = false;
+        if (typeof options.max_tokens === "number" && options.max_tokens > limit) {
+            options.max_tokens = limit;
+            optionsChanged = true;
+        }
+        if (typeof options.max_completion_tokens === "number" && options.max_completion_tokens > limit) {
+            options.max_completion_tokens = limit;
+            optionsChanged = true;
+        }
+        if (typeof options.maxTokens === "number" && options.maxTokens > limit) {
+            options.maxTokens = limit;
+            optionsChanged = true;
+        }
+        if (optionsChanged) {
+            next.options = options;
+            changed = true;
+        }
+    }
+    return changed ? next : payload;
+}
 const CLIENT_ONLY_BODY_FIELDS = new Set([
     "providerID",
     "provider",
@@ -71,6 +141,14 @@ function makeFailFastErrorResponse(status, code, message) {
         headers: { "content-type": "application/json" },
     });
 }
+/**
+ * Creates AbortSignal with timeout that composes with source signal
+ * Properly cleans up timers and event listeners
+ * @param {AbortSignal} [sourceSignal] - Original abort signal from caller
+ * @param {number} timeoutMs - Timeout in milliseconds
+ * @returns {{ signal: AbortSignal, cleanup: () => void }} Composed signal and cleanup function
+ */
 function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(new Error("request_timeout")), timeoutMs);
@@ -93,6 +171,13 @@ function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
         },
     };
 }
+/**
+ * Appends text chunk with size limit to prevent memory overflow
+ * @param {string} current - Current text buffer
+ * @param {string} chunk - New chunk to append
+ * @returns {string} Combined text with size limit
+ */
 function appendLimitedText(current, chunk) {
     const next = current + chunk;
     if (next.length <= CLI_FALLBACK_MAX_BUFFER_CHARS) {
@@ -100,9 +185,22 @@ function appendLimitedText(current, chunk) {
     }
     return next.slice(next.length - CLI_FALLBACK_MAX_BUFFER_CHARS);
 }
+/**
+ * Checks if value is a Request instance
+ * @param {*} value - Value to check
+ * @returns {boolean} True if value is a Request instance
+ */
 function isRequestInstance(value) {
     return typeof Request !== "undefined" && value instanceof Request;
 }
+/**
+ * Normalizes fetch invocation from Request object or URL string
+ * @param {Request|string} input - Fetch input
+ * @param {RequestInit} [init] - Fetch options
+ * @returns {{ requestInput: *, requestInit: RequestInit }} Normalized fetch parameters
+ */
 async function normalizeFetchInvocation(input, init) {
     const requestInit = init ? { ...init } : {};
     let requestInput = input;
@@ -128,6 +226,13 @@ async function normalizeFetchInvocation(input, init) {
     }
     return { requestInput, requestInit };
 }
+/**
+ * Gets header value from Headers object, array, or plain object
+ * @param {Headers|Array|Object} headers - Headers to search
+ * @param {string} headerName - Header name (case-insensitive)
+ * @returns {string|undefined} Header value or undefined
+ */
 function getHeaderValue(headers, headerName) {
     if (!headers) {
         return undefined;
@@ -147,6 +252,11 @@ function getHeaderValue(headers, headerName) {
     }
     return undefined;
 }
+/**
+ * Applies JSON request body with proper content-type header
+ * @param {RequestInit} requestInit - Fetch options
+ * @param {Object} payload - Request payload
+ */
 function applyJsonRequestBody(requestInit, payload) {
     requestInit.body = JSON.stringify(payload);
     if (!requestInit.headers) {
@@ -177,6 +287,12 @@ function applyJsonRequestBody(requestInit, payload) {
         requestInit.headers["content-type"] = "application/json";
     }
 }
+/**
+ * Parses JSON request body if content-type is application/json
+ * @param {RequestInit} requestInit - Fetch options
+ * @returns {Object|null} Parsed payload or null
+ */
 function parseJsonRequestBody(requestInit) {
     if (typeof requestInit.body !== "string") {
         return null;
@@ -196,19 +312,31 @@ function parseJsonRequestBody(requestInit) {
         return null;
     }
 }
+    catch (_error) {
+        return null;
+    }
+}
+/**
+ * Removes client-only fields and caps max_tokens
+ * @param {Object} payload - Request payload
+ * @returns {Object} Sanitized payload
+ */
 function sanitizeOutgoingPayload(payload) {
     const sanitized = { ...payload };
     let changed = false;
+    // Remove client-only fields
     for (const field of CLIENT_ONLY_BODY_FIELDS) {
         if (field in sanitized) {
             delete sanitized[field];
             changed = true;
         }
     }
+    // Remove stream_options if stream is not enabled
     if ("stream_options" in sanitized && sanitized.stream !== true) {
         delete sanitized.stream_options;
         changed = true;
     }
+    // Cap max_tokens fields
     if (typeof sanitized.max_tokens === "number" && sanitized.max_tokens > CHAT_MAX_TOKENS_CAP) {
         sanitized.max_tokens = CHAT_MAX_TOKENS_CAP;
         changed = true;
@@ -219,9 +347,17 @@ function sanitizeOutgoingPayload(payload) {
     }
     return changed ? sanitized : payload;
 }
+/**
+ * Creates degraded payload for quota error recovery
+ * Removes tools and reduces max_tokens to 1024
+ * @param {Object} payload - Original payload
+ * @returns {Object|null} Degraded payload or null if no changes needed
+ */
 function createQuotaDegradedPayload(payload) {
     const degraded = { ...payload };
     let changed = false;
+    // Remove tool-related fields
     if ("tools" in degraded) {
         delete degraded.tools;
         changed = true;
@@ -234,10 +370,12 @@ function createQuotaDegradedPayload(payload) {
         delete degraded.parallel_tool_calls;
         changed = true;
     }
+    // Disable streaming
     if (degraded.stream !== false) {
         degraded.stream = false;
         changed = true;
     }
+    // Reduce max_tokens
     if (typeof degraded.max_tokens !== "number" || degraded.max_tokens > QUOTA_DEGRADE_MAX_TOKENS) {
         degraded.max_tokens = QUOTA_DEGRADE_MAX_TOKENS;
         changed = true;
@@ -248,6 +386,12 @@ function createQuotaDegradedPayload(payload) {
     }
     return changed ? degraded : null;
 }
+/**
+ * Checks if response text contains insufficientQuota error
+ * @param {string} text - Response body text
+ * @returns {boolean} True if insufficient quota error
+ */
 function isInsufficientQuota(text) {
     if (!text) {
         return false;
@@ -261,6 +405,12 @@ function isInsufficientQuota(text) {
         return text.toLowerCase().includes("insufficient_quota");
     }
 }
+/**
+ * Extracts text content from message (handles string or array format)
+ * @param {string|Array} content - Message content
+ * @returns {string} Extracted text
+ */
 function extractMessageText(content) {
     if (typeof content === "string") {
         return content.trim();
@@ -278,6 +428,11 @@ function extractMessageText(content) {
         return "";
     }).filter(Boolean).join("\n").trim();
 }
+/**
+ * Builds prompt text from chat messages for CLI fallback
+ * @param {Object} payload - Request payload with messages
+ * @returns {string} Prompt text for qwen CLI
+ */
 function buildQwenCliPrompt(payload) {
     const messages = Array.isArray(payload?.messages) ? payload.messages : [];
     for (let index = messages.length - 1; index >= 0; index -= 1) {
@@ -300,6 +455,12 @@ function buildQwenCliPrompt(payload) {
     }).filter(Boolean).join("\n\n");
     return merged || "Please respond to the latest user request.";
 }
+/**
+ * Parses qwen CLI JSON output events
+ * @param {string} rawOutput - Raw CLI output
+ * @returns {Array|null} Parsed events or null
+ */
 function parseQwenCliEvents(rawOutput) {
     const trimmed = rawOutput.trim();
     if (!trimmed) {
@@ -323,6 +484,12 @@ function parseQwenCliEvents(rawOutput) {
     }
     return null;
 }
+/**
+ * Extracts response text from CLI events
+ * @param {Array} events - Parsed CLI events
+ * @returns {string|null} Extracted text or null
+ */
 function extractQwenCliText(events) {
     for (let index = events.length - 1; index >= 0; index -= 1) {
         const event = events[index];
@@ -348,9 +515,24 @@ function extractQwenCliText(events) {
     }
     return null;
 }
+/**
+ * Creates SSE formatted chunk for streaming responses
+ * @param {Object} data - Data to stringify and send
+ * @returns {string} SSE formatted string chunk
+ */
 function createSseResponseChunk(data) {
     return `data: ${JSON.stringify(data)}\n\n`;
 }
+/**
+ * Creates Response object matching OpenAI completion format
+ * Handles both streaming (SSE) and non-streaming responses
+ * @param {string} model - Model ID used
+ * @param {string} content - Completion text content
+ * @param {Object} context - Request context for logging
+ * @param {boolean} streamMode - Whether to return streaming response
+ * @returns {Response} Formatted completion response
+ */
 function makeQwenCliCompletionResponse(model, content, context, streamMode) {
     if (LOGGING_ENABLED) {
         logInfo("Qwen CLI fallback returned completion", {
@@ -365,6 +547,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
         const encoder = new TextEncoder();
         const stream = new ReadableStream({
             start(controller) {
+                // Send first chunk with content
                 controller.enqueue(encoder.encode(createSseResponseChunk({
                     id: completionId,
                     object: "chat.completion.chunk",
@@ -378,6 +561,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
                         },
                     ],
                 })));
+                // Send stop chunk
                 controller.enqueue(encoder.encode(createSseResponseChunk({
                     id: completionId,
                     object: "chat.completion.chunk",
@@ -391,6 +575,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
                         },
                     ],
                 })));
+                // Send DONE marker
                 controller.enqueue(encoder.encode("data: [DONE]\n\n"));
                 controller.close();
             },
@@ -404,6 +589,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
             },
         });
     }
+    // Non-streaming response format
     const body = {
         id: `chatcmpl-${randomUUID()}`,
         object: "chat.completion",
@@ -433,6 +619,13 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
         },
     });
 }
+/**
+ * Executes qwen CLI as fallback when API quota is exceeded
+ * @param {Object} payload - Original request payload
+ * @param {Object} context - Request context for logging
+ * @param {AbortSignal} [abortSignal] - Abort controller signal
+ * @returns {Promise<{ ok: boolean, response?: Response, reason?: string, stdout?: string, stderr?: string }>} Fallback execution result
+ */
 async function runQwenCliFallback(payload, context, abortSignal) {
     const model = typeof payload?.model === "string" && payload.model.length > 0 ? payload.model : "coder-model";
     const streamMode = payload?.stream === true;
@@ -544,6 +737,14 @@ async function runQwenCliFallback(payload, context, abortSignal) {
         });
     });
 }
+/**
+ * Creates Response object for quota/rate limit errors
+ * @param {string} text - Response body text
+ * @param {HeadersInit} sourceHeaders - Original response headers
+ * @param {Object} context - Request context for logging
+ * @returns {Response} Formatted error response
+ */
 function makeQuotaFailFastResponse(text, sourceHeaders, context) {
     const headers = new Headers(sourceHeaders);
     headers.set("content-type", "application/json");
@@ -569,6 +770,12 @@ function makeQuotaFailFastResponse(text, sourceHeaders, context) {
         headers,
     });
 }
+/**
+ * Performs fetch request with timeout protection
+ * @param {Request|string} input - Fetch input
+ * @param {RequestInit} requestInit - Fetch options
+ * @returns {Promise<Response>} Fetch response
+ */
 async function sendWithTimeout(input, requestInit) {
     const composed = createRequestSignalWithTimeout(requestInit.signal, CHAT_REQUEST_TIMEOUT_MS);
     try {
@@ -581,15 +788,77 @@ async function sendWithTimeout(input, requestInit) {
         composed.cleanup();
     }
 }
+/**
+ * Injects required DashScope OAuth headers into fetch request
+ * Ensures compatibility even if OpenCode doesn't call chat.headers hook
+ * @param {RequestInit} requestInit - Fetch options to modify
+ */
+function applyDashScopeHeaders(requestInit) {
+    // Ensure required DashScope OAuth headers are always present.
+    // This mirrors qwen-code (DashScopeOpenAICompatibleProvider.buildHeaders) behavior.
+    // NOTE: We intentionally do this in the fetch layer so it works even when
+    // OpenCode does not call the `chat.headers` hook (older versions / API mismatch).
+    const headersToApply = {
+        "X-DashScope-AuthType": PORTAL_HEADERS.AUTH_TYPE_VALUE,
+        "X-DashScope-CacheControl": "enable",
+        "User-Agent": PLUGIN_USER_AGENT,
+        "X-DashScope-UserAgent": PLUGIN_USER_AGENT,
+    };
+    if (!requestInit.headers) {
+        requestInit.headers = { ...headersToApply };
+        return;
+    }
+    if (requestInit.headers instanceof Headers) {
+        for (const [key, value] of Object.entries(headersToApply)) {
+            if (!requestInit.headers.has(key)) {
+                requestInit.headers.set(key, value);
+            }
+        }
+        return;
+    }
+    if (Array.isArray(requestInit.headers)) {
+        const existing = new Set(requestInit.headers.map(([name]) => String(name).toLowerCase()));
+        for (const [key, value] of Object.entries(headersToApply)) {
+            if (!existing.has(key.toLowerCase())) {
+                requestInit.headers.push([key, value]);
+            }
+        }
+        return;
+    }
+    // Plain object
+    for (const [key, value] of Object.entries(headersToApply)) {
+        if (!(key in requestInit.headers)) {
+            requestInit.headers[key] = value;
+        }
+    }
+}
+/**
+ * Custom fetch wrapper for OpenCode SDK
+ * Handles token limits, DashScope headers, retries, and quota error fallback
+ * @param {Request|string} input - Fetch input
+ * @param {RequestInit} [init] - Fetch options
+ * @returns {Promise<Response>} API response or fallback response
+ */
 async function failFastFetch(input, init) {
     const normalized = await normalizeFetchInvocation(input, init);
     const requestInput = normalized.requestInput;
     const requestInit = normalized.requestInit;
+    // Always inject DashScope OAuth headers at the fetch layer.
+    // This ensures compatibility across OpenCode versions.
+    applyDashScopeHeaders(requestInit);
     const sourceSignal = requestInit.signal;
     const rawPayload = parseJsonRequestBody(requestInit);
     const sessionID = typeof rawPayload?.sessionID === "string" ? rawPayload.sessionID : undefined;
     let payload = rawPayload;
     if (payload) {
+        // Ensure we never exceed DashScope model output limits.
+        const capped = capPayloadMaxTokens(payload);
+        if (capped !== payload) {
+            payload = capped;
+            applyJsonRequestBody(requestInit, payload);
+        }
         const sanitized = sanitizeOutgoingPayload(payload);
         if (sanitized !== payload) {
             payload = sanitized;
@@ -614,84 +883,93 @@ async function failFastFetch(input, init) {
     }
     try {
         let response = await sendWithTimeout(requestInput, requestInit);
-        if (LOGGING_ENABLED) {
-            logInfo("Qwen request response", {
-                request_id: context.requestId,
-                sessionID: context.sessionID,
-                modelID: context.modelID,
-                status: response.status,
-                attempt: 1,
-            });
-        }
-        if (response.status === 429) {
-            const firstBody = await response.text().catch(() => "");
-            if (payload && isInsufficientQuota(firstBody)) {
-                const degradedPayload = createQuotaDegradedPayload(payload);
-                if (degradedPayload) {
-                    const fallbackInit = { ...requestInit };
-                    applyJsonRequestBody(fallbackInit, degradedPayload);
-                    if (LOGGING_ENABLED) {
-                        logWarn("Retrying once with degraded payload after 429 insufficient_quota", {
-                            request_id: context.requestId,
-                            sessionID: context.sessionID,
-                            modelID: context.modelID,
-                            attempt: 2,
-                        });
-                    }
-                    response = await sendWithTimeout(requestInput, fallbackInit);
-                    if (LOGGING_ENABLED) {
-                        logInfo("Qwen request response", {
-                            request_id: context.requestId,
-                            sessionID: context.sessionID,
-                            modelID: context.modelID,
-                            status: response.status,
-                            attempt: 2,
-                        });
-                    }
-                    if (response.status !== 429) {
-                        return response;
-                    }
-                    const fallbackBody = await response.text().catch(() => "");
-                    if (ENABLE_CLI_FALLBACK) {
-                        const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
-                        if (cliFallback.ok) {
-                            return cliFallback.response;
-                        }
-                        if (cliFallback.reason === "cli_aborted") {
-                            return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
+        const MAX_REQUEST_RETRIES = 3;
+        for (let retryAttempt = 0; retryAttempt <= MAX_REQUEST_RETRIES; retryAttempt++) {
+            if (LOGGING_ENABLED) {
+                logInfo("Qwen request response", {
+                    request_id: context.requestId,
+                    sessionID: context.sessionID,
+                    modelID: context.modelID,
+                    status: response.status,
+                    attempt: retryAttempt + 1,
+                });
+            }
+const RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504];
+            if (RETRYABLE_STATUS_CODES.includes(response.status)) {
+                if (response.status === 429) {
+                    const firstBody = await response.text().catch(() => "");
+                    if (payload && isInsufficientQuota(firstBody)) {
+                        const degradedPayload = createQuotaDegradedPayload(payload);
+                        if (degradedPayload) {
+                            const fallbackInit = { ...requestInit };
+                            applyJsonRequestBody(fallbackInit, degradedPayload);
+                            if (LOGGING_ENABLED) {
+                                logWarn(`Retrying with degraded payload after ${response.status} insufficient_quota, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
+                                    request_id: context.requestId,
+                                    sessionID: context.sessionID,
+                                    modelID: context.modelID,
+                                });
+                            }
+                            response = await sendWithTimeout(requestInput, fallbackInit);
+                            if (retryAttempt < MAX_REQUEST_RETRIES) {
+                                continue;
+                            }
+                            const fallbackBody = await response.text().catch(() => "");
+                            if (ENABLE_CLI_FALLBACK) {
+                                const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
+                                if (cliFallback.ok) {
+                                    return cliFallback.response;
+                                }
+                                if (cliFallback.reason === "cli_aborted") {
+                                    return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
+                                }
+                                if (LOGGING_ENABLED) {
+                                    logWarn("Qwen CLI fallback failed", {
+                                        request_id: context.requestId,
+                                        sessionID: context.sessionID,
+                                        modelID: context.modelID,
+                                        reason: cliFallback.reason,
+                                        stderr: cliFallback.stderr,
+                                    });
+                                }
+                            }
+                            return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
                         }
-                        if (LOGGING_ENABLED) {
-                            logWarn("Qwen CLI fallback failed", {
-                                request_id: context.requestId,
-                                sessionID: context.sessionID,
-                                modelID: context.modelID,
-                                reason: cliFallback.reason,
-                                stderr: cliFallback.stderr,
-                            });
+                        if (ENABLE_CLI_FALLBACK) {
+                            const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
+                            if (cliFallback.ok) {
+                                return cliFallback.response;
+                            }
+                            if (cliFallback.reason === "cli_aborted") {
+                                return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
+                            }
+                            if (LOGGING_ENABLED) {
+                                logWarn("Qwen CLI fallback failed", {
+                                    request_id: context.requestId,
+                                    sessionID: context.sessionID,
+                                    modelID: context.modelID,
+                                    reason: cliFallback.reason,
+                                    stderr: cliFallback.stderr,
+                                });
+                            }
                         }
                     }
-                    return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
+                    return makeQuotaFailFastResponse(firstBody, response.headers, context);
                 }
-                if (ENABLE_CLI_FALLBACK) {
-                    const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
-                    if (cliFallback.ok) {
-                        return cliFallback.response;
-                    }
-                    if (cliFallback.reason === "cli_aborted") {
-                        return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
-                    }
+                if (retryAttempt < MAX_REQUEST_RETRIES) {
                     if (LOGGING_ENABLED) {
-                        logWarn("Qwen CLI fallback failed", {
+                        logWarn(`Retrying after ${response.status}, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
                             request_id: context.requestId,
                             sessionID: context.sessionID,
                             modelID: context.modelID,
-                            reason: cliFallback.reason,
-                            stderr: cliFallback.stderr,
                         });
                     }
+                    await new Promise(r => setTimeout(r, (retryAttempt + 1) * 1000));
+                    response = await sendWithTimeout(requestInput, requestInit);
+                    continue;
                 }
             }
-            return makeQuotaFailFastResponse(firstBody, response.headers, context);
+            return response;
         }
         return response;
     }
@@ -710,8 +988,8 @@ async function failFastFetch(input, init) {
  * Get valid access token from SDK auth state, refresh if expired.
  * Uses getAuth() from SDK instead of reading file directly.
  *
- * @param getAuth - Function to get auth state from SDK
- * @returns Access token or null
+ * @param {Function} getAuth - Function to get auth state from SDK
+ * @returns {Promise<string|null>} Access token or null if not available
  */
 async function getValidAccessToken(getAuth) {
     const diskToken = await getValidToken();
@@ -760,9 +1038,11 @@ async function getValidAccessToken(getAuth) {
     }
     return accessToken ?? null;
 }
 /**
  * Get base URL from token stored on disk (resource_url).
- * Falls back to portal.qwen.ai/v1 if not available.
+ * Falls back to DashScope compatible-mode if not available.
+ * @returns {string} DashScope API base URL
  */
 function getBaseUrl() {
     try {
@@ -776,31 +1056,36 @@ function getBaseUrl() {
     }
     return getApiBaseUrl();
 }
-/**
- * Alibaba Qwen OAuth authentication plugin for opencode
- *
- * @example
- * ```json
- * {
- *   "plugin": ["opencode-alibaba-qwen-cli-auth"],
- *   "model": "qwen-code/coder-model"
- * }
- * ```
- */
-export const QwenAuthPlugin = async (_input) => {
-    return {
-        auth: {
-            provider: PROVIDER_ID,
+/**
+ * Alibaba Qwen OAuth authentication plugin for opencode
+ * Integrates Qwen OAuth device flow and API handling into opencode SDK
+ *
+ * @param {*} _input - Plugin initialization input
+ * @returns {Promise<Object>} Plugin configuration and hooks
+ *
+ * @example
+ * ```json
+ * {
+ *   "plugin": ["opencode-alibaba-qwen-cli-auth"],
+ *   "model": "qwen-code/coder-model"
+ * }
+ * ```
+ */
+export const QwenAuthPlugin = async (_input) => {
+    return {
+        auth: {
+            provider: PROVIDER_ID,
             /**
              * Loader: get token + base URL, return to SDK.
              * Pattern similar to opencode-qwencode-auth reference plugin.
              */
-            async loader(getAuth, provider) {
+            async loader(getAuth, provider) {
                 // Zero cost for OAuth models (free)
                 if (provider?.models) {
-                    for (const model of Object.values(provider.models)) {
-                        if (model) model.cost = { input: 0, output: 0 };
-                    }
+                    for (const model of Object.values(provider.models)) {
+                        if (model) model.cost = { input: 0, output: 0 };
+                    }
                 }
                 const accessToken = await getValidAccessToken(getAuth);
                 if (!accessToken) return null;
@@ -817,32 +1102,32 @@ export const QwenAuthPlugin = async (_input) => {
                 };
             },
             methods: [
-                {
-                    label: AUTH_LABELS.OAUTH,
-                    type: "oauth",
-                    /**
-                     * Device Authorization Grant OAuth flow (RFC 8628)
-                     */
-                    authorize: async () => {
-                        // Generate PKCE
-                        const pkce = await createPKCE();
-                        // Request device code
-                        const deviceAuth = await requestDeviceCode(pkce);
-                        if (!deviceAuth) {
-                            throw new Error("Failed to request device code");
-                        }
+                {
+                    label: AUTH_LABELS.OAUTH,
+                    type: "oauth",
+                    /**
+                     * Device Authorization Grant OAuth flow (RFC 8628)
+                     */
+                    authorize: async () => {
+                        // Generate PKCE
+                        const pkce = await createPKCE();
+                        // Request device code
+                        const deviceAuth = await requestDeviceCode(pkce);
+                        if (!deviceAuth) {
+                            throw new Error("Failed to request device code");
+                        }
                         // Display user code
                         console.log(`\nPlease visit: ${deviceAuth.verification_uri}`);
                         console.log(`And enter code: ${deviceAuth.user_code}\n`);
                         // Verification URL - SDK will open browser automatically when method=auto
-                        const verificationUrl = deviceAuth.verification_uri_complete || deviceAuth.verification_uri;
-                        return {
-                            url: verificationUrl,
-                            method: "auto",
-                            instructions: AUTH_LABELS.INSTRUCTIONS,
-                            callback: async () => {
-                                // Poll for token
-                                let pollInterval = (deviceAuth.interval || 5) * 1000;
+                        const verificationUrl = deviceAuth.verification_uri_complete || deviceAuth.verification_uri;
+                        return {
+                            url: verificationUrl,
+                            method: "auto",
+                            instructions: AUTH_LABELS.INSTRUCTIONS,
+                            callback: async () => {
+                                // Poll for token
+                                let pollInterval = (deviceAuth.interval || 5) * 1000;
                                 const POLLING_MARGIN_MS = 3000;
                                 const maxInterval = DEVICE_FLOW.MAX_POLL_INTERVAL;
                                 const startTime = Date.now();
@@ -855,9 +1140,9 @@ export const QwenAuthPlugin = async (_input) => {
                                         saveToken(result);
                                         // Return to SDK to save auth state
                                         return {
-                                            type: "success",
-                                            access: result.access,
-                                            refresh: result.refresh,
+                                            type: "success",
+                                            access: result.access,
+                                            refresh: result.refresh,
                                             expires: result.expires,
                                         };
                                     }
@@ -900,19 +1185,19 @@ export const QwenAuthPlugin = async (_input) => {
                                 console.error("[qwen-oauth-plugin] Device authorization timed out");
                                 return { type: "failed" };
                             },
-                        };
-                    },
-                },
-            ],
-        },
+                        };
+                    },
+                },
+            ],
+        },
         /**
          * Register qwen-code provider with model list.
          * Only register models that Portal API (OAuth) accepts:
          * coder-model and vision-model (according to QWEN_OAUTH_ALLOWED_MODELS from original CLI)
          */
-        config: async (config) => {
-            const providers = config.provider || {};
-            providers[PROVIDER_ID] = {
+        config: async (config) => {
+            const providers = config.provider || {};
+            providers[PROVIDER_ID] = {
                 npm: "@ai-sdk/openai-compatible",
                 name: "Qwen Code",
                 options: {
@@ -928,21 +1213,28 @@ export const QwenAuthPlugin = async (_input) => {
                         // Thinking is always enabled by default on server side (qwen3.5-plus)
                         reasoning: false,
                         limit: { context: 1048576, output: CHAT_MAX_TOKENS_CAP },
-                        cost: { input: 0, output: 0 },
-                        modalities: { input: ["text"], output: ["text"] },
-                    },
+                        cost: { input: 0, output: 0 },
+                        modalities: { input: ["text"], output: ["text"] },
+                    },
                     "vision-model": {
                         id: "vision-model",
                         name: "Qwen VL Plus (vision)",
                         reasoning: false,
-                        limit: { context: 131072, output: CHAT_MAX_TOKENS_CAP },
-                        cost: { input: 0, output: 0 },
-                        modalities: { input: ["text"], output: ["text"] },
-                    },
-                },
+                        limit: { context: 131072, output: DASH_SCOPE_OUTPUT_LIMITS["vision-model"] },
+                        cost: { input: 0, output: 0 },
+                        modalities: { input: ["text"], output: ["text"] },
+                    },
+                },
             };
             config.provider = providers;
         },
+        /**
+         * Apply dynamic chat parameters before sending request
+         * Ensures tokens and timeouts don't exceed plugin limits
+         *
+         * @param {*} input - Original chat request parameters
+         * @param {*} output - Final payload to be sent
+         */
         "chat.params": async (input, output) => {
             try {
                 output.options = output.options || {};
@@ -988,6 +1280,9 @@ export const QwenAuthPlugin = async (_input) => {
          * Send DashScope headers like original CLI.
          * X-DashScope-CacheControl: enable prompt caching, reduce token consumption.
          * X-DashScope-AuthType: specify auth method for server.
+         *
+         * @param {*} input - Original chat request parameters
+         * @param {*} output - Final payload to be sent
          */
         "chat.headers": async (input, output) => {
             try {
@@ -1013,5 +1308,5 @@ export const QwenAuthPlugin = async (_input) => {
         },
     };
 };
-export default QwenAuthPlugin;
+export default QwenAuthPlugin;
 //# sourceMappingURL=index.js.map