opencode-qwen-cli-auth 2.2.9 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,37 +1,52 @@
1
1
  /**
2
- * Alibaba Qwen OAuth Authentication Plugin for opencode
3
- *
4
- * Simple plugin: handles OAuth login + provides apiKey/baseURL to SDK.
5
- * SDK handles streaming, headers, and request format.
6
- *
2
+ * @fileoverview Alibaba Qwen OAuth Authentication Plugin for opencode
3
+ * Main plugin entry point implementing OAuth 2.0 Device Authorization Grant
4
+ * Handles authentication, request transformation, and error recovery
5
+ *
6
+ * Architecture:
7
+ * - OAuth flow: PKCE + Device Code Grant (RFC 8628)
8
+ * - Token management: Automatic refresh with file-based storage
9
+ * - Request handling: Custom fetch wrapper with retry logic
10
+ * - Error recovery: Quota degradation and CLI fallback
11
+ *
7
12
  * @license MIT with Usage Disclaimer (see LICENSE file)
8
13
  * @repository https://github.com/TVD-00/opencode-qwen-cli-auth
14
+ * @version 2.2.9
9
15
  */
16
+
10
17
  import { randomUUID } from "node:crypto";
11
18
  import { spawn } from "node:child_process";
12
19
  import { existsSync } from "node:fs";
13
- import { createPKCE, requestDeviceCode, pollForToken, getApiBaseUrl, saveToken, refreshAccessToken, loadStoredToken, getValidToken } from "./lib/auth/auth.js";
20
+ import { createPKCE, requestDeviceCode, pollForToken, getApiBaseUrl, saveToken, refreshAccessToken, loadStoredToken, getValidToken, upsertOAuthAccount, getActiveOAuthAccount, markOAuthAccountQuotaExhausted, switchToNextHealthyOAuthAccount } from "./lib/auth/auth.js";
14
21
  import { PROVIDER_ID, AUTH_LABELS, DEVICE_FLOW, PORTAL_HEADERS } from "./lib/constants.js";
15
22
  import { logError, logInfo, logWarn, LOGGING_ENABLED } from "./lib/logger.js";
23
+
24
+ /** Request timeout for chat completions in milliseconds */
16
25
  const CHAT_REQUEST_TIMEOUT_MS = 30000;
17
- const CHAT_MAX_RETRIES = 0;
18
- // Output token caps should match what qwen-code uses for DashScope.
19
- // - coder-model: 64K output
20
- // - vision-model: 8K output
21
- // We still keep a default for safety.
26
+ /** Maximum number of retry attempts for failed requests */
27
+ const CHAT_MAX_RETRIES = 3;
28
+ /** Output token cap for coder-model (64K tokens) */
22
29
  const CHAT_MAX_TOKENS_CAP = 65536;
30
+ /** Default max tokens for chat requests */
23
31
  const CHAT_DEFAULT_MAX_TOKENS = 2048;
32
+ /** Maximum consecutive polling failures before aborting OAuth flow */
24
33
  const MAX_CONSECUTIVE_POLL_FAILURES = 3;
34
+ /** Reduced max tokens for quota degraded requests */
25
35
  const QUOTA_DEGRADE_MAX_TOKENS = 1024;
36
+ /** Timeout for CLI fallback execution in milliseconds */
26
37
  const CLI_FALLBACK_TIMEOUT_MS = 8000;
38
+ /** Maximum buffer size for CLI output in characters */
27
39
  const CLI_FALLBACK_MAX_BUFFER_CHARS = 1024 * 1024;
40
+ /** Enable CLI fallback feature via environment variable */
28
41
  const ENABLE_CLI_FALLBACK = process.env.OPENCODE_QWEN_ENABLE_CLI_FALLBACK === "1";
42
+ /** User agent string for plugin identification */
29
43
  const PLUGIN_USER_AGENT = "opencode-qwen-cli-auth/2.2.1";
30
- // Match qwen-code output limits for DashScope OAuth.
44
+ /** Output token limits per model for DashScope OAuth */
31
45
  const DASH_SCOPE_OUTPUT_LIMITS = {
32
46
  "coder-model": 65536,
33
47
  "vision-model": 8192,
34
48
  };
49
+ let ACTIVE_OAUTH_ACCOUNT_ID = null;
35
50
  function capPayloadMaxTokens(payload) {
36
51
  if (!payload || typeof payload !== "object") {
37
52
  return payload;
@@ -111,7 +126,7 @@ function resolveQwenCliCommand() {
111
126
  return "qwen";
112
127
  }
113
128
  const QWEN_CLI_COMMAND = resolveQwenCliCommand();
114
- function shouldUseShell(command) {
129
+ function requiresShellExecution(command) {
115
130
  return process.platform === "win32" && /\.(cmd|bat)$/i.test(command);
116
131
  }
117
132
  function makeFailFastErrorResponse(status, code, message) {
@@ -127,6 +142,14 @@ function makeFailFastErrorResponse(status, code, message) {
127
142
  headers: { "content-type": "application/json" },
128
143
  });
129
144
  }
145
+
146
+ /**
147
+ * Creates AbortSignal with timeout that composes with source signal
148
+ * Properly cleans up timers and event listeners
149
+ * @param {AbortSignal} [sourceSignal] - Original abort signal from caller
150
+ * @param {number} timeoutMs - Timeout in milliseconds
151
+ * @returns {{ signal: AbortSignal, cleanup: () => void }} Composed signal and cleanup function
152
+ */
130
153
  function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
131
154
  const controller = new AbortController();
132
155
  const timeoutId = setTimeout(() => controller.abort(new Error("request_timeout")), timeoutMs);
@@ -149,6 +172,13 @@ function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
149
172
  },
150
173
  };
151
174
  }
175
+
176
+ /**
177
+ * Appends text chunk with size limit to prevent memory overflow
178
+ * @param {string} current - Current text buffer
179
+ * @param {string} chunk - New chunk to append
180
+ * @returns {string} Combined text with size limit
181
+ */
152
182
  function appendLimitedText(current, chunk) {
153
183
  const next = current + chunk;
154
184
  if (next.length <= CLI_FALLBACK_MAX_BUFFER_CHARS) {
@@ -156,9 +186,22 @@ function appendLimitedText(current, chunk) {
156
186
  }
157
187
  return next.slice(next.length - CLI_FALLBACK_MAX_BUFFER_CHARS);
158
188
  }
189
+
190
+ /**
191
+ * Checks if value is a Request instance
192
+ * @param {*} value - Value to check
193
+ * @returns {boolean} True if value is a Request instance
194
+ */
159
195
  function isRequestInstance(value) {
160
196
  return typeof Request !== "undefined" && value instanceof Request;
161
197
  }
198
+
199
+ /**
200
+ * Normalizes fetch invocation from Request object or URL string
201
+ * @param {Request|string} input - Fetch input
202
+ * @param {RequestInit} [init] - Fetch options
203
+ * @returns {{ requestInput: *, requestInit: RequestInit }} Normalized fetch parameters
204
+ */
162
205
  async function normalizeFetchInvocation(input, init) {
163
206
  const requestInit = init ? { ...init } : {};
164
207
  let requestInput = input;
@@ -184,6 +227,13 @@ async function normalizeFetchInvocation(input, init) {
184
227
  }
185
228
  return { requestInput, requestInit };
186
229
  }
230
+
231
+ /**
232
+ * Gets header value from Headers object, array, or plain object
233
+ * @param {Headers|Array|Object} headers - Headers to search
234
+ * @param {string} headerName - Header name (case-insensitive)
235
+ * @returns {string|undefined} Header value or undefined
236
+ */
187
237
  function getHeaderValue(headers, headerName) {
188
238
  if (!headers) {
189
239
  return undefined;
@@ -203,6 +253,72 @@ function getHeaderValue(headers, headerName) {
203
253
  }
204
254
  return undefined;
205
255
  }
256
+
257
+ function applyAuthorizationHeader(requestInit, accessToken) {
258
+ if (typeof accessToken !== "string" || accessToken.length === 0) {
259
+ return;
260
+ }
261
+ const bearer = `Bearer ${accessToken}`;
262
+ if (!requestInit.headers) {
263
+ requestInit.headers = { authorization: bearer };
264
+ return;
265
+ }
266
+ if (requestInit.headers instanceof Headers) {
267
+ requestInit.headers.set("authorization", bearer);
268
+ return;
269
+ }
270
+ if (Array.isArray(requestInit.headers)) {
271
+ const existing = requestInit.headers.findIndex(([name]) => String(name).toLowerCase() === "authorization");
272
+ if (existing >= 0) {
273
+ requestInit.headers[existing][1] = bearer;
274
+ return;
275
+ }
276
+ requestInit.headers.push(["authorization", bearer]);
277
+ return;
278
+ }
279
+ let existingKey = null;
280
+ for (const key of Object.keys(requestInit.headers)) {
281
+ if (key.toLowerCase() === "authorization") {
282
+ existingKey = key;
283
+ break;
284
+ }
285
+ }
286
+ if (existingKey) {
287
+ requestInit.headers[existingKey] = bearer;
288
+ return;
289
+ }
290
+ requestInit.headers.authorization = bearer;
291
+ }
292
+
293
+ function rewriteRequestBaseUrl(requestInput, resourceUrl) {
294
+ if (typeof requestInput !== "string" || typeof resourceUrl !== "string" || resourceUrl.length === 0) {
295
+ return requestInput;
296
+ }
297
+ try {
298
+ const targetBase = new URL(getApiBaseUrl(resourceUrl));
299
+ const current = new URL(requestInput);
300
+ const baseSegments = targetBase.pathname.split("/").filter(Boolean);
301
+ const currentSegments = current.pathname.split("/").filter(Boolean);
302
+ let suffix = currentSegments;
303
+ if (currentSegments.length >= baseSegments.length &&
304
+ baseSegments.every((segment, index) => currentSegments[index] === segment)) {
305
+ suffix = currentSegments.slice(baseSegments.length);
306
+ }
307
+ const mergedPath = [...baseSegments, ...suffix].join("/");
308
+ targetBase.pathname = `/${mergedPath}`.replace(/\/+/g, "/");
309
+ targetBase.search = current.search;
310
+ targetBase.hash = current.hash;
311
+ return targetBase.toString();
312
+ }
313
+ catch (_error) {
314
+ return requestInput;
315
+ }
316
+ }
317
+ /**
318
+ * Applies JSON request body with proper content-type header
319
+ * @param {RequestInit} requestInit - Fetch options
320
+ * @param {Object} payload - Request payload
321
+ */
206
322
  function applyJsonRequestBody(requestInit, payload) {
207
323
  requestInit.body = JSON.stringify(payload);
208
324
  if (!requestInit.headers) {
@@ -233,6 +349,12 @@ function applyJsonRequestBody(requestInit, payload) {
233
349
  requestInit.headers["content-type"] = "application/json";
234
350
  }
235
351
  }
352
+
353
+ /**
354
+ * Parses JSON request body if content-type is application/json
355
+ * @param {RequestInit} requestInit - Fetch options
356
+ * @returns {Object|null} Parsed payload or null
357
+ */
236
358
  function parseJsonRequestBody(requestInit) {
237
359
  if (typeof requestInit.body !== "string") {
238
360
  return null;
@@ -252,19 +374,27 @@ function parseJsonRequestBody(requestInit) {
252
374
  return null;
253
375
  }
254
376
  }
377
+ /**
378
+ * Removes client-only fields and caps max_tokens
379
+ * @param {Object} payload - Request payload
380
+ * @returns {Object} Sanitized payload
381
+ */
255
382
  function sanitizeOutgoingPayload(payload) {
256
383
  const sanitized = { ...payload };
257
384
  let changed = false;
385
+ // Remove client-only fields
258
386
  for (const field of CLIENT_ONLY_BODY_FIELDS) {
259
387
  if (field in sanitized) {
260
388
  delete sanitized[field];
261
389
  changed = true;
262
390
  }
263
391
  }
392
+ // Remove stream_options if stream is not enabled
264
393
  if ("stream_options" in sanitized && sanitized.stream !== true) {
265
394
  delete sanitized.stream_options;
266
395
  changed = true;
267
396
  }
397
+ // Cap max_tokens fields
268
398
  if (typeof sanitized.max_tokens === "number" && sanitized.max_tokens > CHAT_MAX_TOKENS_CAP) {
269
399
  sanitized.max_tokens = CHAT_MAX_TOKENS_CAP;
270
400
  changed = true;
@@ -275,9 +405,17 @@ function sanitizeOutgoingPayload(payload) {
275
405
  }
276
406
  return changed ? sanitized : payload;
277
407
  }
408
+
409
+ /**
410
+ * Creates degraded payload for quota error recovery
411
+ * Removes tools and reduces max_tokens to 1024
412
+ * @param {Object} payload - Original payload
413
+ * @returns {Object|null} Degraded payload or null if no changes needed
414
+ */
278
415
  function createQuotaDegradedPayload(payload) {
279
416
  const degraded = { ...payload };
280
417
  let changed = false;
418
+ // Remove tool-related fields
281
419
  if ("tools" in degraded) {
282
420
  delete degraded.tools;
283
421
  changed = true;
@@ -290,10 +428,16 @@ function createQuotaDegradedPayload(payload) {
290
428
  delete degraded.parallel_tool_calls;
291
429
  changed = true;
292
430
  }
431
+ // Disable streaming
293
432
  if (degraded.stream !== false) {
294
433
  degraded.stream = false;
295
434
  changed = true;
296
435
  }
436
+ if ("stream_options" in degraded) {
437
+ delete degraded.stream_options;
438
+ changed = true;
439
+ }
440
+ // Reduce max_tokens
297
441
  if (typeof degraded.max_tokens !== "number" || degraded.max_tokens > QUOTA_DEGRADE_MAX_TOKENS) {
298
442
  degraded.max_tokens = QUOTA_DEGRADE_MAX_TOKENS;
299
443
  changed = true;
@@ -304,6 +448,12 @@ function createQuotaDegradedPayload(payload) {
304
448
  }
305
449
  return changed ? degraded : null;
306
450
  }
451
+
452
+ /**
453
+ * Checks if response text contains insufficientQuota error
454
+ * @param {string} text - Response body text
455
+ * @returns {boolean} True if insufficient quota error
456
+ */
307
457
  function isInsufficientQuota(text) {
308
458
  if (!text) {
309
459
  return false;
@@ -317,6 +467,12 @@ function isInsufficientQuota(text) {
317
467
  return text.toLowerCase().includes("insufficient_quota");
318
468
  }
319
469
  }
470
+
471
+ /**
472
+ * Extracts text content from message (handles string or array format)
473
+ * @param {string|Array} content - Message content
474
+ * @returns {string} Extracted text
475
+ */
320
476
  function extractMessageText(content) {
321
477
  if (typeof content === "string") {
322
478
  return content.trim();
@@ -334,6 +490,11 @@ function extractMessageText(content) {
334
490
  return "";
335
491
  }).filter(Boolean).join("\n").trim();
336
492
  }
493
+ /**
494
+ * Builds prompt text from chat messages for CLI fallback
495
+ * @param {Object} payload - Request payload with messages
496
+ * @returns {string} Prompt text for qwen CLI
497
+ */
337
498
  function buildQwenCliPrompt(payload) {
338
499
  const messages = Array.isArray(payload?.messages) ? payload.messages : [];
339
500
  for (let index = messages.length - 1; index >= 0; index -= 1) {
@@ -356,6 +517,12 @@ function buildQwenCliPrompt(payload) {
356
517
  }).filter(Boolean).join("\n\n");
357
518
  return merged || "Please respond to the latest user request.";
358
519
  }
520
+
521
+ /**
522
+ * Parses qwen CLI JSON output events
523
+ * @param {string} rawOutput - Raw CLI output
524
+ * @returns {Array|null} Parsed events or null
525
+ */
359
526
  function parseQwenCliEvents(rawOutput) {
360
527
  const trimmed = rawOutput.trim();
361
528
  if (!trimmed) {
@@ -379,6 +546,12 @@ function parseQwenCliEvents(rawOutput) {
379
546
  }
380
547
  return null;
381
548
  }
549
+
550
+ /**
551
+ * Extracts response text from CLI events
552
+ * @param {Array} events - Parsed CLI events
553
+ * @returns {string|null} Extracted text or null
554
+ */
382
555
  function extractQwenCliText(events) {
383
556
  for (let index = events.length - 1; index >= 0; index -= 1) {
384
557
  const event = events[index];
@@ -404,9 +577,24 @@ function extractQwenCliText(events) {
404
577
  }
405
578
  return null;
406
579
  }
580
+ /**
581
+ * Creates SSE formatted chunk for streaming responses
582
+ * @param {Object} data - Data to stringify and send
583
+ * @returns {string} SSE formatted string chunk
584
+ */
407
585
  function createSseResponseChunk(data) {
408
586
  return `data: ${JSON.stringify(data)}\n\n`;
409
587
  }
588
+
589
+ /**
590
+ * Creates Response object matching OpenAI completion format
591
+ * Handles both streaming (SSE) and non-streaming responses
592
+ * @param {string} model - Model ID used
593
+ * @param {string} content - Completion text content
594
+ * @param {Object} context - Request context for logging
595
+ * @param {boolean} streamMode - Whether to return streaming response
596
+ * @returns {Response} Formatted completion response
597
+ */
410
598
  function makeQwenCliCompletionResponse(model, content, context, streamMode) {
411
599
  if (LOGGING_ENABLED) {
412
600
  logInfo("Qwen CLI fallback returned completion", {
@@ -421,6 +609,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
421
609
  const encoder = new TextEncoder();
422
610
  const stream = new ReadableStream({
423
611
  start(controller) {
612
+ // Send first chunk with content
424
613
  controller.enqueue(encoder.encode(createSseResponseChunk({
425
614
  id: completionId,
426
615
  object: "chat.completion.chunk",
@@ -434,6 +623,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
434
623
  },
435
624
  ],
436
625
  })));
626
+ // Send stop chunk
437
627
  controller.enqueue(encoder.encode(createSseResponseChunk({
438
628
  id: completionId,
439
629
  object: "chat.completion.chunk",
@@ -447,6 +637,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
447
637
  },
448
638
  ],
449
639
  })));
640
+ // Send DONE marker
450
641
  controller.enqueue(encoder.encode("data: [DONE]\n\n"));
451
642
  controller.close();
452
643
  },
@@ -460,6 +651,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
460
651
  },
461
652
  });
462
653
  }
654
+ // Non-streaming response format
463
655
  const body = {
464
656
  id: `chatcmpl-${randomUUID()}`,
465
657
  object: "chat.completion",
@@ -489,6 +681,13 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
489
681
  },
490
682
  });
491
683
  }
684
+ /**
685
+ * Executes qwen CLI as fallback when API quota is exceeded
686
+ * @param {Object} payload - Original request payload
687
+ * @param {Object} context - Request context for logging
688
+ * @param {AbortSignal} [abortSignal] - Abort controller signal
689
+ * @returns {Promise<{ ok: boolean, response?: Response, reason?: string, stdout?: string, stderr?: string }>} Fallback execution result
690
+ */
492
691
  async function runQwenCliFallback(payload, context, abortSignal) {
493
692
  const model = typeof payload?.model === "string" && payload.model.length > 0 ? payload.model : "coder-model";
494
693
  const streamMode = payload?.stream === true;
@@ -502,6 +701,12 @@ async function runQwenCliFallback(payload, context, abortSignal) {
502
701
  command: QWEN_CLI_COMMAND,
503
702
  });
504
703
  }
704
+ if (requiresShellExecution(QWEN_CLI_COMMAND)) {
705
+ return {
706
+ ok: false,
707
+ reason: "cli_shell_execution_blocked_for_security",
708
+ };
709
+ }
505
710
  return await new Promise((resolve) => {
506
711
  let settled = false;
507
712
  let stdout = "";
@@ -509,7 +714,6 @@ async function runQwenCliFallback(payload, context, abortSignal) {
509
714
  let timer = null;
510
715
  let child = undefined;
511
716
  let abortHandler = undefined;
512
- const useShell = shouldUseShell(QWEN_CLI_COMMAND);
513
717
  const finalize = (result) => {
514
718
  if (settled) {
515
719
  return;
@@ -532,7 +736,7 @@ async function runQwenCliFallback(payload, context, abortSignal) {
532
736
  }
533
737
  try {
534
738
  child = spawn(QWEN_CLI_COMMAND, args, {
535
- shell: useShell,
739
+ shell: false,
536
740
  windowsHide: true,
537
741
  stdio: ["ignore", "pipe", "pipe"],
538
742
  });
@@ -600,6 +804,14 @@ async function runQwenCliFallback(payload, context, abortSignal) {
600
804
  });
601
805
  });
602
806
  }
807
+
808
+ /**
809
+ * Creates Response object for quota/rate limit errors
810
+ * @param {string} text - Response body text
811
+ * @param {HeadersInit} sourceHeaders - Original response headers
812
+ * @param {Object} context - Request context for logging
813
+ * @returns {Response} Formatted error response
814
+ */
603
815
  function makeQuotaFailFastResponse(text, sourceHeaders, context) {
604
816
  const headers = new Headers(sourceHeaders);
605
817
  headers.set("content-type", "application/json");
@@ -625,6 +837,12 @@ function makeQuotaFailFastResponse(text, sourceHeaders, context) {
625
837
  headers,
626
838
  });
627
839
  }
840
+ /**
841
+ * Performs fetch request with timeout protection
842
+ * @param {Request|string} input - Fetch input
843
+ * @param {RequestInit} requestInit - Fetch options
844
+ * @returns {Promise<Response>} Fetch response
845
+ */
628
846
  async function sendWithTimeout(input, requestInit) {
629
847
  const composed = createRequestSignalWithTimeout(requestInit.signal, CHAT_REQUEST_TIMEOUT_MS);
630
848
  try {
@@ -637,6 +855,12 @@ async function sendWithTimeout(input, requestInit) {
637
855
  composed.cleanup();
638
856
  }
639
857
  }
858
+
859
+ /**
860
+ * Injects required DashScope OAuth headers into fetch request
861
+ * Ensures compatibility even if OpenCode doesn't call chat.headers hook
862
+ * @param {RequestInit} requestInit - Fetch options to modify
863
+ */
640
864
  function applyDashScopeHeaders(requestInit) {
641
865
  // Ensure required DashScope OAuth headers are always present.
642
866
  // This mirrors qwen-code (DashScopeOpenAICompatibleProvider.buildHeaders) behavior.
@@ -676,9 +900,17 @@ function applyDashScopeHeaders(requestInit) {
676
900
  }
677
901
  }
678
902
  }
903
+
904
+ /**
905
+ * Custom fetch wrapper for OpenCode SDK
906
+ * Handles token limits, DashScope headers, retries, and quota error fallback
907
+ * @param {Request|string} input - Fetch input
908
+ * @param {RequestInit} [init] - Fetch options
909
+ * @returns {Promise<Response>} API response or fallback response
910
+ */
679
911
  async function failFastFetch(input, init) {
680
912
  const normalized = await normalizeFetchInvocation(input, init);
681
- const requestInput = normalized.requestInput;
913
+ let requestInput = normalized.requestInput;
682
914
  const requestInit = normalized.requestInit;
683
915
  // Always inject DashScope OAuth headers at the fetch layer.
684
916
  // This ensures compatibility across OpenCode versions.
@@ -704,12 +936,14 @@ async function failFastFetch(input, init) {
704
936
  requestId: getHeaderValue(requestInit.headers, "x-request-id"),
705
937
  sessionID,
706
938
  modelID: typeof payload?.model === "string" ? payload.model : undefined,
939
+ accountID: ACTIVE_OAUTH_ACCOUNT_ID,
707
940
  };
708
941
  if (LOGGING_ENABLED) {
709
942
  logInfo("Qwen request dispatch", {
710
943
  request_id: context.requestId,
711
944
  sessionID: context.sessionID,
712
945
  modelID: context.modelID,
946
+ accountID: context.accountID,
713
947
  max_tokens: typeof payload?.max_tokens === "number" ? payload.max_tokens : undefined,
714
948
  max_completion_tokens: typeof payload?.max_completion_tokens === "number" ? payload.max_completion_tokens : undefined,
715
949
  message_count: Array.isArray(payload?.messages) ? payload.messages.length : undefined,
@@ -718,84 +952,125 @@ async function failFastFetch(input, init) {
718
952
  }
719
953
  try {
720
954
  let response = await sendWithTimeout(requestInput, requestInit);
721
- if (LOGGING_ENABLED) {
722
- logInfo("Qwen request response", {
723
- request_id: context.requestId,
724
- sessionID: context.sessionID,
725
- modelID: context.modelID,
726
- status: response.status,
727
- attempt: 1,
728
- });
729
- }
730
- if (response.status === 429) {
731
- const firstBody = await response.text().catch(() => "");
732
- if (payload && isInsufficientQuota(firstBody)) {
733
- const degradedPayload = createQuotaDegradedPayload(payload);
734
- if (degradedPayload) {
735
- const fallbackInit = { ...requestInit };
736
- applyJsonRequestBody(fallbackInit, degradedPayload);
737
- if (LOGGING_ENABLED) {
738
- logWarn("Retrying once with degraded payload after 429 insufficient_quota", {
739
- request_id: context.requestId,
740
- sessionID: context.sessionID,
741
- modelID: context.modelID,
742
- attempt: 2,
743
- });
744
- }
745
- response = await sendWithTimeout(requestInput, fallbackInit);
746
- if (LOGGING_ENABLED) {
747
- logInfo("Qwen request response", {
748
- request_id: context.requestId,
749
- sessionID: context.sessionID,
750
- modelID: context.modelID,
751
- status: response.status,
752
- attempt: 2,
753
- });
754
- }
755
- if (response.status !== 429) {
756
- return response;
757
- }
758
- const fallbackBody = await response.text().catch(() => "");
759
- if (ENABLE_CLI_FALLBACK) {
760
- const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
761
- if (cliFallback.ok) {
762
- return cliFallback.response;
955
+ const MAX_REQUEST_RETRIES = 3;
956
+ for (let retryAttempt = 0; retryAttempt <= MAX_REQUEST_RETRIES; retryAttempt++) {
957
+ if (LOGGING_ENABLED) {
958
+ logInfo("Qwen request response", {
959
+ request_id: context.requestId,
960
+ sessionID: context.sessionID,
961
+ modelID: context.modelID,
962
+ accountID: context.accountID,
963
+ status: response.status,
964
+ attempt: retryAttempt + 1,
965
+ });
966
+ }
967
+ const RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504];
968
+ if (RETRYABLE_STATUS_CODES.includes(response.status)) {
969
+ if (response.status === 429) {
970
+ const firstBody = await response.text().catch(() => "");
971
+ if (payload && isInsufficientQuota(firstBody)) {
972
+ if (context.accountID) {
973
+ try {
974
+ await markOAuthAccountQuotaExhausted(context.accountID, "insufficient_quota");
975
+ const switched = await switchToNextHealthyOAuthAccount([context.accountID]);
976
+ if (switched?.accessToken) {
977
+ const rotatedInit = { ...requestInit };
978
+ requestInput = rewriteRequestBaseUrl(requestInput, switched.resourceUrl);
979
+ applyAuthorizationHeader(rotatedInit, switched.accessToken);
980
+ applyAuthorizationHeader(requestInit, switched.accessToken);
981
+ context.accountID = switched.accountId;
982
+ ACTIVE_OAUTH_ACCOUNT_ID = switched.accountId;
983
+ if (LOGGING_ENABLED) {
984
+ logInfo("Switched OAuth account after insufficient_quota", {
985
+ request_id: context.requestId,
986
+ sessionID: context.sessionID,
987
+ modelID: context.modelID,
988
+ accountID: context.accountID,
989
+ healthyAccounts: switched.healthyAccountCount,
990
+ totalAccounts: switched.totalAccountCount,
991
+ });
992
+ }
993
+ response = await sendWithTimeout(requestInput, rotatedInit);
994
+ if (retryAttempt < MAX_REQUEST_RETRIES) {
995
+ continue;
996
+ }
997
+ }
998
+ }
999
+ catch (switchError) {
1000
+ logWarn("Failed to switch OAuth account after insufficient_quota", switchError);
1001
+ }
763
1002
  }
764
- if (cliFallback.reason === "cli_aborted") {
765
- return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
1003
+ const degradedPayload = createQuotaDegradedPayload(payload);
1004
+ if (degradedPayload) {
1005
+ const fallbackInit = { ...requestInit };
1006
+ applyJsonRequestBody(fallbackInit, degradedPayload);
1007
+ if (LOGGING_ENABLED) {
1008
+ logWarn(`Retrying with degraded payload after ${response.status} insufficient_quota, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
1009
+ request_id: context.requestId,
1010
+ sessionID: context.sessionID,
1011
+ modelID: context.modelID,
1012
+ });
1013
+ }
1014
+ response = await sendWithTimeout(requestInput, fallbackInit);
1015
+ if (retryAttempt < MAX_REQUEST_RETRIES) {
1016
+ continue;
1017
+ }
1018
+ const fallbackBody = await response.text().catch(() => "");
1019
+ if (ENABLE_CLI_FALLBACK) {
1020
+ const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
1021
+ if (cliFallback.ok) {
1022
+ return cliFallback.response;
1023
+ }
1024
+ if (cliFallback.reason === "cli_aborted") {
1025
+ return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
1026
+ }
1027
+ if (LOGGING_ENABLED) {
1028
+ logWarn("Qwen CLI fallback failed", {
1029
+ request_id: context.requestId,
1030
+ sessionID: context.sessionID,
1031
+ modelID: context.modelID,
1032
+ reason: cliFallback.reason,
1033
+ stderr: cliFallback.stderr,
1034
+ });
1035
+ }
1036
+ }
1037
+ return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
766
1038
  }
767
- if (LOGGING_ENABLED) {
768
- logWarn("Qwen CLI fallback failed", {
769
- request_id: context.requestId,
770
- sessionID: context.sessionID,
771
- modelID: context.modelID,
772
- reason: cliFallback.reason,
773
- stderr: cliFallback.stderr,
774
- });
1039
+ if (ENABLE_CLI_FALLBACK) {
1040
+ const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
1041
+ if (cliFallback.ok) {
1042
+ return cliFallback.response;
1043
+ }
1044
+ if (cliFallback.reason === "cli_aborted") {
1045
+ return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
1046
+ }
1047
+ if (LOGGING_ENABLED) {
1048
+ logWarn("Qwen CLI fallback failed", {
1049
+ request_id: context.requestId,
1050
+ sessionID: context.sessionID,
1051
+ modelID: context.modelID,
1052
+ reason: cliFallback.reason,
1053
+ stderr: cliFallback.stderr,
1054
+ });
1055
+ }
775
1056
  }
776
1057
  }
777
- return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
1058
+ return makeQuotaFailFastResponse(firstBody, response.headers, context);
778
1059
  }
779
- if (ENABLE_CLI_FALLBACK) {
780
- const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
781
- if (cliFallback.ok) {
782
- return cliFallback.response;
783
- }
784
- if (cliFallback.reason === "cli_aborted") {
785
- return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
786
- }
1060
+ if (retryAttempt < MAX_REQUEST_RETRIES) {
787
1061
  if (LOGGING_ENABLED) {
788
- logWarn("Qwen CLI fallback failed", {
1062
+ logWarn(`Retrying after ${response.status}, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
789
1063
  request_id: context.requestId,
790
1064
  sessionID: context.sessionID,
791
1065
  modelID: context.modelID,
792
- reason: cliFallback.reason,
793
- stderr: cliFallback.stderr,
794
1066
  });
795
1067
  }
1068
+ await new Promise(r => setTimeout(r, (retryAttempt + 1) * 1000));
1069
+ response = await sendWithTimeout(requestInput, requestInit);
1070
+ continue;
796
1071
  }
797
1072
  }
798
- return makeQuotaFailFastResponse(firstBody, response.headers, context);
1073
+ return response;
799
1074
  }
800
1075
  return response;
801
1076
  }
@@ -814,26 +1089,40 @@ async function failFastFetch(input, init) {
814
1089
  * Get valid access token from SDK auth state, refresh if expired.
815
1090
  * Uses getAuth() from SDK instead of reading file directly.
816
1091
  *
817
- * @param getAuth - Function to get auth state from SDK
818
- * @returns Access token or null
1092
+ * @param {Function} getAuth - Function to get auth state from SDK
1093
+ * @returns {Promise<{ accessToken: string, resourceUrl?: string, accountId?: string }|null>} Access token state or null
819
1094
  */
820
1095
  async function getValidAccessToken(getAuth) {
1096
+ const activeOAuthAccount = await getActiveOAuthAccount({ allowExhausted: true });
1097
+ if (activeOAuthAccount?.accessToken) {
1098
+ return {
1099
+ accessToken: activeOAuthAccount.accessToken,
1100
+ resourceUrl: activeOAuthAccount.resourceUrl,
1101
+ accountId: activeOAuthAccount.accountId,
1102
+ };
1103
+ }
821
1104
  const diskToken = await getValidToken();
822
1105
  if (diskToken?.accessToken) {
823
- return diskToken.accessToken;
1106
+ return {
1107
+ accessToken: diskToken.accessToken,
1108
+ resourceUrl: diskToken.resourceUrl,
1109
+ };
824
1110
  }
825
1111
  const auth = await getAuth();
826
1112
  if (!auth || auth.type !== "oauth") {
827
1113
  return null;
828
1114
  }
829
1115
  let accessToken = auth.access;
1116
+ let resourceUrl = undefined;
830
1117
  // Refresh if expired (60 second buffer)
831
1118
  if (accessToken && auth.expires && Date.now() > auth.expires - 60000 && auth.refresh) {
832
1119
  try {
833
1120
  const refreshResult = await refreshAccessToken(auth.refresh);
834
1121
  if (refreshResult.type === "success") {
835
1122
  accessToken = refreshResult.access;
1123
+ resourceUrl = refreshResult.resourceUrl;
836
1124
  saveToken(refreshResult);
1125
+ await upsertOAuthAccount(refreshResult, { setActive: false });
837
1126
  }
838
1127
  else {
839
1128
  if (LOGGING_ENABLED) {
@@ -851,24 +1140,38 @@ async function getValidAccessToken(getAuth) {
851
1140
  }
852
1141
  if (auth.access && auth.refresh) {
853
1142
  try {
854
- saveToken({
1143
+ const sdkToken = {
855
1144
  type: "success",
856
1145
  access: accessToken || auth.access,
857
1146
  refresh: auth.refresh,
858
1147
  expires: typeof auth.expires === "number" ? auth.expires : Date.now() + 3600 * 1000,
859
- });
1148
+ resourceUrl,
1149
+ };
1150
+ saveToken(sdkToken);
1151
+ await upsertOAuthAccount(sdkToken, { setActive: false });
860
1152
  }
861
1153
  catch (e) {
862
1154
  logWarn("Failed to bootstrap .qwen token from SDK auth state:", e);
863
1155
  }
864
1156
  }
865
- return accessToken ?? null;
1157
+ if (!accessToken) {
1158
+ return null;
1159
+ }
1160
+ return {
1161
+ accessToken,
1162
+ resourceUrl,
1163
+ };
866
1164
  }
1165
+
867
1166
  /**
868
1167
  * Get base URL from token stored on disk (resource_url).
869
1168
  * Falls back to DashScope compatible-mode if not available.
1169
+ * @returns {string} DashScope API base URL
870
1170
  */
871
- function getBaseUrl() {
1171
+ function getBaseUrl(resourceUrl) {
1172
+ if (typeof resourceUrl === "string" && resourceUrl.length > 0) {
1173
+ return getApiBaseUrl(resourceUrl);
1174
+ }
872
1175
  try {
873
1176
  const stored = loadStoredToken();
874
1177
  if (stored?.resource_url) {
@@ -880,8 +1183,13 @@ function getBaseUrl() {
880
1183
  }
881
1184
  return getApiBaseUrl();
882
1185
  }
1186
+
883
1187
  /**
884
1188
  * Alibaba Qwen OAuth authentication plugin for opencode
1189
+ * Integrates Qwen OAuth device flow and API handling into opencode SDK
1190
+ *
1191
+ * @param {*} _input - Plugin initialization input
1192
+ * @returns {Promise<Object>} Plugin configuration and hooks
885
1193
  *
886
1194
  * @example
887
1195
  * ```json
@@ -906,14 +1214,15 @@ export const QwenAuthPlugin = async (_input) => {
906
1214
  if (model) model.cost = { input: 0, output: 0 };
907
1215
  }
908
1216
  }
909
- const accessToken = await getValidAccessToken(getAuth);
910
- if (!accessToken) return null;
911
- const baseURL = getBaseUrl();
1217
+ const tokenState = await getValidAccessToken(getAuth);
1218
+ if (!tokenState?.accessToken) return null;
1219
+ ACTIVE_OAUTH_ACCOUNT_ID = tokenState.accountId || null;
1220
+ const baseURL = getBaseUrl(tokenState.resourceUrl);
912
1221
  if (LOGGING_ENABLED) {
913
1222
  logInfo("Using Qwen baseURL:", baseURL);
914
1223
  }
915
1224
  return {
916
- apiKey: accessToken,
1225
+ apiKey: tokenState.accessToken,
917
1226
  baseURL,
918
1227
  timeout: CHAT_REQUEST_TIMEOUT_MS,
919
1228
  maxRetries: CHAT_MAX_RETRIES,
@@ -957,6 +1266,8 @@ export const QwenAuthPlugin = async (_input) => {
957
1266
  const result = await pollForToken(deviceAuth.device_code, pkce.verifier);
958
1267
  if (result.type === "success") {
959
1268
  saveToken(result);
1269
+ const savedAccount = await upsertOAuthAccount(result, { setActive: true });
1270
+ ACTIVE_OAUTH_ACCOUNT_ID = savedAccount?.accountId || ACTIVE_OAUTH_ACCOUNT_ID;
960
1271
  // Return to SDK to save auth state
961
1272
  return {
962
1273
  type: "success",
@@ -1047,6 +1358,13 @@ export const QwenAuthPlugin = async (_input) => {
1047
1358
  };
1048
1359
  config.provider = providers;
1049
1360
  },
1361
+ /**
1362
+ * Apply dynamic chat parameters before sending request
1363
+ * Ensures tokens and timeouts don't exceed plugin limits
1364
+ *
1365
+ * @param {*} input - Original chat request parameters
1366
+ * @param {*} output - Final payload to be sent
1367
+ */
1050
1368
  "chat.params": async (input, output) => {
1051
1369
  try {
1052
1370
  output.options = output.options || {};
@@ -1092,6 +1410,9 @@ export const QwenAuthPlugin = async (_input) => {
1092
1410
  * Send DashScope headers like original CLI.
1093
1411
  * X-DashScope-CacheControl: enable prompt caching, reduce token consumption.
1094
1412
  * X-DashScope-AuthType: specify auth method for server.
1413
+ *
1414
+ * @param {*} input - Original chat request parameters
1415
+ * @param {*} output - Final payload to be sent
1095
1416
  */
1096
1417
  "chat.headers": async (input, output) => {
1097
1418
  try {