opencode-qwen-cli-auth 2.2.8 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,28 +1,98 @@
1
1
  /**
2
- * Alibaba Qwen OAuth Authentication Plugin for opencode
3
- *
4
- * Simple plugin: handles OAuth login + provides apiKey/baseURL to SDK.
5
- * SDK handles streaming, headers, and request format.
6
- *
2
+ * @fileoverview Alibaba Qwen OAuth Authentication Plugin for opencode
3
+ * Main plugin entry point implementing OAuth 2.0 Device Authorization Grant
4
+ * Handles authentication, request transformation, and error recovery
5
+ *
6
+ * Architecture:
7
+ * - OAuth flow: PKCE + Device Code Grant (RFC 8628)
8
+ * - Token management: Automatic refresh with file-based storage
9
+ * - Request handling: Custom fetch wrapper with retry logic
10
+ * - Error recovery: Quota degradation and CLI fallback
11
+ *
7
12
  * @license MIT with Usage Disclaimer (see LICENSE file)
8
13
  * @repository https://github.com/TVD-00/opencode-qwen-cli-auth
14
+ * @version 2.2.9
9
15
  */
16
+
10
17
  import { randomUUID } from "node:crypto";
11
18
  import { spawn } from "node:child_process";
12
19
  import { existsSync } from "node:fs";
13
20
  import { createPKCE, requestDeviceCode, pollForToken, getApiBaseUrl, saveToken, refreshAccessToken, loadStoredToken, getValidToken } from "./lib/auth/auth.js";
14
21
  import { PROVIDER_ID, AUTH_LABELS, DEVICE_FLOW, PORTAL_HEADERS } from "./lib/constants.js";
15
22
  import { logError, logInfo, logWarn, LOGGING_ENABLED } from "./lib/logger.js";
23
+
24
+ /** Request timeout for chat completions in milliseconds */
16
25
  const CHAT_REQUEST_TIMEOUT_MS = 30000;
17
- const CHAT_MAX_RETRIES = 0;
18
- const CHAT_MAX_TOKENS_CAP = 2048;
26
+ /** Maximum number of retry attempts for failed requests */
27
+ const CHAT_MAX_RETRIES = 3;
28
+ /** Output token cap for coder-model (64K tokens) */
29
+ const CHAT_MAX_TOKENS_CAP = 65536;
30
+ /** Default max tokens for chat requests */
19
31
  const CHAT_DEFAULT_MAX_TOKENS = 2048;
32
+ /** Maximum consecutive polling failures before aborting OAuth flow */
20
33
  const MAX_CONSECUTIVE_POLL_FAILURES = 3;
34
+ /** Reduced max tokens for quota degraded requests */
21
35
  const QUOTA_DEGRADE_MAX_TOKENS = 1024;
36
+ /** Timeout for CLI fallback execution in milliseconds */
22
37
  const CLI_FALLBACK_TIMEOUT_MS = 8000;
38
+ /** Maximum buffer size for CLI output in characters */
23
39
  const CLI_FALLBACK_MAX_BUFFER_CHARS = 1024 * 1024;
40
+ /** Enable CLI fallback feature via environment variable */
24
41
  const ENABLE_CLI_FALLBACK = process.env.OPENCODE_QWEN_ENABLE_CLI_FALLBACK === "1";
42
+ /** User agent string for plugin identification */
25
43
  const PLUGIN_USER_AGENT = "opencode-qwen-cli-auth/2.2.1";
44
+ /** Output token limits per model for DashScope OAuth */
45
+ const DASH_SCOPE_OUTPUT_LIMITS = {
46
+ "coder-model": 65536,
47
+ "vision-model": 8192,
48
+ };
49
+ function capPayloadMaxTokens(payload) {
50
+ if (!payload || typeof payload !== "object") {
51
+ return payload;
52
+ }
53
+ const model = typeof payload.model === "string" ? payload.model : "";
54
+ const normalizedModel = model.trim().toLowerCase();
55
+ const limit = DASH_SCOPE_OUTPUT_LIMITS[normalizedModel];
56
+ if (!limit) {
57
+ return payload;
58
+ }
59
+ const next = { ...payload };
60
+ let changed = false;
61
+ if (typeof next.max_tokens === "number" && next.max_tokens > limit) {
62
+ next.max_tokens = limit;
63
+ changed = true;
64
+ }
65
+ if (typeof next.max_completion_tokens === "number" && next.max_completion_tokens > limit) {
66
+ next.max_completion_tokens = limit;
67
+ changed = true;
68
+ }
69
+ // Some clients use camelCase.
70
+ if (typeof next.maxTokens === "number" && next.maxTokens > limit) {
71
+ next.maxTokens = limit;
72
+ changed = true;
73
+ }
74
+ if (next.options && typeof next.options === "object") {
75
+ const options = { ...next.options };
76
+ let optionsChanged = false;
77
+ if (typeof options.max_tokens === "number" && options.max_tokens > limit) {
78
+ options.max_tokens = limit;
79
+ optionsChanged = true;
80
+ }
81
+ if (typeof options.max_completion_tokens === "number" && options.max_completion_tokens > limit) {
82
+ options.max_completion_tokens = limit;
83
+ optionsChanged = true;
84
+ }
85
+ if (typeof options.maxTokens === "number" && options.maxTokens > limit) {
86
+ options.maxTokens = limit;
87
+ optionsChanged = true;
88
+ }
89
+ if (optionsChanged) {
90
+ next.options = options;
91
+ changed = true;
92
+ }
93
+ }
94
+ return changed ? next : payload;
95
+ }
26
96
  const CLIENT_ONLY_BODY_FIELDS = new Set([
27
97
  "providerID",
28
98
  "provider",
@@ -71,6 +141,14 @@ function makeFailFastErrorResponse(status, code, message) {
71
141
  headers: { "content-type": "application/json" },
72
142
  });
73
143
  }
144
+
145
+ /**
146
+ * Creates AbortSignal with timeout that composes with source signal
147
+ * Properly cleans up timers and event listeners
148
+ * @param {AbortSignal} [sourceSignal] - Original abort signal from caller
149
+ * @param {number} timeoutMs - Timeout in milliseconds
150
+ * @returns {{ signal: AbortSignal, cleanup: () => void }} Composed signal and cleanup function
151
+ */
74
152
  function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
75
153
  const controller = new AbortController();
76
154
  const timeoutId = setTimeout(() => controller.abort(new Error("request_timeout")), timeoutMs);
@@ -93,6 +171,13 @@ function createRequestSignalWithTimeout(sourceSignal, timeoutMs) {
93
171
  },
94
172
  };
95
173
  }
174
+
175
+ /**
176
+ * Appends text chunk with size limit to prevent memory overflow
177
+ * @param {string} current - Current text buffer
178
+ * @param {string} chunk - New chunk to append
179
+ * @returns {string} Combined text with size limit
180
+ */
96
181
  function appendLimitedText(current, chunk) {
97
182
  const next = current + chunk;
98
183
  if (next.length <= CLI_FALLBACK_MAX_BUFFER_CHARS) {
@@ -100,9 +185,22 @@ function appendLimitedText(current, chunk) {
100
185
  }
101
186
  return next.slice(next.length - CLI_FALLBACK_MAX_BUFFER_CHARS);
102
187
  }
188
+
189
+ /**
190
+ * Checks if value is a Request instance
191
+ * @param {*} value - Value to check
192
+ * @returns {boolean} True if value is a Request instance
193
+ */
103
194
  function isRequestInstance(value) {
104
195
  return typeof Request !== "undefined" && value instanceof Request;
105
196
  }
197
+
198
+ /**
199
+ * Normalizes fetch invocation from Request object or URL string
200
+ * @param {Request|string} input - Fetch input
201
+ * @param {RequestInit} [init] - Fetch options
202
+ * @returns {{ requestInput: *, requestInit: RequestInit }} Normalized fetch parameters
203
+ */
106
204
  async function normalizeFetchInvocation(input, init) {
107
205
  const requestInit = init ? { ...init } : {};
108
206
  let requestInput = input;
@@ -128,6 +226,13 @@ async function normalizeFetchInvocation(input, init) {
128
226
  }
129
227
  return { requestInput, requestInit };
130
228
  }
229
+
230
+ /**
231
+ * Gets header value from Headers object, array, or plain object
232
+ * @param {Headers|Array|Object} headers - Headers to search
233
+ * @param {string} headerName - Header name (case-insensitive)
234
+ * @returns {string|undefined} Header value or undefined
235
+ */
131
236
  function getHeaderValue(headers, headerName) {
132
237
  if (!headers) {
133
238
  return undefined;
@@ -147,6 +252,11 @@ function getHeaderValue(headers, headerName) {
147
252
  }
148
253
  return undefined;
149
254
  }
255
+ /**
256
+ * Applies JSON request body with proper content-type header
257
+ * @param {RequestInit} requestInit - Fetch options
258
+ * @param {Object} payload - Request payload
259
+ */
150
260
  function applyJsonRequestBody(requestInit, payload) {
151
261
  requestInit.body = JSON.stringify(payload);
152
262
  if (!requestInit.headers) {
@@ -177,6 +287,12 @@ function applyJsonRequestBody(requestInit, payload) {
177
287
  requestInit.headers["content-type"] = "application/json";
178
288
  }
179
289
  }
290
+
291
+ /**
292
+ * Parses JSON request body if content-type is application/json
293
+ * @param {RequestInit} requestInit - Fetch options
294
+ * @returns {Object|null} Parsed payload or null
295
+ */
180
296
  function parseJsonRequestBody(requestInit) {
181
297
  if (typeof requestInit.body !== "string") {
182
298
  return null;
@@ -196,19 +312,31 @@ function parseJsonRequestBody(requestInit) {
196
312
  return null;
197
313
  }
198
314
  }
315
+ catch (_error) {
316
+ return null;
317
+ }
318
+ }
319
+ /**
320
+ * Removes client-only fields and caps max_tokens
321
+ * @param {Object} payload - Request payload
322
+ * @returns {Object} Sanitized payload
323
+ */
199
324
  function sanitizeOutgoingPayload(payload) {
200
325
  const sanitized = { ...payload };
201
326
  let changed = false;
327
+ // Remove client-only fields
202
328
  for (const field of CLIENT_ONLY_BODY_FIELDS) {
203
329
  if (field in sanitized) {
204
330
  delete sanitized[field];
205
331
  changed = true;
206
332
  }
207
333
  }
334
+ // Remove stream_options if stream is not enabled
208
335
  if ("stream_options" in sanitized && sanitized.stream !== true) {
209
336
  delete sanitized.stream_options;
210
337
  changed = true;
211
338
  }
339
+ // Cap max_tokens fields
212
340
  if (typeof sanitized.max_tokens === "number" && sanitized.max_tokens > CHAT_MAX_TOKENS_CAP) {
213
341
  sanitized.max_tokens = CHAT_MAX_TOKENS_CAP;
214
342
  changed = true;
@@ -219,9 +347,17 @@ function sanitizeOutgoingPayload(payload) {
219
347
  }
220
348
  return changed ? sanitized : payload;
221
349
  }
350
+
351
+ /**
352
+ * Creates degraded payload for quota error recovery
353
+ * Removes tools and reduces max_tokens to 1024
354
+ * @param {Object} payload - Original payload
355
+ * @returns {Object|null} Degraded payload or null if no changes needed
356
+ */
222
357
  function createQuotaDegradedPayload(payload) {
223
358
  const degraded = { ...payload };
224
359
  let changed = false;
360
+ // Remove tool-related fields
225
361
  if ("tools" in degraded) {
226
362
  delete degraded.tools;
227
363
  changed = true;
@@ -234,10 +370,12 @@ function createQuotaDegradedPayload(payload) {
234
370
  delete degraded.parallel_tool_calls;
235
371
  changed = true;
236
372
  }
373
+ // Disable streaming
237
374
  if (degraded.stream !== false) {
238
375
  degraded.stream = false;
239
376
  changed = true;
240
377
  }
378
+ // Reduce max_tokens
241
379
  if (typeof degraded.max_tokens !== "number" || degraded.max_tokens > QUOTA_DEGRADE_MAX_TOKENS) {
242
380
  degraded.max_tokens = QUOTA_DEGRADE_MAX_TOKENS;
243
381
  changed = true;
@@ -248,6 +386,12 @@ function createQuotaDegradedPayload(payload) {
248
386
  }
249
387
  return changed ? degraded : null;
250
388
  }
389
+
390
+ /**
391
+ * Checks if response text contains insufficientQuota error
392
+ * @param {string} text - Response body text
393
+ * @returns {boolean} True if insufficient quota error
394
+ */
251
395
  function isInsufficientQuota(text) {
252
396
  if (!text) {
253
397
  return false;
@@ -261,6 +405,12 @@ function isInsufficientQuota(text) {
261
405
  return text.toLowerCase().includes("insufficient_quota");
262
406
  }
263
407
  }
408
+
409
+ /**
410
+ * Extracts text content from message (handles string or array format)
411
+ * @param {string|Array} content - Message content
412
+ * @returns {string} Extracted text
413
+ */
264
414
  function extractMessageText(content) {
265
415
  if (typeof content === "string") {
266
416
  return content.trim();
@@ -278,6 +428,11 @@ function extractMessageText(content) {
278
428
  return "";
279
429
  }).filter(Boolean).join("\n").trim();
280
430
  }
431
+ /**
432
+ * Builds prompt text from chat messages for CLI fallback
433
+ * @param {Object} payload - Request payload with messages
434
+ * @returns {string} Prompt text for qwen CLI
435
+ */
281
436
  function buildQwenCliPrompt(payload) {
282
437
  const messages = Array.isArray(payload?.messages) ? payload.messages : [];
283
438
  for (let index = messages.length - 1; index >= 0; index -= 1) {
@@ -300,6 +455,12 @@ function buildQwenCliPrompt(payload) {
300
455
  }).filter(Boolean).join("\n\n");
301
456
  return merged || "Please respond to the latest user request.";
302
457
  }
458
+
459
+ /**
460
+ * Parses qwen CLI JSON output events
461
+ * @param {string} rawOutput - Raw CLI output
462
+ * @returns {Array|null} Parsed events or null
463
+ */
303
464
  function parseQwenCliEvents(rawOutput) {
304
465
  const trimmed = rawOutput.trim();
305
466
  if (!trimmed) {
@@ -323,6 +484,12 @@ function parseQwenCliEvents(rawOutput) {
323
484
  }
324
485
  return null;
325
486
  }
487
+
488
+ /**
489
+ * Extracts response text from CLI events
490
+ * @param {Array} events - Parsed CLI events
491
+ * @returns {string|null} Extracted text or null
492
+ */
326
493
  function extractQwenCliText(events) {
327
494
  for (let index = events.length - 1; index >= 0; index -= 1) {
328
495
  const event = events[index];
@@ -348,9 +515,24 @@ function extractQwenCliText(events) {
348
515
  }
349
516
  return null;
350
517
  }
518
+ /**
519
+ * Creates SSE formatted chunk for streaming responses
520
+ * @param {Object} data - Data to stringify and send
521
+ * @returns {string} SSE formatted string chunk
522
+ */
351
523
  function createSseResponseChunk(data) {
352
524
  return `data: ${JSON.stringify(data)}\n\n`;
353
525
  }
526
+
527
+ /**
528
+ * Creates Response object matching OpenAI completion format
529
+ * Handles both streaming (SSE) and non-streaming responses
530
+ * @param {string} model - Model ID used
531
+ * @param {string} content - Completion text content
532
+ * @param {Object} context - Request context for logging
533
+ * @param {boolean} streamMode - Whether to return streaming response
534
+ * @returns {Response} Formatted completion response
535
+ */
354
536
  function makeQwenCliCompletionResponse(model, content, context, streamMode) {
355
537
  if (LOGGING_ENABLED) {
356
538
  logInfo("Qwen CLI fallback returned completion", {
@@ -365,6 +547,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
365
547
  const encoder = new TextEncoder();
366
548
  const stream = new ReadableStream({
367
549
  start(controller) {
550
+ // Send first chunk with content
368
551
  controller.enqueue(encoder.encode(createSseResponseChunk({
369
552
  id: completionId,
370
553
  object: "chat.completion.chunk",
@@ -378,6 +561,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
378
561
  },
379
562
  ],
380
563
  })));
564
+ // Send stop chunk
381
565
  controller.enqueue(encoder.encode(createSseResponseChunk({
382
566
  id: completionId,
383
567
  object: "chat.completion.chunk",
@@ -391,6 +575,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
391
575
  },
392
576
  ],
393
577
  })));
578
+ // Send DONE marker
394
579
  controller.enqueue(encoder.encode("data: [DONE]\n\n"));
395
580
  controller.close();
396
581
  },
@@ -404,6 +589,7 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
404
589
  },
405
590
  });
406
591
  }
592
+ // Non-streaming response format
407
593
  const body = {
408
594
  id: `chatcmpl-${randomUUID()}`,
409
595
  object: "chat.completion",
@@ -433,6 +619,13 @@ function makeQwenCliCompletionResponse(model, content, context, streamMode) {
433
619
  },
434
620
  });
435
621
  }
622
+ /**
623
+ * Executes qwen CLI as fallback when API quota is exceeded
624
+ * @param {Object} payload - Original request payload
625
+ * @param {Object} context - Request context for logging
626
+ * @param {AbortSignal} [abortSignal] - Abort controller signal
627
+ * @returns {Promise<{ ok: boolean, response?: Response, reason?: string, stdout?: string, stderr?: string }>} Fallback execution result
628
+ */
436
629
  async function runQwenCliFallback(payload, context, abortSignal) {
437
630
  const model = typeof payload?.model === "string" && payload.model.length > 0 ? payload.model : "coder-model";
438
631
  const streamMode = payload?.stream === true;
@@ -544,6 +737,14 @@ async function runQwenCliFallback(payload, context, abortSignal) {
544
737
  });
545
738
  });
546
739
  }
740
+
741
+ /**
742
+ * Creates Response object for quota/rate limit errors
743
+ * @param {string} text - Response body text
744
+ * @param {HeadersInit} sourceHeaders - Original response headers
745
+ * @param {Object} context - Request context for logging
746
+ * @returns {Response} Formatted error response
747
+ */
547
748
  function makeQuotaFailFastResponse(text, sourceHeaders, context) {
548
749
  const headers = new Headers(sourceHeaders);
549
750
  headers.set("content-type", "application/json");
@@ -569,6 +770,12 @@ function makeQuotaFailFastResponse(text, sourceHeaders, context) {
569
770
  headers,
570
771
  });
571
772
  }
773
+ /**
774
+ * Performs fetch request with timeout protection
775
+ * @param {Request|string} input - Fetch input
776
+ * @param {RequestInit} requestInit - Fetch options
777
+ * @returns {Promise<Response>} Fetch response
778
+ */
572
779
  async function sendWithTimeout(input, requestInit) {
573
780
  const composed = createRequestSignalWithTimeout(requestInit.signal, CHAT_REQUEST_TIMEOUT_MS);
574
781
  try {
@@ -581,15 +788,77 @@ async function sendWithTimeout(input, requestInit) {
581
788
  composed.cleanup();
582
789
  }
583
790
  }
791
+
792
+ /**
793
+ * Injects required DashScope OAuth headers into fetch request
794
+ * Ensures compatibility even if OpenCode doesn't call chat.headers hook
795
+ * @param {RequestInit} requestInit - Fetch options to modify
796
+ */
797
+ function applyDashScopeHeaders(requestInit) {
798
+ // Ensure required DashScope OAuth headers are always present.
799
+ // This mirrors qwen-code (DashScopeOpenAICompatibleProvider.buildHeaders) behavior.
800
+ // NOTE: We intentionally do this in the fetch layer so it works even when
801
+ // OpenCode does not call the `chat.headers` hook (older versions / API mismatch).
802
+ const headersToApply = {
803
+ "X-DashScope-AuthType": PORTAL_HEADERS.AUTH_TYPE_VALUE,
804
+ "X-DashScope-CacheControl": "enable",
805
+ "User-Agent": PLUGIN_USER_AGENT,
806
+ "X-DashScope-UserAgent": PLUGIN_USER_AGENT,
807
+ };
808
+ if (!requestInit.headers) {
809
+ requestInit.headers = { ...headersToApply };
810
+ return;
811
+ }
812
+ if (requestInit.headers instanceof Headers) {
813
+ for (const [key, value] of Object.entries(headersToApply)) {
814
+ if (!requestInit.headers.has(key)) {
815
+ requestInit.headers.set(key, value);
816
+ }
817
+ }
818
+ return;
819
+ }
820
+ if (Array.isArray(requestInit.headers)) {
821
+ const existing = new Set(requestInit.headers.map(([name]) => String(name).toLowerCase()));
822
+ for (const [key, value] of Object.entries(headersToApply)) {
823
+ if (!existing.has(key.toLowerCase())) {
824
+ requestInit.headers.push([key, value]);
825
+ }
826
+ }
827
+ return;
828
+ }
829
+ // Plain object
830
+ for (const [key, value] of Object.entries(headersToApply)) {
831
+ if (!(key in requestInit.headers)) {
832
+ requestInit.headers[key] = value;
833
+ }
834
+ }
835
+ }
836
+
837
+ /**
838
+ * Custom fetch wrapper for OpenCode SDK
839
+ * Handles token limits, DashScope headers, retries, and quota error fallback
840
+ * @param {Request|string} input - Fetch input
841
+ * @param {RequestInit} [init] - Fetch options
842
+ * @returns {Promise<Response>} API response or fallback response
843
+ */
584
844
  async function failFastFetch(input, init) {
585
845
  const normalized = await normalizeFetchInvocation(input, init);
586
846
  const requestInput = normalized.requestInput;
587
847
  const requestInit = normalized.requestInit;
848
+ // Always inject DashScope OAuth headers at the fetch layer.
849
+ // This ensures compatibility across OpenCode versions.
850
+ applyDashScopeHeaders(requestInit);
588
851
  const sourceSignal = requestInit.signal;
589
852
  const rawPayload = parseJsonRequestBody(requestInit);
590
853
  const sessionID = typeof rawPayload?.sessionID === "string" ? rawPayload.sessionID : undefined;
591
854
  let payload = rawPayload;
592
855
  if (payload) {
856
+ // Ensure we never exceed DashScope model output limits.
857
+ const capped = capPayloadMaxTokens(payload);
858
+ if (capped !== payload) {
859
+ payload = capped;
860
+ applyJsonRequestBody(requestInit, payload);
861
+ }
593
862
  const sanitized = sanitizeOutgoingPayload(payload);
594
863
  if (sanitized !== payload) {
595
864
  payload = sanitized;
@@ -614,84 +883,93 @@ async function failFastFetch(input, init) {
614
883
  }
615
884
  try {
616
885
  let response = await sendWithTimeout(requestInput, requestInit);
617
- if (LOGGING_ENABLED) {
618
- logInfo("Qwen request response", {
619
- request_id: context.requestId,
620
- sessionID: context.sessionID,
621
- modelID: context.modelID,
622
- status: response.status,
623
- attempt: 1,
624
- });
625
- }
626
- if (response.status === 429) {
627
- const firstBody = await response.text().catch(() => "");
628
- if (payload && isInsufficientQuota(firstBody)) {
629
- const degradedPayload = createQuotaDegradedPayload(payload);
630
- if (degradedPayload) {
631
- const fallbackInit = { ...requestInit };
632
- applyJsonRequestBody(fallbackInit, degradedPayload);
633
- if (LOGGING_ENABLED) {
634
- logWarn("Retrying once with degraded payload after 429 insufficient_quota", {
635
- request_id: context.requestId,
636
- sessionID: context.sessionID,
637
- modelID: context.modelID,
638
- attempt: 2,
639
- });
640
- }
641
- response = await sendWithTimeout(requestInput, fallbackInit);
642
- if (LOGGING_ENABLED) {
643
- logInfo("Qwen request response", {
644
- request_id: context.requestId,
645
- sessionID: context.sessionID,
646
- modelID: context.modelID,
647
- status: response.status,
648
- attempt: 2,
649
- });
650
- }
651
- if (response.status !== 429) {
652
- return response;
653
- }
654
- const fallbackBody = await response.text().catch(() => "");
655
- if (ENABLE_CLI_FALLBACK) {
656
- const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
657
- if (cliFallback.ok) {
658
- return cliFallback.response;
659
- }
660
- if (cliFallback.reason === "cli_aborted") {
661
- return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
886
+ const MAX_REQUEST_RETRIES = 3;
887
+ for (let retryAttempt = 0; retryAttempt <= MAX_REQUEST_RETRIES; retryAttempt++) {
888
+ if (LOGGING_ENABLED) {
889
+ logInfo("Qwen request response", {
890
+ request_id: context.requestId,
891
+ sessionID: context.sessionID,
892
+ modelID: context.modelID,
893
+ status: response.status,
894
+ attempt: retryAttempt + 1,
895
+ });
896
+ }
897
+ const RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504];
898
+ if (RETRYABLE_STATUS_CODES.includes(response.status)) {
899
+ if (response.status === 429) {
900
+ const firstBody = await response.text().catch(() => "");
901
+ if (payload && isInsufficientQuota(firstBody)) {
902
+ const degradedPayload = createQuotaDegradedPayload(payload);
903
+ if (degradedPayload) {
904
+ const fallbackInit = { ...requestInit };
905
+ applyJsonRequestBody(fallbackInit, degradedPayload);
906
+ if (LOGGING_ENABLED) {
907
+ logWarn(`Retrying with degraded payload after ${response.status} insufficient_quota, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
908
+ request_id: context.requestId,
909
+ sessionID: context.sessionID,
910
+ modelID: context.modelID,
911
+ });
912
+ }
913
+ response = await sendWithTimeout(requestInput, fallbackInit);
914
+ if (retryAttempt < MAX_REQUEST_RETRIES) {
915
+ continue;
916
+ }
917
+ const fallbackBody = await response.text().catch(() => "");
918
+ if (ENABLE_CLI_FALLBACK) {
919
+ const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
920
+ if (cliFallback.ok) {
921
+ return cliFallback.response;
922
+ }
923
+ if (cliFallback.reason === "cli_aborted") {
924
+ return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
925
+ }
926
+ if (LOGGING_ENABLED) {
927
+ logWarn("Qwen CLI fallback failed", {
928
+ request_id: context.requestId,
929
+ sessionID: context.sessionID,
930
+ modelID: context.modelID,
931
+ reason: cliFallback.reason,
932
+ stderr: cliFallback.stderr,
933
+ });
934
+ }
935
+ }
936
+ return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
662
937
  }
663
- if (LOGGING_ENABLED) {
664
- logWarn("Qwen CLI fallback failed", {
665
- request_id: context.requestId,
666
- sessionID: context.sessionID,
667
- modelID: context.modelID,
668
- reason: cliFallback.reason,
669
- stderr: cliFallback.stderr,
670
- });
938
+ if (ENABLE_CLI_FALLBACK) {
939
+ const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
940
+ if (cliFallback.ok) {
941
+ return cliFallback.response;
942
+ }
943
+ if (cliFallback.reason === "cli_aborted") {
944
+ return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
945
+ }
946
+ if (LOGGING_ENABLED) {
947
+ logWarn("Qwen CLI fallback failed", {
948
+ request_id: context.requestId,
949
+ sessionID: context.sessionID,
950
+ modelID: context.modelID,
951
+ reason: cliFallback.reason,
952
+ stderr: cliFallback.stderr,
953
+ });
954
+ }
671
955
  }
672
956
  }
673
- return makeQuotaFailFastResponse(fallbackBody, response.headers, context);
957
+ return makeQuotaFailFastResponse(firstBody, response.headers, context);
674
958
  }
675
- if (ENABLE_CLI_FALLBACK) {
676
- const cliFallback = await runQwenCliFallback(payload, context, sourceSignal);
677
- if (cliFallback.ok) {
678
- return cliFallback.response;
679
- }
680
- if (cliFallback.reason === "cli_aborted") {
681
- return makeFailFastErrorResponse(400, "request_aborted", "Qwen request was aborted");
682
- }
959
+ if (retryAttempt < MAX_REQUEST_RETRIES) {
683
960
  if (LOGGING_ENABLED) {
684
- logWarn("Qwen CLI fallback failed", {
961
+ logWarn(`Retrying after ${response.status}, attempt ${retryAttempt + 2}/${MAX_REQUEST_RETRIES + 1}`, {
685
962
  request_id: context.requestId,
686
963
  sessionID: context.sessionID,
687
964
  modelID: context.modelID,
688
- reason: cliFallback.reason,
689
- stderr: cliFallback.stderr,
690
965
  });
691
966
  }
967
+ await new Promise(r => setTimeout(r, (retryAttempt + 1) * 1000));
968
+ response = await sendWithTimeout(requestInput, requestInit);
969
+ continue;
692
970
  }
693
971
  }
694
- return makeQuotaFailFastResponse(firstBody, response.headers, context);
972
+ return response;
695
973
  }
696
974
  return response;
697
975
  }
@@ -710,8 +988,8 @@ async function failFastFetch(input, init) {
710
988
  * Get valid access token from SDK auth state, refresh if expired.
711
989
  * Uses getAuth() from SDK instead of reading file directly.
712
990
  *
713
- * @param getAuth - Function to get auth state from SDK
714
- * @returns Access token or null
991
+ * @param {Function} getAuth - Function to get auth state from SDK
992
+ * @returns {Promise<string|null>} Access token or null if not available
715
993
  */
716
994
  async function getValidAccessToken(getAuth) {
717
995
  const diskToken = await getValidToken();
@@ -760,9 +1038,11 @@ async function getValidAccessToken(getAuth) {
760
1038
  }
761
1039
  return accessToken ?? null;
762
1040
  }
1041
+
763
1042
  /**
764
1043
  * Get base URL from token stored on disk (resource_url).
765
- * Falls back to portal.qwen.ai/v1 if not available.
1044
+ * Falls back to DashScope compatible-mode if not available.
1045
+ * @returns {string} DashScope API base URL
766
1046
  */
767
1047
  function getBaseUrl() {
768
1048
  try {
@@ -776,31 +1056,36 @@ function getBaseUrl() {
776
1056
  }
777
1057
  return getApiBaseUrl();
778
1058
  }
779
- /**
780
- * Alibaba Qwen OAuth authentication plugin for opencode
781
- *
782
- * @example
783
- * ```json
784
- * {
785
- * "plugin": ["opencode-alibaba-qwen-cli-auth"],
786
- * "model": "qwen-code/coder-model"
787
- * }
788
- * ```
789
- */
790
- export const QwenAuthPlugin = async (_input) => {
791
- return {
792
- auth: {
793
- provider: PROVIDER_ID,
1059
+
1060
+ /**
1061
+ * Alibaba Qwen OAuth authentication plugin for opencode
1062
+ * Integrates Qwen OAuth device flow and API handling into opencode SDK
1063
+ *
1064
+ * @param {*} _input - Plugin initialization input
1065
+ * @returns {Promise<Object>} Plugin configuration and hooks
1066
+ *
1067
+ * @example
1068
+ * ```json
1069
+ * {
1070
+ * "plugin": ["opencode-alibaba-qwen-cli-auth"],
1071
+ * "model": "qwen-code/coder-model"
1072
+ * }
1073
+ * ```
1074
+ */
1075
+ export const QwenAuthPlugin = async (_input) => {
1076
+ return {
1077
+ auth: {
1078
+ provider: PROVIDER_ID,
794
1079
  /**
795
1080
  * Loader: get token + base URL, return to SDK.
796
1081
  * Pattern similar to opencode-qwencode-auth reference plugin.
797
1082
  */
798
- async loader(getAuth, provider) {
1083
+ async loader(getAuth, provider) {
799
1084
  // Zero cost for OAuth models (free)
800
1085
  if (provider?.models) {
801
- for (const model of Object.values(provider.models)) {
802
- if (model) model.cost = { input: 0, output: 0 };
803
- }
1086
+ for (const model of Object.values(provider.models)) {
1087
+ if (model) model.cost = { input: 0, output: 0 };
1088
+ }
804
1089
  }
805
1090
  const accessToken = await getValidAccessToken(getAuth);
806
1091
  if (!accessToken) return null;
@@ -817,32 +1102,32 @@ export const QwenAuthPlugin = async (_input) => {
817
1102
  };
818
1103
  },
819
1104
  methods: [
820
- {
821
- label: AUTH_LABELS.OAUTH,
822
- type: "oauth",
823
- /**
824
- * Device Authorization Grant OAuth flow (RFC 8628)
825
- */
826
- authorize: async () => {
827
- // Generate PKCE
828
- const pkce = await createPKCE();
829
- // Request device code
830
- const deviceAuth = await requestDeviceCode(pkce);
831
- if (!deviceAuth) {
832
- throw new Error("Failed to request device code");
833
- }
1105
+ {
1106
+ label: AUTH_LABELS.OAUTH,
1107
+ type: "oauth",
1108
+ /**
1109
+ * Device Authorization Grant OAuth flow (RFC 8628)
1110
+ */
1111
+ authorize: async () => {
1112
+ // Generate PKCE
1113
+ const pkce = await createPKCE();
1114
+ // Request device code
1115
+ const deviceAuth = await requestDeviceCode(pkce);
1116
+ if (!deviceAuth) {
1117
+ throw new Error("Failed to request device code");
1118
+ }
834
1119
  // Display user code
835
1120
  console.log(`\nPlease visit: ${deviceAuth.verification_uri}`);
836
1121
  console.log(`And enter code: ${deviceAuth.user_code}\n`);
837
1122
  // Verification URL - SDK will open browser automatically when method=auto
838
- const verificationUrl = deviceAuth.verification_uri_complete || deviceAuth.verification_uri;
839
- return {
840
- url: verificationUrl,
841
- method: "auto",
842
- instructions: AUTH_LABELS.INSTRUCTIONS,
843
- callback: async () => {
844
- // Poll for token
845
- let pollInterval = (deviceAuth.interval || 5) * 1000;
1123
+ const verificationUrl = deviceAuth.verification_uri_complete || deviceAuth.verification_uri;
1124
+ return {
1125
+ url: verificationUrl,
1126
+ method: "auto",
1127
+ instructions: AUTH_LABELS.INSTRUCTIONS,
1128
+ callback: async () => {
1129
+ // Poll for token
1130
+ let pollInterval = (deviceAuth.interval || 5) * 1000;
846
1131
  const POLLING_MARGIN_MS = 3000;
847
1132
  const maxInterval = DEVICE_FLOW.MAX_POLL_INTERVAL;
848
1133
  const startTime = Date.now();
@@ -855,9 +1140,9 @@ export const QwenAuthPlugin = async (_input) => {
855
1140
  saveToken(result);
856
1141
  // Return to SDK to save auth state
857
1142
  return {
858
- type: "success",
859
- access: result.access,
860
- refresh: result.refresh,
1143
+ type: "success",
1144
+ access: result.access,
1145
+ refresh: result.refresh,
861
1146
  expires: result.expires,
862
1147
  };
863
1148
  }
@@ -900,19 +1185,19 @@ export const QwenAuthPlugin = async (_input) => {
900
1185
  console.error("[qwen-oauth-plugin] Device authorization timed out");
901
1186
  return { type: "failed" };
902
1187
  },
903
- };
904
- },
905
- },
906
- ],
907
- },
1188
+ };
1189
+ },
1190
+ },
1191
+ ],
1192
+ },
908
1193
  /**
909
1194
  * Register qwen-code provider with model list.
910
1195
  * Only register models that Portal API (OAuth) accepts:
911
1196
  * coder-model and vision-model (according to QWEN_OAUTH_ALLOWED_MODELS from original CLI)
912
1197
  */
913
- config: async (config) => {
914
- const providers = config.provider || {};
915
- providers[PROVIDER_ID] = {
1198
+ config: async (config) => {
1199
+ const providers = config.provider || {};
1200
+ providers[PROVIDER_ID] = {
916
1201
  npm: "@ai-sdk/openai-compatible",
917
1202
  name: "Qwen Code",
918
1203
  options: {
@@ -928,21 +1213,28 @@ export const QwenAuthPlugin = async (_input) => {
928
1213
  // Thinking is always enabled by default on server side (qwen3.5-plus)
929
1214
  reasoning: false,
930
1215
  limit: { context: 1048576, output: CHAT_MAX_TOKENS_CAP },
931
- cost: { input: 0, output: 0 },
932
- modalities: { input: ["text"], output: ["text"] },
933
- },
1216
+ cost: { input: 0, output: 0 },
1217
+ modalities: { input: ["text"], output: ["text"] },
1218
+ },
934
1219
  "vision-model": {
935
1220
  id: "vision-model",
936
1221
  name: "Qwen VL Plus (vision)",
937
1222
  reasoning: false,
938
- limit: { context: 131072, output: CHAT_MAX_TOKENS_CAP },
939
- cost: { input: 0, output: 0 },
940
- modalities: { input: ["text"], output: ["text"] },
941
- },
942
- },
1223
+ limit: { context: 131072, output: DASH_SCOPE_OUTPUT_LIMITS["vision-model"] },
1224
+ cost: { input: 0, output: 0 },
1225
+ modalities: { input: ["text"], output: ["text"] },
1226
+ },
1227
+ },
943
1228
  };
944
1229
  config.provider = providers;
945
1230
  },
1231
+ /**
1232
+ * Apply dynamic chat parameters before sending request
1233
+ * Ensures tokens and timeouts don't exceed plugin limits
1234
+ *
1235
+ * @param {*} input - Original chat request parameters
1236
+ * @param {*} output - Final payload to be sent
1237
+ */
946
1238
  "chat.params": async (input, output) => {
947
1239
  try {
948
1240
  output.options = output.options || {};
@@ -988,6 +1280,9 @@ export const QwenAuthPlugin = async (_input) => {
988
1280
  * Send DashScope headers like original CLI.
989
1281
  * X-DashScope-CacheControl: enable prompt caching, reduce token consumption.
990
1282
  * X-DashScope-AuthType: specify auth method for server.
1283
+ *
1284
+ * @param {*} input - Original chat request parameters
1285
+ * @param {*} output - Final payload to be sent
991
1286
  */
992
1287
  "chat.headers": async (input, output) => {
993
1288
  try {
@@ -1013,5 +1308,5 @@ export const QwenAuthPlugin = async (_input) => {
1013
1308
  },
1014
1309
  };
1015
1310
  };
1016
- export default QwenAuthPlugin;
1311
+ export default QwenAuthPlugin;
1017
1312
  //# sourceMappingURL=index.js.map