pi-free 1.0.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +107 -1
  2. package/README.md +95 -46
  3. package/config.ts +165 -120
  4. package/constants.ts +22 -61
  5. package/index.ts +186 -0
  6. package/lib/json-persistence.ts +11 -10
  7. package/lib/logger.ts +2 -2
  8. package/lib/model-enhancer.ts +20 -20
  9. package/lib/open-browser.ts +41 -0
  10. package/lib/provider-cache.ts +106 -0
  11. package/lib/registry.ts +144 -0
  12. package/package.json +67 -82
  13. package/provider-factory.ts +25 -41
  14. package/provider-failover/benchmark-lookup.ts +247 -0
  15. package/provider-failover/benchmarks-chunk-0.ts +2010 -0
  16. package/provider-failover/benchmarks-chunk-1.ts +1988 -0
  17. package/provider-failover/benchmarks-chunk-2.ts +2010 -0
  18. package/provider-failover/benchmarks-chunk-3.ts +2010 -0
  19. package/provider-failover/benchmarks-chunk-4.ts +1969 -0
  20. package/provider-failover/hardcoded-benchmarks.ts +22 -10025
  21. package/provider-helper.ts +38 -37
  22. package/providers/{cline-auth.ts → cline/cline-auth.ts} +2 -2
  23. package/providers/cline/cline-models.ts +128 -0
  24. package/providers/{cline.ts → cline/cline.ts} +300 -257
  25. package/providers/cloudflare/cloudflare.ts +368 -0
  26. package/providers/dynamic-built-in/index.ts +513 -0
  27. package/providers/{kilo-auth.ts → kilo/kilo-auth.ts} +3 -20
  28. package/providers/{kilo-models.ts → kilo/kilo-models.ts} +2 -2
  29. package/providers/kilo/kilo.ts +235 -0
  30. package/providers/{modal.ts → modal/modal.ts} +4 -3
  31. package/providers/{nvidia.ts → nvidia/nvidia.ts} +152 -113
  32. package/providers/ollama/ollama.ts +172 -0
  33. package/providers/opencode-session.ts +34 -34
  34. package/providers/{qwen-auth.ts → qwen/qwen-auth.ts} +24 -40
  35. package/providers/{qwen-models.ts → qwen/qwen-models.ts} +101 -95
  36. package/providers/qwen/qwen.ts +202 -0
  37. package/provider-failover/auto-switch.ts +0 -350
  38. package/provider-failover/errors.ts +0 -275
  39. package/provider-failover/index.ts +0 -238
  40. package/providers/cline-models.ts +0 -77
  41. package/providers/factory.ts +0 -125
  42. package/providers/fireworks.ts +0 -49
  43. package/providers/go.ts +0 -216
  44. package/providers/kilo.ts +0 -146
  45. package/providers/mistral.ts +0 -144
  46. package/providers/ollama.ts +0 -113
  47. package/providers/openrouter.ts +0 -175
  48. package/providers/qwen.ts +0 -127
  49. package/providers/zen.ts +0 -371
  50. package/usage/commands.ts +0 -17
  51. package/usage/cumulative.ts +0 -193
  52. package/usage/formatters.ts +0 -115
  53. package/usage/index.ts +0 -46
  54. package/usage/limits.ts +0 -148
  55. package/usage/metrics.ts +0 -222
  56. package/usage/sessions.ts +0 -355
  57. package/usage/store.ts +0 -99
  58. package/usage/tracking.ts +0 -329
  59. package/usage/types.ts +0 -26
  60. package/usage/widget.ts +0 -90
  61. package/widget/data.ts +0 -113
  62. package/widget/format.ts +0 -26
  63. package/widget/render.ts +0 -117
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Ollama Cloud Provider Extension
3
+ *
4
+ * Provides access to Ollama's cloud-hosted models via ollama.com API.
5
+ * All models use Ollama's usage-based pricing system:
6
+ * - Free tier: Unlimited public models (session limits reset every 5 hours,
7
+ * weekly limits reset every 7 days)
8
+ * - Pro tier: 50x more cloud usage than Free
9
+ * - Max tier: 5x more usage than Pro
10
+ *
11
+ * Requires OLLAMA_API_KEY with cloud access.
12
+ * Get a free key at: https://ollama.com/settings/keys
13
+ *
14
+ * Responds to global /free toggle (shows models but warns they're freemium).
15
+ *
16
+ * Usage:
17
+ * pi install git:github.com/apmantza/pi-free
18
+ * # Set OLLAMA_API_KEY env var
19
+ * # Models appear in /model selector
20
+ * # Use /ollama-toggle to show all vs limited set
21
+ */
22
+
23
+ import type {
24
+ ExtensionAPI,
25
+ ProviderModelConfig,
26
+ } from "@mariozechner/pi-coding-agent";
27
+ import {
28
+ applyHidden,
29
+ getOllamaApiKey,
30
+ getOllamaShowPaid,
31
+ } from "../../config.ts";
32
+ import {
33
+ BASE_URL_OLLAMA,
34
+ DEFAULT_FETCH_TIMEOUT_MS,
35
+ PROVIDER_OLLAMA,
36
+ } from "../../constants.ts";
37
+ import { createLogger } from "../../lib/logger.ts";
38
+ import { registerWithGlobalToggle } from "../../lib/registry.ts";
39
+ import { fetchWithRetry } from "../../lib/util.ts";
40
+ import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
41
+
42
+ const _logger = createLogger("ollama-cloud");
43
+
44
+ // =============================================================================
45
+ // Fetch + map
46
+ // =============================================================================
47
+
48
+ async function fetchOllamaModels(
49
+ apiKey: string,
50
+ ): Promise<ProviderModelConfig[]> {
51
+ // Use OpenAI-compatible /v1/models endpoint for consistency
52
+ // The native /api/tags returns :cloud suffixes that may not work with /v1/chat/completions
53
+ const response = await fetchWithRetry(
54
+ `${BASE_URL_OLLAMA}/v1/models`,
55
+ {
56
+ headers: {
57
+ Authorization: `Bearer ${apiKey}`,
58
+ "Content-Type": "application/json",
59
+ },
60
+ },
61
+ 3,
62
+ 1000,
63
+ DEFAULT_FETCH_TIMEOUT_MS,
64
+ );
65
+
66
+ if (!response.ok) {
67
+ throw new Error(
68
+ `Failed to fetch Ollama models: ${response.status} ${response.statusText}`,
69
+ );
70
+ }
71
+
72
+ const json = (await response.json()) as {
73
+ data?: Array<{ id: string; owned_by?: string }>;
74
+ };
75
+ const models = json.data ?? [];
76
+
77
+ _logger.info(
78
+ `[ollama-cloud] Fetched ${models.length} models from Ollama Cloud`,
79
+ );
80
+
81
+ // Filter to chat/text generation models only
82
+ const chatModels = models.filter((m) => {
83
+ // Skip embedding-only models (typically have "embed" in name)
84
+ const name = m.id.toLowerCase();
85
+ if (name.includes("embed")) return false;
86
+ return true;
87
+ });
88
+
89
+ const result = applyHidden(
90
+ chatModels.map(
91
+ (m): ProviderModelConfig => ({
92
+ id: m.id,
93
+ name: m.id,
94
+ // Try to infer reasoning from model name
95
+ reasoning:
96
+ m.id.toLowerCase().includes("reasoning") ||
97
+ m.id.toLowerCase().includes("r1") ||
98
+ m.id.toLowerCase().includes("thinking"),
99
+ input: ["text"],
100
+ // Ollama Cloud uses usage-based pricing (GPU time), not per-token
101
+ // Free tier has limits but no direct cost per token
102
+ cost: {
103
+ input: 0, // Freemium: usage-based, not per-token
104
+ output: 0,
105
+ cacheRead: 0,
106
+ cacheWrite: 0,
107
+ },
108
+ // Default context window - Ollama doesn't expose this via /v1/models
109
+ contextWindow: 32768,
110
+ maxTokens: 4096, // Default, varies by model
111
+ }),
112
+ ),
113
+ );
114
+
115
+ return result;
116
+ }
117
+
118
+ // =============================================================================
119
+ // Extension Entry Point
120
+ // =============================================================================
121
+
122
+ export default async function (pi: ExtensionAPI) {
123
+ const apiKey = getOllamaApiKey();
124
+
125
+ if (!apiKey) {
126
+ _logger.info(
127
+ "[ollama-cloud] Skipping - OLLAMA_API_KEY not set (env var or ~/.pi/free.json)",
128
+ );
129
+ return;
130
+ }
131
+
132
+ // Fetch models
133
+ let allModels: ProviderModelConfig[] = [];
134
+
135
+ try {
136
+ allModels = await fetchOllamaModels(apiKey);
137
+ } catch (error) {
138
+ _logger.error("[ollama-cloud] Failed to fetch models at startup", {
139
+ error: error instanceof Error ? error.message : String(error),
140
+ });
141
+ return;
142
+ }
143
+
144
+ // For Ollama, all models share the same free tier
145
+ // So "free" and "all" are the same set
146
+ const freeModels = allModels;
147
+ const stored = { free: freeModels, all: allModels };
148
+ const hasKey = true;
149
+
150
+ // Create re-register function
151
+ const reRegister = createReRegister(pi, {
152
+ providerId: PROVIDER_OLLAMA,
153
+ baseUrl: BASE_URL_OLLAMA,
154
+ apiKey,
155
+ });
156
+
157
+ // Register with global toggle system
158
+ registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
159
+
160
+ // Register initial models
161
+ const initialModels = getOllamaShowPaid() ? allModels : freeModels;
162
+ pi.registerProvider(PROVIDER_OLLAMA, {
163
+ baseUrl: BASE_URL_OLLAMA,
164
+ apiKey,
165
+ api: "openai-completions" as const,
166
+ models: enhanceWithCI(initialModels),
167
+ });
168
+
169
+ _logger.info(
170
+ `[ollama-cloud] Registered ${initialModels.length} models (usage-based free tier)`,
171
+ );
172
+ }
@@ -1,34 +1,34 @@
1
- /**
2
- * Shared OpenCode session/request tracking.
3
- *
4
- * OpenCode endpoints appear to behave more reliably when a stable session id
5
- * is included across requests in the same Pi session.
6
- */
7
- export function createOpenCodeSessionTracker() {
8
- let sessionId = "";
9
- let requestCount = 0;
10
-
11
- function generateId(): string {
12
- return (
13
- Math.random().toString(36).substring(2, 15) +
14
- Math.random().toString(36).substring(2, 15)
15
- );
16
- }
17
-
18
- function getSessionId(): string {
19
- if (!sessionId) {
20
- sessionId = generateId();
21
- }
22
- return sessionId;
23
- }
24
-
25
- function nextRequestId(): string {
26
- requestCount++;
27
- return `${getSessionId()}-${requestCount}`;
28
- }
29
-
30
- return {
31
- getSessionId,
32
- nextRequestId,
33
- };
34
- }
1
+ /**
2
+ * Shared OpenCode session/request tracking.
3
+ *
4
+ * OpenCode endpoints appear to behave more reliably when a stable session id
5
+ * is included across requests in the same Pi session.
6
+ */
7
+ export function createOpenCodeSessionTracker() {
8
+ let sessionId = "";
9
+ let requestCount = 0;
10
+
11
+ function generateId(): string {
12
+ return (
13
+ Math.random().toString(36).substring(2, 15) +
14
+ Math.random().toString(36).substring(2, 15)
15
+ );
16
+ }
17
+
18
+ function getSessionId(): string {
19
+ if (!sessionId) {
20
+ sessionId = generateId();
21
+ }
22
+ return sessionId;
23
+ }
24
+
25
+ function nextRequestId(): string {
26
+ requestCount++;
27
+ return `${getSessionId()}-${requestCount}`;
28
+ }
29
+
30
+ return {
31
+ getSessionId,
32
+ nextRequestId,
33
+ };
34
+ }
@@ -13,12 +13,12 @@
13
13
  */
14
14
 
15
15
  import crypto from "node:crypto";
16
- import { spawn } from "node:child_process";
17
16
  import type {
18
17
  OAuthCredentials,
19
18
  OAuthLoginCallbacks,
20
19
  } from "@mariozechner/pi-ai";
21
- import { createLogger } from "../lib/logger.ts";
20
+ import { createLogger } from "../../lib/logger.ts";
21
+ import { openBrowser } from "../../lib/open-browser.ts";
22
22
 
23
23
  const _logger = createLogger("qwen-auth");
24
24
 
@@ -37,6 +37,12 @@ const QWEN_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:device_code";
37
37
  const INITIAL_POLL_INTERVAL_MS = 2000;
38
38
  const MAX_POLL_INTERVAL_MS = 10000;
39
39
 
40
+ // Token refresh buffer: proactively refresh this many ms before actual expiry.
41
+ // Matches qwen-code's SharedTokenManager which uses a 30s buffer.
42
+ // We use 5 minutes (same as pi-core's reference qwen-cli example) to be safe
43
+ // against clock skew, network latency, and server-side early revocation.
44
+ const EXPIRY_BUFFER_MS = 5 * 60 * 1000;
45
+
40
46
  // =============================================================================
41
47
  // PKCE Utilities
42
48
  // =============================================================================
@@ -46,10 +52,7 @@ function generateCodeVerifier(): string {
46
52
  }
47
53
 
48
54
  function generateCodeChallenge(codeVerifier: string): string {
49
- return crypto
50
- .createHash("sha256")
51
- .update(codeVerifier)
52
- .digest("base64url");
55
+ return crypto.createHash("sha256").update(codeVerifier).digest("base64url");
53
56
  }
54
57
 
55
58
  function generatePKCEPair(): {
@@ -67,33 +70,10 @@ function generatePKCEPair(): {
67
70
 
68
71
  function objectToUrlEncoded(data: Record<string, string>): string {
69
72
  return Object.keys(data)
70
- .map(
71
- (key) =>
72
- `${encodeURIComponent(key)}=${encodeURIComponent(data[key])}`,
73
- )
73
+ .map((key) => `${encodeURIComponent(key)}=${encodeURIComponent(data[key])}`)
74
74
  .join("&");
75
75
  }
76
76
 
77
- function openBrowser(url: string): void {
78
- try {
79
- if (process.platform === "win32") {
80
- // cmd.exe interprets & as a command separator, breaking URLs with query params.
81
- // PowerShell's Start-Process treats the URL as a literal string.
82
- spawn(
83
- "powershell.exe",
84
- ["-NoProfile", "-NonInteractive", "-Command", `Start-Process "${url.replace(/"/g, '\\"')}"`],
85
- { detached: true, shell: false, windowsHide: true },
86
- ).unref();
87
- } else if (process.platform === "darwin") {
88
- spawn("open", [url], { detached: true }).unref();
89
- } else {
90
- spawn("xdg-open", [url], { detached: true }).unref();
91
- }
92
- } catch (err) {
93
- _logger.debug("Failed to open browser", { error: String(err) });
94
- }
95
- }
96
-
97
77
  function abortableSleep(ms: number, signal?: AbortSignal): Promise<void> {
98
78
  return new Promise((resolve, reject) => {
99
79
  if (signal?.aborted) {
@@ -169,9 +149,7 @@ async function requestDeviceAuthorization(
169
149
  );
170
150
  }
171
151
 
172
- const result = (await response.json()) as
173
- | DeviceAuthorizationData
174
- | ErrorData;
152
+ const result = (await response.json()) as DeviceAuthorizationData | ErrorData;
175
153
 
176
154
  if ("error" in result) {
177
155
  throw new Error(
@@ -325,8 +303,8 @@ export async function loginQwen(
325
303
  access: data.access_token!,
326
304
  refresh: data.refresh_token ?? "",
327
305
  expires: data.expires_in
328
- ? Date.now() + data.expires_in * 1000
329
- : Date.now() + 3600 * 1000, // 1 hour default
306
+ ? Date.now() + data.expires_in * 1000 - EXPIRY_BUFFER_MS
307
+ : Date.now() + 3600 * 1000 - EXPIRY_BUFFER_MS, // 1 hour default minus buffer
330
308
  resource_url: resourceUrl,
331
309
  };
332
310
  }
@@ -354,7 +332,11 @@ export async function loginQwen(
354
332
  export async function refreshQwenToken(
355
333
  credentials: OAuthCredentials,
356
334
  ): Promise<OAuthCredentials> {
357
- if (credentials.expires > Date.now()) return credentials;
335
+ // Note: we intentionally DO NOT early-return when the token appears valid.
336
+ // pi-core calls refreshToken() only when it has already determined the token
337
+ // needs refreshing (Date.now() >= cred.expires). The early return was
338
+ // redundant and blocked forced-refreshes after server-side token revocation
339
+ // (where the stored expiry hasn't been reached yet but the token is invalid).
358
340
 
359
341
  if (!credentials.refresh) {
360
342
  throw new Error(
@@ -398,21 +380,23 @@ export async function refreshQwenToken(
398
380
  }
399
381
 
400
382
  // Preserve resource_url as a proper field (not encoded in refresh token)
401
- const resourceUrl = data.resource_url || (credentials.resource_url as string) || "";
383
+ const resourceUrl =
384
+ data.resource_url || (credentials.resource_url as string) || "";
402
385
 
403
386
  return {
404
387
  access: data.access_token,
405
388
  refresh: data.refresh_token ?? credentials.refresh,
406
389
  expires: data.expires_in
407
- ? Date.now() + data.expires_in * 1000
408
- : Date.now() + 3600 * 1000,
390
+ ? Date.now() + data.expires_in * 1000 - EXPIRY_BUFFER_MS
391
+ : Date.now() + 3600 * 1000 - EXPIRY_BUFFER_MS,
409
392
  resource_url: resourceUrl,
410
393
  };
411
394
  }
412
395
 
413
396
  // Fallback endpoint used when resource_url is absent from the OAuth token.
414
397
  // Mirrors qwen-code's DEFAULT_QWEN_BASE_URL.
415
- const QWEN_DEFAULT_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1";
398
+ const QWEN_DEFAULT_BASE_URL =
399
+ "https://dashscope.aliyuncs.com/compatible-mode/v1";
416
400
 
417
401
  /**
418
402
  * Resolve the API base URL from OAuth credentials.
@@ -1,95 +1,101 @@
1
- /**
2
- * Qwen OAuth model definitions.
3
- *
4
- * Free tier provides Qwen Coder Plus with 1,000 requests/day.
5
- */
6
-
7
- import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
8
- import { createLogger } from "../lib/logger.ts";
9
-
10
- const _logger = createLogger("qwen-models");
11
-
12
- /**
13
- * portal.qwen.ai compatibility settings.
14
- *
15
- * portal.qwen.ai's OpenAI-compatible API does not support several parameters
16
- * that the pi framework sends by default.
17
- */
18
- export const PORTAL_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
19
- supportsStore: false,
20
- supportsDeveloperRole: false,
21
- supportsReasoningEffort: false,
22
- supportsUsageInStreaming: false,
23
- supportsStrictMode: false,
24
- maxTokensField: "max_tokens",
25
- };
26
-
27
- /**
28
- * Fallback model used before OAuth completes or if model discovery fails.
29
- * The real model ID is resolved dynamically via fetchQwenLiveModels() after auth.
30
- */
31
- export const QWEN_FREE_MODELS: ProviderModelConfig[] = [
32
- {
33
- id: "coder-model",
34
- name: "Qwen Coder — Free 1k/day",
35
- reasoning: false,
36
- input: ["text"],
37
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
38
- contextWindow: 131_072,
39
- maxTokens: 16_384,
40
- compat: PORTAL_COMPAT,
41
- },
42
- ];
43
-
44
- /**
45
- * Fetch Qwen models. For OAuth free tier, the model list is static.
46
- */
47
- export async function fetchQwenModels(): Promise<ProviderModelConfig[]> {
48
- _logger.info("Qwen OAuth: using static free tier models");
49
- return QWEN_FREE_MODELS;
50
- }
51
-
52
- /**
53
- * Fetch live model list from the Qwen API using the OAuth access token.
54
- * Returns updated models with real IDs from the server, or the original
55
- * models unchanged if the request fails.
56
- */
57
- export async function fetchQwenLiveModels(
58
- baseUrl: string,
59
- accessToken: string,
60
- templateModels: ProviderModelConfig[],
61
- ): Promise<ProviderModelConfig[]> {
62
- try {
63
- const response = await fetch(`${baseUrl}/models`, {
64
- headers: {
65
- Authorization: `Bearer ${accessToken}`,
66
- Accept: "application/json",
67
- },
68
- });
69
-
70
- if (!response.ok) {
71
- _logger.info("Qwen /v1/models fetch failed, keeping current model IDs", {
72
- status: response.status,
73
- });
74
- return templateModels;
75
- }
76
-
77
- interface ModelEntry { id: string }
78
- const data = (await response.json()) as { data?: ModelEntry[] };
79
- const ids: string[] = (data.data ?? []).map((m: ModelEntry) => m.id).filter(Boolean);
80
-
81
- _logger.info("Qwen live models discovered", { ids });
82
-
83
- if (ids.length === 0) return templateModels;
84
-
85
- // Prefer a coder model if available, otherwise use the first model
86
- const preferred = ids.find((id) => /coder/i.test(id)) ?? ids[0];
87
-
88
- return templateModels.map((m) => ({ ...m, id: preferred }));
89
- } catch (err) {
90
- _logger.info("Qwen live model fetch error, keeping current model IDs", {
91
- error: String(err),
92
- });
93
- return templateModels;
94
- }
95
- }
1
+ /**
2
+ * Qwen OAuth model definitions.
3
+ *
4
+ * @deprecated The 1,000 req/day free tier is no longer available. Auth is broken.
5
+ * This provider remains for backward compatibility but should not be used.
6
+ */
7
+
8
+ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
9
+ import { createLogger } from "../../lib/logger.ts";
10
+
11
+ const _logger = createLogger("qwen-models");
12
+
13
+ /**
14
+ * portal.qwen.ai compatibility settings.
15
+ *
16
+ * portal.qwen.ai's OpenAI-compatible API does not support several parameters
17
+ * that the pi framework sends by default.
18
+ */
19
+ export const PORTAL_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
20
+ supportsStore: false,
21
+ supportsDeveloperRole: false,
22
+ supportsReasoningEffort: false,
23
+ supportsUsageInStreaming: false,
24
+ supportsStrictMode: false,
25
+ maxTokensField: "max_tokens",
26
+ };
27
+
28
+ /**
29
+ * Fallback model used before OAuth completes or if model discovery fails.
30
+ * The real model ID is resolved dynamically via fetchQwenLiveModels() after auth.
31
+ */
32
+ export const QWEN_FREE_MODELS: ProviderModelConfig[] = [
33
+ {
34
+ id: "coder-model",
35
+ name: "Qwen Coder — DEPRECATED (free tier discontinued)",
36
+ reasoning: false,
37
+ input: ["text"],
38
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
39
+ contextWindow: 131_072,
40
+ maxTokens: 16_384,
41
+ compat: PORTAL_COMPAT,
42
+ },
43
+ ];
44
+
45
+ /**
46
+ * Fetch Qwen models. Returns static model list for backward compatibility.
47
+ * @deprecated Qwen free tier is discontinued.
48
+ */
49
+ export async function fetchQwenModels(): Promise<ProviderModelConfig[]> {
50
+ _logger.info("Qwen provider is deprecated, returning placeholder models");
51
+ return QWEN_FREE_MODELS;
52
+ }
53
+
54
+ /**
55
+ * Fetch live model list from the Qwen API using the OAuth access token.
56
+ * Returns updated models with real IDs from the server, or the original
57
+ * models unchanged if the request fails.
58
+ */
59
+ export async function fetchQwenLiveModels(
60
+ baseUrl: string,
61
+ accessToken: string,
62
+ templateModels: ProviderModelConfig[],
63
+ ): Promise<ProviderModelConfig[]> {
64
+ try {
65
+ const response = await fetch(`${baseUrl}/models`, {
66
+ headers: {
67
+ Authorization: `Bearer ${accessToken}`,
68
+ Accept: "application/json",
69
+ },
70
+ });
71
+
72
+ if (!response.ok) {
73
+ _logger.info("Qwen /v1/models fetch failed, keeping current model IDs", {
74
+ status: response.status,
75
+ });
76
+ return templateModels;
77
+ }
78
+
79
+ interface ModelEntry {
80
+ id: string;
81
+ }
82
+ const data = (await response.json()) as { data?: ModelEntry[] };
83
+ const ids: string[] = (data.data ?? [])
84
+ .map((m: ModelEntry) => m.id)
85
+ .filter(Boolean);
86
+
87
+ _logger.info("Qwen live models discovered", { ids });
88
+
89
+ if (ids.length === 0) return templateModels;
90
+
91
+ // Prefer a coder model if available, otherwise use the first model
92
+ const preferred = ids.find((id) => /coder/i.test(id)) ?? ids[0];
93
+
94
+ return templateModels.map((m) => ({ ...m, id: preferred }));
95
+ } catch (err) {
96
+ _logger.info("Qwen live model fetch error, keeping current model IDs", {
97
+ error: String(err),
98
+ });
99
+ return templateModels;
100
+ }
101
+ }