pi-free 1.0.8 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +107 -1
- package/README.md +95 -46
- package/config.ts +165 -120
- package/constants.ts +22 -61
- package/index.ts +186 -0
- package/lib/json-persistence.ts +11 -10
- package/lib/logger.ts +2 -2
- package/lib/model-enhancer.ts +20 -20
- package/lib/open-browser.ts +41 -0
- package/lib/provider-cache.ts +106 -0
- package/lib/registry.ts +144 -0
- package/package.json +67 -82
- package/provider-factory.ts +25 -41
- package/provider-failover/benchmark-lookup.ts +247 -0
- package/provider-failover/benchmarks-chunk-0.ts +2010 -0
- package/provider-failover/benchmarks-chunk-1.ts +1988 -0
- package/provider-failover/benchmarks-chunk-2.ts +2010 -0
- package/provider-failover/benchmarks-chunk-3.ts +2010 -0
- package/provider-failover/benchmarks-chunk-4.ts +1969 -0
- package/provider-failover/hardcoded-benchmarks.ts +22 -10025
- package/provider-helper.ts +38 -37
- package/providers/{cline-auth.ts → cline/cline-auth.ts} +2 -2
- package/providers/cline/cline-models.ts +128 -0
- package/providers/{cline.ts → cline/cline.ts} +300 -257
- package/providers/cloudflare/cloudflare.ts +368 -0
- package/providers/dynamic-built-in/index.ts +513 -0
- package/providers/{kilo-auth.ts → kilo/kilo-auth.ts} +3 -20
- package/providers/{kilo-models.ts → kilo/kilo-models.ts} +2 -2
- package/providers/kilo/kilo.ts +235 -0
- package/providers/{modal.ts → modal/modal.ts} +4 -3
- package/providers/{nvidia.ts → nvidia/nvidia.ts} +152 -113
- package/providers/ollama/ollama.ts +172 -0
- package/providers/opencode-session.ts +34 -34
- package/providers/{qwen-auth.ts → qwen/qwen-auth.ts} +24 -40
- package/providers/{qwen-models.ts → qwen/qwen-models.ts} +101 -95
- package/providers/qwen/qwen.ts +202 -0
- package/provider-failover/auto-switch.ts +0 -350
- package/provider-failover/errors.ts +0 -275
- package/provider-failover/index.ts +0 -238
- package/providers/cline-models.ts +0 -77
- package/providers/factory.ts +0 -125
- package/providers/fireworks.ts +0 -49
- package/providers/go.ts +0 -216
- package/providers/kilo.ts +0 -146
- package/providers/mistral.ts +0 -144
- package/providers/ollama.ts +0 -113
- package/providers/openrouter.ts +0 -175
- package/providers/qwen.ts +0 -127
- package/providers/zen.ts +0 -371
- package/usage/commands.ts +0 -17
- package/usage/cumulative.ts +0 -193
- package/usage/formatters.ts +0 -115
- package/usage/index.ts +0 -46
- package/usage/limits.ts +0 -148
- package/usage/metrics.ts +0 -222
- package/usage/sessions.ts +0 -355
- package/usage/store.ts +0 -99
- package/usage/tracking.ts +0 -329
- package/usage/types.ts +0 -26
- package/usage/widget.ts +0 -90
- package/widget/data.ts +0 -113
- package/widget/format.ts +0 -26
- package/widget/render.ts +0 -117
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ollama Cloud Provider Extension
|
|
3
|
+
*
|
|
4
|
+
* Provides access to Ollama's cloud-hosted models via ollama.com API.
|
|
5
|
+
* All models use Ollama's usage-based pricing system:
|
|
6
|
+
* - Free tier: Unlimited public models (session limits reset every 5 hours,
|
|
7
|
+
* weekly limits reset every 7 days)
|
|
8
|
+
* - Pro tier: 50x more cloud usage than Free
|
|
9
|
+
* - Max tier: 5x more usage than Pro
|
|
10
|
+
*
|
|
11
|
+
* Requires OLLAMA_API_KEY with cloud access.
|
|
12
|
+
* Get a free key at: https://ollama.com/settings/keys
|
|
13
|
+
*
|
|
14
|
+
* Responds to global /free toggle (shows models but warns they're freemium).
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* pi install git:github.com/apmantza/pi-free
|
|
18
|
+
* # Set OLLAMA_API_KEY env var
|
|
19
|
+
* # Models appear in /model selector
|
|
20
|
+
* # Use /ollama-toggle to show all vs limited set
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type {
|
|
24
|
+
ExtensionAPI,
|
|
25
|
+
ProviderModelConfig,
|
|
26
|
+
} from "@mariozechner/pi-coding-agent";
|
|
27
|
+
import {
|
|
28
|
+
applyHidden,
|
|
29
|
+
getOllamaApiKey,
|
|
30
|
+
getOllamaShowPaid,
|
|
31
|
+
} from "../../config.ts";
|
|
32
|
+
import {
|
|
33
|
+
BASE_URL_OLLAMA,
|
|
34
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
35
|
+
PROVIDER_OLLAMA,
|
|
36
|
+
} from "../../constants.ts";
|
|
37
|
+
import { createLogger } from "../../lib/logger.ts";
|
|
38
|
+
import { registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
39
|
+
import { fetchWithRetry } from "../../lib/util.ts";
|
|
40
|
+
import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
|
|
41
|
+
|
|
42
|
+
const _logger = createLogger("ollama-cloud");
|
|
43
|
+
|
|
44
|
+
// =============================================================================
|
|
45
|
+
// Fetch + map
|
|
46
|
+
// =============================================================================
|
|
47
|
+
|
|
48
|
+
async function fetchOllamaModels(
|
|
49
|
+
apiKey: string,
|
|
50
|
+
): Promise<ProviderModelConfig[]> {
|
|
51
|
+
// Use OpenAI-compatible /v1/models endpoint for consistency
|
|
52
|
+
// The native /api/tags returns :cloud suffixes that may not work with /v1/chat/completions
|
|
53
|
+
const response = await fetchWithRetry(
|
|
54
|
+
`${BASE_URL_OLLAMA}/v1/models`,
|
|
55
|
+
{
|
|
56
|
+
headers: {
|
|
57
|
+
Authorization: `Bearer ${apiKey}`,
|
|
58
|
+
"Content-Type": "application/json",
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
3,
|
|
62
|
+
1000,
|
|
63
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
if (!response.ok) {
|
|
67
|
+
throw new Error(
|
|
68
|
+
`Failed to fetch Ollama models: ${response.status} ${response.statusText}`,
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const json = (await response.json()) as {
|
|
73
|
+
data?: Array<{ id: string; owned_by?: string }>;
|
|
74
|
+
};
|
|
75
|
+
const models = json.data ?? [];
|
|
76
|
+
|
|
77
|
+
_logger.info(
|
|
78
|
+
`[ollama-cloud] Fetched ${models.length} models from Ollama Cloud`,
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
// Filter to chat/text generation models only
|
|
82
|
+
const chatModels = models.filter((m) => {
|
|
83
|
+
// Skip embedding-only models (typically have "embed" in name)
|
|
84
|
+
const name = m.id.toLowerCase();
|
|
85
|
+
if (name.includes("embed")) return false;
|
|
86
|
+
return true;
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const result = applyHidden(
|
|
90
|
+
chatModels.map(
|
|
91
|
+
(m): ProviderModelConfig => ({
|
|
92
|
+
id: m.id,
|
|
93
|
+
name: m.id,
|
|
94
|
+
// Try to infer reasoning from model name
|
|
95
|
+
reasoning:
|
|
96
|
+
m.id.toLowerCase().includes("reasoning") ||
|
|
97
|
+
m.id.toLowerCase().includes("r1") ||
|
|
98
|
+
m.id.toLowerCase().includes("thinking"),
|
|
99
|
+
input: ["text"],
|
|
100
|
+
// Ollama Cloud uses usage-based pricing (GPU time), not per-token
|
|
101
|
+
// Free tier has limits but no direct cost per token
|
|
102
|
+
cost: {
|
|
103
|
+
input: 0, // Freemium: usage-based, not per-token
|
|
104
|
+
output: 0,
|
|
105
|
+
cacheRead: 0,
|
|
106
|
+
cacheWrite: 0,
|
|
107
|
+
},
|
|
108
|
+
// Default context window - Ollama doesn't expose this via /v1/models
|
|
109
|
+
contextWindow: 32768,
|
|
110
|
+
maxTokens: 4096, // Default, varies by model
|
|
111
|
+
}),
|
|
112
|
+
),
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// =============================================================================
|
|
119
|
+
// Extension Entry Point
|
|
120
|
+
// =============================================================================
|
|
121
|
+
|
|
122
|
+
export default async function (pi: ExtensionAPI) {
|
|
123
|
+
const apiKey = getOllamaApiKey();
|
|
124
|
+
|
|
125
|
+
if (!apiKey) {
|
|
126
|
+
_logger.info(
|
|
127
|
+
"[ollama-cloud] Skipping - OLLAMA_API_KEY not set (env var or ~/.pi/free.json)",
|
|
128
|
+
);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Fetch models
|
|
133
|
+
let allModels: ProviderModelConfig[] = [];
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
allModels = await fetchOllamaModels(apiKey);
|
|
137
|
+
} catch (error) {
|
|
138
|
+
_logger.error("[ollama-cloud] Failed to fetch models at startup", {
|
|
139
|
+
error: error instanceof Error ? error.message : String(error),
|
|
140
|
+
});
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// For Ollama, all models share the same free tier
|
|
145
|
+
// So "free" and "all" are the same set
|
|
146
|
+
const freeModels = allModels;
|
|
147
|
+
const stored = { free: freeModels, all: allModels };
|
|
148
|
+
const hasKey = true;
|
|
149
|
+
|
|
150
|
+
// Create re-register function
|
|
151
|
+
const reRegister = createReRegister(pi, {
|
|
152
|
+
providerId: PROVIDER_OLLAMA,
|
|
153
|
+
baseUrl: BASE_URL_OLLAMA,
|
|
154
|
+
apiKey,
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
// Register with global toggle system
|
|
158
|
+
registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
|
|
159
|
+
|
|
160
|
+
// Register initial models
|
|
161
|
+
const initialModels = getOllamaShowPaid() ? allModels : freeModels;
|
|
162
|
+
pi.registerProvider(PROVIDER_OLLAMA, {
|
|
163
|
+
baseUrl: BASE_URL_OLLAMA,
|
|
164
|
+
apiKey,
|
|
165
|
+
api: "openai-completions" as const,
|
|
166
|
+
models: enhanceWithCI(initialModels),
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
_logger.info(
|
|
170
|
+
`[ollama-cloud] Registered ${initialModels.length} models (usage-based free tier)`,
|
|
171
|
+
);
|
|
172
|
+
}
|
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared OpenCode session/request tracking.
|
|
3
|
-
*
|
|
4
|
-
* OpenCode endpoints appear to behave more reliably when a stable session id
|
|
5
|
-
* is included across requests in the same Pi session.
|
|
6
|
-
*/
|
|
7
|
-
export function createOpenCodeSessionTracker() {
|
|
8
|
-
let sessionId = "";
|
|
9
|
-
let requestCount = 0;
|
|
10
|
-
|
|
11
|
-
function generateId(): string {
|
|
12
|
-
return (
|
|
13
|
-
Math.random().toString(36).substring(2, 15) +
|
|
14
|
-
Math.random().toString(36).substring(2, 15)
|
|
15
|
-
);
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function getSessionId(): string {
|
|
19
|
-
if (!sessionId) {
|
|
20
|
-
sessionId = generateId();
|
|
21
|
-
}
|
|
22
|
-
return sessionId;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function nextRequestId(): string {
|
|
26
|
-
requestCount++;
|
|
27
|
-
return `${getSessionId()}-${requestCount}`;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
return {
|
|
31
|
-
getSessionId,
|
|
32
|
-
nextRequestId,
|
|
33
|
-
};
|
|
34
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Shared OpenCode session/request tracking.
|
|
3
|
+
*
|
|
4
|
+
* OpenCode endpoints appear to behave more reliably when a stable session id
|
|
5
|
+
* is included across requests in the same Pi session.
|
|
6
|
+
*/
|
|
7
|
+
export function createOpenCodeSessionTracker() {
|
|
8
|
+
let sessionId = "";
|
|
9
|
+
let requestCount = 0;
|
|
10
|
+
|
|
11
|
+
function generateId(): string {
|
|
12
|
+
return (
|
|
13
|
+
Math.random().toString(36).substring(2, 15) +
|
|
14
|
+
Math.random().toString(36).substring(2, 15)
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function getSessionId(): string {
|
|
19
|
+
if (!sessionId) {
|
|
20
|
+
sessionId = generateId();
|
|
21
|
+
}
|
|
22
|
+
return sessionId;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function nextRequestId(): string {
|
|
26
|
+
requestCount++;
|
|
27
|
+
return `${getSessionId()}-${requestCount}`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
getSessionId,
|
|
32
|
+
nextRequestId,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import crypto from "node:crypto";
|
|
16
|
-
import { spawn } from "node:child_process";
|
|
17
16
|
import type {
|
|
18
17
|
OAuthCredentials,
|
|
19
18
|
OAuthLoginCallbacks,
|
|
20
19
|
} from "@mariozechner/pi-ai";
|
|
21
|
-
import { createLogger } from "
|
|
20
|
+
import { createLogger } from "../../lib/logger.ts";
|
|
21
|
+
import { openBrowser } from "../../lib/open-browser.ts";
|
|
22
22
|
|
|
23
23
|
const _logger = createLogger("qwen-auth");
|
|
24
24
|
|
|
@@ -37,6 +37,12 @@ const QWEN_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:device_code";
|
|
|
37
37
|
const INITIAL_POLL_INTERVAL_MS = 2000;
|
|
38
38
|
const MAX_POLL_INTERVAL_MS = 10000;
|
|
39
39
|
|
|
40
|
+
// Token refresh buffer: proactively refresh this many ms before actual expiry.
|
|
41
|
+
// Matches qwen-code's SharedTokenManager which uses a 30s buffer.
|
|
42
|
+
// We use 5 minutes (same as pi-core's reference qwen-cli example) to be safe
|
|
43
|
+
// against clock skew, network latency, and server-side early revocation.
|
|
44
|
+
const EXPIRY_BUFFER_MS = 5 * 60 * 1000;
|
|
45
|
+
|
|
40
46
|
// =============================================================================
|
|
41
47
|
// PKCE Utilities
|
|
42
48
|
// =============================================================================
|
|
@@ -46,10 +52,7 @@ function generateCodeVerifier(): string {
|
|
|
46
52
|
}
|
|
47
53
|
|
|
48
54
|
function generateCodeChallenge(codeVerifier: string): string {
|
|
49
|
-
return crypto
|
|
50
|
-
.createHash("sha256")
|
|
51
|
-
.update(codeVerifier)
|
|
52
|
-
.digest("base64url");
|
|
55
|
+
return crypto.createHash("sha256").update(codeVerifier).digest("base64url");
|
|
53
56
|
}
|
|
54
57
|
|
|
55
58
|
function generatePKCEPair(): {
|
|
@@ -67,33 +70,10 @@ function generatePKCEPair(): {
|
|
|
67
70
|
|
|
68
71
|
function objectToUrlEncoded(data: Record<string, string>): string {
|
|
69
72
|
return Object.keys(data)
|
|
70
|
-
.map(
|
|
71
|
-
(key) =>
|
|
72
|
-
`${encodeURIComponent(key)}=${encodeURIComponent(data[key])}`,
|
|
73
|
-
)
|
|
73
|
+
.map((key) => `${encodeURIComponent(key)}=${encodeURIComponent(data[key])}`)
|
|
74
74
|
.join("&");
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
function openBrowser(url: string): void {
|
|
78
|
-
try {
|
|
79
|
-
if (process.platform === "win32") {
|
|
80
|
-
// cmd.exe interprets & as a command separator, breaking URLs with query params.
|
|
81
|
-
// PowerShell's Start-Process treats the URL as a literal string.
|
|
82
|
-
spawn(
|
|
83
|
-
"powershell.exe",
|
|
84
|
-
["-NoProfile", "-NonInteractive", "-Command", `Start-Process "${url.replace(/"/g, '\\"')}"`],
|
|
85
|
-
{ detached: true, shell: false, windowsHide: true },
|
|
86
|
-
).unref();
|
|
87
|
-
} else if (process.platform === "darwin") {
|
|
88
|
-
spawn("open", [url], { detached: true }).unref();
|
|
89
|
-
} else {
|
|
90
|
-
spawn("xdg-open", [url], { detached: true }).unref();
|
|
91
|
-
}
|
|
92
|
-
} catch (err) {
|
|
93
|
-
_logger.debug("Failed to open browser", { error: String(err) });
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
77
|
function abortableSleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
98
78
|
return new Promise((resolve, reject) => {
|
|
99
79
|
if (signal?.aborted) {
|
|
@@ -169,9 +149,7 @@ async function requestDeviceAuthorization(
|
|
|
169
149
|
);
|
|
170
150
|
}
|
|
171
151
|
|
|
172
|
-
const result = (await response.json()) as
|
|
173
|
-
| DeviceAuthorizationData
|
|
174
|
-
| ErrorData;
|
|
152
|
+
const result = (await response.json()) as DeviceAuthorizationData | ErrorData;
|
|
175
153
|
|
|
176
154
|
if ("error" in result) {
|
|
177
155
|
throw new Error(
|
|
@@ -325,8 +303,8 @@ export async function loginQwen(
|
|
|
325
303
|
access: data.access_token!,
|
|
326
304
|
refresh: data.refresh_token ?? "",
|
|
327
305
|
expires: data.expires_in
|
|
328
|
-
? Date.now() + data.expires_in * 1000
|
|
329
|
-
: Date.now() + 3600 * 1000, // 1 hour default
|
|
306
|
+
? Date.now() + data.expires_in * 1000 - EXPIRY_BUFFER_MS
|
|
307
|
+
: Date.now() + 3600 * 1000 - EXPIRY_BUFFER_MS, // 1 hour default minus buffer
|
|
330
308
|
resource_url: resourceUrl,
|
|
331
309
|
};
|
|
332
310
|
}
|
|
@@ -354,7 +332,11 @@ export async function loginQwen(
|
|
|
354
332
|
export async function refreshQwenToken(
|
|
355
333
|
credentials: OAuthCredentials,
|
|
356
334
|
): Promise<OAuthCredentials> {
|
|
357
|
-
|
|
335
|
+
// Note: we intentionally DO NOT early-return when the token appears valid.
|
|
336
|
+
// pi-core calls refreshToken() only when it has already determined the token
|
|
337
|
+
// needs refreshing (Date.now() >= cred.expires). The early return was
|
|
338
|
+
// redundant and blocked forced-refreshes after server-side token revocation
|
|
339
|
+
// (where the stored expiry hasn't been reached yet but the token is invalid).
|
|
358
340
|
|
|
359
341
|
if (!credentials.refresh) {
|
|
360
342
|
throw new Error(
|
|
@@ -398,21 +380,23 @@ export async function refreshQwenToken(
|
|
|
398
380
|
}
|
|
399
381
|
|
|
400
382
|
// Preserve resource_url as a proper field (not encoded in refresh token)
|
|
401
|
-
const resourceUrl =
|
|
383
|
+
const resourceUrl =
|
|
384
|
+
data.resource_url || (credentials.resource_url as string) || "";
|
|
402
385
|
|
|
403
386
|
return {
|
|
404
387
|
access: data.access_token,
|
|
405
388
|
refresh: data.refresh_token ?? credentials.refresh,
|
|
406
389
|
expires: data.expires_in
|
|
407
|
-
? Date.now() + data.expires_in * 1000
|
|
408
|
-
: Date.now() + 3600 * 1000,
|
|
390
|
+
? Date.now() + data.expires_in * 1000 - EXPIRY_BUFFER_MS
|
|
391
|
+
: Date.now() + 3600 * 1000 - EXPIRY_BUFFER_MS,
|
|
409
392
|
resource_url: resourceUrl,
|
|
410
393
|
};
|
|
411
394
|
}
|
|
412
395
|
|
|
413
396
|
// Fallback endpoint used when resource_url is absent from the OAuth token.
|
|
414
397
|
// Mirrors qwen-code's DEFAULT_QWEN_BASE_URL.
|
|
415
|
-
const QWEN_DEFAULT_BASE_URL =
|
|
398
|
+
const QWEN_DEFAULT_BASE_URL =
|
|
399
|
+
"https://dashscope.aliyuncs.com/compatible-mode/v1";
|
|
416
400
|
|
|
417
401
|
/**
|
|
418
402
|
* Resolve the API base URL from OAuth credentials.
|
|
@@ -1,95 +1,101 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Qwen OAuth model definitions.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
*
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
*
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Qwen OAuth model definitions.
|
|
3
|
+
*
|
|
4
|
+
* @deprecated The 1,000 req/day free tier is no longer available. Auth is broken.
|
|
5
|
+
* This provider remains for backward compatibility but should not be used.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
|
|
9
|
+
import { createLogger } from "../../lib/logger.ts";
|
|
10
|
+
|
|
11
|
+
const _logger = createLogger("qwen-models");
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* portal.qwen.ai compatibility settings.
|
|
15
|
+
*
|
|
16
|
+
* portal.qwen.ai's OpenAI-compatible API does not support several parameters
|
|
17
|
+
* that the pi framework sends by default.
|
|
18
|
+
*/
|
|
19
|
+
export const PORTAL_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
|
|
20
|
+
supportsStore: false,
|
|
21
|
+
supportsDeveloperRole: false,
|
|
22
|
+
supportsReasoningEffort: false,
|
|
23
|
+
supportsUsageInStreaming: false,
|
|
24
|
+
supportsStrictMode: false,
|
|
25
|
+
maxTokensField: "max_tokens",
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Fallback model used before OAuth completes or if model discovery fails.
|
|
30
|
+
* The real model ID is resolved dynamically via fetchQwenLiveModels() after auth.
|
|
31
|
+
*/
|
|
32
|
+
export const QWEN_FREE_MODELS: ProviderModelConfig[] = [
|
|
33
|
+
{
|
|
34
|
+
id: "coder-model",
|
|
35
|
+
name: "Qwen Coder — DEPRECATED (free tier discontinued)",
|
|
36
|
+
reasoning: false,
|
|
37
|
+
input: ["text"],
|
|
38
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
39
|
+
contextWindow: 131_072,
|
|
40
|
+
maxTokens: 16_384,
|
|
41
|
+
compat: PORTAL_COMPAT,
|
|
42
|
+
},
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Fetch Qwen models. Returns static model list for backward compatibility.
|
|
47
|
+
* @deprecated Qwen free tier is discontinued.
|
|
48
|
+
*/
|
|
49
|
+
export async function fetchQwenModels(): Promise<ProviderModelConfig[]> {
|
|
50
|
+
_logger.info("Qwen provider is deprecated, returning placeholder models");
|
|
51
|
+
return QWEN_FREE_MODELS;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Fetch live model list from the Qwen API using the OAuth access token.
|
|
56
|
+
* Returns updated models with real IDs from the server, or the original
|
|
57
|
+
* models unchanged if the request fails.
|
|
58
|
+
*/
|
|
59
|
+
export async function fetchQwenLiveModels(
|
|
60
|
+
baseUrl: string,
|
|
61
|
+
accessToken: string,
|
|
62
|
+
templateModels: ProviderModelConfig[],
|
|
63
|
+
): Promise<ProviderModelConfig[]> {
|
|
64
|
+
try {
|
|
65
|
+
const response = await fetch(`${baseUrl}/models`, {
|
|
66
|
+
headers: {
|
|
67
|
+
Authorization: `Bearer ${accessToken}`,
|
|
68
|
+
Accept: "application/json",
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
_logger.info("Qwen /v1/models fetch failed, keeping current model IDs", {
|
|
74
|
+
status: response.status,
|
|
75
|
+
});
|
|
76
|
+
return templateModels;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interface ModelEntry {
|
|
80
|
+
id: string;
|
|
81
|
+
}
|
|
82
|
+
const data = (await response.json()) as { data?: ModelEntry[] };
|
|
83
|
+
const ids: string[] = (data.data ?? [])
|
|
84
|
+
.map((m: ModelEntry) => m.id)
|
|
85
|
+
.filter(Boolean);
|
|
86
|
+
|
|
87
|
+
_logger.info("Qwen live models discovered", { ids });
|
|
88
|
+
|
|
89
|
+
if (ids.length === 0) return templateModels;
|
|
90
|
+
|
|
91
|
+
// Prefer a coder model if available, otherwise use the first model
|
|
92
|
+
const preferred = ids.find((id) => /coder/i.test(id)) ?? ids[0];
|
|
93
|
+
|
|
94
|
+
return templateModels.map((m) => ({ ...m, id: preferred }));
|
|
95
|
+
} catch (err) {
|
|
96
|
+
_logger.info("Qwen live model fetch error, keeping current model IDs", {
|
|
97
|
+
error: String(err),
|
|
98
|
+
});
|
|
99
|
+
return templateModels;
|
|
100
|
+
}
|
|
101
|
+
}
|