pi-free 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +106 -2
- package/README.md +548 -393
- package/config.ts +22 -2
- package/index.ts +148 -148
- package/lib/built-in-toggle.ts +23 -42
- package/lib/toggle-state.ts +86 -0
- package/lib/util.ts +256 -256
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +637 -247
- package/provider-helper.ts +261 -260
- package/providers/cline/cline-models.ts +129 -128
- package/providers/cline/cline.ts +284 -298
- package/providers/cloudflare/cloudflare.ts +292 -134
- package/providers/dynamic-built-in/index.ts +2 -1
- package/providers/kilo/kilo-models.ts +2 -1
- package/providers/modal/modal.ts +47 -44
- package/providers/nvidia/nvidia.ts +296 -33
- package/providers/ollama/ollama.ts +280 -172
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Cloudflare Workers AI Provider Extension
|
|
3
3
|
*
|
|
4
|
-
* Provides access to Cloudflare's serverless GPU network with
|
|
4
|
+
* Provides access to Cloudflare's serverless GPU network with 30+ models.
|
|
5
5
|
* All models use Cloudflare's "Neurons" pricing system:
|
|
6
6
|
* - 10,000 Neurons per day FREE (resets daily at 00:00 UTC)
|
|
7
7
|
* - $0.011 per 1,000 Neurons beyond free allocation
|
|
@@ -17,8 +17,6 @@
|
|
|
17
17
|
* - Config file: ~/.pi/agent/auth.json
|
|
18
18
|
* { "cloudflare-ai": { "access": "token", "account_id": "id" } }
|
|
19
19
|
* - Legacy: CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID env vars
|
|
20
|
-
*
|
|
21
|
-
* Models can be customized via ~/.pi/cloudflare-models.json
|
|
22
20
|
*/
|
|
23
21
|
|
|
24
22
|
import { existsSync, readFileSync } from "node:fs";
|
|
@@ -28,7 +26,9 @@ import type {
|
|
|
28
26
|
ExtensionAPI,
|
|
29
27
|
ProviderModelConfig,
|
|
30
28
|
} from "@mariozechner/pi-coding-agent";
|
|
29
|
+
import { DEFAULT_FETCH_TIMEOUT_MS } from "../../constants.ts";
|
|
31
30
|
import { createLogger } from "../../lib/logger.ts";
|
|
31
|
+
import { fetchWithRetry } from "../../lib/util.ts";
|
|
32
32
|
|
|
33
33
|
const _logger = createLogger("cloudflare");
|
|
34
34
|
|
|
@@ -81,36 +81,60 @@ function getCloudflareAuth(): CloudflareAuth {
|
|
|
81
81
|
// Compatibility Settings
|
|
82
82
|
// =============================================================================
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
* Cloudflare Workers AI compatibility settings.
|
|
86
|
-
* Prevents 413 Payload Too Large errors by disabling unsupported parameters.
|
|
87
|
-
*/
|
|
88
|
-
const CLOUDFLARE_COMPAT: {
|
|
89
|
-
supportsStore?: boolean;
|
|
90
|
-
supportsDeveloperRole?: boolean;
|
|
91
|
-
supportsReasoningEffort?: boolean;
|
|
92
|
-
supportsStrictMode?: boolean;
|
|
93
|
-
maxTokensField?: "max_tokens" | "max_completion_tokens";
|
|
94
|
-
requiresThinkingAsText?: boolean;
|
|
95
|
-
} = {
|
|
84
|
+
const CLOUDFLARE_COMPAT = {
|
|
96
85
|
supportsStore: false,
|
|
97
86
|
supportsDeveloperRole: false,
|
|
98
87
|
supportsReasoningEffort: false,
|
|
99
88
|
supportsStrictMode: false,
|
|
100
|
-
maxTokensField: "max_tokens",
|
|
89
|
+
maxTokensField: "max_tokens" as const,
|
|
101
90
|
};
|
|
102
91
|
|
|
103
92
|
// =============================================================================
|
|
104
|
-
//
|
|
93
|
+
// Known non-chat model patterns (to filter out)
|
|
105
94
|
// =============================================================================
|
|
106
95
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
96
|
+
const NON_CHAT_PATTERNS = [
|
|
97
|
+
// Embeddings
|
|
98
|
+
/bge-/i,
|
|
99
|
+
/embed/i,
|
|
100
|
+
/embedding/i,
|
|
101
|
+
/pfnet\/plamo-embedding/i,
|
|
102
|
+
/qwen3-embedding/i,
|
|
103
|
+
// Image generation
|
|
104
|
+
/flux/i,
|
|
105
|
+
/stable-diffusion/i,
|
|
106
|
+
/dreamshaper/i,
|
|
107
|
+
/lucid-origin/i,
|
|
108
|
+
/phoenix/i,
|
|
109
|
+
// Speech/audio
|
|
110
|
+
/whisper/i,
|
|
111
|
+
/aura-/i,
|
|
112
|
+
/nova-/i,
|
|
113
|
+
/melotts/i,
|
|
114
|
+
// Translation (not chat)
|
|
115
|
+
/indictrans/i,
|
|
116
|
+
/m2m100/i,
|
|
117
|
+
// Vision-only models
|
|
118
|
+
/llava/i,
|
|
119
|
+
/detr-/i,
|
|
120
|
+
/resnet/i,
|
|
121
|
+
/unum\/uform/i,
|
|
122
|
+
// Code/SQL only
|
|
123
|
+
/sqlcoder/i,
|
|
124
|
+
// Classification/reranking
|
|
125
|
+
/reranker/i,
|
|
126
|
+
/distilbert/i,
|
|
127
|
+
// Safety/moderation
|
|
128
|
+
/llama-guard/i,
|
|
129
|
+
// Turn detection
|
|
130
|
+
/smart-turn/i,
|
|
131
|
+
];
|
|
132
|
+
|
|
133
|
+
// =============================================================================
|
|
134
|
+
// Fallback models (used if API fetch fails)
|
|
135
|
+
// =============================================================================
|
|
111
136
|
|
|
112
|
-
const
|
|
113
|
-
// Frontier models
|
|
137
|
+
const FALLBACK_MODELS: ProviderModelConfig[] = [
|
|
114
138
|
{
|
|
115
139
|
id: "@cf/moonshotai/kimi-k2.5",
|
|
116
140
|
name: "Kimi K2.5",
|
|
@@ -119,6 +143,17 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
119
143
|
cost: { input: 0.6, output: 3.0, cacheRead: 0.1, cacheWrite: 0 },
|
|
120
144
|
contextWindow: 256000,
|
|
121
145
|
maxTokens: 8192,
|
|
146
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
id: "@cf/moonshotai/kimi-k2.6",
|
|
150
|
+
name: "Kimi K2.6",
|
|
151
|
+
reasoning: true,
|
|
152
|
+
input: ["text", "image"],
|
|
153
|
+
cost: { input: 0.8, output: 4.0, cacheRead: 0.1, cacheWrite: 0 },
|
|
154
|
+
contextWindow: 256000,
|
|
155
|
+
maxTokens: 8192,
|
|
156
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
122
157
|
},
|
|
123
158
|
{
|
|
124
159
|
id: "@cf/meta/llama-4-scout-17b-16e-instruct",
|
|
@@ -128,6 +163,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
128
163
|
cost: { input: 0.27, output: 0.85, cacheRead: 0, cacheWrite: 0 },
|
|
129
164
|
contextWindow: 131072,
|
|
130
165
|
maxTokens: 8192,
|
|
166
|
+
compat: CLOUDFLARE_COMPAT,
|
|
131
167
|
},
|
|
132
168
|
{
|
|
133
169
|
id: "@cf/nvidia/nemotron-3-120b-a12b",
|
|
@@ -137,7 +173,27 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
137
173
|
cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
|
|
138
174
|
contextWindow: 256000,
|
|
139
175
|
maxTokens: 8192,
|
|
140
|
-
compat: { requiresThinkingAsText: true },
|
|
176
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
id: "@cf/openai/gpt-oss-120b",
|
|
180
|
+
name: "GPT-OSS 120B",
|
|
181
|
+
reasoning: true,
|
|
182
|
+
input: ["text"],
|
|
183
|
+
cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
|
|
184
|
+
contextWindow: 128000,
|
|
185
|
+
maxTokens: 8192,
|
|
186
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
id: "@cf/openai/gpt-oss-20b",
|
|
190
|
+
name: "GPT-OSS 20B",
|
|
191
|
+
reasoning: true,
|
|
192
|
+
input: ["text"],
|
|
193
|
+
cost: { input: 0.2, output: 0.6, cacheRead: 0, cacheWrite: 0 },
|
|
194
|
+
contextWindow: 128000,
|
|
195
|
+
maxTokens: 8192,
|
|
196
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
141
197
|
},
|
|
142
198
|
{
|
|
143
199
|
id: "@cf/google/gemma-4-26b-a4b-it",
|
|
@@ -147,7 +203,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
147
203
|
cost: { input: 0.1, output: 0.3, cacheRead: 0, cacheWrite: 0 },
|
|
148
204
|
contextWindow: 256000,
|
|
149
205
|
maxTokens: 8192,
|
|
150
|
-
compat: { requiresThinkingAsText: true },
|
|
206
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
151
207
|
},
|
|
152
208
|
{
|
|
153
209
|
id: "@cf/google/gemma-3-12b-it",
|
|
@@ -157,6 +213,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
157
213
|
cost: { input: 0.345, output: 0.556, cacheRead: 0, cacheWrite: 0 },
|
|
158
214
|
contextWindow: 80000,
|
|
159
215
|
maxTokens: 8192,
|
|
216
|
+
compat: CLOUDFLARE_COMPAT,
|
|
160
217
|
},
|
|
161
218
|
{
|
|
162
219
|
id: "@cf/qwen/qwen3-30b-a3b-fp8",
|
|
@@ -166,44 +223,47 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
166
223
|
cost: { input: 0.051, output: 0.34, cacheRead: 0, cacheWrite: 0 },
|
|
167
224
|
contextWindow: 32768,
|
|
168
225
|
maxTokens: 8192,
|
|
169
|
-
compat: { requiresThinkingAsText: true },
|
|
226
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
170
227
|
},
|
|
171
228
|
{
|
|
172
|
-
id: "@cf/
|
|
173
|
-
name: "
|
|
229
|
+
id: "@cf/qwen/qwen2.5-coder-32b-instruct",
|
|
230
|
+
name: "Qwen 2.5 Coder 32B",
|
|
174
231
|
reasoning: false,
|
|
175
232
|
input: ["text"],
|
|
176
|
-
cost: { input: 0.
|
|
177
|
-
contextWindow:
|
|
233
|
+
cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
|
|
234
|
+
contextWindow: 32768,
|
|
178
235
|
maxTokens: 8192,
|
|
236
|
+
compat: CLOUDFLARE_COMPAT,
|
|
179
237
|
},
|
|
180
|
-
// Popular models
|
|
181
238
|
{
|
|
182
|
-
id: "@cf/
|
|
183
|
-
name: "
|
|
184
|
-
reasoning:
|
|
239
|
+
id: "@cf/qwen/qwq-32b",
|
|
240
|
+
name: "QwQ 32B (Reasoning)",
|
|
241
|
+
reasoning: true,
|
|
185
242
|
input: ["text"],
|
|
186
|
-
cost: { input: 0.
|
|
187
|
-
contextWindow:
|
|
243
|
+
cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
|
|
244
|
+
contextWindow: 32768,
|
|
188
245
|
maxTokens: 8192,
|
|
246
|
+
compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
|
|
189
247
|
},
|
|
190
248
|
{
|
|
191
|
-
id: "@cf/
|
|
192
|
-
name: "
|
|
249
|
+
id: "@cf/zai-org/glm-4.7-flash",
|
|
250
|
+
name: "GLM-4.7 Flash",
|
|
193
251
|
reasoning: false,
|
|
194
252
|
input: ["text"],
|
|
195
|
-
cost: { input: 0.
|
|
253
|
+
cost: { input: 0.06, output: 0.4, cacheRead: 0, cacheWrite: 0 },
|
|
196
254
|
contextWindow: 131072,
|
|
197
255
|
maxTokens: 8192,
|
|
256
|
+
compat: CLOUDFLARE_COMPAT,
|
|
198
257
|
},
|
|
199
258
|
{
|
|
200
|
-
id: "@cf/meta/llama-3.
|
|
201
|
-
name: "Llama 3.
|
|
259
|
+
id: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
|
|
260
|
+
name: "Llama 3.3 70B Fast",
|
|
202
261
|
reasoning: false,
|
|
203
262
|
input: ["text"],
|
|
204
263
|
cost: { input: 0.5, output: 0.5, cacheRead: 0, cacheWrite: 0 },
|
|
205
264
|
contextWindow: 131072,
|
|
206
265
|
maxTokens: 8192,
|
|
266
|
+
compat: CLOUDFLARE_COMPAT,
|
|
207
267
|
},
|
|
208
268
|
{
|
|
209
269
|
id: "@cf/meta/llama-3.1-405b-instruct",
|
|
@@ -213,121 +273,213 @@ const DEFAULT_MODELS: ModelConfig[] = [
|
|
|
213
273
|
cost: { input: 2.0, output: 2.0, cacheRead: 0, cacheWrite: 0 },
|
|
214
274
|
contextWindow: 131072,
|
|
215
275
|
maxTokens: 8192,
|
|
276
|
+
compat: CLOUDFLARE_COMPAT,
|
|
216
277
|
},
|
|
217
278
|
{
|
|
218
|
-
id: "@cf/
|
|
219
|
-
name: "
|
|
220
|
-
reasoning:
|
|
279
|
+
id: "@cf/meta/llama-3.1-70b-instruct",
|
|
280
|
+
name: "Llama 3.1 70B",
|
|
281
|
+
reasoning: false,
|
|
221
282
|
input: ["text"],
|
|
222
|
-
cost: { input: 0.
|
|
223
|
-
contextWindow:
|
|
283
|
+
cost: { input: 0.5, output: 0.5, cacheRead: 0, cacheWrite: 0 },
|
|
284
|
+
contextWindow: 131072,
|
|
224
285
|
maxTokens: 8192,
|
|
225
|
-
compat:
|
|
286
|
+
compat: CLOUDFLARE_COMPAT,
|
|
226
287
|
},
|
|
227
288
|
{
|
|
228
|
-
id: "@cf/
|
|
229
|
-
name: "
|
|
230
|
-
reasoning: true,
|
|
231
|
-
input: ["text"],
|
|
232
|
-
cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
|
|
233
|
-
contextWindow: 16384,
|
|
234
|
-
maxTokens: 4096,
|
|
235
|
-
},
|
|
236
|
-
// Mistral models
|
|
237
|
-
{
|
|
238
|
-
id: "@cf/mistral/mistral-small-3.1-24b-instruct",
|
|
239
|
-
name: "Mistral Small 3.1 24B",
|
|
289
|
+
id: "@cf/meta/llama-3.2-11b-vision-instruct",
|
|
290
|
+
name: "Llama 3.2 11B Vision",
|
|
240
291
|
reasoning: false,
|
|
241
292
|
input: ["text", "image"],
|
|
242
|
-
cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
|
|
243
|
-
contextWindow: 32768,
|
|
244
|
-
maxTokens: 8192,
|
|
245
|
-
},
|
|
246
|
-
{
|
|
247
|
-
id: "@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
|
248
|
-
name: "Mistral 7B Instruct",
|
|
249
|
-
reasoning: false,
|
|
250
|
-
input: ["text"],
|
|
251
|
-
cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
|
|
252
|
-
contextWindow: 32768,
|
|
253
|
-
maxTokens: 4096,
|
|
254
|
-
},
|
|
255
|
-
{
|
|
256
|
-
id: "@cf/mistral/mixtral-8x7b-instruct-v0.1-awq",
|
|
257
|
-
name: "Mixtral 8x7B Instruct",
|
|
258
|
-
reasoning: false,
|
|
259
|
-
input: ["text"],
|
|
260
|
-
cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
|
|
261
|
-
contextWindow: 32768,
|
|
262
|
-
maxTokens: 4096,
|
|
263
|
-
},
|
|
264
|
-
// Qwen and Gemma
|
|
265
|
-
{
|
|
266
|
-
id: "@cf/qwen/qwen1.5-14b-chat-awq",
|
|
267
|
-
name: "Qwen 1.5 14B Chat",
|
|
268
|
-
reasoning: false,
|
|
269
|
-
input: ["text"],
|
|
270
293
|
cost: { input: 0.2, output: 0.2, cacheRead: 0, cacheWrite: 0 },
|
|
271
|
-
contextWindow:
|
|
294
|
+
contextWindow: 128000,
|
|
272
295
|
maxTokens: 8192,
|
|
273
|
-
|
|
274
|
-
{
|
|
275
|
-
id: "@cf/google/gemma-2b-it-lora",
|
|
276
|
-
name: "Gemma 2B",
|
|
277
|
-
reasoning: false,
|
|
278
|
-
input: ["text"],
|
|
279
|
-
cost: { input: 0.05, output: 0.05, cacheRead: 0, cacheWrite: 0 },
|
|
280
|
-
contextWindow: 8192,
|
|
281
|
-
maxTokens: 2048,
|
|
282
|
-
},
|
|
283
|
-
{
|
|
284
|
-
id: "@cf/google/gemma-7b-it-lora",
|
|
285
|
-
name: "Gemma 7B",
|
|
286
|
-
reasoning: false,
|
|
287
|
-
input: ["text"],
|
|
288
|
-
cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
|
|
289
|
-
contextWindow: 8192,
|
|
290
|
-
maxTokens: 2048,
|
|
296
|
+
compat: CLOUDFLARE_COMPAT,
|
|
291
297
|
},
|
|
292
298
|
];
|
|
293
299
|
|
|
294
300
|
// =============================================================================
|
|
295
|
-
// Model
|
|
301
|
+
// Model metadata inference
|
|
296
302
|
// =============================================================================
|
|
297
303
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
+
interface CloudflareModel {
|
|
305
|
+
id: string;
|
|
306
|
+
name?: string;
|
|
307
|
+
description?: string;
|
|
308
|
+
task?: {
|
|
309
|
+
id?: string;
|
|
310
|
+
name?: string;
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function isChatModel(modelId: string): boolean {
|
|
315
|
+
return !NON_CHAT_PATTERNS.some((pattern) => pattern.test(modelId));
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function inferModelName(id: string): string {
|
|
319
|
+
// Extract the model name part after the last /
|
|
320
|
+
const namePart = id.split("/").pop() || id;
|
|
321
|
+
|
|
322
|
+
// Remove common suffixes
|
|
323
|
+
const clean = namePart
|
|
324
|
+
.replace(/-instruct$/i, "")
|
|
325
|
+
.replace(/-chat$/i, "")
|
|
326
|
+
.replace(/-it$/i, "")
|
|
327
|
+
.replace(/-awq$/i, " (AWQ)")
|
|
328
|
+
.replace(/-fp8$/i, " (FP8)")
|
|
329
|
+
.replace(/-fast$/i, " (Fast)")
|
|
330
|
+
.replace(/-lora$/i, " (LoRA)")
|
|
331
|
+
.replace(/-hf$/i, " (HF)")
|
|
332
|
+
.replace(/-v\d+\.\d+$/i, "");
|
|
333
|
+
|
|
334
|
+
// Convert to title case
|
|
335
|
+
return clean
|
|
336
|
+
.split("-")
|
|
337
|
+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
|
338
|
+
.join(" ")
|
|
339
|
+
.replace(/\b(\d+(?:\.\d+)?)[bB]\b/g, "$1B");
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function inferModelMetadata(id: string): Partial<ProviderModelConfig> {
|
|
343
|
+
const hasVision = /vision|multimodal|vl|llava/i.test(id);
|
|
344
|
+
const hasReasoning = /reason|r1|thinking|qwq|nemotron|oss/i.test(id);
|
|
345
|
+
|
|
346
|
+
// Default context windows by model family
|
|
347
|
+
let contextWindow = 32768;
|
|
348
|
+
let maxTokens = 4096;
|
|
349
|
+
|
|
350
|
+
if (/llama-3\.1|llama-3\.3|llama-4|gemma-4|kimi|nemotron/i.test(id)) {
|
|
351
|
+
contextWindow = 128000;
|
|
352
|
+
maxTokens = 8192;
|
|
353
|
+
}
|
|
354
|
+
if (/llama-3\.2-11b/i.test(id)) {
|
|
355
|
+
contextWindow = 128000;
|
|
356
|
+
maxTokens = 8192;
|
|
357
|
+
}
|
|
358
|
+
if (/gemma-3/i.test(id)) {
|
|
359
|
+
contextWindow = 80000;
|
|
360
|
+
maxTokens = 8192;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Estimate costs based on model size (very rough approximation)
|
|
364
|
+
let inputCost = 0.1;
|
|
365
|
+
let outputCost = 0.3;
|
|
366
|
+
|
|
367
|
+
const sizeMatch = id.match(/(\d+)(?:\.\d+)?[bB]/);
|
|
368
|
+
if (sizeMatch) {
|
|
369
|
+
const size = parseInt(sizeMatch[1], 10);
|
|
370
|
+
if (size >= 100) {
|
|
371
|
+
inputCost = 0.5;
|
|
372
|
+
outputCost = 1.5;
|
|
373
|
+
} else if (size >= 70) {
|
|
374
|
+
inputCost = 0.5;
|
|
375
|
+
outputCost = 0.5;
|
|
376
|
+
} else if (size >= 30) {
|
|
377
|
+
inputCost = 0.3;
|
|
378
|
+
outputCost = 0.3;
|
|
379
|
+
} else if (size >= 8) {
|
|
380
|
+
inputCost = 0.2;
|
|
381
|
+
outputCost = 0.2;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Override for specific known models
|
|
386
|
+
if (id.includes("llama-3.1-405b")) {
|
|
387
|
+
inputCost = 2.0;
|
|
388
|
+
outputCost = 2.0;
|
|
389
|
+
}
|
|
390
|
+
if (id.includes("kimi-k2.5")) {
|
|
391
|
+
inputCost = 0.6;
|
|
392
|
+
outputCost = 3.0;
|
|
393
|
+
}
|
|
394
|
+
if (id.includes("kimi-k2.6")) {
|
|
395
|
+
inputCost = 0.8;
|
|
396
|
+
outputCost = 4.0;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
name: inferModelName(id),
|
|
401
|
+
reasoning: hasReasoning,
|
|
402
|
+
input: hasVision ? (["text", "image"] as const) : (["text"] as const),
|
|
403
|
+
cost: { input: inputCost, output: outputCost, cacheRead: 0, cacheWrite: 0 },
|
|
404
|
+
contextWindow,
|
|
405
|
+
maxTokens,
|
|
406
|
+
compat: hasReasoning
|
|
407
|
+
? { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true }
|
|
408
|
+
: CLOUDFLARE_COMPAT,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// =============================================================================
|
|
413
|
+
// Dynamic model fetching
|
|
414
|
+
// =============================================================================
|
|
304
415
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
416
|
+
async function fetchCloudflareModels(
|
|
417
|
+
token: string,
|
|
418
|
+
accountId: string,
|
|
419
|
+
): Promise<ProviderModelConfig[]> {
|
|
420
|
+
const baseUrl = `https://api.cloudflare.com/client/v4/accounts/${accountId}`;
|
|
308
421
|
|
|
309
422
|
try {
|
|
310
|
-
const
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
423
|
+
const response = await fetchWithRetry(
|
|
424
|
+
`${baseUrl}/ai/models`,
|
|
425
|
+
{
|
|
426
|
+
headers: {
|
|
427
|
+
Authorization: `Bearer ${token}`,
|
|
428
|
+
"Content-Type": "application/json",
|
|
429
|
+
},
|
|
430
|
+
},
|
|
431
|
+
3,
|
|
432
|
+
1000,
|
|
433
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
434
|
+
);
|
|
435
|
+
|
|
436
|
+
if (!response.ok) {
|
|
437
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
323
438
|
}
|
|
324
439
|
|
|
325
|
-
|
|
326
|
-
|
|
440
|
+
const json = (await response.json()) as {
|
|
441
|
+
success?: boolean;
|
|
442
|
+
result?: CloudflareModel[];
|
|
443
|
+
errors?: Array<{ message: string }>;
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
if (!json.success || !json.result) {
|
|
447
|
+
throw new Error(
|
|
448
|
+
json.errors?.[0]?.message || "API returned unsuccessful response",
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Filter to chat/text generation models only
|
|
453
|
+
const chatModels = json.result.filter((m) => isChatModel(m.id));
|
|
454
|
+
|
|
455
|
+
// Map to ProviderModelConfig
|
|
456
|
+
const models = chatModels.map((m): ProviderModelConfig => {
|
|
457
|
+
const inferred = inferModelMetadata(m.id);
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
id: m.id,
|
|
461
|
+
name: m.name || inferred.name || m.id,
|
|
462
|
+
reasoning: inferred.reasoning || false,
|
|
463
|
+
input: inferred.input || ["text"],
|
|
464
|
+
cost: inferred.cost || {
|
|
465
|
+
input: 0.1,
|
|
466
|
+
output: 0.3,
|
|
467
|
+
cacheRead: 0,
|
|
468
|
+
cacheWrite: 0,
|
|
469
|
+
},
|
|
470
|
+
contextWindow: inferred.contextWindow || 32768,
|
|
471
|
+
maxTokens: inferred.maxTokens || 4096,
|
|
472
|
+
compat: inferred.compat || CLOUDFLARE_COMPAT,
|
|
473
|
+
};
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
_logger.info(`[cloudflare] Fetched ${models.length} chat models from API`);
|
|
477
|
+
return models;
|
|
478
|
+
} catch (error) {
|
|
327
479
|
_logger.warn(
|
|
328
|
-
`[cloudflare] Failed to
|
|
480
|
+
`[cloudflare] Failed to fetch models from API: ${error instanceof Error ? error.message : String(error)}`,
|
|
329
481
|
);
|
|
330
|
-
return
|
|
482
|
+
return [];
|
|
331
483
|
}
|
|
332
484
|
}
|
|
333
485
|
|
|
@@ -352,7 +504,13 @@ export default async function cloudflareProvider(pi: ExtensionAPI) {
|
|
|
352
504
|
return;
|
|
353
505
|
}
|
|
354
506
|
|
|
355
|
-
|
|
507
|
+
// Try to fetch models dynamically, fall back to hardcoded list
|
|
508
|
+
let models = await fetchCloudflareModels(apiToken, accountId);
|
|
509
|
+
|
|
510
|
+
if (models.length === 0) {
|
|
511
|
+
_logger.info("[cloudflare] Using fallback model list");
|
|
512
|
+
models = FALLBACK_MODELS;
|
|
513
|
+
}
|
|
356
514
|
|
|
357
515
|
pi.registerProvider("cloudflare", {
|
|
358
516
|
baseUrl: `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/v1`,
|
|
@@ -29,6 +29,7 @@ import { DEFAULT_FETCH_TIMEOUT_MS } from "../../constants.ts";
|
|
|
29
29
|
import { createLogger } from "../../lib/logger.ts";
|
|
30
30
|
import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
31
31
|
import { fetchWithRetry } from "../../lib/util.ts";
|
|
32
|
+
import { enhanceWithCI } from "../../provider-helper.ts";
|
|
32
33
|
|
|
33
34
|
const _logger = createLogger("dynamic-built-in");
|
|
34
35
|
|
|
@@ -403,7 +404,7 @@ export async function setupDynamicBuiltInProviders(
|
|
|
403
404
|
baseUrl: config.baseUrl,
|
|
404
405
|
apiKey,
|
|
405
406
|
api: config.api,
|
|
406
|
-
models,
|
|
407
|
+
models: enhanceWithCI(models, config.providerId),
|
|
407
408
|
});
|
|
408
409
|
};
|
|
409
410
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { applyHidden } from "../../config.ts";
|
|
6
|
+
import { PROVIDER_KILO } from "../../constants.ts";
|
|
6
7
|
import { fetchOpenRouterCompatibleModels } from "../model-fetcher.ts";
|
|
7
8
|
|
|
8
9
|
const KILO_API_BASE = process.env.KILO_API_URL || "https://api.kilo.ai";
|
|
@@ -22,5 +23,5 @@ export async function fetchKiloModels(options?: {
|
|
|
22
23
|
freeOnly: options?.freeOnly,
|
|
23
24
|
});
|
|
24
25
|
|
|
25
|
-
return applyHidden(models);
|
|
26
|
+
return applyHidden(models, PROVIDER_KILO);
|
|
26
27
|
}
|
package/providers/modal/modal.ts
CHANGED
|
@@ -1,44 +1,47 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Modal GLM Provider Extension
|
|
3
|
-
*
|
|
4
|
-
* Provides access to GLM models hosted on Modal's OpenAI-compatible endpoint.
|
|
5
|
-
* Requires MODAL_API_KEY (or modal_api_key in ~/.pi/free.json).
|
|
6
|
-
*
|
|
7
|
-
* Endpoint docs: https://modal.com/glm-5-endpoint
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
|
|
11
|
-
import { applyHidden, PROVIDER_MODAL } from "../../config.ts";
|
|
12
|
-
import { BASE_URL_MODAL, URL_MODAL_TOS } from "../../constants.ts";
|
|
13
|
-
import { createProvider } from "../../provider-factory.ts";
|
|
14
|
-
|
|
15
|
-
function getModalModels(): ProviderModelConfig[] {
|
|
16
|
-
return applyHidden(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Modal GLM Provider Extension
|
|
3
|
+
*
|
|
4
|
+
* Provides access to GLM models hosted on Modal's OpenAI-compatible endpoint.
|
|
5
|
+
* Requires MODAL_API_KEY (or modal_api_key in ~/.pi/free.json).
|
|
6
|
+
*
|
|
7
|
+
* Endpoint docs: https://modal.com/glm-5-endpoint
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
|
|
11
|
+
import { applyHidden, PROVIDER_MODAL } from "../../config.ts";
|
|
12
|
+
import { BASE_URL_MODAL, URL_MODAL_TOS } from "../../constants.ts";
|
|
13
|
+
import { createProvider } from "../../provider-factory.ts";
|
|
14
|
+
|
|
15
|
+
function getModalModels(): ProviderModelConfig[] {
|
|
16
|
+
return applyHidden(
|
|
17
|
+
[
|
|
18
|
+
{
|
|
19
|
+
id: "zai-org/GLM-5.1-FP8",
|
|
20
|
+
name: "GLM-5.1 FP8 (Modal)",
|
|
21
|
+
reasoning: true,
|
|
22
|
+
input: ["text"],
|
|
23
|
+
// Promotional/free-period pricing may change; keep conservative placeholders.
|
|
24
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
25
|
+
contextWindow: 128000,
|
|
26
|
+
maxTokens: 16384,
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
PROVIDER_MODAL,
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export default function (pi: Parameters<typeof createProvider>[0]) {
|
|
34
|
+
return createProvider(pi, {
|
|
35
|
+
providerId: PROVIDER_MODAL,
|
|
36
|
+
baseUrl: BASE_URL_MODAL,
|
|
37
|
+
apiKeyEnvVar: "MODAL_API_KEY",
|
|
38
|
+
apiKeyConfigKey: "modal_api_key",
|
|
39
|
+
fetchModels: async () => getModalModels(),
|
|
40
|
+
tosUrl: URL_MODAL_TOS,
|
|
41
|
+
skipToggle: true, // Modal only has 1 model, no need for toggle
|
|
42
|
+
extraHeaders: {
|
|
43
|
+
"X-Title": "Pi",
|
|
44
|
+
"HTTP-Referer": "https://modal.com/",
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
}
|