pi-free 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Cloudflare Workers AI Provider Extension
3
3
  *
4
- * Provides access to Cloudflare's serverless GPU network with 18+ models.
4
+ * Provides access to Cloudflare's serverless GPU network with 30+ models.
5
5
  * All models use Cloudflare's "Neurons" pricing system:
6
6
  * - 10,000 Neurons per day FREE (resets daily at 00:00 UTC)
7
7
  * - $0.011 per 1,000 Neurons beyond free allocation
@@ -17,8 +17,6 @@
17
17
  * - Config file: ~/.pi/agent/auth.json
18
18
  * { "cloudflare-ai": { "access": "token", "account_id": "id" } }
19
19
  * - Legacy: CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID env vars
20
- *
21
- * Models can be customized via ~/.pi/cloudflare-models.json
22
20
  */
23
21
 
24
22
  import { existsSync, readFileSync } from "node:fs";
@@ -28,7 +26,9 @@ import type {
28
26
  ExtensionAPI,
29
27
  ProviderModelConfig,
30
28
  } from "@mariozechner/pi-coding-agent";
29
+ import { DEFAULT_FETCH_TIMEOUT_MS } from "../../constants.ts";
31
30
  import { createLogger } from "../../lib/logger.ts";
31
+ import { fetchWithRetry } from "../../lib/util.ts";
32
32
 
33
33
  const _logger = createLogger("cloudflare");
34
34
 
@@ -81,36 +81,60 @@ function getCloudflareAuth(): CloudflareAuth {
81
81
  // Compatibility Settings
82
82
  // =============================================================================
83
83
 
84
- /**
85
- * Cloudflare Workers AI compatibility settings.
86
- * Prevents 413 Payload Too Large errors by disabling unsupported parameters.
87
- */
88
- const CLOUDFLARE_COMPAT: {
89
- supportsStore?: boolean;
90
- supportsDeveloperRole?: boolean;
91
- supportsReasoningEffort?: boolean;
92
- supportsStrictMode?: boolean;
93
- maxTokensField?: "max_tokens" | "max_completion_tokens";
94
- requiresThinkingAsText?: boolean;
95
- } = {
84
+ const CLOUDFLARE_COMPAT = {
96
85
  supportsStore: false,
97
86
  supportsDeveloperRole: false,
98
87
  supportsReasoningEffort: false,
99
88
  supportsStrictMode: false,
100
- maxTokensField: "max_tokens",
89
+ maxTokensField: "max_tokens" as const,
101
90
  };
102
91
 
103
92
  // =============================================================================
104
- // Default Models (18 models from Cloudflare Workers AI)
93
+ // Known non-chat model patterns (to filter out)
105
94
  // =============================================================================
106
95
 
107
- interface ModelConfig extends ProviderModelConfig {
108
- compat?: { requiresThinkingAsText?: boolean };
109
- _remove?: boolean;
110
- }
96
+ const NON_CHAT_PATTERNS = [
97
+ // Embeddings
98
+ /bge-/i,
99
+ /embed/i,
100
+ /embedding/i,
101
+ /pfnet\/plamo-embedding/i,
102
+ /qwen3-embedding/i,
103
+ // Image generation
104
+ /flux/i,
105
+ /stable-diffusion/i,
106
+ /dreamshaper/i,
107
+ /lucid-origin/i,
108
+ /phoenix/i,
109
+ // Speech/audio
110
+ /whisper/i,
111
+ /aura-/i,
112
+ /nova-/i,
113
+ /melotts/i,
114
+ // Translation (not chat)
115
+ /indictrans/i,
116
+ /m2m100/i,
117
+ // Vision-only models
118
+ /llava/i,
119
+ /detr-/i,
120
+ /resnet/i,
121
+ /unum\/uform/i,
122
+ // Code/SQL only
123
+ /sqlcoder/i,
124
+ // Classification/reranking
125
+ /reranker/i,
126
+ /distilbert/i,
127
+ // Safety/moderation
128
+ /llama-guard/i,
129
+ // Turn detection
130
+ /smart-turn/i,
131
+ ];
132
+
133
+ // =============================================================================
134
+ // Fallback models (used if API fetch fails)
135
+ // =============================================================================
111
136
 
112
- const DEFAULT_MODELS: ModelConfig[] = [
113
- // Frontier models
137
+ const FALLBACK_MODELS: ProviderModelConfig[] = [
114
138
  {
115
139
  id: "@cf/moonshotai/kimi-k2.5",
116
140
  name: "Kimi K2.5",
@@ -119,6 +143,17 @@ const DEFAULT_MODELS: ModelConfig[] = [
119
143
  cost: { input: 0.6, output: 3.0, cacheRead: 0.1, cacheWrite: 0 },
120
144
  contextWindow: 256000,
121
145
  maxTokens: 8192,
146
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
147
+ },
148
+ {
149
+ id: "@cf/moonshotai/kimi-k2.6",
150
+ name: "Kimi K2.6",
151
+ reasoning: true,
152
+ input: ["text", "image"],
153
+ cost: { input: 0.8, output: 4.0, cacheRead: 0.1, cacheWrite: 0 },
154
+ contextWindow: 256000,
155
+ maxTokens: 8192,
156
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
122
157
  },
123
158
  {
124
159
  id: "@cf/meta/llama-4-scout-17b-16e-instruct",
@@ -128,6 +163,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
128
163
  cost: { input: 0.27, output: 0.85, cacheRead: 0, cacheWrite: 0 },
129
164
  contextWindow: 131072,
130
165
  maxTokens: 8192,
166
+ compat: CLOUDFLARE_COMPAT,
131
167
  },
132
168
  {
133
169
  id: "@cf/nvidia/nemotron-3-120b-a12b",
@@ -137,7 +173,27 @@ const DEFAULT_MODELS: ModelConfig[] = [
137
173
  cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
138
174
  contextWindow: 256000,
139
175
  maxTokens: 8192,
140
- compat: { requiresThinkingAsText: true },
176
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
177
+ },
178
+ {
179
+ id: "@cf/openai/gpt-oss-120b",
180
+ name: "GPT-OSS 120B",
181
+ reasoning: true,
182
+ input: ["text"],
183
+ cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
184
+ contextWindow: 128000,
185
+ maxTokens: 8192,
186
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
187
+ },
188
+ {
189
+ id: "@cf/openai/gpt-oss-20b",
190
+ name: "GPT-OSS 20B",
191
+ reasoning: true,
192
+ input: ["text"],
193
+ cost: { input: 0.2, output: 0.6, cacheRead: 0, cacheWrite: 0 },
194
+ contextWindow: 128000,
195
+ maxTokens: 8192,
196
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
141
197
  },
142
198
  {
143
199
  id: "@cf/google/gemma-4-26b-a4b-it",
@@ -147,7 +203,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
147
203
  cost: { input: 0.1, output: 0.3, cacheRead: 0, cacheWrite: 0 },
148
204
  contextWindow: 256000,
149
205
  maxTokens: 8192,
150
- compat: { requiresThinkingAsText: true },
206
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
151
207
  },
152
208
  {
153
209
  id: "@cf/google/gemma-3-12b-it",
@@ -157,6 +213,7 @@ const DEFAULT_MODELS: ModelConfig[] = [
157
213
  cost: { input: 0.345, output: 0.556, cacheRead: 0, cacheWrite: 0 },
158
214
  contextWindow: 80000,
159
215
  maxTokens: 8192,
216
+ compat: CLOUDFLARE_COMPAT,
160
217
  },
161
218
  {
162
219
  id: "@cf/qwen/qwen3-30b-a3b-fp8",
@@ -166,44 +223,47 @@ const DEFAULT_MODELS: ModelConfig[] = [
166
223
  cost: { input: 0.051, output: 0.34, cacheRead: 0, cacheWrite: 0 },
167
224
  contextWindow: 32768,
168
225
  maxTokens: 8192,
169
- compat: { requiresThinkingAsText: true },
226
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
170
227
  },
171
228
  {
172
- id: "@cf/zai-org/glm-4.7-flash",
173
- name: "GLM-4.7 Flash",
229
+ id: "@cf/qwen/qwen2.5-coder-32b-instruct",
230
+ name: "Qwen 2.5 Coder 32B",
174
231
  reasoning: false,
175
232
  input: ["text"],
176
- cost: { input: 0.06, output: 0.4, cacheRead: 0, cacheWrite: 0 },
177
- contextWindow: 131072,
233
+ cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
234
+ contextWindow: 32768,
178
235
  maxTokens: 8192,
236
+ compat: CLOUDFLARE_COMPAT,
179
237
  },
180
- // Popular models
181
238
  {
182
- id: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
183
- name: "Llama 3.3 70B (Fast)",
184
- reasoning: false,
239
+ id: "@cf/qwen/qwq-32b",
240
+ name: "QwQ 32B (Reasoning)",
241
+ reasoning: true,
185
242
  input: ["text"],
186
- cost: { input: 0.5, output: 0.5, cacheRead: 0, cacheWrite: 0 },
187
- contextWindow: 131072,
243
+ cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
244
+ contextWindow: 32768,
188
245
  maxTokens: 8192,
246
+ compat: { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true },
189
247
  },
190
248
  {
191
- id: "@cf/meta/llama-3.1-8b-instruct",
192
- name: "Llama 3.1 8B",
249
+ id: "@cf/zai-org/glm-4.7-flash",
250
+ name: "GLM-4.7 Flash",
193
251
  reasoning: false,
194
252
  input: ["text"],
195
- cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
253
+ cost: { input: 0.06, output: 0.4, cacheRead: 0, cacheWrite: 0 },
196
254
  contextWindow: 131072,
197
255
  maxTokens: 8192,
256
+ compat: CLOUDFLARE_COMPAT,
198
257
  },
199
258
  {
200
- id: "@cf/meta/llama-3.1-70b-instruct",
201
- name: "Llama 3.1 70B",
259
+ id: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
260
+ name: "Llama 3.3 70B Fast",
202
261
  reasoning: false,
203
262
  input: ["text"],
204
263
  cost: { input: 0.5, output: 0.5, cacheRead: 0, cacheWrite: 0 },
205
264
  contextWindow: 131072,
206
265
  maxTokens: 8192,
266
+ compat: CLOUDFLARE_COMPAT,
207
267
  },
208
268
  {
209
269
  id: "@cf/meta/llama-3.1-405b-instruct",
@@ -213,121 +273,213 @@ const DEFAULT_MODELS: ModelConfig[] = [
213
273
  cost: { input: 2.0, output: 2.0, cacheRead: 0, cacheWrite: 0 },
214
274
  contextWindow: 131072,
215
275
  maxTokens: 8192,
276
+ compat: CLOUDFLARE_COMPAT,
216
277
  },
217
278
  {
218
- id: "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
219
- name: "DeepSeek R1 Distill Qwen 32B",
220
- reasoning: true,
279
+ id: "@cf/meta/llama-3.1-70b-instruct",
280
+ name: "Llama 3.1 70B",
281
+ reasoning: false,
221
282
  input: ["text"],
222
- cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
223
- contextWindow: 32768,
283
+ cost: { input: 0.5, output: 0.5, cacheRead: 0, cacheWrite: 0 },
284
+ contextWindow: 131072,
224
285
  maxTokens: 8192,
225
- compat: { requiresThinkingAsText: true },
286
+ compat: CLOUDFLARE_COMPAT,
226
287
  },
227
288
  {
228
- id: "@cf/deepseek-ai/deepseek-math-7b-instruct",
229
- name: "DeepSeek Math 7B",
230
- reasoning: true,
231
- input: ["text"],
232
- cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
233
- contextWindow: 16384,
234
- maxTokens: 4096,
235
- },
236
- // Mistral models
237
- {
238
- id: "@cf/mistral/mistral-small-3.1-24b-instruct",
239
- name: "Mistral Small 3.1 24B",
289
+ id: "@cf/meta/llama-3.2-11b-vision-instruct",
290
+ name: "Llama 3.2 11B Vision",
240
291
  reasoning: false,
241
292
  input: ["text", "image"],
242
- cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
243
- contextWindow: 32768,
244
- maxTokens: 8192,
245
- },
246
- {
247
- id: "@cf/mistral/mistral-7b-instruct-v0.2-lora",
248
- name: "Mistral 7B Instruct",
249
- reasoning: false,
250
- input: ["text"],
251
- cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
252
- contextWindow: 32768,
253
- maxTokens: 4096,
254
- },
255
- {
256
- id: "@cf/mistral/mixtral-8x7b-instruct-v0.1-awq",
257
- name: "Mixtral 8x7B Instruct",
258
- reasoning: false,
259
- input: ["text"],
260
- cost: { input: 0.3, output: 0.3, cacheRead: 0, cacheWrite: 0 },
261
- contextWindow: 32768,
262
- maxTokens: 4096,
263
- },
264
- // Qwen and Gemma
265
- {
266
- id: "@cf/qwen/qwen1.5-14b-chat-awq",
267
- name: "Qwen 1.5 14B Chat",
268
- reasoning: false,
269
- input: ["text"],
270
293
  cost: { input: 0.2, output: 0.2, cacheRead: 0, cacheWrite: 0 },
271
- contextWindow: 32768,
294
+ contextWindow: 128000,
272
295
  maxTokens: 8192,
273
- },
274
- {
275
- id: "@cf/google/gemma-2b-it-lora",
276
- name: "Gemma 2B",
277
- reasoning: false,
278
- input: ["text"],
279
- cost: { input: 0.05, output: 0.05, cacheRead: 0, cacheWrite: 0 },
280
- contextWindow: 8192,
281
- maxTokens: 2048,
282
- },
283
- {
284
- id: "@cf/google/gemma-7b-it-lora",
285
- name: "Gemma 7B",
286
- reasoning: false,
287
- input: ["text"],
288
- cost: { input: 0.1, output: 0.1, cacheRead: 0, cacheWrite: 0 },
289
- contextWindow: 8192,
290
- maxTokens: 2048,
296
+ compat: CLOUDFLARE_COMPAT,
291
297
  },
292
298
  ];
293
299
 
294
300
  // =============================================================================
295
- // Model Customization (user overrides)
301
+ // Model metadata inference
296
302
  // =============================================================================
297
303
 
298
- function getModels(): ProviderModelConfig[] {
299
- // Apply Cloudflare compat settings to all default models
300
- const defaults = DEFAULT_MODELS.map((m) => ({
301
- ...m,
302
- compat: { ...CLOUDFLARE_COMPAT, ...m.compat },
303
- })) as ProviderModelConfig[];
304
+ interface CloudflareModel {
305
+ id: string;
306
+ name?: string;
307
+ description?: string;
308
+ task?: {
309
+ id?: string;
310
+ name?: string;
311
+ };
312
+ }
313
+
314
+ function isChatModel(modelId: string): boolean {
315
+ return !NON_CHAT_PATTERNS.some((pattern) => pattern.test(modelId));
316
+ }
317
+
318
+ function inferModelName(id: string): string {
319
+ // Extract the model name part after the last /
320
+ const namePart = id.split("/").pop() || id;
321
+
322
+ // Remove common suffixes
323
+ const clean = namePart
324
+ .replace(/-instruct$/i, "")
325
+ .replace(/-chat$/i, "")
326
+ .replace(/-it$/i, "")
327
+ .replace(/-awq$/i, " (AWQ)")
328
+ .replace(/-fp8$/i, " (FP8)")
329
+ .replace(/-fast$/i, " (Fast)")
330
+ .replace(/-lora$/i, " (LoRA)")
331
+ .replace(/-hf$/i, " (HF)")
332
+ .replace(/-v\d+\.\d+$/i, "");
333
+
334
+ // Convert to title case
335
+ return clean
336
+ .split("-")
337
+ .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
338
+ .join(" ")
339
+ .replace(/\b(\d+(?:\.\d+)?)[bB]\b/g, "$1B");
340
+ }
341
+
342
+ function inferModelMetadata(id: string): Partial<ProviderModelConfig> {
343
+ const hasVision = /vision|multimodal|vl|llava/i.test(id);
344
+ const hasReasoning = /reason|r1|thinking|qwq|nemotron|oss/i.test(id);
345
+
346
+ // Default context windows by model family
347
+ let contextWindow = 32768;
348
+ let maxTokens = 4096;
349
+
350
+ if (/llama-3\.1|llama-3\.3|llama-4|gemma-4|kimi|nemotron/i.test(id)) {
351
+ contextWindow = 128000;
352
+ maxTokens = 8192;
353
+ }
354
+ if (/llama-3\.2-11b/i.test(id)) {
355
+ contextWindow = 128000;
356
+ maxTokens = 8192;
357
+ }
358
+ if (/gemma-3/i.test(id)) {
359
+ contextWindow = 80000;
360
+ maxTokens = 8192;
361
+ }
362
+
363
+ // Estimate costs based on model size (very rough approximation)
364
+ let inputCost = 0.1;
365
+ let outputCost = 0.3;
366
+
367
+ const sizeMatch = id.match(/(\d+)(?:\.\d+)?[bB]/);
368
+ if (sizeMatch) {
369
+ const size = parseInt(sizeMatch[1], 10);
370
+ if (size >= 100) {
371
+ inputCost = 0.5;
372
+ outputCost = 1.5;
373
+ } else if (size >= 70) {
374
+ inputCost = 0.5;
375
+ outputCost = 0.5;
376
+ } else if (size >= 30) {
377
+ inputCost = 0.3;
378
+ outputCost = 0.3;
379
+ } else if (size >= 8) {
380
+ inputCost = 0.2;
381
+ outputCost = 0.2;
382
+ }
383
+ }
384
+
385
+ // Override for specific known models
386
+ if (id.includes("llama-3.1-405b")) {
387
+ inputCost = 2.0;
388
+ outputCost = 2.0;
389
+ }
390
+ if (id.includes("kimi-k2.5")) {
391
+ inputCost = 0.6;
392
+ outputCost = 3.0;
393
+ }
394
+ if (id.includes("kimi-k2.6")) {
395
+ inputCost = 0.8;
396
+ outputCost = 4.0;
397
+ }
398
+
399
+ return {
400
+ name: inferModelName(id),
401
+ reasoning: hasReasoning,
402
+ input: hasVision ? (["text", "image"] as const) : (["text"] as const),
403
+ cost: { input: inputCost, output: outputCost, cacheRead: 0, cacheWrite: 0 },
404
+ contextWindow,
405
+ maxTokens,
406
+ compat: hasReasoning
407
+ ? { ...CLOUDFLARE_COMPAT, requiresThinkingAsText: true }
408
+ : CLOUDFLARE_COMPAT,
409
+ };
410
+ }
411
+
412
+ // =============================================================================
413
+ // Dynamic model fetching
414
+ // =============================================================================
304
415
 
305
- // Check for user overrides
306
- const overridePath = join(homedir(), ".pi", "cloudflare-models.json");
307
- if (!existsSync(overridePath)) return defaults;
416
+ async function fetchCloudflareModels(
417
+ token: string,
418
+ accountId: string,
419
+ ): Promise<ProviderModelConfig[]> {
420
+ const baseUrl = `https://api.cloudflare.com/client/v4/accounts/${accountId}`;
308
421
 
309
422
  try {
310
- const override = JSON.parse(
311
- readFileSync(overridePath, "utf-8"),
312
- ) as ModelConfig[];
313
- const modelMap = new Map<string, any>(defaults.map((m) => [m.id, m]));
314
-
315
- for (const model of override) {
316
- if (model._remove) {
317
- modelMap.delete(model.id);
318
- } else {
319
- // Apply Cloudflare compat settings to user overrides
320
- model.compat = { ...CLOUDFLARE_COMPAT, ...model.compat };
321
- modelMap.set(model.id, model);
322
- }
423
+ const response = await fetchWithRetry(
424
+ `${baseUrl}/ai/models`,
425
+ {
426
+ headers: {
427
+ Authorization: `Bearer ${token}`,
428
+ "Content-Type": "application/json",
429
+ },
430
+ },
431
+ 3,
432
+ 1000,
433
+ DEFAULT_FETCH_TIMEOUT_MS,
434
+ );
435
+
436
+ if (!response.ok) {
437
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
323
438
  }
324
439
 
325
- return Array.from(modelMap.values()) as ProviderModelConfig[];
326
- } catch (e) {
440
+ const json = (await response.json()) as {
441
+ success?: boolean;
442
+ result?: CloudflareModel[];
443
+ errors?: Array<{ message: string }>;
444
+ };
445
+
446
+ if (!json.success || !json.result) {
447
+ throw new Error(
448
+ json.errors?.[0]?.message || "API returned unsuccessful response",
449
+ );
450
+ }
451
+
452
+ // Filter to chat/text generation models only
453
+ const chatModels = json.result.filter((m) => isChatModel(m.id));
454
+
455
+ // Map to ProviderModelConfig
456
+ const models = chatModels.map((m): ProviderModelConfig => {
457
+ const inferred = inferModelMetadata(m.id);
458
+
459
+ return {
460
+ id: m.id,
461
+ name: m.name || inferred.name || m.id,
462
+ reasoning: inferred.reasoning || false,
463
+ input: inferred.input || ["text"],
464
+ cost: inferred.cost || {
465
+ input: 0.1,
466
+ output: 0.3,
467
+ cacheRead: 0,
468
+ cacheWrite: 0,
469
+ },
470
+ contextWindow: inferred.contextWindow || 32768,
471
+ maxTokens: inferred.maxTokens || 4096,
472
+ compat: inferred.compat || CLOUDFLARE_COMPAT,
473
+ };
474
+ });
475
+
476
+ _logger.info(`[cloudflare] Fetched ${models.length} chat models from API`);
477
+ return models;
478
+ } catch (error) {
327
479
  _logger.warn(
328
- `[cloudflare] Failed to load ~/.pi/cloudflare-models.json: ${e instanceof Error ? e.message : String(e)}`,
480
+ `[cloudflare] Failed to fetch models from API: ${error instanceof Error ? error.message : String(error)}`,
329
481
  );
330
- return defaults;
482
+ return [];
331
483
  }
332
484
  }
333
485
 
@@ -352,7 +504,13 @@ export default async function cloudflareProvider(pi: ExtensionAPI) {
352
504
  return;
353
505
  }
354
506
 
355
- const models = getModels();
507
+ // Try to fetch models dynamically, fall back to hardcoded list
508
+ let models = await fetchCloudflareModels(apiToken, accountId);
509
+
510
+ if (models.length === 0) {
511
+ _logger.info("[cloudflare] Using fallback model list");
512
+ models = FALLBACK_MODELS;
513
+ }
356
514
 
357
515
  pi.registerProvider("cloudflare", {
358
516
  baseUrl: `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/v1`,
@@ -29,6 +29,7 @@ import { DEFAULT_FETCH_TIMEOUT_MS } from "../../constants.ts";
29
29
  import { createLogger } from "../../lib/logger.ts";
30
30
  import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
31
31
  import { fetchWithRetry } from "../../lib/util.ts";
32
+ import { enhanceWithCI } from "../../provider-helper.ts";
32
33
 
33
34
  const _logger = createLogger("dynamic-built-in");
34
35
 
@@ -403,7 +404,7 @@ export async function setupDynamicBuiltInProviders(
403
404
  baseUrl: config.baseUrl,
404
405
  apiKey,
405
406
  api: config.api,
406
- models,
407
+ models: enhanceWithCI(models, config.providerId),
407
408
  });
408
409
  };
409
410
 
@@ -3,6 +3,7 @@
3
3
  */
4
4
 
5
5
  import { applyHidden } from "../../config.ts";
6
+ import { PROVIDER_KILO } from "../../constants.ts";
6
7
  import { fetchOpenRouterCompatibleModels } from "../model-fetcher.ts";
7
8
 
8
9
  const KILO_API_BASE = process.env.KILO_API_URL || "https://api.kilo.ai";
@@ -22,5 +23,5 @@ export async function fetchKiloModels(options?: {
22
23
  freeOnly: options?.freeOnly,
23
24
  });
24
25
 
25
- return applyHidden(models);
26
+ return applyHidden(models, PROVIDER_KILO);
26
27
  }
@@ -1,44 +1,47 @@
1
- /**
2
- * Modal GLM Provider Extension
3
- *
4
- * Provides access to GLM models hosted on Modal's OpenAI-compatible endpoint.
5
- * Requires MODAL_API_KEY (or modal_api_key in ~/.pi/free.json).
6
- *
7
- * Endpoint docs: https://modal.com/glm-5-endpoint
8
- */
9
-
10
- import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
11
- import { applyHidden, PROVIDER_MODAL } from "../../config.ts";
12
- import { BASE_URL_MODAL, URL_MODAL_TOS } from "../../constants.ts";
13
- import { createProvider } from "../../provider-factory.ts";
14
-
15
- function getModalModels(): ProviderModelConfig[] {
16
- return applyHidden([
17
- {
18
- id: "zai-org/GLM-5.1-FP8",
19
- name: "GLM-5.1 FP8 (Modal)",
20
- reasoning: true,
21
- input: ["text"],
22
- // Promotional/free-period pricing may change; keep conservative placeholders.
23
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
24
- contextWindow: 128000,
25
- maxTokens: 16384,
26
- },
27
- ]);
28
- }
29
-
30
- export default function (pi: Parameters<typeof createProvider>[0]) {
31
- return createProvider(pi, {
32
- providerId: PROVIDER_MODAL,
33
- baseUrl: BASE_URL_MODAL,
34
- apiKeyEnvVar: "MODAL_API_KEY",
35
- apiKeyConfigKey: "modal_api_key",
36
- fetchModels: async () => getModalModels(),
37
- tosUrl: URL_MODAL_TOS,
38
- skipToggle: true, // Modal only has 1 model, no need for toggle
39
- extraHeaders: {
40
- "X-Title": "Pi",
41
- "HTTP-Referer": "https://modal.com/",
42
- },
43
- });
44
- }
1
+ /**
2
+ * Modal GLM Provider Extension
3
+ *
4
+ * Provides access to GLM models hosted on Modal's OpenAI-compatible endpoint.
5
+ * Requires MODAL_API_KEY (or modal_api_key in ~/.pi/free.json).
6
+ *
7
+ * Endpoint docs: https://modal.com/glm-5-endpoint
8
+ */
9
+
10
+ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
11
+ import { applyHidden, PROVIDER_MODAL } from "../../config.ts";
12
+ import { BASE_URL_MODAL, URL_MODAL_TOS } from "../../constants.ts";
13
+ import { createProvider } from "../../provider-factory.ts";
14
+
15
+ function getModalModels(): ProviderModelConfig[] {
16
+ return applyHidden(
17
+ [
18
+ {
19
+ id: "zai-org/GLM-5.1-FP8",
20
+ name: "GLM-5.1 FP8 (Modal)",
21
+ reasoning: true,
22
+ input: ["text"],
23
+ // Promotional/free-period pricing may change; keep conservative placeholders.
24
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
25
+ contextWindow: 128000,
26
+ maxTokens: 16384,
27
+ },
28
+ ],
29
+ PROVIDER_MODAL,
30
+ );
31
+ }
32
+
33
+ export default function (pi: Parameters<typeof createProvider>[0]) {
34
+ return createProvider(pi, {
35
+ providerId: PROVIDER_MODAL,
36
+ baseUrl: BASE_URL_MODAL,
37
+ apiKeyEnvVar: "MODAL_API_KEY",
38
+ apiKeyConfigKey: "modal_api_key",
39
+ fetchModels: async () => getModalModels(),
40
+ tosUrl: URL_MODAL_TOS,
41
+ skipToggle: true, // Modal only has 1 model, no need for toggle
42
+ extraHeaders: {
43
+ "X-Title": "Pi",
44
+ "HTTP-Referer": "https://modal.com/",
45
+ },
46
+ });
47
+ }