pi-free 2.0.9 → 2.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/util.ts CHANGED
@@ -1,460 +1,524 @@
1
- import { createLogger } from "./logger.ts";
2
- import {
3
- getProxyModelCompat,
4
- isLikelyReasoningModel,
5
- } from "./provider-compat.ts";
6
- import type { ProviderModelConfig as PiProviderModelConfig } from "@earendil-works/pi-coding-agent";
7
- import type { ProviderModelConfig } from "./types.ts";
8
-
9
- const _logger = createLogger("util");
10
-
11
- // =============================================================================
12
- // Shared Utilities
13
- // =============================================================================
14
-
15
- /**
16
- * Log a warning message for provider operations
17
- */
18
- export function logWarning(
19
- provider: string,
20
- message: string,
21
- error?: unknown,
22
- ): void {
23
- _logger.warn(
24
- `[${provider}] ${message}`,
25
- error ? { error: String(error) } : undefined,
26
- );
27
- }
28
-
29
- /**
30
- * Fetch with timeout using AbortController
31
- */
32
- export async function fetchWithTimeout(
33
- url: string,
34
- options: RequestInit,
35
- timeoutMs = 30000,
36
- ): Promise<Response> {
37
- const controller = new AbortController();
38
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
39
-
40
- try {
41
- const response = await fetch(url, {
42
- ...options,
43
- signal: controller.signal,
44
- });
45
- return response;
46
- } finally {
47
- clearTimeout(timeoutId);
48
- }
49
- }
50
-
51
- /**
52
- * Fetch with retry logic and timeout
53
- */
54
- export async function fetchWithRetry(
55
- url: string,
56
- options: RequestInit,
57
- retries = 3,
58
- delayMs = 1000,
59
- timeoutMs = 30000,
60
- ): Promise<Response> {
61
- let lastError: unknown;
62
-
63
- for (let i = 0; i < retries; i++) {
64
- try {
65
- const response = await fetchWithTimeout(url, options, timeoutMs);
66
- if (response.ok) return response;
67
-
68
- // If it's a rate limit, throw immediately
69
- if (response.status === 429) {
70
- throw new Error(`Rate limited (429)`);
71
- }
72
-
73
- // For server errors, retry
74
- if (response.status >= 500) {
75
- lastError = new Error(`Server error ${response.status}`);
76
- if (i < retries - 1) {
77
- await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
78
- continue;
79
- }
80
- // Last retry exhausted - throw the error
81
- throw lastError;
82
- }
83
-
84
- return response; // Return non-ok but non-retryable responses
85
- } catch (error) {
86
- lastError = error;
87
- if (i < retries - 1) {
88
- await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
89
- }
90
- }
91
- }
92
-
93
- throw lastError;
94
- }
95
-
96
- // =============================================================================
97
- // Shared API Response Parsing
98
- // =============================================================================
99
-
100
- /**
101
- * Parse and validate model list API response
102
- * Shared between Kilo, OpenRouter, and other providers
103
- */
104
- export async function parseModelResponse<T>(
105
- response: Response,
106
- providerName: string,
107
- ): Promise<{ data: T[] }> {
108
- if (!response.ok) {
109
- throw new Error(
110
- `Failed to fetch ${providerName} models: ${response.status} ${response.statusText}`,
111
- );
112
- }
113
-
114
- const json = (await response.json()) as { data?: T[] };
115
-
116
- if (!json.data || !Array.isArray(json.data)) {
117
- throw new Error(
118
- `Invalid ${providerName} models response: missing data array`,
119
- );
120
- }
121
-
122
- return { data: json.data };
123
- }
124
-
125
- // =============================================================================
126
- // Model Filtering Utilities
127
- // =============================================================================
128
-
129
- // Models known to be small (no "Xb" in their ID) that should be filtered.
130
- // Updated as new small free models appear on OpenRouter/Kilo.
131
- const KNOWN_SMALL_MODELS: ReadonlySet<string> = new Set([
132
- // Microsoft Phi models (1.5B–14B)
133
- "microsoft/phi-3-mini-128k-instruct",
134
- "microsoft/phi-3-mini-4k-instruct",
135
- "microsoft/phi-3-small-128k-instruct",
136
- "microsoft/phi-3-small-8k-instruct",
137
- "microsoft/phi-3-medium-128k-instruct",
138
- "microsoft/phi-3-medium-4k-instruct",
139
- "microsoft/phi-3.5-mini-instruct",
140
- "microsoft/phi-4-mini-instruct",
141
- "microsoft/phi-4-mini-reasoning",
142
- "microsoft/phi-4-reasoning-plus",
143
- // OpenChat (7B)
144
- "openchat/openchat-3.5-0106",
145
- "openchat/openchat-3.5-1210",
146
- // Mistral 7B variants
147
- "mistralai/mistral-7b-instruct-v0.1",
148
- "mistralai/mistral-7b-instruct-v0.2",
149
- "mistralai/mistral-7b-instruct-v0.3",
150
- // Gemma small variants
151
- "google/gemma-2b-it",
152
- "google/gemma-1.1-2b-it",
153
- // DeepSeek small variants
154
- "deepseek/deepseek-r1-distill-qwen-1.5b",
155
- "deepseek/deepseek-r1-distill-llama-8b",
156
- "deepseek/deepseek-r1-distill-qwen-7b",
157
- "deepseek/deepseek-r1-distill-qwen-14b",
158
- // Stripe Hyena (2.7B)
159
- "togethercomputer/stripedhy-2.7b",
160
- // TinyLlama
161
- "tinyllama/tinyllama-1.1b-chat-v1.0",
162
- ]);
163
-
164
- /**
165
- * Check if model is usable based on size constraints and naming.
166
- * Extracts model size from ID (e.g., "llama-3-70b" -> 70) and compares to minSizeB.
167
- * Falls back to a blocklist for models that don't encode size in the name.
168
- */
169
- export function isUsableModel(modelId: string, minSizeB?: number): boolean {
170
- // Filter out models that are likely test or debug models
171
- if (modelId.includes("test") || modelId.includes("debug")) {
172
- return false;
173
- }
174
-
175
- // Filter by minimum size if specified
176
- if (minSizeB !== undefined) {
177
- // Known-small blocklist (models without "Xb" in the name)
178
- // Strip :free suffix used by OpenRouter/Kilo
179
- const baseId = modelId.replace(/:free$/, "");
180
- if (KNOWN_SMALL_MODELS.has(baseId)) return false;
181
-
182
- // Check Mixture-of-Experts models first (e.g., "8x22b" = 176b total)
183
- const parsed = parseModelSize(modelId);
184
- if (parsed?.type === "moe") {
185
- if (parsed.experts * parsed.sizePerExpert < minSizeB) return false;
186
- return true; // MoE model passed size check
187
- }
188
-
189
- // Standard model size (e.g., "70b", "8b")
190
- if (parsed?.type === "standard" && parsed.size < minSizeB) return false;
191
- }
192
-
193
- return true;
194
- }
195
-
196
- // =============================================================================
197
- // Model Size Parsing (no regex — avoids SonarCloud S5852 flags)
198
- // =============================================================================
199
-
200
- interface MoeSize {
201
- type: "moe";
202
- experts: number;
203
- sizePerExpert: number;
204
- }
205
-
206
- interface StandardSize {
207
- type: "standard";
208
- size: number;
209
- }
210
-
211
- /**
212
- * Extract model size from a model ID without using regex.
213
- * Handles both MoE ("8x22b") and standard ("70b", "8b") formats.
214
- */
215
- /**
216
- * Parse MoE (Mixture of Experts) model size like "8x22b".
217
- */
218
- function parseMoeSize(lower: string): MoeSize | null {
219
- let searchPos = 0;
220
- while (true) {
221
- const xIdx = lower.indexOf("x", searchPos);
222
- if (xIdx <= 0) break;
223
- const beforeChar = lower[xIdx - 1];
224
- if (!(beforeChar >= "0" && beforeChar <= "9")) {
225
- searchPos = xIdx + 1;
226
- continue;
227
- }
228
- const bIdx = lower.indexOf("b", xIdx + 1);
229
- if (bIdx <= xIdx + 1) {
230
- searchPos = xIdx + 1;
231
- continue;
232
- }
233
- let countStart = xIdx - 1;
234
- while (
235
- countStart > 0 &&
236
- lower[countStart - 1] >= "0" &&
237
- lower[countStart - 1] <= "9"
238
- ) {
239
- countStart--;
240
- }
241
- const experts = Number.parseInt(lower.slice(countStart, xIdx), 10);
242
- const size = Number.parseFloat(lower.slice(xIdx + 1, bIdx));
243
- if (
244
- !Number.isNaN(experts) &&
245
- !Number.isNaN(size) &&
246
- experts > 0 &&
247
- size > 0
248
- ) {
249
- const afterB = lower.slice(bIdx + 1);
250
- if (
251
- afterB.length === 0 ||
252
- ((afterB[0] < "0" || afterB[0] > "9") && afterB[0] !== ".")
253
- ) {
254
- return { type: "moe", experts, sizePerExpert: size };
255
- }
256
- }
257
- searchPos = xIdx + 1;
258
- }
259
- return null;
260
- }
261
-
262
- /**
263
- * Parse standard model size like "70b" or "8b".
264
- */
265
- function parseStandardSize(lower: string): StandardSize | null {
266
- for (let i = 0; i < lower.length; i++) {
267
- if (lower[i] !== "b") continue;
268
- const afterB = lower.slice(i + 1);
269
- if (
270
- afterB.length > 0 &&
271
- ((afterB[0] >= "0" && afterB[0] <= "9") || afterB[0] === ".")
272
- ) {
273
- continue; // b followed by digit or dot — not our match
274
- }
275
- let start = i;
276
- while (
277
- start > 0 &&
278
- ((lower[start - 1] >= "0" && lower[start - 1] <= "9") ||
279
- lower[start - 1] === ".")
280
- ) {
281
- start--;
282
- }
283
- if (start < i) {
284
- const numStr = lower.slice(start, i);
285
- const size = Number.parseFloat(numStr);
286
- if (!Number.isNaN(size) && size > 0) {
287
- return { type: "standard", size };
288
- }
289
- }
290
- break;
291
- }
292
- return null;
293
- }
294
-
295
- function parseModelSize(modelId: string): MoeSize | StandardSize | null {
296
- const lower = modelId.toLowerCase();
297
- return parseMoeSize(lower) ?? parseStandardSize(lower) ?? null;
298
- }
299
-
300
- // =============================================================================
301
- // Model Name Cleaning
302
- // =============================================================================
303
-
304
- /**
305
- * Strip provider prefix from model names.
306
- * OpenRouter/Kilo return names like "Provider : Model Name" or "Provider / Model Name".
307
- * We only want the model name part.
308
- */
309
- export function cleanModelName(name: string): string {
310
- // Handle patterns like "Provider : Model Name" or "Provider / Model Name"
311
- const colonIdx = name.indexOf(":");
312
- const slashIdx = name.indexOf("/");
313
- const idx =
314
- colonIdx === -1
315
- ? slashIdx
316
- : slashIdx === -1
317
- ? colonIdx
318
- : Math.min(colonIdx, slashIdx);
319
- if (idx > 0) {
320
- return name.slice(idx + 1).trim();
321
- }
322
- return name.trim();
323
- }
324
-
325
- // =============================================================================
326
- // Model Mapping
327
- // =============================================================================
328
-
329
- /**
330
- * Map OpenRouter/Kilo API model to ProviderModelConfig
331
- * Shared between OpenRouter and Kilo providers
332
- */
333
- export function mapOpenRouterModel(m: {
334
- id: string;
335
- name: string;
336
- context_length?: number;
337
- max_completion_tokens?: number | null;
338
- top_provider?: { max_completion_tokens?: number | null };
339
- pricing?: { prompt?: string | null; completion?: string | null };
340
- architecture?: {
341
- input_modalities?: string[] | null;
342
- output_modalities?: string[] | null;
343
- };
344
- }): ProviderModelConfig {
345
- const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
346
- const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
347
-
348
- return {
349
- id: m.id,
350
- name: cleanModelName(m.name),
351
- reasoning: false, // OpenRouter doesn't expose reasoning flag directly
352
- input: m.architecture?.input_modalities?.includes("image")
353
- ? (["text", "image"] as const)
354
- : (["text"] as const),
355
- cost: {
356
- input: promptPrice,
357
- output: completionPrice,
358
- cacheRead: 0,
359
- cacheWrite: 0,
360
- },
361
- contextWindow: m.context_length ?? 4096,
362
- maxTokens:
363
- m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
364
- };
365
- }
366
-
367
- // =============================================================================
368
- // OpenAI-Compatible Provider Helpers
369
- // =============================================================================
370
-
371
- /**
372
- * Defaults for mapping models from OpenAI-compatible /v1/models endpoints.
373
- */
374
- export interface OpenAIModelDefaults {
375
- /** Per-model cost defaults (set to 0 if provider is free-tier). */
376
- cost?: { input: number; output: number };
377
- /** Default context window (tokens). */
378
- contextWindow?: number;
379
- /** Default max output tokens. */
380
- maxTokens?: number;
381
- /** Default input modalities. */
382
- input?: string[];
383
- }
384
-
385
- /**
386
- * Generic model shape returned by OpenAI-compatible /v1/models endpoints.
387
- */
388
- export interface OpenAIModelEntry {
389
- id: string;
390
- object?: string;
391
- created?: number;
392
- owned_by?: string;
393
- }
394
-
395
- /**
396
- * Fetch and map models from an OpenAI-compatible /v1/models endpoint.
397
- *
398
- * Eliminates ~40 lines of duplicated fetch→parse→map boilerplate
399
- * that was repeated in CrofAI, DeepInfra, and SambaNova providers.
400
- */
401
- export async function fetchOpenAICompatibleModels(
402
- providerId: string,
403
- baseUrl: string,
404
- apiKey: string,
405
- defaults: OpenAIModelDefaults = {},
406
- ): Promise<PiProviderModelConfig[]> {
407
- const logger = createLogger(providerId);
408
-
409
- logger.info(`[${providerId}] Fetching models...`);
410
-
411
- try {
412
- const response = await fetchWithRetry(
413
- `${baseUrl}/models`,
414
- {
415
- headers: {
416
- Authorization: `Bearer ${apiKey}`,
417
- "Content-Type": "application/json",
418
- },
419
- },
420
- 3,
421
- 1000,
422
- 30000,
423
- );
424
-
425
- if (!response.ok) {
426
- throw new Error(`${providerId} API error: ${response.status}`);
427
- }
428
-
429
- const data = (await response.json()) as { data?: OpenAIModelEntry[] };
430
- const models = data.data ?? [];
431
-
432
- logger.info(`[${providerId}] Fetched ${models.length} models`);
433
-
434
- return models
435
- .filter((m) => m.id)
436
- .map((m): PiProviderModelConfig => {
437
- const name = m.id.split("/").pop() || m.id;
438
- return {
439
- id: m.id,
440
- name,
441
- reasoning: isLikelyReasoningModel({ id: m.id, name }),
442
- input: (defaults.input as PiProviderModelConfig["input"]) ?? ["text"],
443
- cost: {
444
- input: defaults.cost?.input ?? 0,
445
- output: defaults.cost?.output ?? 0,
446
- cacheRead: 0,
447
- cacheWrite: 0,
448
- },
449
- contextWindow: defaults.contextWindow ?? 128_000,
450
- maxTokens: defaults.maxTokens ?? 4_096,
451
- compat: getProxyModelCompat({ id: m.id, name }),
452
- };
453
- });
454
- } catch (error) {
455
- logger.error(`[${providerId}] Failed to fetch models:`, {
456
- error: error instanceof Error ? error.message : String(error),
457
- });
458
- return [];
459
- }
460
- }
1
+ import { createLogger } from "./logger.ts";
2
+ import {
3
+ getProxyModelCompat,
4
+ isLikelyReasoningModel,
5
+ } from "./provider-compat.ts";
6
+ import type { ProviderModelConfig as PiProviderModelConfig } from "@earendil-works/pi-coding-agent";
7
+ import type { ProviderModelConfig } from "./types.ts";
8
+
9
+ const _logger = createLogger("util");
10
+
11
+ // =============================================================================
12
+ // Shared Utilities
13
+ // =============================================================================
14
+
15
+ /**
16
+ * Log a warning message for provider operations
17
+ */
18
+ export function logWarning(
19
+ provider: string,
20
+ message: string,
21
+ error?: unknown,
22
+ ): void {
23
+ _logger.warn(
24
+ `[${provider}] ${message}`,
25
+ error ? { error: String(error) } : undefined,
26
+ );
27
+ }
28
+
29
+ /**
30
+ * Fetch with timeout using AbortController
31
+ */
32
+ export async function fetchWithTimeout(
33
+ url: string,
34
+ options: RequestInit,
35
+ timeoutMs = 30000,
36
+ ): Promise<Response> {
37
+ const controller = new AbortController();
38
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
39
+
40
+ try {
41
+ const response = await fetch(url, {
42
+ ...options,
43
+ signal: controller.signal,
44
+ });
45
+ return response;
46
+ } finally {
47
+ clearTimeout(timeoutId);
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Fetch with retry logic and timeout
53
+ */
54
+ export async function fetchWithRetry(
55
+ url: string,
56
+ options: RequestInit,
57
+ retries = 3,
58
+ delayMs = 1000,
59
+ timeoutMs = 30000,
60
+ ): Promise<Response> {
61
+ let lastError: unknown;
62
+
63
+ for (let i = 0; i < retries; i++) {
64
+ try {
65
+ const response = await fetchWithTimeout(url, options, timeoutMs);
66
+ if (response.ok) return response;
67
+
68
+ // If it's a rate limit, throw immediately
69
+ if (response.status === 429) {
70
+ throw new Error(`Rate limited (429)`);
71
+ }
72
+
73
+ // For server errors, retry
74
+ if (response.status >= 500) {
75
+ lastError = new Error(`Server error ${response.status}`);
76
+ if (i < retries - 1) {
77
+ await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
78
+ continue;
79
+ }
80
+ // Last retry exhausted - throw the error
81
+ throw lastError;
82
+ }
83
+
84
+ return response; // Return non-ok but non-retryable responses
85
+ } catch (error) {
86
+ lastError = error;
87
+ if (i < retries - 1) {
88
+ await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
89
+ }
90
+ }
91
+ }
92
+
93
+ throw lastError;
94
+ }
95
+
96
+ // =============================================================================
97
+ // Shared API Response Parsing
98
+ // =============================================================================
99
+
100
+ /**
101
+ * Parse and validate model list API response
102
+ * Shared between Kilo, OpenRouter, and other providers
103
+ */
104
+ export async function parseModelResponse<T>(
105
+ response: Response,
106
+ providerName: string,
107
+ ): Promise<{ data: T[] }> {
108
+ if (!response.ok) {
109
+ throw new Error(
110
+ `Failed to fetch ${providerName} models: ${response.status} ${response.statusText}`,
111
+ );
112
+ }
113
+
114
+ const json = (await response.json()) as { data?: T[] };
115
+
116
+ if (!json.data || !Array.isArray(json.data)) {
117
+ throw new Error(
118
+ `Invalid ${providerName} models response: missing data array`,
119
+ );
120
+ }
121
+
122
+ return { data: json.data };
123
+ }
124
+
125
+ // =============================================================================
126
+ // Model Filtering Utilities
127
+ // =============================================================================
128
+
129
+ // Models known to be small (no "Xb" in their ID) that should be filtered.
130
+ // Updated as new small free models appear on OpenRouter/Kilo.
131
+ const KNOWN_SMALL_MODELS: ReadonlySet<string> = new Set([
132
+ // Microsoft Phi models (1.5B–14B)
133
+ "microsoft/phi-3-mini-128k-instruct",
134
+ "microsoft/phi-3-mini-4k-instruct",
135
+ "microsoft/phi-3-small-128k-instruct",
136
+ "microsoft/phi-3-small-8k-instruct",
137
+ "microsoft/phi-3-medium-128k-instruct",
138
+ "microsoft/phi-3-medium-4k-instruct",
139
+ "microsoft/phi-3.5-mini-instruct",
140
+ "microsoft/phi-4-mini-instruct",
141
+ "microsoft/phi-4-mini-reasoning",
142
+ "microsoft/phi-4-reasoning-plus",
143
+ // OpenChat (7B)
144
+ "openchat/openchat-3.5-0106",
145
+ "openchat/openchat-3.5-1210",
146
+ // Mistral 7B variants
147
+ "mistralai/mistral-7b-instruct-v0.1",
148
+ "mistralai/mistral-7b-instruct-v0.2",
149
+ "mistralai/mistral-7b-instruct-v0.3",
150
+ // Gemma small variants
151
+ "google/gemma-2b-it",
152
+ "google/gemma-1.1-2b-it",
153
+ // DeepSeek small variants
154
+ "deepseek/deepseek-r1-distill-qwen-1.5b",
155
+ "deepseek/deepseek-r1-distill-llama-8b",
156
+ "deepseek/deepseek-r1-distill-qwen-7b",
157
+ "deepseek/deepseek-r1-distill-qwen-14b",
158
+ // Stripe Hyena (2.7B)
159
+ "togethercomputer/stripedhy-2.7b",
160
+ // TinyLlama
161
+ "tinyllama/tinyllama-1.1b-chat-v1.0",
162
+ ]);
163
+
164
+ /**
165
+ * Check if model is usable based on size constraints and naming.
166
+ * Extracts model size from ID (e.g., "llama-3-70b" -> 70) and compares to minSizeB.
167
+ * Falls back to a blocklist for models that don't encode size in the name.
168
+ */
169
+ export function isUsableModel(modelId: string, minSizeB?: number): boolean {
170
+ // Filter out models that are likely test or debug models
171
+ if (modelId.includes("test") || modelId.includes("debug")) {
172
+ return false;
173
+ }
174
+
175
+ // Filter by minimum size if specified
176
+ if (minSizeB !== undefined) {
177
+ // Known-small blocklist (models without "Xb" in the name)
178
+ // Strip :free suffix used by OpenRouter/Kilo
179
+ const baseId = modelId.replace(/:free$/, "");
180
+ if (KNOWN_SMALL_MODELS.has(baseId)) return false;
181
+
182
+ // Check Mixture-of-Experts models first (e.g., "8x22b" = 176b total)
183
+ const parsed = parseModelSize(modelId);
184
+ if (parsed?.type === "moe") {
185
+ if (parsed.experts * parsed.sizePerExpert < minSizeB) return false;
186
+ return true; // MoE model passed size check
187
+ }
188
+
189
+ // Standard model size (e.g., "70b", "8b")
190
+ if (parsed?.type === "standard" && parsed.size < minSizeB) return false;
191
+ }
192
+
193
+ return true;
194
+ }
195
+
196
+ // =============================================================================
197
+ // Model Size Parsing (no regex — avoids SonarCloud S5852 flags)
198
+ // =============================================================================
199
+
200
+ interface MoeSize {
201
+ type: "moe";
202
+ experts: number;
203
+ sizePerExpert: number;
204
+ }
205
+
206
+ interface StandardSize {
207
+ type: "standard";
208
+ size: number;
209
+ }
210
+
211
+ /**
212
+ * Extract model size from a model ID without using regex.
213
+ * Handles both MoE ("8x22b") and standard ("70b", "8b") formats.
214
+ */
215
+ /**
216
+ * Parse MoE (Mixture of Experts) model size like "8x22b".
217
+ */
218
+ function parseMoeSize(lower: string): MoeSize | null {
219
+ let searchPos = 0;
220
+ while (true) {
221
+ const xIdx = lower.indexOf("x", searchPos);
222
+ if (xIdx <= 0) break;
223
+ const beforeChar = lower[xIdx - 1];
224
+ if (!(beforeChar >= "0" && beforeChar <= "9")) {
225
+ searchPos = xIdx + 1;
226
+ continue;
227
+ }
228
+ const bIdx = lower.indexOf("b", xIdx + 1);
229
+ if (bIdx <= xIdx + 1) {
230
+ searchPos = xIdx + 1;
231
+ continue;
232
+ }
233
+ let countStart = xIdx - 1;
234
+ while (
235
+ countStart > 0 &&
236
+ lower[countStart - 1] >= "0" &&
237
+ lower[countStart - 1] <= "9"
238
+ ) {
239
+ countStart--;
240
+ }
241
+ const experts = Number.parseInt(lower.slice(countStart, xIdx), 10);
242
+ const size = Number.parseFloat(lower.slice(xIdx + 1, bIdx));
243
+ if (
244
+ !Number.isNaN(experts) &&
245
+ !Number.isNaN(size) &&
246
+ experts > 0 &&
247
+ size > 0
248
+ ) {
249
+ const afterB = lower.slice(bIdx + 1);
250
+ if (
251
+ afterB.length === 0 ||
252
+ ((afterB[0] < "0" || afterB[0] > "9") && afterB[0] !== ".")
253
+ ) {
254
+ return { type: "moe", experts, sizePerExpert: size };
255
+ }
256
+ }
257
+ searchPos = xIdx + 1;
258
+ }
259
+ return null;
260
+ }
261
+
262
+ /**
263
+ * Parse standard model size like "70b" or "8b".
264
+ */
265
+ function parseStandardSize(lower: string): StandardSize | null {
266
+ for (let i = 0; i < lower.length; i++) {
267
+ if (lower[i] !== "b") continue;
268
+ const afterB = lower.slice(i + 1);
269
+ if (
270
+ afterB.length > 0 &&
271
+ ((afterB[0] >= "0" && afterB[0] <= "9") || afterB[0] === ".")
272
+ ) {
273
+ continue; // b followed by digit or dot — not our match
274
+ }
275
+ let start = i;
276
+ while (
277
+ start > 0 &&
278
+ ((lower[start - 1] >= "0" && lower[start - 1] <= "9") ||
279
+ lower[start - 1] === ".")
280
+ ) {
281
+ start--;
282
+ }
283
+ if (start < i) {
284
+ const numStr = lower.slice(start, i);
285
+ const size = Number.parseFloat(numStr);
286
+ if (!Number.isNaN(size) && size > 0) {
287
+ return { type: "standard", size };
288
+ }
289
+ }
290
+ break;
291
+ }
292
+ return null;
293
+ }
294
+
295
+ function parseModelSize(modelId: string): MoeSize | StandardSize | null {
296
+ const lower = modelId.toLowerCase();
297
+ return parseMoeSize(lower) ?? parseStandardSize(lower) ?? null;
298
+ }
299
+
300
+ // =============================================================================
301
+ // Model Name Cleaning
302
+ // =============================================================================
303
+
304
+ /**
305
+ * Strip provider prefix from model names.
306
+ * OpenRouter/Kilo return names like "Provider : Model Name" or "Provider / Model Name".
307
+ * We only want the model name part.
308
+ */
309
+ export function cleanModelName(name: string): string {
310
+ // Handle patterns like "Provider : Model Name" or "Provider / Model Name"
311
+ const colonIdx = name.indexOf(":");
312
+ const slashIdx = name.indexOf("/");
313
+ const idx =
314
+ colonIdx === -1
315
+ ? slashIdx
316
+ : slashIdx === -1
317
+ ? colonIdx
318
+ : Math.min(colonIdx, slashIdx);
319
+ if (idx > 0) {
320
+ return name.slice(idx + 1).trim();
321
+ }
322
+ return name.trim();
323
+ }
324
+
325
+ // =============================================================================
326
+ // Model Mapping
327
+ // =============================================================================
328
+
329
+ /**
330
+ * Map OpenRouter/Kilo API model to ProviderModelConfig
331
+ * Shared between OpenRouter and Kilo providers
332
+ */
333
+ export function mapOpenRouterModel(m: {
334
+ id: string;
335
+ name: string;
336
+ context_length?: number;
337
+ max_completion_tokens?: number | null;
338
+ top_provider?: { max_completion_tokens?: number | null };
339
+ pricing?: { prompt?: string | null; completion?: string | null };
340
+ architecture?: {
341
+ input_modalities?: string[] | null;
342
+ output_modalities?: string[] | null;
343
+ };
344
+ }): ProviderModelConfig {
345
+ const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
346
+ const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
347
+
348
+ return {
349
+ id: m.id,
350
+ name: cleanModelName(m.name),
351
+ reasoning: false, // OpenRouter doesn't expose reasoning flag directly
352
+ input: m.architecture?.input_modalities?.includes("image")
353
+ ? (["text", "image"] as const)
354
+ : (["text"] as const),
355
+ cost: {
356
+ input: promptPrice,
357
+ output: completionPrice,
358
+ cacheRead: 0,
359
+ cacheWrite: 0,
360
+ },
361
+ contextWindow: m.context_length ?? 4096,
362
+ maxTokens:
363
+ m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
364
+ };
365
+ }
366
+
367
+ // =============================================================================
368
+ // OpenAI-Compatible Provider Helpers
369
+ // =============================================================================
370
+
371
+ /**
372
+ * Defaults for mapping models from OpenAI-compatible /v1/models endpoints.
373
+ */
374
+ export interface OpenAIModelDefaults {
375
+ /** Per-model cost defaults (set to 0 if provider is free-tier). */
376
+ cost?: { input: number; output: number };
377
+ /** Default context window (tokens). */
378
+ contextWindow?: number;
379
+ /** Default max output tokens. */
380
+ maxTokens?: number;
381
+ /** Default input modalities. */
382
+ input?: string[];
383
+ }
384
+
385
+ /**
386
+ * Generic model shape returned by OpenAI-compatible /v1/models endpoints.
387
+ *
388
+ * Some providers (SambaNova, DeepInfra) return extended fields beyond
389
+ * the standard OpenAI format. We accept them loosely and use what's
390
+ * available, falling back to defaults otherwise.
391
+ */
392
+ export interface OpenAIModelEntry {
393
+ id: string;
394
+ object?: string;
395
+ created?: number;
396
+ owned_by?: string;
397
+ /** Extended: per-model reasoning capability (some providers expose this) */
398
+ reasoning?: boolean;
399
+ /** Extended: input modalities (some providers expose this) */
400
+ input_modalities?: string[];
401
+ /** Extended: per-model context length (SambaNova, etc.) */
402
+ context_length?: number;
403
+ /** Extended: alternate field name for context length */
404
+ max_context_length?: number;
405
+ /** Extended: alternate field name for context length (snake_case) */
406
+ context_window?: number;
407
+ /** Extended: per-model max completion tokens (SambaNova, etc.) */
408
+ max_completion_tokens?: number;
409
+ /** Extended: alternate field name for max tokens */
410
+ max_tokens?: number;
411
+ /** Extended: per-model pricing (SambaNova, etc.) */
412
+ pricing?: { prompt?: string | number; completion?: string | number };
413
+ }
414
+
415
+ /**
416
+ * Fetch and map models from an OpenAI-compatible /v1/models endpoint.
417
+ *
418
+ * Eliminates ~40 lines of duplicated fetch→parse→map boilerplate
419
+ * that was repeated in CrofAI, DeepInfra, and SambaNova providers.
420
+ */
421
+ export async function fetchOpenAICompatibleModels(
422
+ providerId: string,
423
+ baseUrl: string,
424
+ apiKey: string,
425
+ defaults: OpenAIModelDefaults = {},
426
+ ): Promise<PiProviderModelConfig[]> {
427
+ const logger = createLogger(providerId);
428
+
429
+ logger.info(`[${providerId}] Fetching models...`);
430
+
431
+ try {
432
+ const response = await fetchWithRetry(
433
+ `${baseUrl}/models`,
434
+ {
435
+ headers: {
436
+ Authorization: `Bearer ${apiKey}`,
437
+ "Content-Type": "application/json",
438
+ },
439
+ },
440
+ 3,
441
+ 1000,
442
+ 30000,
443
+ );
444
+
445
+ if (!response.ok) {
446
+ throw new Error(`${providerId} API error: ${response.status}`);
447
+ }
448
+
449
+ const body = (await response.json()) as
450
+ | OpenAIModelEntry[]
451
+ | { data?: OpenAIModelEntry[] };
452
+ const models = Array.isArray(body) ? body : (body.data ?? []);
453
+
454
+ logger.info(`[${providerId}] Fetched ${models.length} models`);
455
+
456
+ return models
457
+ .filter((m) => m.id)
458
+ .map((m): PiProviderModelConfig => {
459
+ const name = m.id.split("/").pop() || m.id;
460
+
461
+ // Use per-model context length if the API provides it (try multiple field names)
462
+ const contextWindow =
463
+ m.context_length ??
464
+ m.max_context_length ??
465
+ m.context_window ??
466
+ defaults.contextWindow ??
467
+ 128_000;
468
+
469
+ // Use per-model max tokens if the API provides it (try multiple field names)
470
+ const maxTokens =
471
+ m.max_completion_tokens ??
472
+ m.max_tokens ??
473
+ defaults.maxTokens ??
474
+ 4_096;
475
+
476
+ // Use per-model reasoning flag if the API provides it
477
+ const reasoning =
478
+ m.reasoning ?? isLikelyReasoningModel({ id: m.id, name });
479
+
480
+ // Use per-model input_modalities if the API provides it
481
+ const hasVision = m.input_modalities?.includes("image") ?? false;
482
+ const input =
483
+ (defaults.input as PiProviderModelConfig["input"]) ??
484
+ (hasVision ? ["text", "image"] : ["text"]);
485
+
486
+ // Use per-model pricing if the API provides it, otherwise use defaults
487
+ const inputCost =
488
+ (typeof m.pricing?.prompt === "number" ||
489
+ typeof m.pricing?.prompt === "string"
490
+ ? Number(m.pricing.prompt)
491
+ : undefined) ??
492
+ defaults.cost?.input ??
493
+ 0;
494
+ const outputCost =
495
+ (typeof m.pricing?.completion === "number" ||
496
+ typeof m.pricing?.completion === "string"
497
+ ? Number(m.pricing.completion)
498
+ : undefined) ??
499
+ defaults.cost?.output ??
500
+ 0;
501
+
502
+ return {
503
+ id: m.id,
504
+ name,
505
+ reasoning,
506
+ input,
507
+ cost: {
508
+ input: inputCost,
509
+ output: outputCost,
510
+ cacheRead: 0,
511
+ cacheWrite: 0,
512
+ },
513
+ contextWindow,
514
+ maxTokens,
515
+ compat: getProxyModelCompat({ id: m.id, name }),
516
+ };
517
+ });
518
+ } catch (error) {
519
+ logger.error(`[${providerId}] Failed to fetch models:`, {
520
+ error: error instanceof Error ? error.message : String(error),
521
+ });
522
+ return [];
523
+ }
524
+ }