pi-free 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,378 +1,634 @@
1
- /**
2
- * TokenRouter Provider Extension
3
- *
4
- * TokenRouter is an OpenAI-compatible API gateway routing to 90+ models
5
- * across multiple providers (OpenAI, Anthropic, Google, DeepSeek, Qwen, etc.).
6
- *
7
- * API: https://api.tokenrouter.com/v1
8
- * Models: /v1/models
9
- *
10
- * Setup:
11
- * TOKENROUTER_API_KEY=sk-...
12
- * # or add tokenrouter_api_key to ~/.pi/free.json
13
- */
14
-
15
- import type {
16
- ExtensionAPI,
17
- ProviderModelConfig,
18
- } from "@earendil-works/pi-coding-agent";
19
- import type { AssistantMessage, ThinkingContent } from "@earendil-works/pi-ai";
20
- import {
21
- getTokenrouterApiKey,
22
- getTokenrouterShowPaid,
23
- applyHidden,
24
- } from "../../config.ts";
25
- import {
26
- BASE_URL_TOKENROUTER,
27
- DEFAULT_FETCH_TIMEOUT_MS,
28
- PROVIDER_TOKENROUTER,
29
- } from "../../constants.ts";
30
- import { createLogger } from "../../lib/logger.ts";
31
- import { safeEnrichModelsWithModelsDev } from "../../lib/model-metadata.ts";
32
- import {
33
- DEEPSEEK_PROXY_COMPAT,
34
- getProxyModelCompat,
35
- isLikelyReasoningModel,
36
- } from "../../lib/provider-compat.ts";
37
- import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
38
- import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
39
- import { createReRegister, setupProvider } from "../../provider-helper.ts";
40
-
41
- const _logger = createLogger("tokenrouter");
42
-
43
- // =============================================================================
44
- // Reasoning cleanup
45
- // TokenRouter's MiniMax-M3 model sometimes emits DeepSeek-style `<think>`
46
- // reasoning tags inline in the assistant text. Pi does not strip them, so we
47
- // extract them into proper ThinkingContent blocks on message_end.
48
- // =============================================================================
49
-
50
- interface ExtractedThinking {
51
- text: string;
52
- thinking: string;
53
- }
54
-
55
- function collapseWhitespace(text: string): string {
56
- return text
57
- .replace(/\r\n/g, "\n")
58
- .replace(/\n{3,}/g, "\n\n")
59
- .replace(/[ \t]+/g, " ")
60
- .trim();
61
- }
62
-
63
- function extractThinkBlocks(text: string): ExtractedThinking {
64
- const openTag = "<think>";
65
- const closeTag = "</think>";
66
- const thinkingParts: string[] = [];
67
- const textParts: string[] = [];
68
- let cursor = 0;
69
-
70
- while (cursor < text.length) {
71
- const openStart = text.indexOf(openTag, cursor);
72
- if (openStart === -1) {
73
- textParts.push(text.slice(cursor));
74
- break;
75
- }
76
-
77
- textParts.push(text.slice(cursor, openStart));
78
- const valueStart = openStart + openTag.length;
79
- const closeStart = text.indexOf(closeTag, valueStart);
80
- if (closeStart === -1) {
81
- // Unclosed think tag: treat remainder as thinking.
82
- thinkingParts.push(text.slice(valueStart));
83
- break;
84
- }
85
-
86
- thinkingParts.push(text.slice(valueStart, closeStart));
87
- cursor = closeStart + closeTag.length;
88
- }
89
-
90
- return {
91
- text: collapseWhitespace(textParts.join("")),
92
- thinking: collapseWhitespace(thinkingParts.join("\n\n")),
93
- };
94
- }
95
-
96
- function isTokenRouterModel(model: { provider?: string }): boolean {
97
- return model.provider === PROVIDER_TOKENROUTER;
98
- }
99
-
100
- // =============================================================================
101
- // Known Free Models
102
- // TokenRouter doesn't expose pricing via /v1/models, so known-free models
103
- // are hardcoded. Detected via name suffix also catches `:free`-tagged models.
104
- // =============================================================================
105
-
106
- const MINIMAX_M3_ID = "MiniMax-M3";
107
- const KNOWN_FREE_MODELS = new Set([MINIMAX_M3_ID]);
108
- const MINIMAX_ADAPTIVE_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
109
- ...DEEPSEEK_PROXY_COMPAT,
110
- thinkingFormat: "deepseek",
111
- };
112
-
113
- // =============================================================================
114
- // Types
115
- // =============================================================================
116
-
117
- interface TokenRouterModel {
118
- id: string;
119
- object: string;
120
- created: number;
121
- owned_by: string;
122
- supported_endpoint_types: string[];
123
- tags?: string;
124
- }
125
-
126
- // =============================================================================
127
- // Helpers
128
- // =============================================================================
129
-
130
- /** Text-capable chat endpoints (excludes image/video/audio-only types) */
131
- const CHAT_ENDPOINT_TYPES = new Set([
132
- "openai",
133
- "openai-response",
134
- "anthropic",
135
- "anthropic-compatible",
136
- "gemini",
137
- ]);
138
-
139
- function isTextChatModel(model: TokenRouterModel): boolean {
140
- const tags = (model.tags ?? "").toLowerCase();
141
- // Exclude models whose only tags are non-text
142
- const nonTextTags = ["image", "video", "audio"];
143
- const hasNonTextTag = nonTextTags.some((t) => tags.includes(t));
144
- const hasTextTag = tags.includes("text");
145
- // If it has a text tag, include it. If only non-text tags, exclude.
146
- if (hasTextTag) return true;
147
- if (hasNonTextTag && !hasTextTag) return false;
148
- // No tags or empty tags: check endpoint types
149
- return model.supported_endpoint_types.some((t) => CHAT_ENDPOINT_TYPES.has(t));
150
- }
151
-
152
- function isTokenRouterMinimaxModel(modelId: string): boolean {
153
- return modelId.toLowerCase().includes("minimax");
154
- }
155
-
156
- export function finalizeTokenRouterModel(
157
- model: ProviderModelConfig,
158
- ): ProviderModelConfig {
159
- if (!isTokenRouterMinimaxModel(model.id)) return model;
160
-
161
- return {
162
- ...model,
163
- reasoning: true,
164
- compat: {
165
- ...MINIMAX_ADAPTIVE_COMPAT,
166
- ...(model.compat ?? {}),
167
- thinkingFormat: "deepseek",
168
- supportsReasoningEffort: true,
169
- },
170
- };
171
- }
172
-
173
- export function normalizeAssistantMessage(
174
- message: AssistantMessage,
175
- ): AssistantMessage {
176
- const newContent: AssistantMessage["content"] = [];
177
- let extractedThinking = "";
178
-
179
- for (const block of message.content) {
180
- if (block.type !== "text") {
181
- newContent.push(block);
182
- continue;
183
- }
184
-
185
- const extracted = extractThinkBlocks(block.text);
186
- if (extracted.thinking) {
187
- extractedThinking = extractedThinking
188
- ? `${extractedThinking}\n\n${extracted.thinking}`
189
- : extracted.thinking;
190
- }
191
- if (extracted.text) {
192
- newContent.push({ ...block, text: extracted.text });
193
- }
194
- }
195
-
196
- if (extractedThinking) {
197
- newContent.push({
198
- type: "thinking",
199
- thinking: extractedThinking,
200
- } as ThinkingContent);
201
- }
202
-
203
- return { ...message, content: newContent };
204
- }
205
-
206
- export function patchTokenRouterMinimaxThinkingPayload(
207
- payload: unknown,
208
- ): unknown {
209
- if (typeof payload !== "object" || payload === null) return payload;
210
- const body = payload as {
211
- model?: unknown;
212
- thinking?: { type?: unknown };
213
- };
214
- if (!isTokenRouterMinimaxModel(String(body.model ?? ""))) return payload;
215
- if (body.thinking?.type !== "enabled") return payload;
216
-
217
- return {
218
- ...body,
219
- thinking: {
220
- ...body.thinking,
221
- type: "adaptive",
222
- },
223
- };
224
- }
225
-
226
- export function mapTokenRouterModel(
227
- model: TokenRouterModel,
228
- ): ProviderModelConfig & {
229
- _pricingKnown?: boolean;
230
- _freeKnown?: boolean;
231
- _isFree?: boolean;
232
- } {
233
- const name = cleanModelName(model.id);
234
- const isMinimax = isTokenRouterMinimaxModel(model.id);
235
- const reasoning = isMinimax || isLikelyReasoningModel({ id: model.id, name });
236
- const isResponseApi =
237
- model.supported_endpoint_types.includes("openai-response");
238
- const isKnownFree = KNOWN_FREE_MODELS.has(model.id);
239
-
240
- return {
241
- id: model.id,
242
- name,
243
- reasoning,
244
- input: ["text"],
245
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
246
- contextWindow: 128_000,
247
- maxTokens: 16_384,
248
- compat: {
249
- ...(isMinimax
250
- ? MINIMAX_ADAPTIVE_COMPAT
251
- : getProxyModelCompat({ id: model.id, name })),
252
- // openai-response models use a different API shape
253
- ...(isResponseApi ? { apiType: "openai-response" as const } : {}),
254
- },
255
- // Known-free models bypass pricing detection entirely
256
- _freeKnown: isKnownFree,
257
- _isFree: isKnownFree,
258
- // Non-free models signal no pricing data (name-based detection only)
259
- _pricingKnown: false,
260
- } as ProviderModelConfig & { _pricingKnown?: boolean };
261
- }
262
-
263
- // =============================================================================
264
- // Fetch Models
265
- // =============================================================================
266
-
267
- async function fetchTokenRouterModels(
268
- apiKey: string,
269
- ): Promise<ProviderModelConfig[]> {
270
- _logger.info("[tokenrouter] Fetching models from TokenRouter API...");
271
-
272
- try {
273
- const response = await fetchWithRetry(
274
- `${BASE_URL_TOKENROUTER}/models`,
275
- {
276
- headers: {
277
- Authorization: `Bearer ${apiKey}`,
278
- Accept: "application/json",
279
- "Content-Type": "application/json",
280
- },
281
- },
282
- 3,
283
- 1000,
284
- DEFAULT_FETCH_TIMEOUT_MS,
285
- );
286
-
287
- if (!response.ok) {
288
- throw new Error(`TokenRouter API error: ${response.status}`);
289
- }
290
-
291
- const json = (await response.json()) as { data?: TokenRouterModel[] };
292
- const models = (json.data ?? []).filter(isTextChatModel);
293
-
294
- _logger.info(`[tokenrouter] Fetched ${models.length} text chat models`);
295
- const enriched = await safeEnrichModelsWithModelsDev(
296
- models.map(mapTokenRouterModel),
297
- { providerId: PROVIDER_TOKENROUTER },
298
- );
299
- return applyHidden(
300
- enriched.map(finalizeTokenRouterModel),
301
- PROVIDER_TOKENROUTER,
302
- );
303
- } catch (error) {
304
- _logger.error("[tokenrouter] Failed to fetch models", {
305
- error: error instanceof Error ? error.message : String(error),
306
- });
307
- return [];
308
- }
309
- }
310
-
311
- // =============================================================================
312
- // Extension Entry Point
313
- // =============================================================================
314
-
315
- export default async function tokenRouterProvider(pi: ExtensionAPI) {
316
- const apiKey = getTokenrouterApiKey();
317
-
318
- if (!apiKey) {
319
- _logger.info("[tokenrouter] Skipping — TOKENROUTER_API_KEY not set.");
320
- return;
321
- }
322
-
323
- const allModels = await fetchTokenRouterModels(apiKey);
324
-
325
- if (allModels.length === 0) {
326
- _logger.warn("[tokenrouter] No text chat models available");
327
- return;
328
- }
329
-
330
- const freeModels = allModels.filter((m) =>
331
- isFreeModel({ ...m, provider: PROVIDER_TOKENROUTER }, allModels),
332
- );
333
- const stored = { free: freeModels, all: allModels };
334
-
335
- _logger.info(
336
- `[tokenrouter] Registered ${allModels.length} models (${freeModels.length} free)`,
337
- );
338
-
339
- const reRegister = createReRegister(pi, {
340
- providerId: PROVIDER_TOKENROUTER,
341
- baseUrl: BASE_URL_TOKENROUTER,
342
- apiKey,
343
- });
344
-
345
- registerWithGlobalToggle(PROVIDER_TOKENROUTER, stored, reRegister, true);
346
-
347
- pi.on("before_provider_request", (event) =>
348
- patchTokenRouterMinimaxThinkingPayload(event.payload),
349
- );
350
-
351
- pi.on("message_end", (event, ctx) => {
352
- if (!isTokenRouterModel(ctx.model ?? {})) return;
353
- if (event.message.role !== "assistant") return;
354
- return { message: normalizeAssistantMessage(event.message) };
355
- });
356
-
357
- setupProvider(
358
- pi,
359
- {
360
- providerId: PROVIDER_TOKENROUTER,
361
- initialShowPaid: getTokenrouterShowPaid(),
362
- tosUrl: "https://tokenrouter.com/terms",
363
- reRegister: (models, _stored) => {
364
- if (_stored) {
365
- stored.free = _stored.free;
366
- stored.all = _stored.all;
367
- }
368
- reRegister(models);
369
- },
370
- },
371
- stored,
372
- );
373
-
374
- const showPaid = getTokenrouterShowPaid();
375
- const initialModels =
376
- showPaid && stored.all.length > 0 ? stored.all : freeModels;
377
- reRegister(initialModels);
378
- }
1
+ /**
2
+ * TokenRouter Provider Extension
3
+ *
4
+ * TokenRouter is an OpenAI-compatible API gateway routing to 90+ models
5
+ * across multiple providers (OpenAI, Anthropic, Google, DeepSeek, Qwen, etc.).
6
+ *
7
+ * API: https://api.tokenrouter.com/v1
8
+ * Models: /v1/models
9
+ *
10
+ * Setup:
11
+ * TOKENROUTER_API_KEY=sk-...
12
+ * # or add tokenrouter_api_key to ~/.pi/free.json
13
+ */
14
+
15
+ import type {
16
+ ExtensionAPI,
17
+ ProviderModelConfig,
18
+ } from "@earendil-works/pi-coding-agent";
19
+ import type {
20
+ Api,
21
+ AssistantMessage,
22
+ AssistantMessageEvent,
23
+ AssistantMessageEventStream,
24
+ Context,
25
+ Model,
26
+ SimpleStreamOptions,
27
+ ThinkingContent,
28
+ } from "@earendil-works/pi-ai";
29
+ import {
30
+ createAssistantMessageEventStream,
31
+ streamSimpleOpenAICompletions,
32
+ } from "@earendil-works/pi-ai";
33
+ import {
34
+ getTokenrouterApiKey,
35
+ getTokenrouterShowPaid,
36
+ applyHidden,
37
+ } from "../../config.ts";
38
+ import {
39
+ BASE_URL_TOKENROUTER,
40
+ DEFAULT_FETCH_TIMEOUT_MS,
41
+ PROVIDER_TOKENROUTER,
42
+ } from "../../constants.ts";
43
+ import { createLogger } from "../../lib/logger.ts";
44
+ import { safeEnrichModelsWithModelsDev } from "../../lib/model-metadata.ts";
45
+ import {
46
+ DEEPSEEK_PROXY_COMPAT,
47
+ getProxyModelCompat,
48
+ isLikelyReasoningModel,
49
+ } from "../../lib/provider-compat.ts";
50
+ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
51
+ import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
52
+ import { enhanceWithCI, setupProvider } from "../../provider-helper.ts";
53
+
54
+ const _logger = createLogger("tokenrouter");
55
+
56
+ // =============================================================================
57
+ // Reasoning cleanup
58
+ // TokenRouter's MiniMax-M3 model sometimes emits DeepSeek-style `<think>`
59
+ // reasoning tags inline in the assistant text. Pi does not strip them, so we
60
+ // extract them into proper ThinkingContent blocks on message_end.
61
+ // =============================================================================
62
+
63
+ interface ExtractedThinking {
64
+ text: string;
65
+ thinking: string;
66
+ }
67
+
68
+ function collapseWhitespace(text: string): string {
69
+ return text
70
+ .replace(/\r\n/g, "\n")
71
+ .replace(/\n{3,}/g, "\n\n")
72
+ .replace(/[ \t]+/g, " ")
73
+ .trim();
74
+ }
75
+
76
+ function extractThinkBlocks(text: string): ExtractedThinking {
77
+ const openTag = "<think>";
78
+ const closeTag = "</think>";
79
+ const thinkingParts: string[] = [];
80
+ const textParts: string[] = [];
81
+ let cursor = 0;
82
+
83
+ while (cursor < text.length) {
84
+ const openStart = text.indexOf(openTag, cursor);
85
+ if (openStart === -1) {
86
+ textParts.push(text.slice(cursor));
87
+ break;
88
+ }
89
+
90
+ textParts.push(text.slice(cursor, openStart));
91
+ const valueStart = openStart + openTag.length;
92
+ const closeStart = text.indexOf(closeTag, valueStart);
93
+ if (closeStart === -1) {
94
+ // Unclosed think tag: treat remainder as thinking.
95
+ thinkingParts.push(text.slice(valueStart));
96
+ break;
97
+ }
98
+
99
+ thinkingParts.push(text.slice(valueStart, closeStart));
100
+ cursor = closeStart + closeTag.length;
101
+ }
102
+
103
+ return {
104
+ text: collapseWhitespace(textParts.join("")),
105
+ thinking: collapseWhitespace(thinkingParts.join("\n\n")),
106
+ };
107
+ }
108
+
109
+ function isTokenRouterModel(model: { provider?: string }): boolean {
110
+ return model.provider === PROVIDER_TOKENROUTER;
111
+ }
112
+
113
+ // =============================================================================
114
+ // Known Free Models
115
+ // TokenRouter doesn't expose pricing via /v1/models, so known-free models
116
+ // are hardcoded. Detected via name suffix also catches `:free`-tagged models.
117
+ // =============================================================================
118
+
119
+ const MINIMAX_M3_ID = "MiniMax-M3";
120
+ const KNOWN_FREE_MODELS = new Set([MINIMAX_M3_ID]);
121
+ const TOKENROUTER_OPENAI_API = "tokenrouter-openai-completions" as const;
122
+ const TOKENROUTER_HIGH_LOAD_RETRY_DELAY_MS = 30_000;
123
+ const MINIMAX_ADAPTIVE_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
124
+ ...DEEPSEEK_PROXY_COMPAT,
125
+ thinkingFormat: "deepseek",
126
+ };
127
+
128
+ // =============================================================================
129
+ // Types
130
+ // =============================================================================
131
+
132
+ interface TokenRouterModel {
133
+ id: string;
134
+ object: string;
135
+ created: number;
136
+ owned_by: string;
137
+ supported_endpoint_types: string[];
138
+ tags?: string;
139
+ }
140
+
141
+ // =============================================================================
142
+ // Helpers
143
+ // =============================================================================
144
+
145
+ /** Text-capable chat endpoints (excludes image/video/audio-only types) */
146
+ const CHAT_ENDPOINT_TYPES = new Set([
147
+ "openai",
148
+ "openai-response",
149
+ "anthropic",
150
+ "anthropic-compatible",
151
+ "gemini",
152
+ ]);
153
+
154
+ function isTextChatModel(model: TokenRouterModel): boolean {
155
+ const tags = (model.tags ?? "").toLowerCase();
156
+ // Exclude models whose only tags are non-text
157
+ const nonTextTags = ["image", "video", "audio"];
158
+ const hasNonTextTag = nonTextTags.some((t) => tags.includes(t));
159
+ const hasTextTag = tags.includes("text");
160
+ // If it has a text tag, include it. If only non-text tags, exclude.
161
+ if (hasTextTag) return true;
162
+ if (hasNonTextTag && !hasTextTag) return false;
163
+ // No tags or empty tags: check endpoint types
164
+ return model.supported_endpoint_types.some((t) => CHAT_ENDPOINT_TYPES.has(t));
165
+ }
166
+
167
+ function isTokenRouterMinimaxModel(modelId: string): boolean {
168
+ return modelId.toLowerCase().includes("minimax");
169
+ }
170
+
171
+ export function finalizeTokenRouterModel(
172
+ model: ProviderModelConfig,
173
+ ): ProviderModelConfig {
174
+ if (!isTokenRouterMinimaxModel(model.id)) return model;
175
+
176
+ return {
177
+ ...model,
178
+ reasoning: true,
179
+ compat: {
180
+ ...MINIMAX_ADAPTIVE_COMPAT,
181
+ ...(model.compat ?? {}),
182
+ thinkingFormat: "deepseek",
183
+ supportsReasoningEffort: true,
184
+ },
185
+ };
186
+ }
187
+
188
+ export function normalizeAssistantMessage(
189
+ message: AssistantMessage,
190
+ ): AssistantMessage {
191
+ const newContent: AssistantMessage["content"] = [];
192
+ let extractedThinking = "";
193
+
194
+ for (const block of message.content) {
195
+ if (block.type !== "text") {
196
+ newContent.push(block);
197
+ continue;
198
+ }
199
+
200
+ const extracted = extractThinkBlocks(block.text);
201
+ if (extracted.thinking) {
202
+ extractedThinking = extractedThinking
203
+ ? `${extractedThinking}\n\n${extracted.thinking}`
204
+ : extracted.thinking;
205
+ }
206
+ if (extracted.text) {
207
+ newContent.push({ ...block, text: extracted.text });
208
+ }
209
+ }
210
+
211
+ if (extractedThinking) {
212
+ newContent.push({
213
+ type: "thinking",
214
+ thinking: extractedThinking,
215
+ } as ThinkingContent);
216
+ }
217
+
218
+ return { ...message, content: newContent };
219
+ }
220
+
221
+ function isRecord(value: unknown): value is Record<string, unknown> {
222
+ return typeof value === "object" && value !== null && !Array.isArray(value);
223
+ }
224
+
225
+ function containsTokenRouterMinimaxModel(value: unknown): boolean {
226
+ if (Array.isArray(value)) {
227
+ return value.some(containsTokenRouterMinimaxModel);
228
+ }
229
+ if (!isRecord(value)) return false;
230
+
231
+ for (const [key, child] of Object.entries(value)) {
232
+ if (key === "model" && isTokenRouterMinimaxModel(String(child ?? ""))) {
233
+ return true;
234
+ }
235
+ if (containsTokenRouterMinimaxModel(child)) return true;
236
+ }
237
+ return false;
238
+ }
239
+
240
+ function patchThinkingType(value: unknown): {
241
+ value: unknown;
242
+ changed: boolean;
243
+ } {
244
+ if (Array.isArray(value)) {
245
+ let changed = false;
246
+ const patched = value.map((child) => {
247
+ const result = patchThinkingType(child);
248
+ changed ||= result.changed;
249
+ return result.value;
250
+ });
251
+ return changed ? { value: patched, changed } : { value, changed: false };
252
+ }
253
+ if (!isRecord(value)) return { value, changed: false };
254
+
255
+ let changed = false;
256
+ const patched: Record<string, unknown> = {};
257
+ for (const [key, child] of Object.entries(value)) {
258
+ let next = patchThinkingType(child).value;
259
+ if (key === "thinking" && isRecord(next) && next.type === "enabled") {
260
+ next = { ...next, type: "adaptive" };
261
+ changed = true;
262
+ } else {
263
+ changed ||= next !== child;
264
+ }
265
+ patched[key] = next;
266
+ }
267
+
268
+ return changed ? { value: patched, changed } : { value, changed: false };
269
+ }
270
+
271
+ export function patchTokenRouterMinimaxThinkingPayload(
272
+ payload: unknown,
273
+ force = false,
274
+ ): unknown {
275
+ if (typeof payload === "string") {
276
+ try {
277
+ const parsed = JSON.parse(payload) as unknown;
278
+ const patched = patchTokenRouterMinimaxThinkingPayload(parsed, force);
279
+ return patched === parsed ? payload : JSON.stringify(patched);
280
+ } catch {
281
+ return payload;
282
+ }
283
+ }
284
+
285
+ if (!force && !containsTokenRouterMinimaxModel(payload)) return payload;
286
+ const result = patchThinkingType(payload);
287
+ return result.changed ? result.value : payload;
288
+ }
289
+
290
+ function isTokenRouterHighLoadError(message: string | undefined): boolean {
291
+ const lower = (message ?? "").toLowerCase();
292
+ return (
293
+ lower.includes("(2064)") ||
294
+ lower.includes("server cluster is currently under high load")
295
+ );
296
+ }
297
+
298
+ function isOutputEvent(event: AssistantMessageEvent): boolean {
299
+ return (
300
+ event.type === "text_start" ||
301
+ event.type === "text_delta" ||
302
+ event.type === "text_end" ||
303
+ event.type === "thinking_start" ||
304
+ event.type === "thinking_delta" ||
305
+ event.type === "thinking_end" ||
306
+ event.type === "toolcall_start" ||
307
+ event.type === "toolcall_delta" ||
308
+ event.type === "toolcall_end"
309
+ );
310
+ }
311
+
312
+ function waitForTokenRouterRetry(
313
+ ms: number,
314
+ signal: AbortSignal | undefined,
315
+ ): Promise<void> {
316
+ if (signal?.aborted) return Promise.reject(new Error("aborted"));
317
+ return new Promise((resolve, reject) => {
318
+ const onAbort = () => {
319
+ clearTimeout(timeout);
320
+ reject(new Error("aborted"));
321
+ };
322
+ const timeout = setTimeout(() => {
323
+ signal?.removeEventListener("abort", onAbort);
324
+ resolve();
325
+ }, ms);
326
+ signal?.addEventListener("abort", onAbort, { once: true });
327
+ });
328
+ }
329
+
330
+ function createTokenRouterOpenAIStream(
331
+ model: Model<Api>,
332
+ context: Context,
333
+ options: SimpleStreamOptions | undefined,
334
+ ): AssistantMessageEventStream {
335
+ const forcePatch = isTokenRouterMinimaxModel(model.id);
336
+ return streamSimpleOpenAICompletions(
337
+ { ...model, api: "openai-completions" },
338
+ context,
339
+ {
340
+ ...options,
341
+ onPayload: async (payload, payloadModel) => {
342
+ const patchedPayload = patchTokenRouterMinimaxThinkingPayload(
343
+ payload,
344
+ forcePatch,
345
+ );
346
+ const upstreamPayload = await options?.onPayload?.(
347
+ patchedPayload,
348
+ payloadModel,
349
+ );
350
+ return patchTokenRouterMinimaxThinkingPayload(
351
+ upstreamPayload ?? patchedPayload,
352
+ forcePatch,
353
+ );
354
+ },
355
+ },
356
+ );
357
+ }
358
+
359
+ function createTokenRouterRetryErrorMessage(
360
+ model: Model<Api>,
361
+ options: SimpleStreamOptions | undefined,
362
+ error: unknown,
363
+ ): AssistantMessage {
364
+ return {
365
+ role: "assistant",
366
+ content: [],
367
+ api: model.api,
368
+ provider: model.provider,
369
+ model: model.id,
370
+ usage: {
371
+ input: 0,
372
+ output: 0,
373
+ cacheRead: 0,
374
+ cacheWrite: 0,
375
+ totalTokens: 0,
376
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
377
+ },
378
+ stopReason: options?.signal?.aborted ? "aborted" : "error",
379
+ errorMessage: error instanceof Error ? error.message : String(error),
380
+ timestamp: Date.now(),
381
+ };
382
+ }
383
+
384
+ function streamWithTokenRouterHighLoadRetry(
385
+ model: Model<Api>,
386
+ createAttempt: () => AssistantMessageEventStream,
387
+ options: SimpleStreamOptions | undefined,
388
+ ): AssistantMessageEventStream {
389
+ const output = createAssistantMessageEventStream();
390
+
391
+ void (async () => {
392
+ const buffer: AssistantMessageEvent[] = [];
393
+ let flushed = false;
394
+ let sawOutput = false;
395
+
396
+ function flushBuffer(): void {
397
+ if (flushed) return;
398
+ flushed = true;
399
+ for (const event of buffer) output.push(event);
400
+ buffer.length = 0;
401
+ }
402
+
403
+ try {
404
+ const first = createAttempt();
405
+ let retryAfterHighLoad = false;
406
+ for await (const event of first) {
407
+ if (isOutputEvent(event)) {
408
+ sawOutput = true;
409
+ flushBuffer();
410
+ output.push(event);
411
+ continue;
412
+ }
413
+
414
+ if (
415
+ event.type === "error" &&
416
+ !sawOutput &&
417
+ isTokenRouterHighLoadError(event.error.errorMessage)
418
+ ) {
419
+ retryAfterHighLoad = true;
420
+ break;
421
+ }
422
+
423
+ if (flushed) output.push(event);
424
+ else buffer.push(event);
425
+ }
426
+
427
+ if (!retryAfterHighLoad) {
428
+ flushBuffer();
429
+ return;
430
+ }
431
+
432
+ _logger.warn(
433
+ "[tokenrouter] Server cluster high load (2064); retrying once after 30s",
434
+ );
435
+ await waitForTokenRouterRetry(
436
+ TOKENROUTER_HIGH_LOAD_RETRY_DELAY_MS,
437
+ options?.signal,
438
+ );
439
+ for await (const event of createAttempt()) output.push(event);
440
+ } catch (error) {
441
+ flushBuffer();
442
+ const message = createTokenRouterRetryErrorMessage(model, options, error);
443
+ output.push({
444
+ type: "error",
445
+ reason: message.stopReason as "error" | "aborted",
446
+ error: message,
447
+ });
448
+ }
449
+ })();
450
+
451
+ return output;
452
+ }
453
+
454
+ export function streamSimpleTokenRouter(
455
+ model: Model<Api>,
456
+ context: Context,
457
+ options?: SimpleStreamOptions,
458
+ ): AssistantMessageEventStream {
459
+ return streamWithTokenRouterHighLoadRetry(
460
+ model,
461
+ () => createTokenRouterOpenAIStream(model, context, options),
462
+ options,
463
+ );
464
+ }
465
+
466
+ export const __test__ = {
467
+ TOKENROUTER_HIGH_LOAD_RETRY_DELAY_MS,
468
+ isTokenRouterHighLoadError,
469
+ streamWithTokenRouterHighLoadRetry,
470
+ waitForTokenRouterRetry,
471
+ };
472
+
473
+ export function mapTokenRouterModel(
474
+ model: TokenRouterModel,
475
+ ): ProviderModelConfig & {
476
+ _pricingKnown?: boolean;
477
+ _freeKnown?: boolean;
478
+ _isFree?: boolean;
479
+ } {
480
+ const name = cleanModelName(model.id);
481
+ const isMinimax = isTokenRouterMinimaxModel(model.id);
482
+ const reasoning = isMinimax || isLikelyReasoningModel({ id: model.id, name });
483
+ const isResponseApi =
484
+ model.supported_endpoint_types.includes("openai-response");
485
+ const isKnownFree = KNOWN_FREE_MODELS.has(model.id);
486
+
487
+ return {
488
+ id: model.id,
489
+ name,
490
+ reasoning,
491
+ input: ["text"],
492
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
493
+ contextWindow: 128_000,
494
+ maxTokens: 16_384,
495
+ compat: {
496
+ ...(isMinimax
497
+ ? MINIMAX_ADAPTIVE_COMPAT
498
+ : getProxyModelCompat({ id: model.id, name })),
499
+ // openai-response models use a different API shape
500
+ ...(isResponseApi ? { apiType: "openai-response" as const } : {}),
501
+ },
502
+ // Known-free models bypass pricing detection entirely
503
+ _freeKnown: isKnownFree,
504
+ _isFree: isKnownFree,
505
+ // Non-free models signal no pricing data (name-based detection only)
506
+ _pricingKnown: false,
507
+ } as ProviderModelConfig & { _pricingKnown?: boolean };
508
+ }
509
+
510
+ // =============================================================================
511
+ // Fetch Models
512
+ // =============================================================================
513
+
514
+ async function fetchTokenRouterModels(
515
+ apiKey: string,
516
+ ): Promise<ProviderModelConfig[]> {
517
+ _logger.info("[tokenrouter] Fetching models from TokenRouter API...");
518
+
519
+ try {
520
+ const response = await fetchWithRetry(
521
+ `${BASE_URL_TOKENROUTER}/models`,
522
+ {
523
+ headers: {
524
+ Authorization: `Bearer ${apiKey}`,
525
+ Accept: "application/json",
526
+ "Content-Type": "application/json",
527
+ },
528
+ },
529
+ 3,
530
+ 1000,
531
+ DEFAULT_FETCH_TIMEOUT_MS,
532
+ );
533
+
534
+ if (!response.ok) {
535
+ throw new Error(`TokenRouter API error: ${response.status}`);
536
+ }
537
+
538
+ const json = (await response.json()) as { data?: TokenRouterModel[] };
539
+ const models = (json.data ?? []).filter(isTextChatModel);
540
+
541
+ _logger.info(`[tokenrouter] Fetched ${models.length} text chat models`);
542
+ const enriched = await safeEnrichModelsWithModelsDev(
543
+ models.map(mapTokenRouterModel),
544
+ { providerId: PROVIDER_TOKENROUTER },
545
+ );
546
+ return applyHidden(
547
+ enriched.map(finalizeTokenRouterModel),
548
+ PROVIDER_TOKENROUTER,
549
+ );
550
+ } catch (error) {
551
+ _logger.error("[tokenrouter] Failed to fetch models", {
552
+ error: error instanceof Error ? error.message : String(error),
553
+ });
554
+ return [];
555
+ }
556
+ }
557
+
558
+ // =============================================================================
559
+ // Extension Entry Point
560
+ // =============================================================================
561
+
562
+ export default async function tokenRouterProvider(pi: ExtensionAPI) {
563
+ const apiKey = getTokenrouterApiKey();
564
+
565
+ if (!apiKey) {
566
+ _logger.info("[tokenrouter] Skipping — TOKENROUTER_API_KEY not set.");
567
+ return;
568
+ }
569
+
570
+ const allModels = await fetchTokenRouterModels(apiKey);
571
+
572
+ if (allModels.length === 0) {
573
+ _logger.warn("[tokenrouter] No text chat models available");
574
+ return;
575
+ }
576
+
577
+ const freeModels = allModels.filter((m) =>
578
+ isFreeModel({ ...m, provider: PROVIDER_TOKENROUTER }, allModels),
579
+ );
580
+ const stored = { free: freeModels, all: allModels };
581
+
582
+ _logger.info(
583
+ `[tokenrouter] Registered ${allModels.length} models (${freeModels.length} free)`,
584
+ );
585
+
586
+ const reRegister = (models: ProviderModelConfig[]) => {
587
+ pi.registerProvider(PROVIDER_TOKENROUTER, {
588
+ baseUrl: BASE_URL_TOKENROUTER,
589
+ apiKey,
590
+ api: TOKENROUTER_OPENAI_API,
591
+ streamSimple: streamSimpleTokenRouter,
592
+ headers: { "User-Agent": "pi-free-providers" },
593
+ models: enhanceWithCI(models, PROVIDER_TOKENROUTER),
594
+ });
595
+ };
596
+
597
+ registerWithGlobalToggle(PROVIDER_TOKENROUTER, stored, reRegister, true);
598
+
599
+ pi.on("before_provider_request", (event, ctx) =>
600
+ patchTokenRouterMinimaxThinkingPayload(
601
+ event.payload,
602
+ isTokenRouterModel(ctx.model ?? {}) &&
603
+ isTokenRouterMinimaxModel(ctx.model?.id ?? ""),
604
+ ),
605
+ );
606
+
607
+ pi.on("message_end", (event, ctx) => {
608
+ if (!isTokenRouterModel(ctx.model ?? {})) return;
609
+ if (event.message.role !== "assistant") return;
610
+ return { message: normalizeAssistantMessage(event.message) };
611
+ });
612
+
613
+ setupProvider(
614
+ pi,
615
+ {
616
+ providerId: PROVIDER_TOKENROUTER,
617
+ initialShowPaid: getTokenrouterShowPaid(),
618
+ tosUrl: "https://tokenrouter.com/terms",
619
+ reRegister: (models, _stored) => {
620
+ if (_stored) {
621
+ stored.free = _stored.free;
622
+ stored.all = _stored.all;
623
+ }
624
+ reRegister(models);
625
+ },
626
+ },
627
+ stored,
628
+ );
629
+
630
+ const showPaid = getTokenrouterShowPaid();
631
+ const initialModels =
632
+ showPaid && stored.all.length > 0 ? stored.all : freeModels;
633
+ reRegister(initialModels);
634
+ }