pi-model-auto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/package.json +42 -0
- package/src/canonical-models.ts +141 -0
- package/src/index.ts +611 -0
- package/src/quota.ts +306 -0
- package/src/router-core.ts +916 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,611 @@
|
|
|
1
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import {
|
|
4
|
+
CONFIG_DIR_NAME,
|
|
5
|
+
getAgentDir,
|
|
6
|
+
type ExtensionAPI,
|
|
7
|
+
type ExtensionContext,
|
|
8
|
+
} from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import {
|
|
10
|
+
clampThinkingLevel,
|
|
11
|
+
createAssistantMessageEventStream,
|
|
12
|
+
streamSimple as aiStreamSimple,
|
|
13
|
+
type Api,
|
|
14
|
+
type AssistantMessage,
|
|
15
|
+
type AssistantMessageEvent,
|
|
16
|
+
type Context,
|
|
17
|
+
type Model,
|
|
18
|
+
type SimpleStreamOptions,
|
|
19
|
+
} from "@earendil-works/pi-ai";
|
|
20
|
+
import {
|
|
21
|
+
AA_WILLINGNESS,
|
|
22
|
+
RAMP_WILLINGNESS,
|
|
23
|
+
DEFAULT_CONFIG,
|
|
24
|
+
axisValue,
|
|
25
|
+
buildAutoPool,
|
|
26
|
+
cacheAwareSelect,
|
|
27
|
+
createRoutingState,
|
|
28
|
+
decide,
|
|
29
|
+
frontierChain,
|
|
30
|
+
matchesModelFilter,
|
|
31
|
+
modelKey,
|
|
32
|
+
recordRoutingUsage,
|
|
33
|
+
repriceForTimeOfDay,
|
|
34
|
+
resolveModel,
|
|
35
|
+
routingTurnKey,
|
|
36
|
+
selectFromPool,
|
|
37
|
+
shouldReuseTurnSelection,
|
|
38
|
+
type CacheReason,
|
|
39
|
+
type Decision,
|
|
40
|
+
type Pool,
|
|
41
|
+
type ResolvedModel,
|
|
42
|
+
type RouterConfig,
|
|
43
|
+
type RoutingState,
|
|
44
|
+
type Selection,
|
|
45
|
+
type Tier,
|
|
46
|
+
} from "./router-core.ts";
|
|
47
|
+
import { QuotaState, buildPlanKey, filterPoolByQuota, type PlanState } from "./quota.ts";
|
|
48
|
+
|
|
49
|
+
type ForcedRoute = { tier: Tier } | { model: string };
|
|
50
|
+
type ResolvedAuth = { ok: true; apiKey?: string; headers?: Record<string, string>; env?: Record<string, string> };
|
|
51
|
+
type QuotaPlanLookup = Map<string, { planKey: string; auth: Awaited<ReturnType<ExtensionContext["modelRegistry"]["getApiKeyAndHeaders"]>> }>;
|
|
52
|
+
|
|
53
|
+
const ZERO_USAGE = {
|
|
54
|
+
input: 0,
|
|
55
|
+
output: 0,
|
|
56
|
+
cacheRead: 0,
|
|
57
|
+
cacheWrite: 0,
|
|
58
|
+
totalTokens: 0,
|
|
59
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
interface LastDecision extends Decision {
|
|
63
|
+
chosen: string;
|
|
64
|
+
planKey: string;
|
|
65
|
+
canonical: string | null;
|
|
66
|
+
costTier: string;
|
|
67
|
+
profile?: string;
|
|
68
|
+
confidence: string;
|
|
69
|
+
alternatives: string[];
|
|
70
|
+
cacheReason?: CacheReason;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export default function modelRouter(pi: ExtensionAPI) {
|
|
74
|
+
let extCtx: ExtensionContext | undefined;
|
|
75
|
+
let cfg: RouterConfig = DEFAULT_CONFIG;
|
|
76
|
+
let quota: QuotaState = new QuotaState(DEFAULT_CONFIG.quota);
|
|
77
|
+
let pool: Pool = { cheapPool: [], strongPool: [], standardPool: [], unknownPool: [], all: [] };
|
|
78
|
+
let forcedRoute: ForcedRoute | undefined;
|
|
79
|
+
let lastDecision: LastDecision | undefined;
|
|
80
|
+
let turnSelection: { key: string; selection: Selection } | undefined;
|
|
81
|
+
let routingState: RoutingState = createRoutingState();
|
|
82
|
+
let providerRegistered = false;
|
|
83
|
+
|
|
84
|
+
pi.registerCommand("auto", {
|
|
85
|
+
description: "Show Pi Model Router pool and last decision",
|
|
86
|
+
handler: async (_args, ctx) => {
|
|
87
|
+
ctx.ui.notify(describeRouter(repriceForTimeOfDay(pool, new Date().getHours()), cfg, lastDecision, quota), "info");
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
pi.on("session_start", async (_event, ctx) => {
|
|
92
|
+
extCtx = ctx;
|
|
93
|
+
cfg = loadConfig(ctx);
|
|
94
|
+
quota = new QuotaState(cfg.quota);
|
|
95
|
+
quota.load(quotaStateFile());
|
|
96
|
+
pool = applyConfiguredTiers(buildAutoPool(ctx.modelRegistry.getAvailable(), cfg), cfg, ctx);
|
|
97
|
+
turnSelection = undefined;
|
|
98
|
+
routingState = createRoutingState();
|
|
99
|
+
|
|
100
|
+
const api = `pi-router-api:${Date.now().toString(36)}-${Math.random().toString(36).slice(2)}`;
|
|
101
|
+
if (providerRegistered) pi.unregisterProvider("pi-router");
|
|
102
|
+
providerRegistered = true;
|
|
103
|
+
|
|
104
|
+
pi.registerProvider("pi-router", {
|
|
105
|
+
name: "Pi Router",
|
|
106
|
+
api,
|
|
107
|
+
baseUrl: "https://router.local",
|
|
108
|
+
apiKey: "pi-router-dummy-key",
|
|
109
|
+
models: [
|
|
110
|
+
{
|
|
111
|
+
id: "auto",
|
|
112
|
+
name: "Pi Router (Auto)",
|
|
113
|
+
reasoning: true,
|
|
114
|
+
input: ["text", "image"],
|
|
115
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
116
|
+
contextWindow: 1_000_000,
|
|
117
|
+
maxTokens: 64_000,
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
streamSimple,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
updateRouterStatus(ctx, pool.all.length === 0 ? "🧭 no models" : "🧭 ready");
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
pi.on("model_select", async (event, ctx) => {
|
|
127
|
+
if (isRouterModel(event.model)) {
|
|
128
|
+
updateRouterStatus(ctx, lastDecision ? shortStatus(lastDecision, quota) : "🧭 ready");
|
|
129
|
+
} else {
|
|
130
|
+
ctx.ui.setStatus("router", undefined);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
pi.on("session_shutdown", async () => {
|
|
135
|
+
forcedRoute = undefined;
|
|
136
|
+
lastDecision = undefined;
|
|
137
|
+
turnSelection = undefined;
|
|
138
|
+
extCtx = undefined;
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
pi.on("input", async (event) => {
|
|
142
|
+
if (event.source === "extension") return { action: "continue" };
|
|
143
|
+
|
|
144
|
+
const parsed = parseForcedRoute(event.text);
|
|
145
|
+
forcedRoute = parsed?.route;
|
|
146
|
+
if (!parsed) return { action: "continue" };
|
|
147
|
+
|
|
148
|
+
return { action: "transform", text: parsed.text, images: event.images };
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
function streamSimple(routerModel: Model<Api>, context: Context, options?: SimpleStreamOptions) {
|
|
152
|
+
const stream = createAssistantMessageEventStream();
|
|
153
|
+
|
|
154
|
+
void (async () => {
|
|
155
|
+
try {
|
|
156
|
+
const ctx = extCtx;
|
|
157
|
+
if (!ctx) throw new Error("Pi Router: extension context not initialized");
|
|
158
|
+
if (pool.all.length === 0) {
|
|
159
|
+
throw new Error("Pi Router: no authenticated models. Run /login or configure an API key, then /reload.");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const decision = decide(context, options, forcedRoute, cfg);
|
|
163
|
+
const quotaPlans = cfg.quota.enabled && !forcedRoute ? await resolveQuotaPlans(ctx, pool) : new Map();
|
|
164
|
+
const turnKey = routingTurnKey(context);
|
|
165
|
+
const cachedSelection = turnSelection;
|
|
166
|
+
const reuseTurnSelection =
|
|
167
|
+
decision.cls !== "model" && shouldReuseTurnSelection(context) && cachedSelection?.key === turnKey;
|
|
168
|
+
// Re-evaluate time-of-day shadow-price windows once per turn (clock read here, pick reused
|
|
169
|
+
// within the turn), so a window boundary like GLM 14:00–18:00 takes effect without a /reload.
|
|
170
|
+
const selectionPool = decision.cls === "model" || reuseTurnSelection
|
|
171
|
+
? pool
|
|
172
|
+
: repriceForTimeOfDay(
|
|
173
|
+
forcedRoute ? pool : usablePoolForQuota(ctx, cfg, pool, quota, Date.now(), quotaPlans),
|
|
174
|
+
new Date().getHours(),
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
let selection: Selection;
|
|
178
|
+
let cacheReason: CacheReason | undefined;
|
|
179
|
+
if (reuseTurnSelection) {
|
|
180
|
+
selection = { ...cachedSelection!.selection, reason: `${cachedSelection!.selection.reason}; reused within user turn` };
|
|
181
|
+
} else {
|
|
182
|
+
const fresh = selectModel(decision, selectionPool, context, options, ctx, cfg);
|
|
183
|
+
// Cache-aware stickiness only applies to auto routing (forced routes are the user's explicit choice).
|
|
184
|
+
if (!forcedRoute && decision.cls !== "model") {
|
|
185
|
+
const result = cacheAwareSelect(fresh, routingState, selectionPool, context, cfg);
|
|
186
|
+
selection = result.selection;
|
|
187
|
+
cacheReason = result.cacheReason;
|
|
188
|
+
} else {
|
|
189
|
+
selection = fresh;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const target = selection.selected.model;
|
|
193
|
+
|
|
194
|
+
if (decision.cls !== "model") turnSelection = { key: turnKey, selection };
|
|
195
|
+
|
|
196
|
+
const selectedAuth = quotaPlans.get(modelKey(target))?.auth;
|
|
197
|
+
const auth = selectedAuth ?? await ctx.modelRegistry.getApiKeyAndHeaders(target);
|
|
198
|
+
if (!auth.ok) throw new Error(auth.error);
|
|
199
|
+
const planKey = quotaPlans.get(modelKey(target))?.planKey ?? modelPlanKey(target, auth);
|
|
200
|
+
|
|
201
|
+
const requestedReasoning = options?.reasoning ?? "off";
|
|
202
|
+
const clampedReasoning = target.reasoning ? clampThinkingLevel(target, requestedReasoning) : "off";
|
|
203
|
+
const reasoning = clampedReasoning === "off" ? undefined : clampedReasoning;
|
|
204
|
+
const maxTokens = Math.min(options?.maxTokens ?? target.maxTokens, target.maxTokens);
|
|
205
|
+
|
|
206
|
+
lastDecision = {
|
|
207
|
+
...decision,
|
|
208
|
+
chosen: modelKey(target),
|
|
209
|
+
planKey,
|
|
210
|
+
canonical: selection.selected.canonicalKey,
|
|
211
|
+
costTier: selection.selected.costTier,
|
|
212
|
+
profile: selection.profile,
|
|
213
|
+
confidence: selection.selected.confidence,
|
|
214
|
+
reason: selection.reason,
|
|
215
|
+
alternatives: selection.alternatives,
|
|
216
|
+
cacheReason,
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
updateRouterStatus(ctx, shortStatus(lastDecision, quota));
|
|
220
|
+
logDecision(ctx, cfg, lastDecision);
|
|
221
|
+
|
|
222
|
+
const inner = aiStreamSimple(target, context, {
|
|
223
|
+
...options,
|
|
224
|
+
apiKey: auth.apiKey,
|
|
225
|
+
headers: auth.headers,
|
|
226
|
+
env: auth.env,
|
|
227
|
+
reasoning,
|
|
228
|
+
maxTokens,
|
|
229
|
+
onResponse: async (response, model) => {
|
|
230
|
+
await options?.onResponse?.(response, model);
|
|
231
|
+
if (!cfg.quota.enabled) return;
|
|
232
|
+
|
|
233
|
+
recordQuotaChange(ctx, cfg, quota, planKey, () =>
|
|
234
|
+
quota.recordResponse(planKey, response.status, response.headers, target.provider, Date.now()),
|
|
235
|
+
);
|
|
236
|
+
if (lastDecision) updateRouterStatus(ctx, shortStatus(lastDecision, quota));
|
|
237
|
+
},
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
for await (const event of inner) {
|
|
241
|
+
if (event.type === "error" && looksRateLimited(event.error) && cfg.quota.enabled) {
|
|
242
|
+
recordQuotaChange(ctx, cfg, quota, planKey, () =>
|
|
243
|
+
quota.recordRateLimited(planKey, undefined, undefined, Date.now()),
|
|
244
|
+
);
|
|
245
|
+
if (lastDecision) updateRouterStatus(ctx, shortStatus(lastDecision, quota));
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
stream.push(event);
|
|
249
|
+
if (event.type === "done" && cfg.cacheAware.enabled) {
|
|
250
|
+
// Re-establish the warm lease from realized usage so the next turn can weigh staying vs switching.
|
|
251
|
+
recordRoutingUsage(routingState, selection.selected, event.message.usage, context);
|
|
252
|
+
}
|
|
253
|
+
if (event.type === "done" || event.type === "error") {
|
|
254
|
+
logTerminalEvent(ctx, cfg, lastDecision, event);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
stream.end();
|
|
258
|
+
} catch (error) {
|
|
259
|
+
stream.push(makeRouterError(routerModel, error));
|
|
260
|
+
stream.end();
|
|
261
|
+
}
|
|
262
|
+
})();
|
|
263
|
+
|
|
264
|
+
return stream;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function selectModel(
|
|
269
|
+
decision: Decision,
|
|
270
|
+
pool: Pool,
|
|
271
|
+
context: Context,
|
|
272
|
+
options: SimpleStreamOptions | undefined,
|
|
273
|
+
ctx: ExtensionContext,
|
|
274
|
+
cfg: RouterConfig,
|
|
275
|
+
) {
|
|
276
|
+
if (decision.cls === "model") {
|
|
277
|
+
const model = findModelByRef(ctx, decision.chosen);
|
|
278
|
+
if (!model) throw new Error(`Pi Router: forced model not available or not authenticated: ${decision.chosen}`);
|
|
279
|
+
const selected = resolveModel(model, cfg);
|
|
280
|
+
return { selected, profile: selected.profiles[0] ?? "balanced", reason: "forced model", alternatives: [] };
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const selection = selectFromPool(decision, pool, context, options, cfg);
|
|
284
|
+
if (!selection) throw new Error("Pi Router: model pool is empty");
|
|
285
|
+
return selection;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function applyConfiguredTiers(pool: Pool, cfg: RouterConfig, ctx: ExtensionContext): Pool {
|
|
289
|
+
const next: Pool = {
|
|
290
|
+
cheapPool: [...pool.cheapPool],
|
|
291
|
+
strongPool: [...pool.strongPool],
|
|
292
|
+
standardPool: [...pool.standardPool],
|
|
293
|
+
unknownPool: [...pool.unknownPool],
|
|
294
|
+
all: [...pool.all],
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
for (const tier of ["cheap", "strong"] as const) {
|
|
298
|
+
const ref = cfg.tierModels[tier];
|
|
299
|
+
if (!ref) continue;
|
|
300
|
+
|
|
301
|
+
const model = findModelByRef(ctx, ref);
|
|
302
|
+
if (!model) {
|
|
303
|
+
ctx.ui.notify(`Pi Router: configured ${tier} model not found or unauthenticated: ${ref}`, "warning");
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const resolved = resolveModel(model, cfg);
|
|
308
|
+
if (!matchesModelFilter(resolved, cfg.modelFilter)) {
|
|
309
|
+
ctx.ui.notify(`Pi Router: configured ${tier} model rejected by modelFilter: ${ref}`, "warning");
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
prependUnique(tier === "cheap" ? next.cheapPool : next.strongPool, resolved);
|
|
314
|
+
prependUnique(next.all, resolved);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return next;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function prependUnique(items: ResolvedModel[], item: ResolvedModel) {
|
|
321
|
+
const key = modelKey(item.model);
|
|
322
|
+
const existing = items.findIndex((candidate) => modelKey(candidate.model) === key);
|
|
323
|
+
if (existing >= 0) items.splice(existing, 1);
|
|
324
|
+
items.unshift(item);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function findModelByRef(ctx: ExtensionContext, ref: string): Model<Api> | undefined {
|
|
328
|
+
const [provider, ...idParts] = ref.split("/");
|
|
329
|
+
const id = idParts.join("/");
|
|
330
|
+
if (!provider || !id) return undefined;
|
|
331
|
+
|
|
332
|
+
const model = ctx.modelRegistry.find(provider, id);
|
|
333
|
+
if (!model || !ctx.modelRegistry.hasConfiguredAuth(model)) return undefined;
|
|
334
|
+
return model;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function updateRouterStatus(ctx: ExtensionContext, text: string) {
|
|
338
|
+
if (ctx.model && !isRouterModel(ctx.model)) {
|
|
339
|
+
ctx.ui.setStatus("router", undefined);
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
ctx.ui.setStatus("router", text);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
function isRouterModel(model: Model<Api> | undefined): boolean {
|
|
346
|
+
return model?.provider === "pi-router" && model.id === "auto";
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function parseForcedRoute(text: string): { route: ForcedRoute; text: string } | undefined {
|
|
350
|
+
const match = text.match(/^@(cheap|strong|model:([^\s]+))\s+([\s\S]*)$/);
|
|
351
|
+
if (!match) return undefined;
|
|
352
|
+
if (match[1] === "cheap" || match[1] === "strong") return { route: { tier: match[1] }, text: match[3] };
|
|
353
|
+
return { route: { model: match[2] }, text: match[3] };
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
function loadConfig(ctx: ExtensionContext): RouterConfig {
|
|
357
|
+
let cfg = DEFAULT_CONFIG;
|
|
358
|
+
let userWillingness: Partial<RouterConfig["willingness"]> = {};
|
|
359
|
+
for (const file of configPaths(ctx)) {
|
|
360
|
+
if (!existsSync(file)) continue;
|
|
361
|
+
|
|
362
|
+
try {
|
|
363
|
+
const parsed = JSON.parse(readFileSync(file, "utf8"));
|
|
364
|
+
const router = parsed.router ?? parsed;
|
|
365
|
+
cfg = {
|
|
366
|
+
...cfg,
|
|
367
|
+
...router,
|
|
368
|
+
weights: { ...cfg.weights, ...(router.weights ?? {}) },
|
|
369
|
+
tierModels: { ...cfg.tierModels, ...(router.tierModels ?? router.models ?? {}) },
|
|
370
|
+
modelFilter: { ...cfg.modelFilter, ...(router.modelFilter ?? {}) },
|
|
371
|
+
modelOverrides: { ...cfg.modelOverrides, ...(router.modelOverrides ?? router.overrides ?? {}) },
|
|
372
|
+
cacheAware: { ...cfg.cacheAware, ...(router.cacheAware ?? {}) },
|
|
373
|
+
quota: { ...cfg.quota, ...(router.quota ?? {}) },
|
|
374
|
+
};
|
|
375
|
+
if (router.willingness) userWillingness = { ...userWillingness, ...router.willingness };
|
|
376
|
+
} catch (error) {
|
|
377
|
+
ctx.ui.notify(`Pi Router: failed to read ${file}: ${error instanceof Error ? error.message : String(error)}`, "warning");
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// The $/quality-point budgets live on different scales per source (list price vs measured cost-per-task),
|
|
382
|
+
// so the base willingness follows capabilitySource; explicit user values overlay it.
|
|
383
|
+
const baseWillingness = cfg.capabilitySource === "aa" ? AA_WILLINGNESS : RAMP_WILLINGNESS;
|
|
384
|
+
cfg = { ...cfg, willingness: { ...baseWillingness, ...userWillingness } };
|
|
385
|
+
return cfg;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function configPaths(ctx: ExtensionContext): string[] {
|
|
389
|
+
const paths = [join(getAgentDir(), "model-router.json")];
|
|
390
|
+
if (ctx.isProjectTrusted()) paths.push(join(ctx.cwd, CONFIG_DIR_NAME, "model-router.json"));
|
|
391
|
+
return paths;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function quotaStateFile(): string {
|
|
395
|
+
return join(getAgentDir(), "quota-state.json");
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function describeRouter(
|
|
399
|
+
pool: Pool,
|
|
400
|
+
cfg: RouterConfig,
|
|
401
|
+
lastDecision: LastDecision | undefined,
|
|
402
|
+
quota: QuotaState,
|
|
403
|
+
): string {
|
|
404
|
+
const lines = [
|
|
405
|
+
"Pi Router",
|
|
406
|
+
`capabilitySource: ${cfg.capabilitySource}`,
|
|
407
|
+
`cacheAware: ${cfg.cacheAware.enabled}`,
|
|
408
|
+
`forceStrongOnHighReasoning: ${cfg.forceStrongOnHighReasoning}`,
|
|
409
|
+
`modelFilter: include=[${cfg.modelFilter.include.join(", ") || "*"}] exclude=[${cfg.modelFilter.exclude.join(", ") || "none"}]`,
|
|
410
|
+
`quota: ${cfg.quota.enabled ? "enabled" : "disabled"}`,
|
|
411
|
+
`cheapPool: ${pool.cheapPool.map((item) => modelKey(item.model)).join(", ") || "none"}`,
|
|
412
|
+
`strongPool: ${pool.strongPool.map((item) => `${modelKey(item.model)}(${item.canonicalKey ?? "unknown"}/${item.costTier}/${item.profiles.join("+")})`).join(", ") || "none"}`,
|
|
413
|
+
`standardPool: ${pool.standardPool.map((item) => modelKey(item.model)).join(", ") || "none"}`,
|
|
414
|
+
`unknownPool: ${pool.unknownPool.map((item) => modelKey(item.model)).join(", ") || "none"}`,
|
|
415
|
+
"frontier (auto climbs these cheap→strong by hardness):",
|
|
416
|
+
...(["coder", "deep", "balanced"] as const).map((profile) => {
|
|
417
|
+
const chain = frontierChain(pool.all, profile);
|
|
418
|
+
const points = chain
|
|
419
|
+
.map((item) => `${item.canonicalKey ?? modelKey(item.model)}(${axisValue(item, profile).toFixed(0)}@$${item.priceBlended})`)
|
|
420
|
+
.join(" → ");
|
|
421
|
+
return ` ${profile}: ${points || "none"}`;
|
|
422
|
+
}),
|
|
423
|
+
];
|
|
424
|
+
|
|
425
|
+
if (lastDecision) {
|
|
426
|
+
lines.push(
|
|
427
|
+
"last:",
|
|
428
|
+
` chosen: ${lastDecision.chosen}`,
|
|
429
|
+
` planKey: ${lastDecision.planKey}`,
|
|
430
|
+
` canonical: ${lastDecision.canonical ?? "unknown"}`,
|
|
431
|
+
` costTier: ${lastDecision.costTier}`,
|
|
432
|
+
` profile: ${lastDecision.profile ?? "unknown"}`,
|
|
433
|
+
` confidence: ${lastDecision.confidence}`,
|
|
434
|
+
` cacheReason: ${lastDecision.cacheReason ?? "n/a"}`,
|
|
435
|
+
` reason: ${lastDecision.reason ?? "none"}`,
|
|
436
|
+
` alternatives: ${lastDecision.alternatives.join(", ") || "none"}`,
|
|
437
|
+
);
|
|
438
|
+
} else {
|
|
439
|
+
lines.push("last: none");
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const plans = quota.snapshots();
|
|
443
|
+
if (plans.length > 0) {
|
|
444
|
+
lines.push("plans:");
|
|
445
|
+
for (const plan of plans) {
|
|
446
|
+
lines.push(` ${formatPlanState(plan)}`);
|
|
447
|
+
}
|
|
448
|
+
} else {
|
|
449
|
+
lines.push("plans: none");
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return lines.join("\n");
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function logDecision(ctx: ExtensionContext, cfg: RouterConfig, decision: LastDecision | undefined) {
|
|
456
|
+
if (!cfg.log || !decision) return;
|
|
457
|
+
appendJsonLine(ctx, { ts: new Date().toISOString(), ...decision });
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
function logTerminalEvent(
|
|
461
|
+
ctx: ExtensionContext,
|
|
462
|
+
cfg: RouterConfig,
|
|
463
|
+
decision: LastDecision | undefined,
|
|
464
|
+
event: AssistantMessageEvent,
|
|
465
|
+
) {
|
|
466
|
+
if (!cfg.log || !decision) return;
|
|
467
|
+
if (event.type !== "done" && event.type !== "error") return;
|
|
468
|
+
|
|
469
|
+
const message = event.type === "done" ? event.message : event.error;
|
|
470
|
+
appendJsonLine(ctx, {
|
|
471
|
+
ts: new Date().toISOString(),
|
|
472
|
+
chosen: decision.chosen,
|
|
473
|
+
stopReason: message.stopReason,
|
|
474
|
+
usage: message.usage,
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
function appendJsonLine(ctx: ExtensionContext, value: unknown) {
|
|
479
|
+
const file = join(ctx.cwd, CONFIG_DIR_NAME, "router.log");
|
|
480
|
+
mkdirSync(dirname(file), { recursive: true });
|
|
481
|
+
appendFileSync(file, `${JSON.stringify(value)}\n`, "utf8");
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
function usablePoolForQuota(
|
|
485
|
+
ctx: ExtensionContext,
|
|
486
|
+
cfg: RouterConfig,
|
|
487
|
+
pool: Pool,
|
|
488
|
+
quota: QuotaState,
|
|
489
|
+
now: number,
|
|
490
|
+
quotaPlans: QuotaPlanLookup,
|
|
491
|
+
): Pool {
|
|
492
|
+
if (!cfg.quota.enabled) return pool;
|
|
493
|
+
|
|
494
|
+
let changed = false;
|
|
495
|
+
for (const planKey of new Set(pool.all.map((item) => quotaPlanKeyFor(item, quotaPlans)))) {
|
|
496
|
+
const before = quota.snapshot(planKey);
|
|
497
|
+
quota.isAvailable(planKey, now);
|
|
498
|
+
const after = quota.snapshot(planKey);
|
|
499
|
+
if (quotaStateChanged(before, after)) {
|
|
500
|
+
changed = true;
|
|
501
|
+
logQuotaChange(ctx, cfg, after);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
if (changed) quota.persist(quotaStateFile());
|
|
505
|
+
|
|
506
|
+
return filterPoolByQuota(pool, quota, now, new Set(), (item) => quotaPlanKeyFor(item, quotaPlans));
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function recordQuotaChange(
|
|
510
|
+
ctx: ExtensionContext,
|
|
511
|
+
cfg: RouterConfig,
|
|
512
|
+
quota: QuotaState,
|
|
513
|
+
planKey: string,
|
|
514
|
+
update: () => PlanState,
|
|
515
|
+
): PlanState {
|
|
516
|
+
const before = quota.snapshot(planKey);
|
|
517
|
+
const after = update();
|
|
518
|
+
quota.persist(quotaStateFile());
|
|
519
|
+
if (quotaStateChanged(before, after)) logQuotaChange(ctx, cfg, after);
|
|
520
|
+
return after;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
function quotaStateChanged(before: PlanState | undefined, after: PlanState | undefined): boolean {
|
|
524
|
+
return (
|
|
525
|
+
before?.status !== after?.status ||
|
|
526
|
+
before?.reason !== after?.reason ||
|
|
527
|
+
before?.cooldownUntil !== after?.cooldownUntil
|
|
528
|
+
);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
function logQuotaChange(ctx: ExtensionContext, cfg: RouterConfig, state: PlanState | undefined) {
|
|
532
|
+
if (!cfg.log || !state) return;
|
|
533
|
+
appendJsonLine(ctx, {
|
|
534
|
+
ts: new Date().toISOString(),
|
|
535
|
+
planKey: state.planKey,
|
|
536
|
+
status: state.status,
|
|
537
|
+
reason: state.reason,
|
|
538
|
+
cooldownUntil: state.cooldownUntil,
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
function looksRateLimited(message: AssistantMessage): boolean {
|
|
543
|
+
const text = `${message.stopReason ?? ""} ${message.errorMessage ?? ""}`.toLowerCase();
|
|
544
|
+
return text.includes("429") || text.includes("rate limit") || text.includes("too many requests") || text.includes("quota");
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function shortStatus(decision: LastDecision, quota: QuotaState): string {
|
|
548
|
+
const model = decision.chosen.split("/").at(-1) ?? decision.chosen;
|
|
549
|
+
return `🧭 ${model} · ${decision.costTier}${quotaStatusTag(quota.snapshot(decision.planKey), Date.now())}`;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
function quotaStatusTag(state: PlanState | undefined, now: number): string {
|
|
553
|
+
if (state?.status === "cooldown" && state.cooldownUntil != null) {
|
|
554
|
+
return ` ⏳${Math.max(0, Math.ceil((state.cooldownUntil - now) / 60_000))}m`;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const snapshot = state?.lastSnapshot;
|
|
558
|
+
if (snapshot?.remaining != null && snapshot.limit != null && snapshot.limit > 0) {
|
|
559
|
+
return ` ${Math.round((100 * snapshot.remaining) / snapshot.limit)}%`;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return "";
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
function formatPlanState(state: PlanState): string {
|
|
566
|
+
const remaining = quotaStatusTag(state, Date.now()).trim();
|
|
567
|
+
const cooldownUntil = state.cooldownUntil ? ` cooldownUntil=${new Date(state.cooldownUntil).toISOString()}` : "";
|
|
568
|
+
return `${state.planKey}: ${state.status}${state.reason ? ` reason=${state.reason}` : ""}${cooldownUntil}${remaining ? ` ${remaining}` : ""}`;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
async function resolveQuotaPlans(ctx: ExtensionContext, pool: Pool): Promise<QuotaPlanLookup> {
|
|
572
|
+
const entries = await Promise.all(
|
|
573
|
+
pool.all.map(async (item) => {
|
|
574
|
+
const auth = await ctx.modelRegistry.getApiKeyAndHeaders(item.model);
|
|
575
|
+
const planKey = auth.ok ? modelPlanKey(item.model, auth) : modelPlanKey(item.model);
|
|
576
|
+
return [modelKey(item.model), { planKey, auth }] as const;
|
|
577
|
+
}),
|
|
578
|
+
);
|
|
579
|
+
return new Map(entries);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
function quotaPlanKeyFor(item: ResolvedModel, quotaPlans: QuotaPlanLookup): string {
|
|
583
|
+
return quotaPlans.get(modelKey(item.model))?.planKey ?? modelPlanKey(item.model);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function modelPlanKey(model: Model<Api>, auth?: ResolvedAuth): string {
|
|
587
|
+
return buildPlanKey({
|
|
588
|
+
provider: model.provider,
|
|
589
|
+
baseUrl: model.baseUrl,
|
|
590
|
+
apiKey: auth?.apiKey,
|
|
591
|
+
headers: auth?.headers,
|
|
592
|
+
env: auth?.env,
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
function makeRouterError(model: Model<Api>, error: unknown): AssistantMessageEvent {
|
|
597
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
598
|
+
const message: AssistantMessage = {
|
|
599
|
+
role: "assistant",
|
|
600
|
+
content: [{ type: "text", text: `Pi Router error: ${errorMessage}` }],
|
|
601
|
+
api: model.api,
|
|
602
|
+
provider: model.provider,
|
|
603
|
+
model: model.id,
|
|
604
|
+
usage: structuredClone(ZERO_USAGE),
|
|
605
|
+
stopReason: "error",
|
|
606
|
+
errorMessage,
|
|
607
|
+
timestamp: Date.now(),
|
|
608
|
+
};
|
|
609
|
+
|
|
610
|
+
return { type: "error", reason: "error", error: message };
|
|
611
|
+
}
|