@kylebrodeur/pi-model-router 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/CONTRIBUTING.md +310 -0
- package/LEARNINGS.md +181 -0
- package/LICENSE +21 -0
- package/QUICKSTART.md +111 -0
- package/README.md +195 -0
- package/TESTING.md +374 -0
- package/docs/ARCHITECTURE.md +54 -0
- package/docs/UPSTREAM_ISSUE_scoped_models.md +94 -0
- package/extensions/commands.ts +1068 -0
- package/extensions/config.ts +415 -0
- package/extensions/constants.ts +1 -0
- package/extensions/index.ts +583 -0
- package/extensions/ollama-sync.ts +254 -0
- package/extensions/provider.ts +558 -0
- package/extensions/rate-limit.ts +317 -0
- package/extensions/routing.ts +418 -0
- package/extensions/scope-shim.ts +213 -0
- package/extensions/state.ts +49 -0
- package/extensions/types.ts +148 -0
- package/extensions/ui.ts +130 -0
- package/model-router.agent-bus.json +15 -0
- package/model-router.essential.json +31 -0
- package/model-router.example.json +70 -0
- package/model-router.ledger.json +15 -0
- package/package.json +64 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Limit Fallback Feature
|
|
3
|
+
*
|
|
4
|
+
* Monitors provider responses for rate limiting.
|
|
5
|
+
* NOTE: Requires Pi 0.67+ for after_provider_response event.
|
|
6
|
+
*/
|
|
7
|
+
import type {
|
|
8
|
+
ExtensionAPI,
|
|
9
|
+
ExtensionContext,
|
|
10
|
+
} from '@mariozechner/pi-coding-agent';
|
|
11
|
+
|
|
12
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
export interface RateLimitConfig {
|
|
15
|
+
enabled: boolean;
|
|
16
|
+
shortDelayThreshold: number;
|
|
17
|
+
autoFallback: boolean;
|
|
18
|
+
autoRestore: boolean;
|
|
19
|
+
restoreCheckInterval: number;
|
|
20
|
+
fallbackSequence: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface RateLimitEventEntry {
|
|
24
|
+
timestamp: number;
|
|
25
|
+
provider: string;
|
|
26
|
+
model: string;
|
|
27
|
+
retryAfter?: number;
|
|
28
|
+
httpStatus: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface FallbackState {
|
|
32
|
+
preferredModel?: string;
|
|
33
|
+
fallbackActive: boolean;
|
|
34
|
+
autoRestore: boolean;
|
|
35
|
+
triggeredAt?: number;
|
|
36
|
+
triggerReason?: 'rate_limit' | 'budget_exceeded' | 'manual';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ─── Config ─────────────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
export const DEFAULT_RATE_LIMIT_CONFIG: RateLimitConfig = {
|
|
42
|
+
enabled: true,
|
|
43
|
+
shortDelayThreshold: 60,
|
|
44
|
+
autoFallback: false,
|
|
45
|
+
autoRestore: false,
|
|
46
|
+
restoreCheckInterval: 300,
|
|
47
|
+
fallbackSequence: ['ollama/*'],
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// ─── Module State ───────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
let state: FallbackState = {
|
|
53
|
+
fallbackActive: false,
|
|
54
|
+
autoRestore: false,
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
let history: RateLimitEventEntry[] = [];
|
|
58
|
+
|
|
59
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
const findBestFallbackModel = (
|
|
62
|
+
ctx: ExtensionContext,
|
|
63
|
+
sequence: string[],
|
|
64
|
+
): { provider: string; id: string } | undefined => {
|
|
65
|
+
const availableModels = ctx.modelRegistry.getAvailable();
|
|
66
|
+
|
|
67
|
+
for (const pattern of sequence) {
|
|
68
|
+
for (const model of availableModels) {
|
|
69
|
+
const targetId = `${model.provider}/${model.id}`;
|
|
70
|
+
if (pattern === targetId)
|
|
71
|
+
return { provider: model.provider, id: model.id };
|
|
72
|
+
if (pattern.endsWith('*')) {
|
|
73
|
+
const prefix = pattern.slice(0, -1);
|
|
74
|
+
if (targetId.startsWith(prefix))
|
|
75
|
+
return { provider: model.provider, id: model.id };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return undefined;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
export const tryFallback = async (
|
|
86
|
+
pi: ExtensionAPI,
|
|
87
|
+
ctx: ExtensionContext,
|
|
88
|
+
config: RateLimitConfig,
|
|
89
|
+
triggerReason: FallbackState['triggerReason'] = 'manual',
|
|
90
|
+
contextCompressionEnabled: boolean = false,
|
|
91
|
+
): Promise<{ success: boolean; message: string }> => {
|
|
92
|
+
const currentModel = ctx.model;
|
|
93
|
+
|
|
94
|
+
if (!currentModel) {
|
|
95
|
+
return { success: false, message: 'No current model' };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (currentModel.provider !== 'ollama' && !state.fallbackActive) {
|
|
99
|
+
state.preferredModel = `${currentModel.provider}/${currentModel.id}`;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const target = findBestFallbackModel(
|
|
103
|
+
ctx,
|
|
104
|
+
config.fallbackSequence.length > 0 ? config.fallbackSequence : ['ollama/*'],
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
if (!target) {
|
|
108
|
+
return { success: false, message: 'No fallback models available' };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const targetModel = ctx.modelRegistry.find(target.provider, target.id);
|
|
112
|
+
if (!targetModel) {
|
|
113
|
+
return {
|
|
114
|
+
success: false,
|
|
115
|
+
message: `Model ${target.provider}/${target.id} not in registry. Try /reload.`,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const success = await pi.setModel(targetModel);
|
|
120
|
+
if (success) {
|
|
121
|
+
state.fallbackActive = true;
|
|
122
|
+
state.autoRestore = config.autoRestore;
|
|
123
|
+
state.triggeredAt = Date.now();
|
|
124
|
+
state.triggerReason = triggerReason;
|
|
125
|
+
|
|
126
|
+
// Context Compression Bridge: Bookmark the start of the fallback period
|
|
127
|
+
if (
|
|
128
|
+
contextCompressionEnabled &&
|
|
129
|
+
ctx.sessionManager &&
|
|
130
|
+
'appendLabelChange' in ctx.sessionManager
|
|
131
|
+
) {
|
|
132
|
+
try {
|
|
133
|
+
const sm = ctx.sessionManager as any;
|
|
134
|
+
const leafId = sm.getLeafId();
|
|
135
|
+
if (leafId) {
|
|
136
|
+
sm.appendLabelChange(leafId, 'router-fallback-start');
|
|
137
|
+
}
|
|
138
|
+
} catch (err) {
|
|
139
|
+
// Silently fail if session manager doesn't support labels
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
success,
|
|
146
|
+
message: success
|
|
147
|
+
? `Switched to ${target.provider}/${target.id}`
|
|
148
|
+
: `Failed to switch to ${target.provider}/${target.id}`,
|
|
149
|
+
};
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
export const tryRestore = async (
|
|
153
|
+
pi: ExtensionAPI,
|
|
154
|
+
ctx: ExtensionContext,
|
|
155
|
+
contextCompressionEnabled: boolean = false,
|
|
156
|
+
): Promise<{ success: boolean; message: string }> => {
|
|
157
|
+
if (!state.fallbackActive || !state.preferredModel) {
|
|
158
|
+
return { success: false, message: 'No preferred model stored' };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const [provider, id] = state.preferredModel.split('/');
|
|
162
|
+
const model = ctx.modelRegistry.find(provider, id);
|
|
163
|
+
|
|
164
|
+
if (!model) {
|
|
165
|
+
return {
|
|
166
|
+
success: false,
|
|
167
|
+
message: `Model ${state.preferredModel} not available`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const success = await pi.setModel(model);
|
|
172
|
+
if (success) {
|
|
173
|
+
state.fallbackActive = false;
|
|
174
|
+
state.autoRestore = false;
|
|
175
|
+
|
|
176
|
+
// Context Compression Bridge: Instruct the model to summarize the fallback period
|
|
177
|
+
if (contextCompressionEnabled) {
|
|
178
|
+
pi.sendMessage(
|
|
179
|
+
{
|
|
180
|
+
customType: 'router-context-compression',
|
|
181
|
+
content:
|
|
182
|
+
"System Context: You have just been restored to the primary high-tier model after a period of rate-limit fallback. Before continuing the user's task, please use your `context_checkout` tool to squash the previous fallback period into a concise summary. Use the target `router-fallback-start`.",
|
|
183
|
+
display: false,
|
|
184
|
+
},
|
|
185
|
+
{ deliverAs: 'followUp' },
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
success,
|
|
192
|
+
message: success
|
|
193
|
+
? `Restored ${state.preferredModel}`
|
|
194
|
+
: 'Failed to restore model',
|
|
195
|
+
};
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
export const getFallbackState = (): FallbackState => {
|
|
199
|
+
return { ...state };
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
export const getRateLimitHistory = (): RateLimitEventEntry[] => {
|
|
203
|
+
return [...history];
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
export const recordRateLimit = (
|
|
207
|
+
provider: string,
|
|
208
|
+
model: string,
|
|
209
|
+
httpStatus: number,
|
|
210
|
+
retryAfter?: number,
|
|
211
|
+
): void => {
|
|
212
|
+
history.push({
|
|
213
|
+
timestamp: Date.now(),
|
|
214
|
+
provider,
|
|
215
|
+
model,
|
|
216
|
+
retryAfter,
|
|
217
|
+
httpStatus,
|
|
218
|
+
});
|
|
219
|
+
// Keep last 100
|
|
220
|
+
if (history.length > 100) history = history.slice(-100);
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
export const resetRateLimitState = (): void => {
|
|
224
|
+
state = {
|
|
225
|
+
fallbackActive: false,
|
|
226
|
+
autoRestore: false,
|
|
227
|
+
};
|
|
228
|
+
history = [];
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
// ─── Extension Integration ──────────────────────────────────────────────────
|
|
232
|
+
|
|
233
|
+
export const initializeRateLimitFallback = (
|
|
234
|
+
pi: ExtensionAPI,
|
|
235
|
+
rawConfig: Record<string, unknown>,
|
|
236
|
+
contextCompressionEnabled: boolean = false,
|
|
237
|
+
): void => {
|
|
238
|
+
const config = { ...DEFAULT_RATE_LIMIT_CONFIG };
|
|
239
|
+
for (const key of Object.keys(config) as Array<keyof typeof config>) {
|
|
240
|
+
if (rawConfig[key] !== undefined) config[key] = rawConfig[key] as never;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (!config.enabled) {
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Monitor rate limits (requires Pi 0.68+)
|
|
248
|
+
pi.on('after_provider_response', async (event, ctx) => {
|
|
249
|
+
// 402: Payment Required (Out of credits)
|
|
250
|
+
// 429: Too Many Requests (Rate limit)
|
|
251
|
+
// 503: Service Unavailable
|
|
252
|
+
// 529: Site Overloaded (Anthropic specifically)
|
|
253
|
+
// Note: We intentionally ignore 401/403 to avoid silently masking bad API keys.
|
|
254
|
+
const fallbackTriggers = [402, 429, 503, 529];
|
|
255
|
+
if (!fallbackTriggers.includes(event.status)) return;
|
|
256
|
+
|
|
257
|
+
const currentModel = ctx.model;
|
|
258
|
+
const retryAfter = parseInt(
|
|
259
|
+
String(event.headers?.['retry-after'] || '0'),
|
|
260
|
+
10,
|
|
261
|
+
);
|
|
262
|
+
|
|
263
|
+
recordRateLimit(
|
|
264
|
+
currentModel?.provider || 'unknown',
|
|
265
|
+
currentModel?.id || 'unknown',
|
|
266
|
+
event.status,
|
|
267
|
+
retryAfter || undefined,
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
// Provide transparent UI notifications to the user about why fallback is occurring
|
|
271
|
+
const statusReason =
|
|
272
|
+
event.status === 402
|
|
273
|
+
? 'out of credits (402)'
|
|
274
|
+
: event.status === 529
|
|
275
|
+
? 'provider overloaded (529)'
|
|
276
|
+
: event.status === 503
|
|
277
|
+
? 'service unavailable (503)'
|
|
278
|
+
: `rate limited (429)`;
|
|
279
|
+
|
|
280
|
+
if (retryAfter > 0 && retryAfter < config.shortDelayThreshold) {
|
|
281
|
+
ctx.ui.notify(
|
|
282
|
+
`[Router] ${statusReason}. Retry after ${retryAfter}s`,
|
|
283
|
+
'warning',
|
|
284
|
+
);
|
|
285
|
+
} else if (config.autoFallback && !state.fallbackActive) {
|
|
286
|
+
const result = await tryFallback(
|
|
287
|
+
pi,
|
|
288
|
+
ctx,
|
|
289
|
+
config,
|
|
290
|
+
'rate_limit',
|
|
291
|
+
contextCompressionEnabled,
|
|
292
|
+
);
|
|
293
|
+
if (result.success) {
|
|
294
|
+
ctx.ui.notify(
|
|
295
|
+
`[Router] Auto-fallback due to ${statusReason}: ${result.message}`,
|
|
296
|
+
'info',
|
|
297
|
+
);
|
|
298
|
+
// Transparent session tracking (for RPC clients)
|
|
299
|
+
pi.appendEntry('router-fallback', { reason: statusReason, result });
|
|
300
|
+
}
|
|
301
|
+
} else {
|
|
302
|
+
ctx.ui.notify(
|
|
303
|
+
`[Router] ${statusReason}. Use /router fallback to switch`,
|
|
304
|
+
'warning',
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
// Status bar indicator
|
|
310
|
+
pi.on('model_select', async (_event, ctx) => {
|
|
311
|
+
if (state.fallbackActive) {
|
|
312
|
+
ctx.ui.setStatus('router-fallback', '\ud83c\udfe0 fallback');
|
|
313
|
+
} else {
|
|
314
|
+
ctx.ui.setStatus('router-fallback', '');
|
|
315
|
+
}
|
|
316
|
+
});
|
|
317
|
+
};
|