@kylebrodeur/pi-model-router 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Rate Limit Fallback Feature
3
+ *
4
+ * Monitors provider responses for rate limiting.
5
+ * NOTE: Requires Pi 0.67+ for after_provider_response event.
6
+ */
7
+ import type {
8
+ ExtensionAPI,
9
+ ExtensionContext,
10
+ } from '@mariozechner/pi-coding-agent';
11
+
12
+ // ─── Types ──────────────────────────────────────────────────────────────────
13
+
14
+ export interface RateLimitConfig {
15
+ enabled: boolean;
16
+ shortDelayThreshold: number;
17
+ autoFallback: boolean;
18
+ autoRestore: boolean;
19
+ restoreCheckInterval: number;
20
+ fallbackSequence: string[];
21
+ }
22
+
23
+ export interface RateLimitEventEntry {
24
+ timestamp: number;
25
+ provider: string;
26
+ model: string;
27
+ retryAfter?: number;
28
+ httpStatus: number;
29
+ }
30
+
31
+ export interface FallbackState {
32
+ preferredModel?: string;
33
+ fallbackActive: boolean;
34
+ autoRestore: boolean;
35
+ triggeredAt?: number;
36
+ triggerReason?: 'rate_limit' | 'budget_exceeded' | 'manual';
37
+ }
38
+
39
+ // ─── Config ─────────────────────────────────────────────────────────────────
40
+
41
+ export const DEFAULT_RATE_LIMIT_CONFIG: RateLimitConfig = {
42
+ enabled: true,
43
+ shortDelayThreshold: 60,
44
+ autoFallback: false,
45
+ autoRestore: false,
46
+ restoreCheckInterval: 300,
47
+ fallbackSequence: ['ollama/*'],
48
+ };
49
+
50
+ // ─── Module State ───────────────────────────────────────────────────────────
51
+
52
+ let state: FallbackState = {
53
+ fallbackActive: false,
54
+ autoRestore: false,
55
+ };
56
+
57
+ let history: RateLimitEventEntry[] = [];
58
+
59
+ // ─── Helpers ────────────────────────────────────────────────────────────────
60
+
61
+ const findBestFallbackModel = (
62
+ ctx: ExtensionContext,
63
+ sequence: string[],
64
+ ): { provider: string; id: string } | undefined => {
65
+ const availableModels = ctx.modelRegistry.getAvailable();
66
+
67
+ for (const pattern of sequence) {
68
+ for (const model of availableModels) {
69
+ const targetId = `${model.provider}/${model.id}`;
70
+ if (pattern === targetId)
71
+ return { provider: model.provider, id: model.id };
72
+ if (pattern.endsWith('*')) {
73
+ const prefix = pattern.slice(0, -1);
74
+ if (targetId.startsWith(prefix))
75
+ return { provider: model.provider, id: model.id };
76
+ }
77
+ }
78
+ }
79
+
80
+ return undefined;
81
+ };
82
+
83
+ // ─── Public API ─────────────────────────────────────────────────────────────
84
+
85
+ export const tryFallback = async (
86
+ pi: ExtensionAPI,
87
+ ctx: ExtensionContext,
88
+ config: RateLimitConfig,
89
+ triggerReason: FallbackState['triggerReason'] = 'manual',
90
+ contextCompressionEnabled: boolean = false,
91
+ ): Promise<{ success: boolean; message: string }> => {
92
+ const currentModel = ctx.model;
93
+
94
+ if (!currentModel) {
95
+ return { success: false, message: 'No current model' };
96
+ }
97
+
98
+ if (currentModel.provider !== 'ollama' && !state.fallbackActive) {
99
+ state.preferredModel = `${currentModel.provider}/${currentModel.id}`;
100
+ }
101
+
102
+ const target = findBestFallbackModel(
103
+ ctx,
104
+ config.fallbackSequence.length > 0 ? config.fallbackSequence : ['ollama/*'],
105
+ );
106
+
107
+ if (!target) {
108
+ return { success: false, message: 'No fallback models available' };
109
+ }
110
+
111
+ const targetModel = ctx.modelRegistry.find(target.provider, target.id);
112
+ if (!targetModel) {
113
+ return {
114
+ success: false,
115
+ message: `Model ${target.provider}/${target.id} not in registry. Try /reload.`,
116
+ };
117
+ }
118
+
119
+ const success = await pi.setModel(targetModel);
120
+ if (success) {
121
+ state.fallbackActive = true;
122
+ state.autoRestore = config.autoRestore;
123
+ state.triggeredAt = Date.now();
124
+ state.triggerReason = triggerReason;
125
+
126
+ // Context Compression Bridge: Bookmark the start of the fallback period
127
+ if (
128
+ contextCompressionEnabled &&
129
+ ctx.sessionManager &&
130
+ 'appendLabelChange' in ctx.sessionManager
131
+ ) {
132
+ try {
133
+ const sm = ctx.sessionManager as any;
134
+ const leafId = sm.getLeafId();
135
+ if (leafId) {
136
+ sm.appendLabelChange(leafId, 'router-fallback-start');
137
+ }
138
+ } catch (err) {
139
+ // Silently fail if session manager doesn't support labels
140
+ }
141
+ }
142
+ }
143
+
144
+ return {
145
+ success,
146
+ message: success
147
+ ? `Switched to ${target.provider}/${target.id}`
148
+ : `Failed to switch to ${target.provider}/${target.id}`,
149
+ };
150
+ };
151
+
152
+ export const tryRestore = async (
153
+ pi: ExtensionAPI,
154
+ ctx: ExtensionContext,
155
+ contextCompressionEnabled: boolean = false,
156
+ ): Promise<{ success: boolean; message: string }> => {
157
+ if (!state.fallbackActive || !state.preferredModel) {
158
+ return { success: false, message: 'No preferred model stored' };
159
+ }
160
+
161
+ const [provider, id] = state.preferredModel.split('/');
162
+ const model = ctx.modelRegistry.find(provider, id);
163
+
164
+ if (!model) {
165
+ return {
166
+ success: false,
167
+ message: `Model ${state.preferredModel} not available`,
168
+ };
169
+ }
170
+
171
+ const success = await pi.setModel(model);
172
+ if (success) {
173
+ state.fallbackActive = false;
174
+ state.autoRestore = false;
175
+
176
+ // Context Compression Bridge: Instruct the model to summarize the fallback period
177
+ if (contextCompressionEnabled) {
178
+ pi.sendMessage(
179
+ {
180
+ customType: 'router-context-compression',
181
+ content:
182
+ "System Context: You have just been restored to the primary high-tier model after a period of rate-limit fallback. Before continuing the user's task, please use your `context_checkout` tool to squash the previous fallback period into a concise summary. Use the target `router-fallback-start`.",
183
+ display: false,
184
+ },
185
+ { deliverAs: 'followUp' },
186
+ );
187
+ }
188
+ }
189
+
190
+ return {
191
+ success,
192
+ message: success
193
+ ? `Restored ${state.preferredModel}`
194
+ : 'Failed to restore model',
195
+ };
196
+ };
197
+
198
+ export const getFallbackState = (): FallbackState => {
199
+ return { ...state };
200
+ };
201
+
202
+ export const getRateLimitHistory = (): RateLimitEventEntry[] => {
203
+ return [...history];
204
+ };
205
+
206
+ export const recordRateLimit = (
207
+ provider: string,
208
+ model: string,
209
+ httpStatus: number,
210
+ retryAfter?: number,
211
+ ): void => {
212
+ history.push({
213
+ timestamp: Date.now(),
214
+ provider,
215
+ model,
216
+ retryAfter,
217
+ httpStatus,
218
+ });
219
+ // Keep last 100
220
+ if (history.length > 100) history = history.slice(-100);
221
+ };
222
+
223
+ export const resetRateLimitState = (): void => {
224
+ state = {
225
+ fallbackActive: false,
226
+ autoRestore: false,
227
+ };
228
+ history = [];
229
+ };
230
+
231
+ // ─── Extension Integration ──────────────────────────────────────────────────
232
+
233
+ export const initializeRateLimitFallback = (
234
+ pi: ExtensionAPI,
235
+ rawConfig: Record<string, unknown>,
236
+ contextCompressionEnabled: boolean = false,
237
+ ): void => {
238
+ const config = { ...DEFAULT_RATE_LIMIT_CONFIG };
239
+ for (const key of Object.keys(config) as Array<keyof typeof config>) {
240
+ if (rawConfig[key] !== undefined) config[key] = rawConfig[key] as never;
241
+ }
242
+
243
+ if (!config.enabled) {
244
+ return;
245
+ }
246
+
247
+ // Monitor rate limits (requires Pi 0.68+)
248
+ pi.on('after_provider_response', async (event, ctx) => {
249
+ // 402: Payment Required (Out of credits)
250
+ // 429: Too Many Requests (Rate limit)
251
+ // 503: Service Unavailable
252
+ // 529: Site Overloaded (Anthropic specifically)
253
+ // Note: We intentionally ignore 401/403 to avoid silently masking bad API keys.
254
+ const fallbackTriggers = [402, 429, 503, 529];
255
+ if (!fallbackTriggers.includes(event.status)) return;
256
+
257
+ const currentModel = ctx.model;
258
+ const retryAfter = parseInt(
259
+ String(event.headers?.['retry-after'] || '0'),
260
+ 10,
261
+ );
262
+
263
+ recordRateLimit(
264
+ currentModel?.provider || 'unknown',
265
+ currentModel?.id || 'unknown',
266
+ event.status,
267
+ retryAfter || undefined,
268
+ );
269
+
270
+ // Provide transparent UI notifications to the user about why fallback is occurring
271
+ const statusReason =
272
+ event.status === 402
273
+ ? 'out of credits (402)'
274
+ : event.status === 529
275
+ ? 'provider overloaded (529)'
276
+ : event.status === 503
277
+ ? 'service unavailable (503)'
278
+ : `rate limited (429)`;
279
+
280
+ if (retryAfter > 0 && retryAfter < config.shortDelayThreshold) {
281
+ ctx.ui.notify(
282
+ `[Router] ${statusReason}. Retry after ${retryAfter}s`,
283
+ 'warning',
284
+ );
285
+ } else if (config.autoFallback && !state.fallbackActive) {
286
+ const result = await tryFallback(
287
+ pi,
288
+ ctx,
289
+ config,
290
+ 'rate_limit',
291
+ contextCompressionEnabled,
292
+ );
293
+ if (result.success) {
294
+ ctx.ui.notify(
295
+ `[Router] Auto-fallback due to ${statusReason}: ${result.message}`,
296
+ 'info',
297
+ );
298
+ // Transparent session tracking (for RPC clients)
299
+ pi.appendEntry('router-fallback', { reason: statusReason, result });
300
+ }
301
+ } else {
302
+ ctx.ui.notify(
303
+ `[Router] ${statusReason}. Use /router fallback to switch`,
304
+ 'warning',
305
+ );
306
+ }
307
+ });
308
+
309
+ // Status bar indicator
310
+ pi.on('model_select', async (_event, ctx) => {
311
+ if (state.fallbackActive) {
312
+ ctx.ui.setStatus('router-fallback', '\ud83c\udfe0 fallback');
313
+ } else {
314
+ ctx.ui.setStatus('router-fallback', '');
315
+ }
316
+ });
317
+ };