@kylebrodeur/pi-model-router 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@ import { registerCommands } from './commands';
25
25
  import { registerRouterProvider } from './provider';
26
26
  // ─── Feature modules (added by fork) ────────────────────────────────────────
27
27
  import { initializeOllamaSync } from './ollama-sync';
28
- import { initializeRateLimitFallback } from './rate-limit';
28
+ import { initializeRateLimitFallback, checkAndRestore } from './rate-limit';
29
29
 
30
30
  // ─── Plugin Detection & Progressive Integration ──────────────────────────
31
31
  interface PluginStatus {
@@ -35,7 +35,7 @@ interface PluginStatus {
35
35
 
36
36
  const detectPlugins = (pi: ExtensionAPI): PluginStatus => {
37
37
  const tools = (pi as any).tools ?? {};
38
- const log = (pi as any).log;
38
+ const log = (pi as any).log || console;
39
39
  return {
40
40
  ledger: typeof tools.append_ledger === 'function',
41
41
  agentBus: typeof tools.link_send === 'function',
@@ -48,7 +48,7 @@ const detectAndIntegratePlugins = (
48
48
  debugEnabled: boolean,
49
49
  ) => {
50
50
  const plugins = detectPlugins(pi);
51
- const log = (pi as any).log;
51
+ const log = (pi as any).log || console;
52
52
 
53
53
  // Ledger integration: log routing decisions to qmd-ledger
54
54
  const shouldIntegrateLedger = features?.ledgerIntegration === true;
@@ -575,6 +575,28 @@ const routerExtension = (pi: ExtensionAPI) => {
575
575
  await setModelInternally(routerModel);
576
576
  }
577
577
  }
578
+
579
+ // Auto-restore from rate-limit fallback
580
+ const rateLimitCfg = (currentConfig.rateLimitFallback ?? {}) as Record<
581
+ string,
582
+ unknown
583
+ >;
584
+ if (rateLimitCfg.autoRestore === true) {
585
+ const result = await checkAndRestore(
586
+ pi,
587
+ ctx,
588
+ currentConfig.features?.contextCompression === true,
589
+ (rateLimitCfg.restoreCheckInterval as number) ?? 300,
590
+ );
591
+ if (result.attempted && result.success) {
592
+ ctx.ui.notify(`[Router] Auto-restored: ${result.message}`, 'info');
593
+ pi.appendEntry('router-auto-restore', {
594
+ restored: true,
595
+ message: result.message,
596
+ });
597
+ }
598
+ }
599
+
578
600
  persistState();
579
601
  actions.updateStatus(ctx);
580
602
  });
@@ -28,12 +28,21 @@ export interface RateLimitEventEntry {
28
28
  httpStatus: number;
29
29
  }
30
30
 
31
+ export interface ModelCapabilities {
32
+ vision: boolean;
33
+ reasoning: boolean;
34
+ contextWindow: number;
35
+ maxTokens: number;
36
+ }
37
+
31
38
  export interface FallbackState {
32
39
  preferredModel?: string;
33
40
  fallbackActive: boolean;
34
41
  autoRestore: boolean;
35
42
  triggeredAt?: number;
36
43
  triggerReason?: 'rate_limit' | 'budget_exceeded' | 'manual';
44
+ lastRestoreAttempt?: number;
45
+ requiredCapabilities?: ModelCapabilities;
37
46
  }
38
47
 
39
48
  // ─── Config ─────────────────────────────────────────────────────────────────
@@ -58,26 +67,72 @@ let history: RateLimitEventEntry[] = [];
58
67
 
59
68
  // ─── Helpers ────────────────────────────────────────────────────────────────
60
69
 
70
+ const getModelCapabilities = (model: {
71
+ input: string[];
72
+ reasoning: boolean;
73
+ contextWindow: number;
74
+ maxTokens: number;
75
+ }): ModelCapabilities => ({
76
+ vision: model.input.includes('image'),
77
+ reasoning: model.reasoning,
78
+ contextWindow: model.contextWindow,
79
+ maxTokens: model.maxTokens,
80
+ });
81
+
82
+ const capabilitiesMatch = (
83
+ required: ModelCapabilities,
84
+ candidate: ModelCapabilities,
85
+ ): { match: boolean; missing: string[] } => {
86
+ const missing: string[] = [];
87
+ if (required.vision && !candidate.vision) missing.push('vision');
88
+ if (required.reasoning && !candidate.reasoning) missing.push('reasoning');
89
+ if (candidate.contextWindow < required.contextWindow)
90
+ missing.push(
91
+ `contextWindow ${candidate.contextWindow} < ${required.contextWindow}`,
92
+ );
93
+ if (candidate.maxTokens < required.maxTokens)
94
+ missing.push(`maxTokens ${candidate.maxTokens} < ${required.maxTokens}`);
95
+ return { match: missing.length === 0, missing };
96
+ };
97
+
61
98
  const findBestFallbackModel = (
62
99
  ctx: ExtensionContext,
63
100
  sequence: string[],
64
- ): { provider: string; id: string } | undefined => {
101
+ required?: ModelCapabilities,
102
+ ): { provider: string; id: string; missing?: string[] } | undefined => {
65
103
  const availableModels = ctx.modelRegistry.getAvailable();
66
104
 
105
+ let bestPartialMatch:
106
+ | { provider: string; id: string; missing: string[] }
107
+ | undefined;
108
+
67
109
  for (const pattern of sequence) {
68
110
  for (const model of availableModels) {
69
111
  const targetId = `${model.provider}/${model.id}`;
70
- if (pattern === targetId)
71
- return { provider: model.provider, id: model.id };
72
- if (pattern.endsWith('*')) {
73
- const prefix = pattern.slice(0, -1);
74
- if (targetId.startsWith(prefix))
112
+ if (
113
+ pattern === targetId ||
114
+ (pattern.endsWith('*') && targetId.startsWith(pattern.slice(0, -1)))
115
+ ) {
116
+ if (required) {
117
+ const caps = getModelCapabilities(model);
118
+ const { match, missing } = capabilitiesMatch(required, caps);
119
+ if (match) {
120
+ return { provider: model.provider, id: model.id };
121
+ } else if (!bestPartialMatch) {
122
+ bestPartialMatch = {
123
+ provider: model.provider,
124
+ id: model.id,
125
+ missing,
126
+ };
127
+ }
128
+ } else {
75
129
  return { provider: model.provider, id: model.id };
130
+ }
76
131
  }
77
132
  }
78
133
  }
79
134
 
80
- return undefined;
135
+ return bestPartialMatch;
81
136
  };
82
137
 
83
138
  // ─── Public API ─────────────────────────────────────────────────────────────
@@ -97,17 +152,26 @@ export const tryFallback = async (
97
152
 
98
153
  if (currentModel.provider !== 'ollama' && !state.fallbackActive) {
99
154
  state.preferredModel = `${currentModel.provider}/${currentModel.id}`;
155
+ state.requiredCapabilities = getModelCapabilities(currentModel);
100
156
  }
101
157
 
102
158
  const target = findBestFallbackModel(
103
159
  ctx,
104
160
  config.fallbackSequence.length > 0 ? config.fallbackSequence : ['ollama/*'],
161
+ state.requiredCapabilities,
105
162
  );
106
163
 
107
164
  if (!target) {
108
165
  return { success: false, message: 'No fallback models available' };
109
166
  }
110
167
 
168
+ if (target.missing) {
169
+ return {
170
+ success: false,
171
+ message: `Fallback model ${target.provider}/${target.id} lacks required capabilities: ${target.missing.join(', ')}`,
172
+ };
173
+ }
174
+
111
175
  const targetModel = ctx.modelRegistry.find(target.provider, target.id);
112
176
  if (!targetModel) {
113
177
  return {
@@ -153,9 +217,13 @@ export const tryRestore = async (
153
217
  pi: ExtensionAPI,
154
218
  ctx: ExtensionContext,
155
219
  contextCompressionEnabled: boolean = false,
156
- ): Promise<{ success: boolean; message: string }> => {
220
+ ): Promise<{ success: boolean; message: string; restored: boolean }> => {
157
221
  if (!state.fallbackActive || !state.preferredModel) {
158
- return { success: false, message: 'No preferred model stored' };
222
+ return {
223
+ success: false,
224
+ message: 'No preferred model stored',
225
+ restored: false,
226
+ };
159
227
  }
160
228
 
161
229
  const [provider, id] = state.preferredModel.split('/');
@@ -165,6 +233,7 @@ export const tryRestore = async (
165
233
  return {
166
234
  success: false,
167
235
  message: `Model ${state.preferredModel} not available`,
236
+ restored: false,
168
237
  };
169
238
  }
170
239
 
@@ -172,8 +241,9 @@ export const tryRestore = async (
172
241
  if (success) {
173
242
  state.fallbackActive = false;
174
243
  state.autoRestore = false;
244
+ state.lastRestoreAttempt = undefined;
245
+ state.requiredCapabilities = undefined;
175
246
 
176
- // Context Compression Bridge: Instruct the model to summarize the fallback period
177
247
  if (contextCompressionEnabled) {
178
248
  pi.sendMessage(
179
249
  {
@@ -189,12 +259,47 @@ export const tryRestore = async (
189
259
 
190
260
  return {
191
261
  success,
262
+ restored: success,
192
263
  message: success
193
264
  ? `Restored ${state.preferredModel}`
194
265
  : 'Failed to restore model',
195
266
  };
196
267
  };
197
268
 
269
+ /**
270
+ * Periodically check if the preferred cloud model is healthy and auto-restore.
271
+ * Call this from turn_end or another periodic hook.
272
+ */
273
+ export const checkAndRestore = async (
274
+ pi: ExtensionAPI,
275
+ ctx: ExtensionContext,
276
+ contextCompressionEnabled: boolean = false,
277
+ restoreCheckIntervalSec: number = 300,
278
+ ): Promise<{ attempted: boolean; success: boolean; message: string }> => {
279
+ if (!state.autoRestore || !state.fallbackActive || !state.preferredModel) {
280
+ return {
281
+ attempted: false,
282
+ success: false,
283
+ message: 'Auto-restore not active',
284
+ };
285
+ }
286
+
287
+ const now = Date.now();
288
+ const intervalMs = restoreCheckIntervalSec * 1000;
289
+
290
+ if (state.lastRestoreAttempt && now - state.lastRestoreAttempt < intervalMs) {
291
+ return {
292
+ attempted: false,
293
+ success: false,
294
+ message: 'Restore throttled',
295
+ };
296
+ }
297
+
298
+ state.lastRestoreAttempt = now;
299
+ const result = await tryRestore(pi, ctx, contextCompressionEnabled);
300
+ return { attempted: true, ...result };
301
+ };
302
+
198
303
  export const getFallbackState = (): FallbackState => {
199
304
  return { ...state };
200
305
  };
@@ -224,6 +329,8 @@ export const resetRateLimitState = (): void => {
224
329
  state = {
225
330
  fallbackActive: false,
226
331
  autoRestore: false,
332
+ lastRestoreAttempt: undefined,
333
+ requiredCapabilities: undefined,
227
334
  };
228
335
  history = [];
229
336
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kylebrodeur/pi-model-router",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "type": "module",
5
5
  "description": "Intelligent per-turn model router extension for the pi coding agent (Enhanced Fork)",
6
6
  "keywords": [