@kylebrodeur/pi-model-router 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/index.ts +25 -3
- package/extensions/rate-limit.ts +117 -10
- package/package.json +1 -1
package/extensions/index.ts
CHANGED
|
@@ -25,7 +25,7 @@ import { registerCommands } from './commands';
|
|
|
25
25
|
import { registerRouterProvider } from './provider';
|
|
26
26
|
// ─── Feature modules (added by fork) ────────────────────────────────────────
|
|
27
27
|
import { initializeOllamaSync } from './ollama-sync';
|
|
28
|
-
import { initializeRateLimitFallback } from './rate-limit';
|
|
28
|
+
import { initializeRateLimitFallback, checkAndRestore } from './rate-limit';
|
|
29
29
|
|
|
30
30
|
// ─── Plugin Detection & Progressive Integration ──────────────────────────
|
|
31
31
|
interface PluginStatus {
|
|
@@ -35,7 +35,7 @@ interface PluginStatus {
|
|
|
35
35
|
|
|
36
36
|
const detectPlugins = (pi: ExtensionAPI): PluginStatus => {
|
|
37
37
|
const tools = (pi as any).tools ?? {};
|
|
38
|
-
const log = (pi as any).log;
|
|
38
|
+
const log = (pi as any).log || console;
|
|
39
39
|
return {
|
|
40
40
|
ledger: typeof tools.append_ledger === 'function',
|
|
41
41
|
agentBus: typeof tools.link_send === 'function',
|
|
@@ -48,7 +48,7 @@ const detectAndIntegratePlugins = (
|
|
|
48
48
|
debugEnabled: boolean,
|
|
49
49
|
) => {
|
|
50
50
|
const plugins = detectPlugins(pi);
|
|
51
|
-
const log = (pi as any).log;
|
|
51
|
+
const log = (pi as any).log || console;
|
|
52
52
|
|
|
53
53
|
// Ledger integration: log routing decisions to qmd-ledger
|
|
54
54
|
const shouldIntegrateLedger = features?.ledgerIntegration === true;
|
|
@@ -575,6 +575,28 @@ const routerExtension = (pi: ExtensionAPI) => {
|
|
|
575
575
|
await setModelInternally(routerModel);
|
|
576
576
|
}
|
|
577
577
|
}
|
|
578
|
+
|
|
579
|
+
// Auto-restore from rate-limit fallback
|
|
580
|
+
const rateLimitCfg = (currentConfig.rateLimitFallback ?? {}) as Record<
|
|
581
|
+
string,
|
|
582
|
+
unknown
|
|
583
|
+
>;
|
|
584
|
+
if (rateLimitCfg.autoRestore === true) {
|
|
585
|
+
const result = await checkAndRestore(
|
|
586
|
+
pi,
|
|
587
|
+
ctx,
|
|
588
|
+
currentConfig.features?.contextCompression === true,
|
|
589
|
+
(rateLimitCfg.restoreCheckInterval as number) ?? 300,
|
|
590
|
+
);
|
|
591
|
+
if (result.attempted && result.success) {
|
|
592
|
+
ctx.ui.notify(`[Router] Auto-restored: ${result.message}`, 'info');
|
|
593
|
+
pi.appendEntry('router-auto-restore', {
|
|
594
|
+
restored: true,
|
|
595
|
+
message: result.message,
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
578
600
|
persistState();
|
|
579
601
|
actions.updateStatus(ctx);
|
|
580
602
|
});
|
package/extensions/rate-limit.ts
CHANGED
|
@@ -28,12 +28,21 @@ export interface RateLimitEventEntry {
|
|
|
28
28
|
httpStatus: number;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
export interface ModelCapabilities {
|
|
32
|
+
vision: boolean;
|
|
33
|
+
reasoning: boolean;
|
|
34
|
+
contextWindow: number;
|
|
35
|
+
maxTokens: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
31
38
|
export interface FallbackState {
|
|
32
39
|
preferredModel?: string;
|
|
33
40
|
fallbackActive: boolean;
|
|
34
41
|
autoRestore: boolean;
|
|
35
42
|
triggeredAt?: number;
|
|
36
43
|
triggerReason?: 'rate_limit' | 'budget_exceeded' | 'manual';
|
|
44
|
+
lastRestoreAttempt?: number;
|
|
45
|
+
requiredCapabilities?: ModelCapabilities;
|
|
37
46
|
}
|
|
38
47
|
|
|
39
48
|
// ─── Config ─────────────────────────────────────────────────────────────────
|
|
@@ -58,26 +67,72 @@ let history: RateLimitEventEntry[] = [];
|
|
|
58
67
|
|
|
59
68
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
60
69
|
|
|
70
|
+
const getModelCapabilities = (model: {
|
|
71
|
+
input: string[];
|
|
72
|
+
reasoning: boolean;
|
|
73
|
+
contextWindow: number;
|
|
74
|
+
maxTokens: number;
|
|
75
|
+
}): ModelCapabilities => ({
|
|
76
|
+
vision: model.input.includes('image'),
|
|
77
|
+
reasoning: model.reasoning,
|
|
78
|
+
contextWindow: model.contextWindow,
|
|
79
|
+
maxTokens: model.maxTokens,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const capabilitiesMatch = (
|
|
83
|
+
required: ModelCapabilities,
|
|
84
|
+
candidate: ModelCapabilities,
|
|
85
|
+
): { match: boolean; missing: string[] } => {
|
|
86
|
+
const missing: string[] = [];
|
|
87
|
+
if (required.vision && !candidate.vision) missing.push('vision');
|
|
88
|
+
if (required.reasoning && !candidate.reasoning) missing.push('reasoning');
|
|
89
|
+
if (candidate.contextWindow < required.contextWindow)
|
|
90
|
+
missing.push(
|
|
91
|
+
`contextWindow ${candidate.contextWindow} < ${required.contextWindow}`,
|
|
92
|
+
);
|
|
93
|
+
if (candidate.maxTokens < required.maxTokens)
|
|
94
|
+
missing.push(`maxTokens ${candidate.maxTokens} < ${required.maxTokens}`);
|
|
95
|
+
return { match: missing.length === 0, missing };
|
|
96
|
+
};
|
|
97
|
+
|
|
61
98
|
const findBestFallbackModel = (
|
|
62
99
|
ctx: ExtensionContext,
|
|
63
100
|
sequence: string[],
|
|
64
|
-
|
|
101
|
+
required?: ModelCapabilities,
|
|
102
|
+
): { provider: string; id: string; missing?: string[] } | undefined => {
|
|
65
103
|
const availableModels = ctx.modelRegistry.getAvailable();
|
|
66
104
|
|
|
105
|
+
let bestPartialMatch:
|
|
106
|
+
| { provider: string; id: string; missing: string[] }
|
|
107
|
+
| undefined;
|
|
108
|
+
|
|
67
109
|
for (const pattern of sequence) {
|
|
68
110
|
for (const model of availableModels) {
|
|
69
111
|
const targetId = `${model.provider}/${model.id}`;
|
|
70
|
-
if (
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if (
|
|
112
|
+
if (
|
|
113
|
+
pattern === targetId ||
|
|
114
|
+
(pattern.endsWith('*') && targetId.startsWith(pattern.slice(0, -1)))
|
|
115
|
+
) {
|
|
116
|
+
if (required) {
|
|
117
|
+
const caps = getModelCapabilities(model);
|
|
118
|
+
const { match, missing } = capabilitiesMatch(required, caps);
|
|
119
|
+
if (match) {
|
|
120
|
+
return { provider: model.provider, id: model.id };
|
|
121
|
+
} else if (!bestPartialMatch) {
|
|
122
|
+
bestPartialMatch = {
|
|
123
|
+
provider: model.provider,
|
|
124
|
+
id: model.id,
|
|
125
|
+
missing,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
75
129
|
return { provider: model.provider, id: model.id };
|
|
130
|
+
}
|
|
76
131
|
}
|
|
77
132
|
}
|
|
78
133
|
}
|
|
79
134
|
|
|
80
|
-
return
|
|
135
|
+
return bestPartialMatch;
|
|
81
136
|
};
|
|
82
137
|
|
|
83
138
|
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
@@ -97,17 +152,26 @@ export const tryFallback = async (
|
|
|
97
152
|
|
|
98
153
|
if (currentModel.provider !== 'ollama' && !state.fallbackActive) {
|
|
99
154
|
state.preferredModel = `${currentModel.provider}/${currentModel.id}`;
|
|
155
|
+
state.requiredCapabilities = getModelCapabilities(currentModel);
|
|
100
156
|
}
|
|
101
157
|
|
|
102
158
|
const target = findBestFallbackModel(
|
|
103
159
|
ctx,
|
|
104
160
|
config.fallbackSequence.length > 0 ? config.fallbackSequence : ['ollama/*'],
|
|
161
|
+
state.requiredCapabilities,
|
|
105
162
|
);
|
|
106
163
|
|
|
107
164
|
if (!target) {
|
|
108
165
|
return { success: false, message: 'No fallback models available' };
|
|
109
166
|
}
|
|
110
167
|
|
|
168
|
+
if (target.missing) {
|
|
169
|
+
return {
|
|
170
|
+
success: false,
|
|
171
|
+
message: `Fallback model ${target.provider}/${target.id} lacks required capabilities: ${target.missing.join(', ')}`,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
111
175
|
const targetModel = ctx.modelRegistry.find(target.provider, target.id);
|
|
112
176
|
if (!targetModel) {
|
|
113
177
|
return {
|
|
@@ -153,9 +217,13 @@ export const tryRestore = async (
|
|
|
153
217
|
pi: ExtensionAPI,
|
|
154
218
|
ctx: ExtensionContext,
|
|
155
219
|
contextCompressionEnabled: boolean = false,
|
|
156
|
-
): Promise<{ success: boolean; message: string }> => {
|
|
220
|
+
): Promise<{ success: boolean; message: string; restored: boolean }> => {
|
|
157
221
|
if (!state.fallbackActive || !state.preferredModel) {
|
|
158
|
-
return {
|
|
222
|
+
return {
|
|
223
|
+
success: false,
|
|
224
|
+
message: 'No preferred model stored',
|
|
225
|
+
restored: false,
|
|
226
|
+
};
|
|
159
227
|
}
|
|
160
228
|
|
|
161
229
|
const [provider, id] = state.preferredModel.split('/');
|
|
@@ -165,6 +233,7 @@ export const tryRestore = async (
|
|
|
165
233
|
return {
|
|
166
234
|
success: false,
|
|
167
235
|
message: `Model ${state.preferredModel} not available`,
|
|
236
|
+
restored: false,
|
|
168
237
|
};
|
|
169
238
|
}
|
|
170
239
|
|
|
@@ -172,8 +241,9 @@ export const tryRestore = async (
|
|
|
172
241
|
if (success) {
|
|
173
242
|
state.fallbackActive = false;
|
|
174
243
|
state.autoRestore = false;
|
|
244
|
+
state.lastRestoreAttempt = undefined;
|
|
245
|
+
state.requiredCapabilities = undefined;
|
|
175
246
|
|
|
176
|
-
// Context Compression Bridge: Instruct the model to summarize the fallback period
|
|
177
247
|
if (contextCompressionEnabled) {
|
|
178
248
|
pi.sendMessage(
|
|
179
249
|
{
|
|
@@ -189,12 +259,47 @@ export const tryRestore = async (
|
|
|
189
259
|
|
|
190
260
|
return {
|
|
191
261
|
success,
|
|
262
|
+
restored: success,
|
|
192
263
|
message: success
|
|
193
264
|
? `Restored ${state.preferredModel}`
|
|
194
265
|
: 'Failed to restore model',
|
|
195
266
|
};
|
|
196
267
|
};
|
|
197
268
|
|
|
269
|
+
/**
|
|
270
|
+
* Periodically check if the preferred cloud model is healthy and auto-restore.
|
|
271
|
+
* Call this from turn_end or another periodic hook.
|
|
272
|
+
*/
|
|
273
|
+
export const checkAndRestore = async (
|
|
274
|
+
pi: ExtensionAPI,
|
|
275
|
+
ctx: ExtensionContext,
|
|
276
|
+
contextCompressionEnabled: boolean = false,
|
|
277
|
+
restoreCheckIntervalSec: number = 300,
|
|
278
|
+
): Promise<{ attempted: boolean; success: boolean; message: string }> => {
|
|
279
|
+
if (!state.autoRestore || !state.fallbackActive || !state.preferredModel) {
|
|
280
|
+
return {
|
|
281
|
+
attempted: false,
|
|
282
|
+
success: false,
|
|
283
|
+
message: 'Auto-restore not active',
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const now = Date.now();
|
|
288
|
+
const intervalMs = restoreCheckIntervalSec * 1000;
|
|
289
|
+
|
|
290
|
+
if (state.lastRestoreAttempt && now - state.lastRestoreAttempt < intervalMs) {
|
|
291
|
+
return {
|
|
292
|
+
attempted: false,
|
|
293
|
+
success: false,
|
|
294
|
+
message: 'Restore throttled',
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
state.lastRestoreAttempt = now;
|
|
299
|
+
const result = await tryRestore(pi, ctx, contextCompressionEnabled);
|
|
300
|
+
return { attempted: true, ...result };
|
|
301
|
+
};
|
|
302
|
+
|
|
198
303
|
export const getFallbackState = (): FallbackState => {
|
|
199
304
|
return { ...state };
|
|
200
305
|
};
|
|
@@ -224,6 +329,8 @@ export const resetRateLimitState = (): void => {
|
|
|
224
329
|
state = {
|
|
225
330
|
fallbackActive: false,
|
|
226
331
|
autoRestore: false,
|
|
332
|
+
lastRestoreAttempt: undefined,
|
|
333
|
+
requiredCapabilities: undefined,
|
|
227
334
|
};
|
|
228
335
|
history = [];
|
|
229
336
|
};
|