@kylebrodeur/pi-model-router 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/LEARNINGS.md +3 -2
- package/README.md +24 -0
- package/extensions/index.ts +25 -3
- package/extensions/provider.ts +109 -60
- package/extensions/rate-limit.ts +117 -10
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
10
|
### Added
|
|
11
|
+
- Transparent wait and retry interception for string-based rate limit errors (e.g., "quota will reset after X seconds")
|
|
11
12
|
- Ollama auto-sync feature
|
|
12
13
|
- Rate-limit fallback with transparent HTTP error handling (402, 429, 503, 529)
|
|
13
14
|
- Feature toggles in config (`features` object)
|
package/LEARNINGS.md
CHANGED
|
@@ -73,10 +73,11 @@ The fallback mechanism uses a user-configurable sequence of models: `fallbackSeq
|
|
|
73
73
|
* **Key benefit**: Prevents catastrophic failures when a primary model is unavailable.
|
|
74
74
|
|
|
75
75
|
### 3. Graceful Error Handling
|
|
76
|
-
The extension transparently handles errors. For "out of credits" (`402`) or "rate limit" (`429`), it automatically switches to a fallback model and emits a custom session entry (`router-fallback`) for headless tooling to detect.
|
|
76
|
+
The extension transparently handles errors. For "out of credits" (`402`) or "rate limit" (`429`), it automatically switches to a fallback model and emits a custom session entry (`router-fallback`) for headless tooling to detect.
|
|
77
|
+
Additionally, for string-based 429 errors specifying a cooldown (e.g., "quota will reset after 58s"), the router can intercept the stream, pause for the required duration (if under `shortDelayThreshold`), and automatically retry the original request without failing the turn.
|
|
77
78
|
|
|
78
79
|
* **When to use**: For any extension exposed to external API services.
|
|
79
|
-
* **Key insight**: Never mask API errors; provide enough detail (status codes) in UI notifications for users to diagnose.
|
|
80
|
+
* **Key insight**: Never mask API errors; provide enough detail (status codes) in UI notifications for users to diagnose, but handle transient issues (like short rate limits) invisibly where possible.
|
|
80
81
|
|
|
81
82
|
## 🔌 Pi Integration Patterns
|
|
82
83
|
|
package/README.md
CHANGED
|
@@ -121,6 +121,30 @@ Copy the example config to one of:
|
|
|
121
121
|
|
|
122
122
|
**Priority:** Project config `.pi/model-router.json` overrides user config `~/.pi/agent/model-router.json`. Both override defaults.
|
|
123
123
|
|
|
124
|
+
### Rate Limit Interception & Fallback
|
|
125
|
+
|
|
126
|
+
The router can gracefully handle 429 Rate Limit and Quota errors. If the error specifies a wait time (e.g., "reset after 58s"), the router will pause and automatically retry the prompt if the wait time is under your threshold. If it exceeds the threshold or is unparseable, it fails over to the next available model in your fallback sequence.
|
|
127
|
+
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"rateLimitFallback": {
|
|
131
|
+
"enabled": true,
|
|
132
|
+
"shortDelayThreshold": 60,
|
|
133
|
+
"autoFallback": true,
|
|
134
|
+
"autoRestore": true,
|
|
135
|
+
"restoreCheckInterval": 300,
|
|
136
|
+
"fallbackSequence": ["anthropic/claude-3-haiku-20240307", "ollama/*"]
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
| Field | Description |
|
|
142
|
+
|-------|-------------|
|
|
143
|
+
| `shortDelayThreshold` | Maximum time (in seconds) the router will pause and wait to retry when encountering a rate limit. If the cooldown is longer than this, it triggers a fallback. |
|
|
144
|
+
| `fallbackSequence` | Array of model IDs (or wildcards like `ollama/*`) to try if the primary model fails or the wait time is too long. |
|
|
145
|
+
| `autoFallback` | (Optional) Automatically switch session to the fallback model globally after a hard failure. |
|
|
146
|
+
| `autoRestore` | (Optional) If fallback was triggered, automatically try to restore the original cloud model after `restoreCheckInterval` seconds. |
|
|
147
|
+
|
|
124
148
|
### Progressive Enhancement Configs
|
|
125
149
|
|
|
126
150
|
After installing optional extensions, copy one of these to `.pi/model-router.json`:
|
package/extensions/index.ts
CHANGED
|
@@ -25,7 +25,7 @@ import { registerCommands } from './commands';
|
|
|
25
25
|
import { registerRouterProvider } from './provider';
|
|
26
26
|
// ─── Feature modules (added by fork) ────────────────────────────────────────
|
|
27
27
|
import { initializeOllamaSync } from './ollama-sync';
|
|
28
|
-
import { initializeRateLimitFallback } from './rate-limit';
|
|
28
|
+
import { initializeRateLimitFallback, checkAndRestore } from './rate-limit';
|
|
29
29
|
|
|
30
30
|
// ─── Plugin Detection & Progressive Integration ──────────────────────────
|
|
31
31
|
interface PluginStatus {
|
|
@@ -35,7 +35,7 @@ interface PluginStatus {
|
|
|
35
35
|
|
|
36
36
|
const detectPlugins = (pi: ExtensionAPI): PluginStatus => {
|
|
37
37
|
const tools = (pi as any).tools ?? {};
|
|
38
|
-
const log = (pi as any).log;
|
|
38
|
+
const log = (pi as any).log || console;
|
|
39
39
|
return {
|
|
40
40
|
ledger: typeof tools.append_ledger === 'function',
|
|
41
41
|
agentBus: typeof tools.link_send === 'function',
|
|
@@ -48,7 +48,7 @@ const detectAndIntegratePlugins = (
|
|
|
48
48
|
debugEnabled: boolean,
|
|
49
49
|
) => {
|
|
50
50
|
const plugins = detectPlugins(pi);
|
|
51
|
-
const log = (pi as any).log;
|
|
51
|
+
const log = (pi as any).log || console;
|
|
52
52
|
|
|
53
53
|
// Ledger integration: log routing decisions to qmd-ledger
|
|
54
54
|
const shouldIntegrateLedger = features?.ledgerIntegration === true;
|
|
@@ -575,6 +575,28 @@ const routerExtension = (pi: ExtensionAPI) => {
|
|
|
575
575
|
await setModelInternally(routerModel);
|
|
576
576
|
}
|
|
577
577
|
}
|
|
578
|
+
|
|
579
|
+
// Auto-restore from rate-limit fallback
|
|
580
|
+
const rateLimitCfg = (currentConfig.rateLimitFallback ?? {}) as Record<
|
|
581
|
+
string,
|
|
582
|
+
unknown
|
|
583
|
+
>;
|
|
584
|
+
if (rateLimitCfg.autoRestore === true) {
|
|
585
|
+
const result = await checkAndRestore(
|
|
586
|
+
pi,
|
|
587
|
+
ctx,
|
|
588
|
+
currentConfig.features?.contextCompression === true,
|
|
589
|
+
(rateLimitCfg.restoreCheckInterval as number) ?? 300,
|
|
590
|
+
);
|
|
591
|
+
if (result.attempted && result.success) {
|
|
592
|
+
ctx.ui.notify(`[Router] Auto-restored: ${result.message}`, 'info');
|
|
593
|
+
pi.appendEntry('router-auto-restore', {
|
|
594
|
+
restored: true,
|
|
595
|
+
message: result.message,
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
578
600
|
persistState();
|
|
579
601
|
actions.updateStatus(ctx);
|
|
580
602
|
});
|
package/extensions/provider.ts
CHANGED
|
@@ -30,6 +30,20 @@ import {
|
|
|
30
30
|
hasImageAttachment,
|
|
31
31
|
} from './routing';
|
|
32
32
|
|
|
33
|
+
const rateLimitRegex = /(?:429|rate limit|quota).*?(?:reset after|try again in|wait)\s*(\d+)\s*([smh])/i;
|
|
34
|
+
|
|
35
|
+
function extractWaitTimeMs(errorText: string): number | null {
|
|
36
|
+
const match = errorText.match(rateLimitRegex);
|
|
37
|
+
if (!match) return null;
|
|
38
|
+
const value = parseInt(match[1], 10);
|
|
39
|
+
const unit = match[2].toLowerCase();
|
|
40
|
+
|
|
41
|
+
if (unit === 's') return value * 1000;
|
|
42
|
+
if (unit === 'm') return value * 60000;
|
|
43
|
+
if (unit === 'h') return value * 3600000;
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
|
|
33
47
|
export const createErrorMessage = (
|
|
34
48
|
model: Model<Api>,
|
|
35
49
|
message: string,
|
|
@@ -457,74 +471,109 @@ export const registerRouterProvider = (
|
|
|
457
471
|
const apiKey = auth.apiKey;
|
|
458
472
|
const headers = auth.headers;
|
|
459
473
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
// If the picked model has a smaller context than what we reported, truncate now.
|
|
463
|
-
let effectiveContext = context;
|
|
464
|
-
const targetLimit = targetModel.contextWindow || 128_000;
|
|
465
|
-
if (targetLimit < model.contextWindow!) {
|
|
466
|
-
effectiveContext = truncateContext(context, targetLimit);
|
|
467
|
-
}
|
|
474
|
+
let retryCount = 0;
|
|
475
|
+
let modelSuccess = false;
|
|
468
476
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
if (state.lastExtensionContext) {
|
|
480
|
-
if (delegatedReasoning) {
|
|
481
|
-
state.lastExtensionContext.ui.setHiddenThinkingLabel?.(
|
|
482
|
-
`Thinking (${targetProvider}/${targetModelId})...`,
|
|
483
|
-
);
|
|
484
|
-
} else {
|
|
485
|
-
state.lastExtensionContext.ui.setHiddenThinkingLabel?.();
|
|
477
|
+
while (retryCount < 2) {
|
|
478
|
+
let contentReceived = false;
|
|
479
|
+
try {
|
|
480
|
+
// HONESTY CHECK & AUTO-TRUNCATION
|
|
481
|
+
// If the picked model has a smaller context than what we reported, truncate now.
|
|
482
|
+
let effectiveContext = context;
|
|
483
|
+
const targetLimit = targetModel.contextWindow || 128_000;
|
|
484
|
+
if (targetLimit < model.contextWindow!) {
|
|
485
|
+
effectiveContext = truncateContext(context, targetLimit);
|
|
486
486
|
}
|
|
487
|
-
}
|
|
488
487
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
488
|
+
const thinkingOverride = actions.getThinkingOverride(
|
|
489
|
+
model.id,
|
|
490
|
+
decision.tier,
|
|
491
|
+
);
|
|
492
|
+
const delegatedReasoning =
|
|
493
|
+
targetModel.reasoning &&
|
|
494
|
+
(thinkingOverride ?? decision.thinking) !== 'off'
|
|
495
|
+
? (thinkingOverride ?? decision.thinking)
|
|
496
|
+
: undefined;
|
|
497
|
+
|
|
498
|
+
if (state.lastExtensionContext) {
|
|
499
|
+
if (delegatedReasoning) {
|
|
500
|
+
state.lastExtensionContext.ui.setHiddenThinkingLabel?.(
|
|
501
|
+
`Thinking (${targetProvider}/${targetModelId})...`,
|
|
502
|
+
);
|
|
503
|
+
} else {
|
|
504
|
+
state.lastExtensionContext.ui.setHiddenThinkingLabel?.();
|
|
505
|
+
}
|
|
506
|
+
}
|
|
501
507
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
508
|
+
const delegatedStream = streamSimple(
|
|
509
|
+
targetModel,
|
|
510
|
+
effectiveContext,
|
|
511
|
+
{
|
|
512
|
+
...options,
|
|
513
|
+
apiKey,
|
|
514
|
+
headers,
|
|
515
|
+
...(delegatedReasoning
|
|
516
|
+
? { reasoning: delegatedReasoning }
|
|
517
|
+
: {}),
|
|
518
|
+
},
|
|
519
|
+
);
|
|
520
|
+
|
|
521
|
+
for await (const event of delegatedStream) {
|
|
522
|
+
if (event.type === 'done') {
|
|
523
|
+
const cost = event.message.usage?.cost?.total ?? 0;
|
|
524
|
+
state.accumulatedCost += cost;
|
|
525
|
+
}
|
|
526
|
+
if (event.type === 'error' && !contentReceived) {
|
|
527
|
+
throw new Error(
|
|
528
|
+
(event as any).error?.errorMessage ||
|
|
529
|
+
'Model failed before sending content.',
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
const isContent =
|
|
533
|
+
event.type === 'text_delta' ||
|
|
534
|
+
event.type === 'thinking_delta' ||
|
|
535
|
+
event.type === 'toolcall_delta' ||
|
|
536
|
+
event.type === 'toolcall_end';
|
|
537
|
+
if (isContent) contentReceived = true;
|
|
538
|
+
stream.push(event);
|
|
507
539
|
}
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
540
|
+
modelSuccess = true;
|
|
541
|
+
success = true;
|
|
542
|
+
if (i > 0) decision.isFallback = true;
|
|
543
|
+
break; // break the retry loop
|
|
544
|
+
} catch (err) {
|
|
545
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
546
|
+
const waitMs = extractWaitTimeMs(errMsg);
|
|
547
|
+
const maxWaitMs = (state.currentConfig.rateLimitFallback?.shortDelayThreshold ?? 60) * 1000;
|
|
548
|
+
|
|
549
|
+
if (waitMs && waitMs <= maxWaitMs && retryCount === 0 && !contentReceived) {
|
|
550
|
+
const partialMsg = {
|
|
551
|
+
role: 'assistant',
|
|
552
|
+
content: [],
|
|
553
|
+
api: model.api,
|
|
554
|
+
provider: targetProvider,
|
|
555
|
+
model: targetModelId,
|
|
556
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
557
|
+
timestamp: Date.now(),
|
|
558
|
+
} as unknown as AssistantMessage;
|
|
559
|
+
|
|
560
|
+
stream.push({
|
|
561
|
+
type: 'text_delta',
|
|
562
|
+
contentIndex: 0,
|
|
563
|
+
delta: `\n_⏳ [Router] Rate limit reached on ${targetProvider}/${targetModelId}. Waiting ${Math.ceil(waitMs/1000)}s before retrying..._\n`,
|
|
564
|
+
partial: partialMsg
|
|
565
|
+
});
|
|
566
|
+
await new Promise(resolve => setTimeout(resolve, waitMs + 1000)); // buffer 1s
|
|
567
|
+
retryCount++;
|
|
568
|
+
continue; // try the same model again
|
|
513
569
|
}
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
event.type === 'toolcall_delta' ||
|
|
518
|
-
event.type === 'toolcall_end';
|
|
519
|
-
if (isContent) contentReceived = true;
|
|
520
|
-
stream.push(event);
|
|
570
|
+
|
|
571
|
+
lastError = err;
|
|
572
|
+
break; // model failed completely, break retry loop to go to next fallback model
|
|
521
573
|
}
|
|
522
|
-
success = true;
|
|
523
|
-
if (i > 0) decision.isFallback = true;
|
|
524
|
-
break;
|
|
525
|
-
} catch (err) {
|
|
526
|
-
lastError = err;
|
|
527
574
|
}
|
|
575
|
+
|
|
576
|
+
if (modelSuccess) break; // break fallback loop
|
|
528
577
|
}
|
|
529
578
|
|
|
530
579
|
if (!success) {
|
package/extensions/rate-limit.ts
CHANGED
|
@@ -28,12 +28,21 @@ export interface RateLimitEventEntry {
|
|
|
28
28
|
httpStatus: number;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
export interface ModelCapabilities {
|
|
32
|
+
vision: boolean;
|
|
33
|
+
reasoning: boolean;
|
|
34
|
+
contextWindow: number;
|
|
35
|
+
maxTokens: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
31
38
|
export interface FallbackState {
|
|
32
39
|
preferredModel?: string;
|
|
33
40
|
fallbackActive: boolean;
|
|
34
41
|
autoRestore: boolean;
|
|
35
42
|
triggeredAt?: number;
|
|
36
43
|
triggerReason?: 'rate_limit' | 'budget_exceeded' | 'manual';
|
|
44
|
+
lastRestoreAttempt?: number;
|
|
45
|
+
requiredCapabilities?: ModelCapabilities;
|
|
37
46
|
}
|
|
38
47
|
|
|
39
48
|
// ─── Config ─────────────────────────────────────────────────────────────────
|
|
@@ -58,26 +67,72 @@ let history: RateLimitEventEntry[] = [];
|
|
|
58
67
|
|
|
59
68
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
60
69
|
|
|
70
|
+
const getModelCapabilities = (model: {
|
|
71
|
+
input: string[];
|
|
72
|
+
reasoning: boolean;
|
|
73
|
+
contextWindow: number;
|
|
74
|
+
maxTokens: number;
|
|
75
|
+
}): ModelCapabilities => ({
|
|
76
|
+
vision: model.input.includes('image'),
|
|
77
|
+
reasoning: model.reasoning,
|
|
78
|
+
contextWindow: model.contextWindow,
|
|
79
|
+
maxTokens: model.maxTokens,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const capabilitiesMatch = (
|
|
83
|
+
required: ModelCapabilities,
|
|
84
|
+
candidate: ModelCapabilities,
|
|
85
|
+
): { match: boolean; missing: string[] } => {
|
|
86
|
+
const missing: string[] = [];
|
|
87
|
+
if (required.vision && !candidate.vision) missing.push('vision');
|
|
88
|
+
if (required.reasoning && !candidate.reasoning) missing.push('reasoning');
|
|
89
|
+
if (candidate.contextWindow < required.contextWindow)
|
|
90
|
+
missing.push(
|
|
91
|
+
`contextWindow ${candidate.contextWindow} < ${required.contextWindow}`,
|
|
92
|
+
);
|
|
93
|
+
if (candidate.maxTokens < required.maxTokens)
|
|
94
|
+
missing.push(`maxTokens ${candidate.maxTokens} < ${required.maxTokens}`);
|
|
95
|
+
return { match: missing.length === 0, missing };
|
|
96
|
+
};
|
|
97
|
+
|
|
61
98
|
const findBestFallbackModel = (
|
|
62
99
|
ctx: ExtensionContext,
|
|
63
100
|
sequence: string[],
|
|
64
|
-
|
|
101
|
+
required?: ModelCapabilities,
|
|
102
|
+
): { provider: string; id: string; missing?: string[] } | undefined => {
|
|
65
103
|
const availableModels = ctx.modelRegistry.getAvailable();
|
|
66
104
|
|
|
105
|
+
let bestPartialMatch:
|
|
106
|
+
| { provider: string; id: string; missing: string[] }
|
|
107
|
+
| undefined;
|
|
108
|
+
|
|
67
109
|
for (const pattern of sequence) {
|
|
68
110
|
for (const model of availableModels) {
|
|
69
111
|
const targetId = `${model.provider}/${model.id}`;
|
|
70
|
-
if (
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if (
|
|
112
|
+
if (
|
|
113
|
+
pattern === targetId ||
|
|
114
|
+
(pattern.endsWith('*') && targetId.startsWith(pattern.slice(0, -1)))
|
|
115
|
+
) {
|
|
116
|
+
if (required) {
|
|
117
|
+
const caps = getModelCapabilities(model);
|
|
118
|
+
const { match, missing } = capabilitiesMatch(required, caps);
|
|
119
|
+
if (match) {
|
|
120
|
+
return { provider: model.provider, id: model.id };
|
|
121
|
+
} else if (!bestPartialMatch) {
|
|
122
|
+
bestPartialMatch = {
|
|
123
|
+
provider: model.provider,
|
|
124
|
+
id: model.id,
|
|
125
|
+
missing,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
75
129
|
return { provider: model.provider, id: model.id };
|
|
130
|
+
}
|
|
76
131
|
}
|
|
77
132
|
}
|
|
78
133
|
}
|
|
79
134
|
|
|
80
|
-
return
|
|
135
|
+
return bestPartialMatch;
|
|
81
136
|
};
|
|
82
137
|
|
|
83
138
|
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
@@ -97,17 +152,26 @@ export const tryFallback = async (
|
|
|
97
152
|
|
|
98
153
|
if (currentModel.provider !== 'ollama' && !state.fallbackActive) {
|
|
99
154
|
state.preferredModel = `${currentModel.provider}/${currentModel.id}`;
|
|
155
|
+
state.requiredCapabilities = getModelCapabilities(currentModel);
|
|
100
156
|
}
|
|
101
157
|
|
|
102
158
|
const target = findBestFallbackModel(
|
|
103
159
|
ctx,
|
|
104
160
|
config.fallbackSequence.length > 0 ? config.fallbackSequence : ['ollama/*'],
|
|
161
|
+
state.requiredCapabilities,
|
|
105
162
|
);
|
|
106
163
|
|
|
107
164
|
if (!target) {
|
|
108
165
|
return { success: false, message: 'No fallback models available' };
|
|
109
166
|
}
|
|
110
167
|
|
|
168
|
+
if (target.missing) {
|
|
169
|
+
return {
|
|
170
|
+
success: false,
|
|
171
|
+
message: `Fallback model ${target.provider}/${target.id} lacks required capabilities: ${target.missing.join(', ')}`,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
111
175
|
const targetModel = ctx.modelRegistry.find(target.provider, target.id);
|
|
112
176
|
if (!targetModel) {
|
|
113
177
|
return {
|
|
@@ -153,9 +217,13 @@ export const tryRestore = async (
|
|
|
153
217
|
pi: ExtensionAPI,
|
|
154
218
|
ctx: ExtensionContext,
|
|
155
219
|
contextCompressionEnabled: boolean = false,
|
|
156
|
-
): Promise<{ success: boolean; message: string }> => {
|
|
220
|
+
): Promise<{ success: boolean; message: string; restored: boolean }> => {
|
|
157
221
|
if (!state.fallbackActive || !state.preferredModel) {
|
|
158
|
-
return {
|
|
222
|
+
return {
|
|
223
|
+
success: false,
|
|
224
|
+
message: 'No preferred model stored',
|
|
225
|
+
restored: false,
|
|
226
|
+
};
|
|
159
227
|
}
|
|
160
228
|
|
|
161
229
|
const [provider, id] = state.preferredModel.split('/');
|
|
@@ -165,6 +233,7 @@ export const tryRestore = async (
|
|
|
165
233
|
return {
|
|
166
234
|
success: false,
|
|
167
235
|
message: `Model ${state.preferredModel} not available`,
|
|
236
|
+
restored: false,
|
|
168
237
|
};
|
|
169
238
|
}
|
|
170
239
|
|
|
@@ -172,8 +241,9 @@ export const tryRestore = async (
|
|
|
172
241
|
if (success) {
|
|
173
242
|
state.fallbackActive = false;
|
|
174
243
|
state.autoRestore = false;
|
|
244
|
+
state.lastRestoreAttempt = undefined;
|
|
245
|
+
state.requiredCapabilities = undefined;
|
|
175
246
|
|
|
176
|
-
// Context Compression Bridge: Instruct the model to summarize the fallback period
|
|
177
247
|
if (contextCompressionEnabled) {
|
|
178
248
|
pi.sendMessage(
|
|
179
249
|
{
|
|
@@ -189,12 +259,47 @@ export const tryRestore = async (
|
|
|
189
259
|
|
|
190
260
|
return {
|
|
191
261
|
success,
|
|
262
|
+
restored: success,
|
|
192
263
|
message: success
|
|
193
264
|
? `Restored ${state.preferredModel}`
|
|
194
265
|
: 'Failed to restore model',
|
|
195
266
|
};
|
|
196
267
|
};
|
|
197
268
|
|
|
269
|
+
/**
|
|
270
|
+
* Periodically check if the preferred cloud model is healthy and auto-restore.
|
|
271
|
+
* Call this from turn_end or another periodic hook.
|
|
272
|
+
*/
|
|
273
|
+
export const checkAndRestore = async (
|
|
274
|
+
pi: ExtensionAPI,
|
|
275
|
+
ctx: ExtensionContext,
|
|
276
|
+
contextCompressionEnabled: boolean = false,
|
|
277
|
+
restoreCheckIntervalSec: number = 300,
|
|
278
|
+
): Promise<{ attempted: boolean; success: boolean; message: string }> => {
|
|
279
|
+
if (!state.autoRestore || !state.fallbackActive || !state.preferredModel) {
|
|
280
|
+
return {
|
|
281
|
+
attempted: false,
|
|
282
|
+
success: false,
|
|
283
|
+
message: 'Auto-restore not active',
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const now = Date.now();
|
|
288
|
+
const intervalMs = restoreCheckIntervalSec * 1000;
|
|
289
|
+
|
|
290
|
+
if (state.lastRestoreAttempt && now - state.lastRestoreAttempt < intervalMs) {
|
|
291
|
+
return {
|
|
292
|
+
attempted: false,
|
|
293
|
+
success: false,
|
|
294
|
+
message: 'Restore throttled',
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
state.lastRestoreAttempt = now;
|
|
299
|
+
const result = await tryRestore(pi, ctx, contextCompressionEnabled);
|
|
300
|
+
return { attempted: true, ...result };
|
|
301
|
+
};
|
|
302
|
+
|
|
198
303
|
export const getFallbackState = (): FallbackState => {
|
|
199
304
|
return { ...state };
|
|
200
305
|
};
|
|
@@ -224,6 +329,8 @@ export const resetRateLimitState = (): void => {
|
|
|
224
329
|
state = {
|
|
225
330
|
fallbackActive: false,
|
|
226
331
|
autoRestore: false,
|
|
332
|
+
lastRestoreAttempt: undefined,
|
|
333
|
+
requiredCapabilities: undefined,
|
|
227
334
|
};
|
|
228
335
|
history = [];
|
|
229
336
|
};
|