@crystalfluxay/opencode-rate-limiter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -0
- package/dist/index.js +779 -0
- package/package.json +38 -0
- package/rate-limit.example.json +53 -0
package/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# opencode-rate-limit
|
|
2
|
+
|
|
3
|
+
OpenCode plugin for proactive TPM/RPM rate limiting with multi-provider fallback.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Proactive TPM/RPM limiting** — counts tokens per minute, blocks requests before they hit the API
|
|
8
|
+
- **Reactive error detection** — parses response body against user-configured regex rules
|
|
9
|
+
- **Fuzzy error matching** — `bodyPattern` regex matches any text in the response body
|
|
10
|
+
- **Multi-provider support** — different error rules per provider
|
|
11
|
+
- **Fallback model switching** — auto-switch to backup models when rate limited
|
|
12
|
+
- **Circuit breaker** — auto-disconnect failing models
|
|
13
|
+
- **Smart cooldown** — skip rate-limited models for configurable duration
|
|
14
|
+
- **`/rate-limit-status`** — diagnostic command
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
Add to `opencode.json`:
|
|
19
|
+
|
|
20
|
+
```json
|
|
21
|
+
{
|
|
22
|
+
"plugin": ["opencode-rate-limit"]
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Configure
|
|
27
|
+
|
|
28
|
+
Create `~/.opencode/rate-limit.json`:
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"providers": [
|
|
33
|
+
{
|
|
34
|
+
"provider": "xfyun",
|
|
35
|
+
"tpm": 100000,
|
|
36
|
+
"rpm": 10,
|
|
37
|
+
"errorDetection": [
|
|
38
|
+
{ "bodyPattern": "11210|TPM.*超限", "category": "tpm", "description": "TPM超限", "waitMs": 60000 },
|
|
39
|
+
{ "bodyPattern": "11201|次数超限", "category": "tpm", "description": "次数超限", "waitMs": 60000 },
|
|
40
|
+
{ "statusPattern": "^429$", "category": "tpm", "description": "HTTP 429", "waitMs": 60000 },
|
|
41
|
+
{ "statusPattern": "^5\\d{2}$", "category": "server_error", "description": "HTTP 5xx" },
|
|
42
|
+
{ "statusPattern": "^401$", "category": "fatal", "description": "HTTP 401" }
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
"fallbackModels": [
|
|
47
|
+
{ "providerId": "openai", "modelId": "gpt-4o" }
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Error categories
|
|
53
|
+
|
|
54
|
+
| Category | Behavior |
|
|
55
|
+
|----------|----------|
|
|
56
|
+
| `tpm` | Wait `waitMs` (default 60s), then retry or fallback |
|
|
57
|
+
| `rpm` | Wait `waitMs`, then retry or fallback |
|
|
58
|
+
| `server_error` | Wait `cooldownMs`, retry with backoff |
|
|
59
|
+
| `retryable` | Wait `waitMs`, retry |
|
|
60
|
+
| `fatal` | No retry, propagate error immediately |
|
|
61
|
+
|
|
62
|
+
### Pattern matching
|
|
63
|
+
|
|
64
|
+
- `statusPattern` — regex against HTTP status code string (e.g. `"^429$"`, `"^5\\d{2}$"`)
|
|
65
|
+
- `bodyPattern` — regex against full response body (case-insensitive, fuzzy match)
|
|
66
|
+
- Both optional — at least one required per rule
|
|
67
|
+
- When both specified, BOTH must match (AND logic)
|
|
68
|
+
- Rules checked in order, first match wins
|
|
69
|
+
|
|
70
|
+
### Full config reference
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"enabled": true,
|
|
75
|
+
"cooldownMs": 60000,
|
|
76
|
+
"fallbackMode": "cycle",
|
|
77
|
+
"fallbackModels": [],
|
|
78
|
+
"retryPolicy": {
|
|
79
|
+
"maxRetries": 3,
|
|
80
|
+
"strategy": "exponential",
|
|
81
|
+
"baseDelayMs": 1000,
|
|
82
|
+
"maxDelayMs": 30000,
|
|
83
|
+
"jitterEnabled": true
|
|
84
|
+
},
|
|
85
|
+
"circuitBreaker": {
|
|
86
|
+
"enabled": true,
|
|
87
|
+
"failureThreshold": 5,
|
|
88
|
+
"recoveryTimeoutMs": 60000
|
|
89
|
+
},
|
|
90
|
+
"providers": [],
|
|
91
|
+
"fetchInterception": true,
|
|
92
|
+
"eventFallback": true,
|
|
93
|
+
"logLevel": "warn"
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Preset templates
|
|
98
|
+
|
|
99
|
+
Copy from `src/error-codes.ts`:
|
|
100
|
+
- `XUNFEI_PRESET` — 讯飞星辰 MaaS Astron Coding Plan
|
|
101
|
+
- `OPENAI_PRESET` — Standard OpenAI-compatible providers
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
MIT
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,779 @@
|
|
|
1
|
+
// src/config.ts
|
|
2
|
+
import * as fs from "node:fs";
|
|
3
|
+
import * as path from "node:path";
|
|
4
|
+
import * as os from "node:os";
|
|
5
|
+
var DEFAULT_RETRY_POLICY = {
|
|
6
|
+
maxRetries: 3,
|
|
7
|
+
strategy: "exponential",
|
|
8
|
+
baseDelayMs: 1000,
|
|
9
|
+
maxDelayMs: 30000,
|
|
10
|
+
jitterEnabled: true
|
|
11
|
+
};
|
|
12
|
+
var DEFAULT_CIRCUIT_BREAKER = {
|
|
13
|
+
enabled: true,
|
|
14
|
+
failureThreshold: 5,
|
|
15
|
+
recoveryTimeoutMs: 60000
|
|
16
|
+
};
|
|
17
|
+
var DEFAULT_CONFIG = {
|
|
18
|
+
enabled: true,
|
|
19
|
+
cooldownMs: 60000,
|
|
20
|
+
fallbackMode: "cycle",
|
|
21
|
+
fallbackModels: [],
|
|
22
|
+
retryPolicy: DEFAULT_RETRY_POLICY,
|
|
23
|
+
circuitBreaker: DEFAULT_CIRCUIT_BREAKER,
|
|
24
|
+
providers: [],
|
|
25
|
+
fetchInterception: true,
|
|
26
|
+
eventFallback: true,
|
|
27
|
+
logLevel: "warn"
|
|
28
|
+
};
|
|
29
|
+
function resolveConfigPath(worktree) {
|
|
30
|
+
const projectPath = worktree ? path.join(worktree, ".opencode", "rate-limit.json") : path.join(process.cwd(), ".opencode", "rate-limit.json");
|
|
31
|
+
if (fs.existsSync(projectPath)) {
|
|
32
|
+
return projectPath;
|
|
33
|
+
}
|
|
34
|
+
const homePath = path.join(os.homedir(), ".opencode", "rate-limit.json");
|
|
35
|
+
if (fs.existsSync(homePath)) {
|
|
36
|
+
return homePath;
|
|
37
|
+
}
|
|
38
|
+
return homePath;
|
|
39
|
+
}
|
|
40
|
+
function isObject(item) {
|
|
41
|
+
return item !== null && typeof item === "object" && !Array.isArray(item);
|
|
42
|
+
}
|
|
43
|
+
function deepMerge(target, source) {
|
|
44
|
+
const result = { ...target };
|
|
45
|
+
for (const key of Object.keys(source)) {
|
|
46
|
+
const sourceVal = source[key];
|
|
47
|
+
const targetVal = result[key];
|
|
48
|
+
if (isObject(sourceVal) && isObject(targetVal)) {
|
|
49
|
+
result[key] = deepMerge(targetVal, sourceVal);
|
|
50
|
+
} else if (sourceVal !== undefined) {
|
|
51
|
+
result[key] = sourceVal;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
function validateConfig(config) {
|
|
57
|
+
const errors = [];
|
|
58
|
+
if (config.cooldownMs < 0) {
|
|
59
|
+
errors.push("cooldownMs must be >= 0");
|
|
60
|
+
}
|
|
61
|
+
if (config.retryPolicy.maxRetries < 0) {
|
|
62
|
+
errors.push("retryPolicy.maxRetries must be >= 0");
|
|
63
|
+
}
|
|
64
|
+
if (config.retryPolicy.baseDelayMs < 0) {
|
|
65
|
+
errors.push("retryPolicy.baseDelayMs must be >= 0");
|
|
66
|
+
}
|
|
67
|
+
if (config.circuitBreaker.failureThreshold < 1) {
|
|
68
|
+
errors.push("circuitBreaker.failureThreshold must be >= 1");
|
|
69
|
+
}
|
|
70
|
+
for (const p of config.providers) {
|
|
71
|
+
if (p.tpm !== undefined && p.tpm < 0) {
|
|
72
|
+
errors.push(`provider "${p.provider}": tpm must be >= 0`);
|
|
73
|
+
}
|
|
74
|
+
if (p.rpm !== undefined && p.rpm < 0) {
|
|
75
|
+
errors.push(`provider "${p.provider}": rpm must be >= 0`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
for (const fm of config.fallbackModels) {
|
|
79
|
+
if (!fm.providerId || !fm.modelId) {
|
|
80
|
+
errors.push(`fallback model: providerId and modelId are required`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return errors;
|
|
84
|
+
}
|
|
85
|
+
function loadConfig(worktree) {
|
|
86
|
+
let userConfig = {};
|
|
87
|
+
let sourcePath = null;
|
|
88
|
+
const configPath = resolveConfigPath(worktree);
|
|
89
|
+
if (fs.existsSync(configPath)) {
|
|
90
|
+
try {
|
|
91
|
+
const raw = fs.readFileSync(configPath, "utf-8");
|
|
92
|
+
userConfig = JSON.parse(raw);
|
|
93
|
+
sourcePath = configPath;
|
|
94
|
+
} catch (err) {
|
|
95
|
+
return {
|
|
96
|
+
config: DEFAULT_CONFIG,
|
|
97
|
+
sourcePath: configPath,
|
|
98
|
+
errors: [`Failed to parse config: ${err instanceof Error ? err.message : String(err)}`]
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const merged = deepMerge(DEFAULT_CONFIG, userConfig);
|
|
103
|
+
const errors = validateConfig(merged);
|
|
104
|
+
return { config: merged, sourcePath, errors };
|
|
105
|
+
}
|
|
106
|
+
function findProviderTPM(config, providerId, modelId) {
|
|
107
|
+
const exact = config.providers.find((p) => p.provider === providerId && p.model === modelId);
|
|
108
|
+
if (exact?.tpm !== undefined)
|
|
109
|
+
return exact.tpm;
|
|
110
|
+
const provider = config.providers.find((p) => p.provider === providerId && !p.model);
|
|
111
|
+
if (provider?.tpm !== undefined)
|
|
112
|
+
return provider.tpm;
|
|
113
|
+
return 0;
|
|
114
|
+
}
|
|
115
|
+
function findProviderRPM(config, providerId, modelId) {
|
|
116
|
+
const exact = config.providers.find((p) => p.provider === providerId && p.model === modelId);
|
|
117
|
+
if (exact?.rpm !== undefined)
|
|
118
|
+
return exact.rpm;
|
|
119
|
+
const provider = config.providers.find((p) => p.provider === providerId && !p.model);
|
|
120
|
+
if (provider?.rpm !== undefined)
|
|
121
|
+
return provider.rpm;
|
|
122
|
+
return 0;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// src/token-counter.ts
|
|
126
|
+
var WINDOW_MS = 60000;
|
|
127
|
+
|
|
128
|
+
class TokenCounter {
|
|
129
|
+
windows = new Map;
|
|
130
|
+
rpmCounters = new Map;
|
|
131
|
+
rpmResetTimers = new Map;
|
|
132
|
+
record(key, tokens) {
|
|
133
|
+
const now = Date.now();
|
|
134
|
+
let entries = this.windows.get(key);
|
|
135
|
+
if (!entries) {
|
|
136
|
+
entries = [];
|
|
137
|
+
this.windows.set(key, entries);
|
|
138
|
+
}
|
|
139
|
+
const cutoff = now - WINDOW_MS;
|
|
140
|
+
entries = entries.filter((e) => e.ts > cutoff);
|
|
141
|
+
entries.push({ ts: now, tokens });
|
|
142
|
+
this.windows.set(key, entries);
|
|
143
|
+
}
|
|
144
|
+
recordRequest(key) {
|
|
145
|
+
this.rpmCounters.set(key, (this.rpmCounters.get(key) ?? 0) + 1);
|
|
146
|
+
}
|
|
147
|
+
getTPM(key) {
|
|
148
|
+
const entries = this.windows.get(key);
|
|
149
|
+
if (!entries)
|
|
150
|
+
return 0;
|
|
151
|
+
const cutoff = Date.now() - WINDOW_MS;
|
|
152
|
+
return entries.filter((e) => e.ts > cutoff).reduce((sum, e) => sum + e.tokens, 0);
|
|
153
|
+
}
|
|
154
|
+
getRPM(key) {
|
|
155
|
+
return this.rpmCounters.get(key) ?? 0;
|
|
156
|
+
}
|
|
157
|
+
wouldExceedTPM(key, limit, incomingTokens) {
|
|
158
|
+
if (limit <= 0)
|
|
159
|
+
return false;
|
|
160
|
+
return this.getTPM(key) + incomingTokens > limit;
|
|
161
|
+
}
|
|
162
|
+
wouldExceedRPM(key, limit) {
|
|
163
|
+
if (limit <= 0)
|
|
164
|
+
return false;
|
|
165
|
+
return this.getRPM(key) >= limit;
|
|
166
|
+
}
|
|
167
|
+
reset() {
|
|
168
|
+
this.windows.clear();
|
|
169
|
+
this.rpmCounters.clear();
|
|
170
|
+
}
|
|
171
|
+
resetKey(key) {
|
|
172
|
+
this.windows.delete(key);
|
|
173
|
+
this.rpmCounters.delete(key);
|
|
174
|
+
}
|
|
175
|
+
getKeys() {
|
|
176
|
+
return [...this.windows.keys()];
|
|
177
|
+
}
|
|
178
|
+
startResetTimer() {
|
|
179
|
+
setInterval(() => {
|
|
180
|
+
this.rpmCounters.clear();
|
|
181
|
+
}, WINDOW_MS);
|
|
182
|
+
}
|
|
183
|
+
destroy() {
|
|
184
|
+
for (const timer of this.rpmResetTimers.values()) {
|
|
185
|
+
clearInterval(timer);
|
|
186
|
+
}
|
|
187
|
+
this.rpmResetTimers.clear();
|
|
188
|
+
this.windows.clear();
|
|
189
|
+
this.rpmCounters.clear();
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
function parseRateLimitHeaders(headers) {
|
|
193
|
+
const limitRaw = headers.get("x-ratelimit-limit");
|
|
194
|
+
const remainingRaw = headers.get("x-ratelimit-remaining");
|
|
195
|
+
const resetRaw = headers.get("x-ratelimit-reset");
|
|
196
|
+
if (!limitRaw || !remainingRaw || !resetRaw)
|
|
197
|
+
return null;
|
|
198
|
+
const firstValue = (raw) => {
|
|
199
|
+
const parts = raw.split(",");
|
|
200
|
+
const trimmed = parts[0]?.trim() ?? "";
|
|
201
|
+
return parseInt(trimmed, 10);
|
|
202
|
+
};
|
|
203
|
+
const limit = firstValue(limitRaw);
|
|
204
|
+
const remaining = firstValue(remainingRaw);
|
|
205
|
+
const reset = parseInt(resetRaw.trim(), 10);
|
|
206
|
+
if (isNaN(limit) || isNaN(remaining) || isNaN(reset))
|
|
207
|
+
return null;
|
|
208
|
+
return { limit, remaining, reset };
|
|
209
|
+
}
|
|
210
|
+
var tokenCounter = new TokenCounter;
|
|
211
|
+
|
|
212
|
+
// src/error-codes.ts
|
|
213
|
+
function matchError(httpStatus, body, rules, defaultCooldownMs) {
|
|
214
|
+
let bodyCode;
|
|
215
|
+
let bodyMessage;
|
|
216
|
+
if (body) {
|
|
217
|
+
try {
|
|
218
|
+
const parsed = JSON.parse(body);
|
|
219
|
+
bodyCode = extractNumericField(parsed, ["code", "error_code", "errorCode", "errCode", "ret"]);
|
|
220
|
+
bodyMessage = extractStringField(parsed, ["message", "msg", "error", "error_msg", "errorMsg", "errMsg"]);
|
|
221
|
+
if (!bodyCode && parsed.error && typeof parsed.error === "object") {
|
|
222
|
+
const err = parsed.error;
|
|
223
|
+
bodyCode = extractNumericField(err, ["code", "error_code", "errorCode"]);
|
|
224
|
+
bodyMessage = extractStringField(err, ["message", "msg"]);
|
|
225
|
+
}
|
|
226
|
+
} catch {}
|
|
227
|
+
}
|
|
228
|
+
for (const rule of rules) {
|
|
229
|
+
let matches = true;
|
|
230
|
+
if (rule.httpStatus !== undefined && rule.httpStatus !== httpStatus) {
|
|
231
|
+
matches = false;
|
|
232
|
+
}
|
|
233
|
+
if (rule.bodyCode !== undefined && bodyCode !== undefined) {
|
|
234
|
+
if (String(rule.bodyCode) !== String(bodyCode)) {
|
|
235
|
+
matches = false;
|
|
236
|
+
}
|
|
237
|
+
} else if (rule.bodyCode !== undefined && bodyCode === undefined) {
|
|
238
|
+
matches = false;
|
|
239
|
+
}
|
|
240
|
+
if (rule.messagePattern && bodyMessage) {
|
|
241
|
+
try {
|
|
242
|
+
const regex = new RegExp(rule.messagePattern, "i");
|
|
243
|
+
if (!regex.test(bodyMessage)) {
|
|
244
|
+
matches = false;
|
|
245
|
+
}
|
|
246
|
+
} catch {
|
|
247
|
+
matches = false;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (matches) {
|
|
251
|
+
return {
|
|
252
|
+
matched: true,
|
|
253
|
+
category: rule.category,
|
|
254
|
+
description: rule.description,
|
|
255
|
+
waitMs: rule.waitMs ?? defaultCooldownMs,
|
|
256
|
+
rule
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if (httpStatus === 429) {
|
|
261
|
+
return {
|
|
262
|
+
matched: true,
|
|
263
|
+
category: "tpm",
|
|
264
|
+
description: "HTTP 429 (unmatched by rules, treating as TPM)",
|
|
265
|
+
waitMs: defaultCooldownMs,
|
|
266
|
+
rule: null
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
if (httpStatus >= 500) {
|
|
270
|
+
return {
|
|
271
|
+
matched: true,
|
|
272
|
+
category: "server_error",
|
|
273
|
+
description: `HTTP ${httpStatus} (unmatched, treating as server error)`,
|
|
274
|
+
waitMs: defaultCooldownMs,
|
|
275
|
+
rule: null
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
return { matched: false, category: "fatal", description: "Unknown error", waitMs: 0, rule: null };
|
|
279
|
+
}
|
|
280
|
+
function extractNumericField(obj, keys) {
|
|
281
|
+
for (const key of keys) {
|
|
282
|
+
const val = obj[key];
|
|
283
|
+
if (typeof val === "number")
|
|
284
|
+
return val;
|
|
285
|
+
if (typeof val === "string") {
|
|
286
|
+
const num = parseInt(val, 10);
|
|
287
|
+
if (!isNaN(num))
|
|
288
|
+
return num;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
function extractStringField(obj, keys) {
|
|
294
|
+
for (const key of keys) {
|
|
295
|
+
const val = obj[key];
|
|
296
|
+
if (typeof val === "string")
|
|
297
|
+
return val;
|
|
298
|
+
}
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// src/fetch-wrapper.ts
|
|
303
|
+
var rateLimitStates = new Map;
|
|
304
|
+
function stateKey(pid, mid) {
|
|
305
|
+
return `${pid}::${mid}`;
|
|
306
|
+
}
|
|
307
|
+
function getState(k) {
|
|
308
|
+
let s = rateLimitStates.get(k);
|
|
309
|
+
if (!s) {
|
|
310
|
+
s = { cooldownUntil: 0, consecutiveFailures: 0, circuitOpen: false, circuitRecoveryAt: 0 };
|
|
311
|
+
rateLimitStates.set(k, s);
|
|
312
|
+
}
|
|
313
|
+
return s;
|
|
314
|
+
}
|
|
315
|
+
function estimateTokens(body) {
|
|
316
|
+
if (!body)
|
|
317
|
+
return 0;
|
|
318
|
+
try {
|
|
319
|
+
const p = JSON.parse(body);
|
|
320
|
+
if (Array.isArray(p.messages)) {
|
|
321
|
+
let c = 0;
|
|
322
|
+
for (const m of p.messages) {
|
|
323
|
+
if (typeof m.content === "string")
|
|
324
|
+
c += m.content.length;
|
|
325
|
+
else if (Array.isArray(m.content))
|
|
326
|
+
for (const x of m.content) {
|
|
327
|
+
if (typeof x === "string")
|
|
328
|
+
c += x.length;
|
|
329
|
+
else if (typeof x === "object" && x !== null && "text" in x)
|
|
330
|
+
c += String(x.text).length;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
return Math.ceil(c / 4);
|
|
334
|
+
}
|
|
335
|
+
return Math.ceil(body.length / 4);
|
|
336
|
+
} catch {
|
|
337
|
+
return Math.ceil(body.length / 4);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
var _log = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
|
|
341
|
+
function setLogger(l) {
|
|
342
|
+
_log = l;
|
|
343
|
+
}
|
|
344
|
+
async function peekBody(r) {
|
|
345
|
+
try {
|
|
346
|
+
return await r.clone().text();
|
|
347
|
+
} catch {
|
|
348
|
+
return "";
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
function createWrappedFetch(baseFetch, opts) {
|
|
352
|
+
const { config, providerId, modelId, onRateLimited, onFatalError } = opts;
|
|
353
|
+
const key = stateKey(providerId, modelId);
|
|
354
|
+
const tpmLimit = findProviderTPM(config, providerId, modelId);
|
|
355
|
+
const rpmLimit = findProviderRPM(config, providerId, modelId);
|
|
356
|
+
const provCfg = config.providers.find((p) => p.provider === providerId);
|
|
357
|
+
const errorRules = provCfg?.errorDetection ?? [];
|
|
358
|
+
return async (input, init) => {
|
|
359
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
|
360
|
+
const isChat = url.includes("/chat/completions");
|
|
361
|
+
const now = Date.now();
|
|
362
|
+
const state = getState(key);
|
|
363
|
+
if (state.circuitOpen) {
|
|
364
|
+
if (now < state.circuitRecoveryAt) {
|
|
365
|
+
_log.warn(`[rl] Circuit OPEN: ${key}`);
|
|
366
|
+
throw new Error(`Circuit breaker open`);
|
|
367
|
+
}
|
|
368
|
+
state.circuitOpen = false;
|
|
369
|
+
_log.info(`[rl] Circuit recovery: ${key}`);
|
|
370
|
+
}
|
|
371
|
+
if (state.cooldownUntil > 0 && now < state.cooldownUntil) {
|
|
372
|
+
const rem = Math.ceil((state.cooldownUntil - now) / 1000);
|
|
373
|
+
_log.warn(`[rl] Cooldown ${key} (${rem}s)`);
|
|
374
|
+
onRateLimited(providerId, modelId, `Cooldown: ${rem}s`);
|
|
375
|
+
throw new Error(`Provider in cooldown`);
|
|
376
|
+
}
|
|
377
|
+
if (isChat && init?.body) {
|
|
378
|
+
const bodyStr = typeof init.body === "string" ? init.body : "";
|
|
379
|
+
const est = estimateTokens(bodyStr);
|
|
380
|
+
if (tpmLimit > 0 && tokenCounter.wouldExceedTPM(key, tpmLimit, est)) {
|
|
381
|
+
const cur = tokenCounter.getTPM(key);
|
|
382
|
+
_log.warn(`[rl] TPM: ${cur}/${tpmLimit} +${est} -> ${key}`);
|
|
383
|
+
onRateLimited(providerId, modelId, `Pre-flight TPM: ${cur}/${tpmLimit}`);
|
|
384
|
+
throw new Error(`TPM limit`);
|
|
385
|
+
}
|
|
386
|
+
if (rpmLimit > 0 && tokenCounter.wouldExceedRPM(key, rpmLimit)) {
|
|
387
|
+
_log.warn(`[rl] RPM: ${rpmLimit} -> ${key}`);
|
|
388
|
+
onRateLimited(providerId, modelId, `Pre-flight RPM: ${rpmLimit}`);
|
|
389
|
+
throw new Error(`RPM limit`);
|
|
390
|
+
}
|
|
391
|
+
tokenCounter.record(key, est);
|
|
392
|
+
tokenCounter.recordRequest(key);
|
|
393
|
+
}
|
|
394
|
+
let response;
|
|
395
|
+
try {
|
|
396
|
+
response = await baseFetch(input, init);
|
|
397
|
+
} catch (e) {
|
|
398
|
+
throw e;
|
|
399
|
+
}
|
|
400
|
+
if (isChat) {
|
|
401
|
+
const rlh = parseRateLimitHeaders(response.headers);
|
|
402
|
+
if (rlh)
|
|
403
|
+
_log.debug(`[rl] quota ${key}: ${rlh.remaining}/${rlh.limit}`);
|
|
404
|
+
}
|
|
405
|
+
if (!response.ok) {
|
|
406
|
+
const bodyText = await peekBody(response);
|
|
407
|
+
const result = matchError(response.status, bodyText, errorRules, config.cooldownMs);
|
|
408
|
+
if (result.matched) {
|
|
409
|
+
_log.warn(`[rl] [${result.category}] ${result.description} (${key})`);
|
|
410
|
+
if (result.category === "fatal") {
|
|
411
|
+
_log.error(`[rl] FATAL: ${result.description}`);
|
|
412
|
+
onFatalError(providerId, modelId, result.description);
|
|
413
|
+
} else {
|
|
414
|
+
state.consecutiveFailures++;
|
|
415
|
+
if (config.circuitBreaker.enabled && state.consecutiveFailures >= config.circuitBreaker.failureThreshold) {
|
|
416
|
+
state.circuitOpen = true;
|
|
417
|
+
state.circuitRecoveryAt = now + config.circuitBreaker.recoveryTimeoutMs;
|
|
418
|
+
_log.error(`[rl] Circuit BREAKER OPEN: ${key}`);
|
|
419
|
+
}
|
|
420
|
+
state.cooldownUntil = now + result.waitMs;
|
|
421
|
+
onRateLimited(providerId, modelId, result.description);
|
|
422
|
+
}
|
|
423
|
+
} else {
|
|
424
|
+
_log.warn(`[rl] Unclassified HTTP ${response.status}: ${key}`);
|
|
425
|
+
}
|
|
426
|
+
} else if (state.consecutiveFailures > 0) {
|
|
427
|
+
_log.info(`[rl] ${key} recovered (failures: ${state.consecutiveFailures})`);
|
|
428
|
+
state.consecutiveFailures = 0;
|
|
429
|
+
}
|
|
430
|
+
return response;
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function getFetchStatus(config) {
|
|
434
|
+
const now = Date.now();
|
|
435
|
+
return tokenCounter.getKeys().map((key) => {
|
|
436
|
+
const [pid, mid] = key.split("::");
|
|
437
|
+
const s = rateLimitStates.get(key);
|
|
438
|
+
return {
|
|
439
|
+
providerId: pid,
|
|
440
|
+
modelId: mid,
|
|
441
|
+
tpm: tokenCounter.getTPM(key),
|
|
442
|
+
tpmLimit: findProviderTPM(config, pid, mid),
|
|
443
|
+
rpm: tokenCounter.getRPM(key),
|
|
444
|
+
rpmLimit: findProviderRPM(config, pid, mid),
|
|
445
|
+
cooldownRemaining: s && s.cooldownUntil > now ? Math.ceil((s.cooldownUntil - now) / 1000) : 0,
|
|
446
|
+
circuitOpen: s?.circuitOpen ?? false,
|
|
447
|
+
consecutiveFailures: s?.consecutiveFailures ?? 0
|
|
448
|
+
};
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// src/fallback.ts
|
|
453
|
+
var sessionStates = new Map;
|
|
454
|
+
function getOrCreateSession(sessionId, originalModel) {
|
|
455
|
+
let state = sessionStates.get(sessionId);
|
|
456
|
+
if (!state) {
|
|
457
|
+
state = {
|
|
458
|
+
attemptCount: 0,
|
|
459
|
+
triedModels: [],
|
|
460
|
+
originalModel,
|
|
461
|
+
succeeded: false
|
|
462
|
+
};
|
|
463
|
+
sessionStates.set(sessionId, state);
|
|
464
|
+
}
|
|
465
|
+
return state;
|
|
466
|
+
}
|
|
467
|
+
var cooldowns = new Map;
|
|
468
|
+
function getModelKey(providerId, modelId) {
|
|
469
|
+
return `${providerId}::${modelId}`;
|
|
470
|
+
}
|
|
471
|
+
function isInCooldown(providerId, modelId) {
|
|
472
|
+
const entry = cooldowns.get(getModelKey(providerId, modelId));
|
|
473
|
+
if (!entry)
|
|
474
|
+
return false;
|
|
475
|
+
return Date.now() < entry.until;
|
|
476
|
+
}
|
|
477
|
+
function setCooldown(providerId, modelId, durationMs, reason) {
|
|
478
|
+
cooldowns.set(getModelKey(providerId, modelId), {
|
|
479
|
+
until: Date.now() + durationMs,
|
|
480
|
+
reason
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
function calculateDelay(policy, attempt) {
|
|
484
|
+
let delay;
|
|
485
|
+
switch (policy.strategy) {
|
|
486
|
+
case "immediate":
|
|
487
|
+
delay = 0;
|
|
488
|
+
break;
|
|
489
|
+
case "linear":
|
|
490
|
+
delay = policy.baseDelayMs * attempt;
|
|
491
|
+
break;
|
|
492
|
+
case "exponential":
|
|
493
|
+
default:
|
|
494
|
+
delay = policy.baseDelayMs * Math.pow(2, attempt - 1);
|
|
495
|
+
break;
|
|
496
|
+
}
|
|
497
|
+
delay = Math.min(delay, policy.maxDelayMs);
|
|
498
|
+
if (policy.jitterEnabled) {
|
|
499
|
+
delay = delay * (0.5 + Math.random() * 0.5);
|
|
500
|
+
}
|
|
501
|
+
return Math.round(delay);
|
|
502
|
+
}
|
|
503
|
+
function getAvailableFallbacks(config, triedModels) {
|
|
504
|
+
return config.fallbackModels.filter((fm) => {
|
|
505
|
+
const key = getModelKey(fm.providerId, fm.modelId);
|
|
506
|
+
return !triedModels.includes(key) && !isInCooldown(fm.providerId, fm.modelId);
|
|
507
|
+
}).sort((a, b) => (a.priority ?? 0) - (b.priority ?? 0));
|
|
508
|
+
}
|
|
509
|
+
var log = {
|
|
510
|
+
debug: () => {},
|
|
511
|
+
info: () => {},
|
|
512
|
+
warn: () => {},
|
|
513
|
+
error: () => {}
|
|
514
|
+
};
|
|
515
|
+
function attemptFallback(config, sessionId, originalProviderId, originalModelId, reason) {
|
|
516
|
+
const session = getOrCreateSession(sessionId, originalModelId);
|
|
517
|
+
setCooldown(originalProviderId, originalModelId, config.cooldownMs, reason);
|
|
518
|
+
session.triedModels.push(getModelKey(originalProviderId, originalModelId));
|
|
519
|
+
if (session.attemptCount >= config.retryPolicy.maxRetries) {
|
|
520
|
+
log.error(`[rate-limit] Max retries (${config.retryPolicy.maxRetries}) reached for session ${sessionId}`);
|
|
521
|
+
return { model: null, ok: false, reason: "Max retries exceeded" };
|
|
522
|
+
}
|
|
523
|
+
const available = getAvailableFallbacks(config, session.triedModels);
|
|
524
|
+
if (available.length === 0) {
|
|
525
|
+
if (config.fallbackMode === "cycle") {
|
|
526
|
+
log.info("[rate-limit] All models exhausted, clearing cooldowns for cycle mode");
|
|
527
|
+
cooldowns.clear();
|
|
528
|
+
const retryAvailable = getAvailableFallbacks(config, session.triedModels);
|
|
529
|
+
if (retryAvailable.length === 0) {
|
|
530
|
+
return { model: null, ok: false, reason: "No fallback models available" };
|
|
531
|
+
}
|
|
532
|
+
const next2 = retryAvailable[0];
|
|
533
|
+
session.attemptCount++;
|
|
534
|
+
session.triedModels.push(getModelKey(next2.providerId, next2.modelId));
|
|
535
|
+
return { model: next2, ok: true, reason: `Cycling to ${next2.providerId}/${next2.modelId}` };
|
|
536
|
+
}
|
|
537
|
+
return { model: null, ok: false, reason: "No fallback models available" };
|
|
538
|
+
}
|
|
539
|
+
const next = available[0];
|
|
540
|
+
session.attemptCount++;
|
|
541
|
+
session.triedModels.push(getModelKey(next.providerId, next.modelId));
|
|
542
|
+
const delay = calculateDelay(config.retryPolicy, session.attemptCount);
|
|
543
|
+
if (delay > 0) {
|
|
544
|
+
log.info(`[rate-limit] Waiting ${delay}ms before fallback attempt ${session.attemptCount}`);
|
|
545
|
+
}
|
|
546
|
+
log.info(`[rate-limit] Fallback attempt ${session.attemptCount}: ` + `${originalProviderId}/${originalModelId} → ${next.providerId}/${next.modelId} (reason: ${reason})`);
|
|
547
|
+
return {
|
|
548
|
+
model: next,
|
|
549
|
+
ok: true,
|
|
550
|
+
reason: `Switched to ${next.providerId}/${next.modelId}`
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
function markFallbackSuccess(sessionId) {
|
|
554
|
+
const state = sessionStates.get(sessionId);
|
|
555
|
+
if (state) {
|
|
556
|
+
state.succeeded = true;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
function removeSession(sessionId) {
|
|
560
|
+
sessionStates.delete(sessionId);
|
|
561
|
+
}
|
|
562
|
+
function getFallbackStatus() {
|
|
563
|
+
const now = Date.now();
|
|
564
|
+
const cooldownEntries = [];
|
|
565
|
+
for (const [key, entry] of cooldowns.entries()) {
|
|
566
|
+
if (now < entry.until) {
|
|
567
|
+
cooldownEntries.push({
|
|
568
|
+
key,
|
|
569
|
+
remaining: Math.ceil((entry.until - now) / 1000),
|
|
570
|
+
reason: entry.reason
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
return {
|
|
575
|
+
activeSessions: sessionStates.size,
|
|
576
|
+
cooldowns: cooldownEntries
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// src/event-handler.ts
|
|
581
|
+
var _l = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
|
|
582
|
+
function setEventLogger(l) {
|
|
583
|
+
_l = l;
|
|
584
|
+
}
|
|
585
|
+
function allRules(config, pid) {
|
|
586
|
+
const pc = config.providers.find((x) => x.provider === pid);
|
|
587
|
+
const r = pc?.errorDetection ?? [];
|
|
588
|
+
return r.length > 0 ? r : config.providers.flatMap((x) => x.errorDetection ?? []);
|
|
589
|
+
}
|
|
590
|
+
function handleEvent(e, ctx) {
|
|
591
|
+
if (!ctx.config.eventFallback)
|
|
592
|
+
return false;
|
|
593
|
+
if (e.type === "session.error") {
|
|
594
|
+
const p = e.properties;
|
|
595
|
+
if (!p?.sessionID || !p?.error)
|
|
596
|
+
return false;
|
|
597
|
+
const msg = typeof p.error.message === "string" ? p.error.message : JSON.stringify(p.error);
|
|
598
|
+
const st = typeof p.error.statusCode === "number" ? p.error.statusCode : 0;
|
|
599
|
+
const r = matchError(st, msg, allRules(ctx.config, ""), ctx.config.cooldownMs);
|
|
600
|
+
if (r.matched && r.category !== "fatal") {
|
|
601
|
+
_l.warn(`[rl] event err: ${r.description}`);
|
|
602
|
+
const fb = attemptFallback(ctx.config, p.sessionID, "", "", r.description);
|
|
603
|
+
if (fb.ok && fb.model)
|
|
604
|
+
ctx.onFb(fb.model.providerId, fb.model.modelId, p.sessionID);
|
|
605
|
+
return true;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
if (e.type === "message.updated") {
|
|
609
|
+
const p = e.properties;
|
|
610
|
+
if (!p?.info)
|
|
611
|
+
return false;
|
|
612
|
+
const i = p.info;
|
|
613
|
+
if (i.status === "completed" && !i.error && i.sessionID) {
|
|
614
|
+
markFallbackSuccess(i.sessionID);
|
|
615
|
+
return false;
|
|
616
|
+
}
|
|
617
|
+
if (!i.error)
|
|
618
|
+
return false;
|
|
619
|
+
const msg = typeof i.error === "string" ? i.error : JSON.stringify(i.error);
|
|
620
|
+
const r = matchError(0, msg, allRules(ctx.config, i.providerID ?? ""), ctx.config.cooldownMs);
|
|
621
|
+
if (r.matched && r.category !== "fatal") {
|
|
622
|
+
_l.warn(`[rl] event msg: ${r.description}`);
|
|
623
|
+
const fb = attemptFallback(ctx.config, i.sessionID ?? "", i.providerID ?? "", i.modelID ?? "", r.description);
|
|
624
|
+
if (fb.ok && fb.model)
|
|
625
|
+
ctx.onFb(fb.model.providerId, fb.model.modelId, i.sessionID ?? "");
|
|
626
|
+
return true;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
if (e.type === "session.status") {
|
|
630
|
+
const p = e.properties;
|
|
631
|
+
if (p?.status?.type === "retry" && p.status.message) {
|
|
632
|
+
const r = matchError(0, p.status.message, allRules(ctx.config, ""), ctx.config.cooldownMs);
|
|
633
|
+
if (r.matched && r.category !== "fatal") {
|
|
634
|
+
_l.warn(`[rl] event retry: ${r.description}`);
|
|
635
|
+
const fb = attemptFallback(ctx.config, p.sessionID ?? "", "", "", r.description);
|
|
636
|
+
if (fb.ok && fb.model)
|
|
637
|
+
ctx.onFb(fb.model.providerId, fb.model.modelId, p.sessionID ?? "");
|
|
638
|
+
return true;
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
if (e.type === "session.deleted") {
|
|
643
|
+
const p = e.properties;
|
|
644
|
+
if (p?.sessionID)
|
|
645
|
+
removeSession(p.sessionID);
|
|
646
|
+
}
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// src/index.ts
|
|
651
|
+
var RateLimitPlugin = async ({ client, directory, worktree }) => {
|
|
652
|
+
const { config, sourcePath, errors } = loadConfig(worktree);
|
|
653
|
+
const levels = { debug: 0, info: 1, warn: 2, error: 3 };
|
|
654
|
+
const threshold = levels[config.logLevel] ?? 2;
|
|
655
|
+
const logger = {
|
|
656
|
+
debug: (m, ...a) => {
|
|
657
|
+
if (threshold <= 0)
|
|
658
|
+
console.debug(`[rate-limit] ${m}`, ...a);
|
|
659
|
+
},
|
|
660
|
+
info: (m, ...a) => {
|
|
661
|
+
if (threshold <= 1)
|
|
662
|
+
console.info(`[rate-limit] ${m}`, ...a);
|
|
663
|
+
},
|
|
664
|
+
warn: (m, ...a) => {
|
|
665
|
+
if (threshold <= 2)
|
|
666
|
+
console.warn(`[rate-limit] ${m}`, ...a);
|
|
667
|
+
},
|
|
668
|
+
error: (m, ...a) => {
|
|
669
|
+
if (threshold <= 3)
|
|
670
|
+
console.error(`[rate-limit] ${m}`, ...a);
|
|
671
|
+
}
|
|
672
|
+
};
|
|
673
|
+
setLogger(logger);
|
|
674
|
+
setEventLogger(logger);
|
|
675
|
+
if (!config.enabled) {
|
|
676
|
+
logger.info("Plugin disabled");
|
|
677
|
+
return {};
|
|
678
|
+
}
|
|
679
|
+
if (sourcePath)
|
|
680
|
+
logger.info(`Config: ${sourcePath}`);
|
|
681
|
+
else
|
|
682
|
+
logger.info("No config file, using defaults");
|
|
683
|
+
if (errors.length > 0) {
|
|
684
|
+
for (const e of errors)
|
|
685
|
+
logger.error(`Config error: ${e}`);
|
|
686
|
+
return {};
|
|
687
|
+
}
|
|
688
|
+
tokenCounter.startResetTimer();
|
|
689
|
+
return {
|
|
690
|
+
async config(cfg) {
|
|
691
|
+
if (!config.fetchInterception)
|
|
692
|
+
return;
|
|
693
|
+
const providers = cfg.provider ?? {};
|
|
694
|
+
for (const [pid, pCfg] of Object.entries(providers)) {
|
|
695
|
+
if (!pCfg || typeof pCfg !== "object")
|
|
696
|
+
continue;
|
|
697
|
+
const pc = pCfg;
|
|
698
|
+
const models = pc.models ?? {};
|
|
699
|
+
for (const [mid] of Object.entries(models)) {
|
|
700
|
+
const origFetch = pc.options?.fetch;
|
|
701
|
+
const baseFetch = origFetch ?? globalThis.fetch.bind(globalThis);
|
|
702
|
+
const wrapped = createWrappedFetch(baseFetch, {
|
|
703
|
+
config,
|
|
704
|
+
providerId: pid,
|
|
705
|
+
modelId: mid,
|
|
706
|
+
onRateLimited: (p, m, reason) => {
|
|
707
|
+
logger.warn(`Fallback triggered: ${p}/${m} — ${reason}`);
|
|
708
|
+
},
|
|
709
|
+
onFatalError: (p, m, reason) => {
|
|
710
|
+
logger.error(`Fatal: ${p}/${m} — ${reason}`);
|
|
711
|
+
}
|
|
712
|
+
});
|
|
713
|
+
if (!pc.options)
|
|
714
|
+
pc.options = {};
|
|
715
|
+
pc.options.fetch = wrapped;
|
|
716
|
+
logger.info(`Fetch injected: ${pid}/${mid}`);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
},
|
|
720
|
+
event: async ({ event }) => {
|
|
721
|
+
handleEvent(event, {
|
|
722
|
+
config,
|
|
723
|
+
onFb: (pid, mid, sid) => {
|
|
724
|
+
logger.warn(`Event fallback: ${pid}/${mid} for session ${sid}`);
|
|
725
|
+
}
|
|
726
|
+
});
|
|
727
|
+
},
|
|
728
|
+
async command(cfg) {
|
|
729
|
+
cfg.command = cfg.command ?? {};
|
|
730
|
+
cfg.command["rate-limit-status"] = {
|
|
731
|
+
description: "Show rate limit status and statistics",
|
|
732
|
+
template: buildStatusReport(config)
|
|
733
|
+
};
|
|
734
|
+
},
|
|
735
|
+
cleanup: () => {
|
|
736
|
+
tokenCounter.destroy();
|
|
737
|
+
}
|
|
738
|
+
};
|
|
739
|
+
};
|
|
740
|
+
function buildStatusReport(config) {
|
|
741
|
+
const fetchStatus = getFetchStatus(config);
|
|
742
|
+
const fallbackStatus = getFallbackStatus();
|
|
743
|
+
let report = `## Rate Limit Status
|
|
744
|
+
|
|
745
|
+
`;
|
|
746
|
+
if (fetchStatus.length === 0) {
|
|
747
|
+
report += `_No active providers tracked._
|
|
748
|
+
`;
|
|
749
|
+
} else {
|
|
750
|
+
report += `| Provider | Model | TPM | Limit | RPM | Limit | Cooldown | Circuit | Failures |
|
|
751
|
+
`;
|
|
752
|
+
report += `|----------|-------|-----|-------|-----|-------|----------|---------|----------|
|
|
753
|
+
`;
|
|
754
|
+
for (const s of fetchStatus) {
|
|
755
|
+
report += `| ${s.providerId} | ${s.modelId} | ${s.tpm} | ${s.tpmLimit || "∞"} | ${s.rpm} | ${s.rpmLimit || "∞"} | ${s.cooldownRemaining > 0 ? s.cooldownRemaining + "s" : "—"} | ${s.circuitOpen ? "⚠ OPEN" : "✅"} | ${s.consecutiveFailures} |
|
|
756
|
+
`;
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
report += `
|
|
760
|
+
**Active sessions:** ${fallbackStatus.activeSessions}
|
|
761
|
+
`;
|
|
762
|
+
if (fallbackStatus.cooldowns.length > 0) {
|
|
763
|
+
report += `
|
|
764
|
+
**Cooldowns:**
|
|
765
|
+
`;
|
|
766
|
+
for (const c of fallbackStatus.cooldowns) {
|
|
767
|
+
report += `- ${c.key}: ${c.remaining}s (${c.reason})
|
|
768
|
+
`;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
report += `
|
|
772
|
+
**Config:** ${config.enabled ? "enabled" : "disabled"} | Fetch: ${config.fetchInterception ? "on" : "off"} | Events: ${config.eventFallback ? "on" : "off"}`;
|
|
773
|
+
return report;
|
|
774
|
+
}
|
|
775
|
+
var src_default = RateLimitPlugin;
|
|
776
|
+
export {
|
|
777
|
+
src_default as default,
|
|
778
|
+
RateLimitPlugin
|
|
779
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@crystalfluxay/opencode-rate-limiter",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "OpenCode plugin for proactive TPM/RPM rate limiting with multi-provider fallback and fuzzy error matching",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "crystalfluxay"
|
|
7
|
+
},
|
|
8
|
+
"license": "MIT",
|
|
9
|
+
"keywords": ["opencode", "plugin", "rate-limit", "tpm", "rpm", "fallback", "xunfei", "xfyun"],
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "https://github.com/crystalfluxay/opencode-rate-limiter"
|
|
13
|
+
},
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
".": {
|
|
17
|
+
"types": "./dist/index.d.ts",
|
|
18
|
+
"default": "./dist/index.js"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"files": [
|
|
22
|
+
"dist/index.js",
|
|
23
|
+
"rate-limit.example.json",
|
|
24
|
+
"README.md"
|
|
25
|
+
],
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"@opencode-ai/plugin": "1.0.85"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/node": "^20.11.5",
|
|
31
|
+
"bun-types": "latest",
|
|
32
|
+
"typescript": "^5.7.0"
|
|
33
|
+
},
|
|
34
|
+
"scripts": {
|
|
35
|
+
"build": "tsc",
|
|
36
|
+
"typecheck": "tsc --noEmit"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"enabled": true,
|
|
3
|
+
"cooldownMs": 60000,
|
|
4
|
+
"fallbackMode": "cycle",
|
|
5
|
+
"fallbackModels": [],
|
|
6
|
+
"retryPolicy": {
|
|
7
|
+
"maxRetries": 3,
|
|
8
|
+
"strategy": "exponential",
|
|
9
|
+
"baseDelayMs": 1000,
|
|
10
|
+
"maxDelayMs": 30000,
|
|
11
|
+
"jitterEnabled": true
|
|
12
|
+
},
|
|
13
|
+
"circuitBreaker": {
|
|
14
|
+
"enabled": true,
|
|
15
|
+
"failureThreshold": 5,
|
|
16
|
+
"recoveryTimeoutMs": 60000
|
|
17
|
+
},
|
|
18
|
+
"providers": [
|
|
19
|
+
{
|
|
20
|
+
"provider": "xfyun",
|
|
21
|
+
"tpm": 100000,
|
|
22
|
+
"rpm": 10,
|
|
23
|
+
"errorDetection": [
|
|
24
|
+
{ "bodyPattern": "11210|TPM.*超限|tpm.*limit", "category": "tpm", "description": "TPM超限", "waitMs": 60000 },
|
|
25
|
+
{ "bodyPattern": "11201|次数超限|速率限制|请求.*过快", "category": "tpm", "description": "次数超限", "waitMs": 60000 },
|
|
26
|
+
{ "bodyPattern": "11202|秒级流控", "category": "rpm", "description": "秒级流控超限", "waitMs": 5000 },
|
|
27
|
+
{ "bodyPattern": "11203|并发.*超|并发流控|并发.*限制", "category": "rpm", "description": "并发流控超限", "waitMs": 10000 },
|
|
28
|
+
{ "statusPattern": "^429$", "category": "tpm", "description": "HTTP 429", "waitMs": 60000 },
|
|
29
|
+
{ "bodyPattern": "10008|服务容量不足", "category": "server_error", "description": "服务容量不足" },
|
|
30
|
+
{ "bodyPattern": "10010|引擎.*排队|引擎.*连接失败", "category": "server_error", "description": "引擎排队" },
|
|
31
|
+
{ "bodyPattern": "10012|引擎内部错误", "category": "server_error", "description": "引擎内部错误" },
|
|
32
|
+
{ "bodyPattern": "10110|服务忙|服务繁忙", "category": "server_error", "description": "服务忙" },
|
|
33
|
+
{ "bodyPattern": "10222|引擎网络异常", "category": "server_error", "description": "引擎网络异常" },
|
|
34
|
+
{ "statusPattern": "^5\\d{2}$", "category": "server_error", "description": "HTTP 5xx" },
|
|
35
|
+
{ "bodyPattern": "10907|10910|token.*上限|token.*超", "category": "retryable", "description": "Token超上限", "waitMs": 5000 },
|
|
36
|
+
{ "statusPattern": "^401$", "category": "fatal", "description": "HTTP 401" },
|
|
37
|
+
{ "statusPattern": "^403$", "category": "fatal", "description": "HTTP 403" },
|
|
38
|
+
{ "bodyPattern": "10004|schema.*错误", "category": "fatal", "description": "schema错误" },
|
|
39
|
+
{ "bodyPattern": "10005|参数.*错误", "category": "fatal", "description": "参数错误" },
|
|
40
|
+
{ "bodyPattern": "10013|审核不通过", "category": "fatal", "description": "审核不通过" },
|
|
41
|
+
{ "bodyPattern": "10014|回复.*敏感", "category": "fatal", "description": "回复敏感" },
|
|
42
|
+
{ "bodyPattern": "10015|黑名单", "category": "fatal", "description": "黑名单" },
|
|
43
|
+
{ "bodyPattern": "10016|授权.*错误|未开通", "category": "fatal", "description": "授权错误" },
|
|
44
|
+
{ "bodyPattern": "10019|疑似敏感", "category": "fatal", "description": "疑似敏感" },
|
|
45
|
+
{ "bodyPattern": "11200|授权错误|无.*授权", "category": "fatal", "description": "授权错误" },
|
|
46
|
+
{ "bodyPattern": "11221|套餐.*不支持|模型配置错误", "category": "fatal", "description": "模型配置错误" }
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"fetchInterception": true,
|
|
51
|
+
"eventFallback": true,
|
|
52
|
+
"logLevel": "warn"
|
|
53
|
+
}
|