@crystalfluxay/opencode-rate-limiter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,105 @@
1
+ # opencode-rate-limit
2
+
3
+ OpenCode plugin for proactive TPM/RPM rate limiting with multi-provider fallback.
4
+
5
+ ## Features
6
+
7
+ - **Proactive TPM/RPM limiting** — counts tokens per minute, blocks requests before they hit the API
8
+ - **Reactive error detection** — parses response body against user-configured regex rules
9
+ - **Fuzzy error matching** — `bodyPattern` regex matches any text in the response body
10
+ - **Multi-provider support** — different error rules per provider
11
+ - **Fallback model switching** — auto-switch to backup models when rate limited
12
+ - **Circuit breaker** — auto-disconnect failing models
13
+ - **Smart cooldown** — skip rate-limited models for configurable duration
14
+ - **`/rate-limit-status`** — diagnostic command
15
+
16
+ ## Install
17
+
18
+ Add to `opencode.json`:
19
+
20
+ ```json
21
+ {
22
+ "plugin": ["opencode-rate-limit"]
23
+ }
24
+ ```
25
+
26
+ ## Configure
27
+
28
+ Create `~/.opencode/rate-limit.json`:
29
+
30
+ ```json
31
+ {
32
+ "providers": [
33
+ {
34
+ "provider": "xfyun",
35
+ "tpm": 100000,
36
+ "rpm": 10,
37
+ "errorDetection": [
38
+ { "bodyPattern": "11210|TPM.*超限", "category": "tpm", "description": "TPM超限", "waitMs": 60000 },
39
+ { "bodyPattern": "11201|次数超限", "category": "tpm", "description": "次数超限", "waitMs": 60000 },
40
+ { "statusPattern": "^429$", "category": "tpm", "description": "HTTP 429", "waitMs": 60000 },
41
+ { "statusPattern": "^5\\d{2}$", "category": "server_error", "description": "HTTP 5xx" },
42
+ { "statusPattern": "^401$", "category": "fatal", "description": "HTTP 401" }
43
+ ]
44
+ }
45
+ ],
46
+ "fallbackModels": [
47
+ { "providerId": "openai", "modelId": "gpt-4o" }
48
+ ]
49
+ }
50
+ ```
51
+
52
+ ### Error categories
53
+
54
+ | Category | Behavior |
55
+ |----------|----------|
56
+ | `tpm` | Wait `waitMs` (default 60s), then retry or fallback |
57
+ | `rpm` | Wait `waitMs`, then retry or fallback |
58
+ | `server_error` | Wait `cooldownMs`, retry with backoff |
59
+ | `retryable` | Wait `waitMs`, retry |
60
+ | `fatal` | No retry, propagate error immediately |
61
+
62
+ ### Pattern matching
63
+
64
+ - `statusPattern` — regex against HTTP status code string (e.g. `"^429$"`, `"^5\\d{2}$"`)
65
+ - `bodyPattern` — regex against full response body (case-insensitive, fuzzy match)
66
+ - Both optional — at least one required per rule
67
+ - When both specified, BOTH must match (AND logic)
68
+ - Rules checked in order, first match wins
69
+
70
+ ### Full config reference
71
+
72
+ ```json
73
+ {
74
+ "enabled": true,
75
+ "cooldownMs": 60000,
76
+ "fallbackMode": "cycle",
77
+ "fallbackModels": [],
78
+ "retryPolicy": {
79
+ "maxRetries": 3,
80
+ "strategy": "exponential",
81
+ "baseDelayMs": 1000,
82
+ "maxDelayMs": 30000,
83
+ "jitterEnabled": true
84
+ },
85
+ "circuitBreaker": {
86
+ "enabled": true,
87
+ "failureThreshold": 5,
88
+ "recoveryTimeoutMs": 60000
89
+ },
90
+ "providers": [],
91
+ "fetchInterception": true,
92
+ "eventFallback": true,
93
+ "logLevel": "warn"
94
+ }
95
+ ```
96
+
97
+ ## Preset templates
98
+
99
+ Copy from `src/error-codes.ts`:
100
+ - `XUNFEI_PRESET` — 讯飞星辰 MaaS Astron Coding Plan
101
+ - `OPENAI_PRESET` — Standard OpenAI-compatible providers
102
+
103
+ ## License
104
+
105
+ MIT
package/dist/index.js ADDED
@@ -0,0 +1,779 @@
1
+ // src/config.ts
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import * as os from "node:os";
5
+ var DEFAULT_RETRY_POLICY = {
6
+ maxRetries: 3,
7
+ strategy: "exponential",
8
+ baseDelayMs: 1000,
9
+ maxDelayMs: 30000,
10
+ jitterEnabled: true
11
+ };
12
+ var DEFAULT_CIRCUIT_BREAKER = {
13
+ enabled: true,
14
+ failureThreshold: 5,
15
+ recoveryTimeoutMs: 60000
16
+ };
17
+ var DEFAULT_CONFIG = {
18
+ enabled: true,
19
+ cooldownMs: 60000,
20
+ fallbackMode: "cycle",
21
+ fallbackModels: [],
22
+ retryPolicy: DEFAULT_RETRY_POLICY,
23
+ circuitBreaker: DEFAULT_CIRCUIT_BREAKER,
24
+ providers: [],
25
+ fetchInterception: true,
26
+ eventFallback: true,
27
+ logLevel: "warn"
28
+ };
29
+ function resolveConfigPath(worktree) {
30
+ const projectPath = worktree ? path.join(worktree, ".opencode", "rate-limit.json") : path.join(process.cwd(), ".opencode", "rate-limit.json");
31
+ if (fs.existsSync(projectPath)) {
32
+ return projectPath;
33
+ }
34
+ const homePath = path.join(os.homedir(), ".opencode", "rate-limit.json");
35
+ if (fs.existsSync(homePath)) {
36
+ return homePath;
37
+ }
38
+ return homePath;
39
+ }
40
+ function isObject(item) {
41
+ return item !== null && typeof item === "object" && !Array.isArray(item);
42
+ }
43
+ function deepMerge(target, source) {
44
+ const result = { ...target };
45
+ for (const key of Object.keys(source)) {
46
+ const sourceVal = source[key];
47
+ const targetVal = result[key];
48
+ if (isObject(sourceVal) && isObject(targetVal)) {
49
+ result[key] = deepMerge(targetVal, sourceVal);
50
+ } else if (sourceVal !== undefined) {
51
+ result[key] = sourceVal;
52
+ }
53
+ }
54
+ return result;
55
+ }
56
+ function validateConfig(config) {
57
+ const errors = [];
58
+ if (config.cooldownMs < 0) {
59
+ errors.push("cooldownMs must be >= 0");
60
+ }
61
+ if (config.retryPolicy.maxRetries < 0) {
62
+ errors.push("retryPolicy.maxRetries must be >= 0");
63
+ }
64
+ if (config.retryPolicy.baseDelayMs < 0) {
65
+ errors.push("retryPolicy.baseDelayMs must be >= 0");
66
+ }
67
+ if (config.circuitBreaker.failureThreshold < 1) {
68
+ errors.push("circuitBreaker.failureThreshold must be >= 1");
69
+ }
70
+ for (const p of config.providers) {
71
+ if (p.tpm !== undefined && p.tpm < 0) {
72
+ errors.push(`provider "${p.provider}": tpm must be >= 0`);
73
+ }
74
+ if (p.rpm !== undefined && p.rpm < 0) {
75
+ errors.push(`provider "${p.provider}": rpm must be >= 0`);
76
+ }
77
+ }
78
+ for (const fm of config.fallbackModels) {
79
+ if (!fm.providerId || !fm.modelId) {
80
+ errors.push(`fallback model: providerId and modelId are required`);
81
+ }
82
+ }
83
+ return errors;
84
+ }
85
+ function loadConfig(worktree) {
86
+ let userConfig = {};
87
+ let sourcePath = null;
88
+ const configPath = resolveConfigPath(worktree);
89
+ if (fs.existsSync(configPath)) {
90
+ try {
91
+ const raw = fs.readFileSync(configPath, "utf-8");
92
+ userConfig = JSON.parse(raw);
93
+ sourcePath = configPath;
94
+ } catch (err) {
95
+ return {
96
+ config: DEFAULT_CONFIG,
97
+ sourcePath: configPath,
98
+ errors: [`Failed to parse config: ${err instanceof Error ? err.message : String(err)}`]
99
+ };
100
+ }
101
+ }
102
+ const merged = deepMerge(DEFAULT_CONFIG, userConfig);
103
+ const errors = validateConfig(merged);
104
+ return { config: merged, sourcePath, errors };
105
+ }
106
+ function findProviderTPM(config, providerId, modelId) {
107
+ const exact = config.providers.find((p) => p.provider === providerId && p.model === modelId);
108
+ if (exact?.tpm !== undefined)
109
+ return exact.tpm;
110
+ const provider = config.providers.find((p) => p.provider === providerId && !p.model);
111
+ if (provider?.tpm !== undefined)
112
+ return provider.tpm;
113
+ return 0;
114
+ }
115
+ function findProviderRPM(config, providerId, modelId) {
116
+ const exact = config.providers.find((p) => p.provider === providerId && p.model === modelId);
117
+ if (exact?.rpm !== undefined)
118
+ return exact.rpm;
119
+ const provider = config.providers.find((p) => p.provider === providerId && !p.model);
120
+ if (provider?.rpm !== undefined)
121
+ return provider.rpm;
122
+ return 0;
123
+ }
124
+
125
+ // src/token-counter.ts
126
+ var WINDOW_MS = 60000;
127
+
128
+ class TokenCounter {
129
+ windows = new Map;
130
+ rpmCounters = new Map;
131
+ rpmResetTimers = new Map;
132
+ record(key, tokens) {
133
+ const now = Date.now();
134
+ let entries = this.windows.get(key);
135
+ if (!entries) {
136
+ entries = [];
137
+ this.windows.set(key, entries);
138
+ }
139
+ const cutoff = now - WINDOW_MS;
140
+ entries = entries.filter((e) => e.ts > cutoff);
141
+ entries.push({ ts: now, tokens });
142
+ this.windows.set(key, entries);
143
+ }
144
+ recordRequest(key) {
145
+ this.rpmCounters.set(key, (this.rpmCounters.get(key) ?? 0) + 1);
146
+ }
147
+ getTPM(key) {
148
+ const entries = this.windows.get(key);
149
+ if (!entries)
150
+ return 0;
151
+ const cutoff = Date.now() - WINDOW_MS;
152
+ return entries.filter((e) => e.ts > cutoff).reduce((sum, e) => sum + e.tokens, 0);
153
+ }
154
+ getRPM(key) {
155
+ return this.rpmCounters.get(key) ?? 0;
156
+ }
157
+ wouldExceedTPM(key, limit, incomingTokens) {
158
+ if (limit <= 0)
159
+ return false;
160
+ return this.getTPM(key) + incomingTokens > limit;
161
+ }
162
+ wouldExceedRPM(key, limit) {
163
+ if (limit <= 0)
164
+ return false;
165
+ return this.getRPM(key) >= limit;
166
+ }
167
+ reset() {
168
+ this.windows.clear();
169
+ this.rpmCounters.clear();
170
+ }
171
+ resetKey(key) {
172
+ this.windows.delete(key);
173
+ this.rpmCounters.delete(key);
174
+ }
175
+ getKeys() {
176
+ return [...this.windows.keys()];
177
+ }
178
+ startResetTimer() {
179
+ setInterval(() => {
180
+ this.rpmCounters.clear();
181
+ }, WINDOW_MS);
182
+ }
183
+ destroy() {
184
+ for (const timer of this.rpmResetTimers.values()) {
185
+ clearInterval(timer);
186
+ }
187
+ this.rpmResetTimers.clear();
188
+ this.windows.clear();
189
+ this.rpmCounters.clear();
190
+ }
191
+ }
192
+ function parseRateLimitHeaders(headers) {
193
+ const limitRaw = headers.get("x-ratelimit-limit");
194
+ const remainingRaw = headers.get("x-ratelimit-remaining");
195
+ const resetRaw = headers.get("x-ratelimit-reset");
196
+ if (!limitRaw || !remainingRaw || !resetRaw)
197
+ return null;
198
+ const firstValue = (raw) => {
199
+ const parts = raw.split(",");
200
+ const trimmed = parts[0]?.trim() ?? "";
201
+ return parseInt(trimmed, 10);
202
+ };
203
+ const limit = firstValue(limitRaw);
204
+ const remaining = firstValue(remainingRaw);
205
+ const reset = parseInt(resetRaw.trim(), 10);
206
+ if (isNaN(limit) || isNaN(remaining) || isNaN(reset))
207
+ return null;
208
+ return { limit, remaining, reset };
209
+ }
210
+ var tokenCounter = new TokenCounter;
211
+
212
+ // src/error-codes.ts
213
+ function matchError(httpStatus, body, rules, defaultCooldownMs) {
214
+ let bodyCode;
215
+ let bodyMessage;
216
+ if (body) {
217
+ try {
218
+ const parsed = JSON.parse(body);
219
+ bodyCode = extractNumericField(parsed, ["code", "error_code", "errorCode", "errCode", "ret"]);
220
+ bodyMessage = extractStringField(parsed, ["message", "msg", "error", "error_msg", "errorMsg", "errMsg"]);
221
+ if (!bodyCode && parsed.error && typeof parsed.error === "object") {
222
+ const err = parsed.error;
223
+ bodyCode = extractNumericField(err, ["code", "error_code", "errorCode"]);
224
+ bodyMessage = extractStringField(err, ["message", "msg"]);
225
+ }
226
+ } catch {}
227
+ }
228
+ for (const rule of rules) {
229
+ let matches = true;
230
+ if (rule.httpStatus !== undefined && rule.httpStatus !== httpStatus) {
231
+ matches = false;
232
+ }
233
+ if (rule.bodyCode !== undefined && bodyCode !== undefined) {
234
+ if (String(rule.bodyCode) !== String(bodyCode)) {
235
+ matches = false;
236
+ }
237
+ } else if (rule.bodyCode !== undefined && bodyCode === undefined) {
238
+ matches = false;
239
+ }
240
+ if (rule.messagePattern && bodyMessage) {
241
+ try {
242
+ const regex = new RegExp(rule.messagePattern, "i");
243
+ if (!regex.test(bodyMessage)) {
244
+ matches = false;
245
+ }
246
+ } catch {
247
+ matches = false;
248
+ }
249
+ }
250
+ if (matches) {
251
+ return {
252
+ matched: true,
253
+ category: rule.category,
254
+ description: rule.description,
255
+ waitMs: rule.waitMs ?? defaultCooldownMs,
256
+ rule
257
+ };
258
+ }
259
+ }
260
+ if (httpStatus === 429) {
261
+ return {
262
+ matched: true,
263
+ category: "tpm",
264
+ description: "HTTP 429 (unmatched by rules, treating as TPM)",
265
+ waitMs: defaultCooldownMs,
266
+ rule: null
267
+ };
268
+ }
269
+ if (httpStatus >= 500) {
270
+ return {
271
+ matched: true,
272
+ category: "server_error",
273
+ description: `HTTP ${httpStatus} (unmatched, treating as server error)`,
274
+ waitMs: defaultCooldownMs,
275
+ rule: null
276
+ };
277
+ }
278
+ return { matched: false, category: "fatal", description: "Unknown error", waitMs: 0, rule: null };
279
+ }
280
+ function extractNumericField(obj, keys) {
281
+ for (const key of keys) {
282
+ const val = obj[key];
283
+ if (typeof val === "number")
284
+ return val;
285
+ if (typeof val === "string") {
286
+ const num = parseInt(val, 10);
287
+ if (!isNaN(num))
288
+ return num;
289
+ }
290
+ }
291
+ return;
292
+ }
293
+ function extractStringField(obj, keys) {
294
+ for (const key of keys) {
295
+ const val = obj[key];
296
+ if (typeof val === "string")
297
+ return val;
298
+ }
299
+ return;
300
+ }
301
+
302
+ // src/fetch-wrapper.ts
303
+ var rateLimitStates = new Map;
304
+ function stateKey(pid, mid) {
305
+ return `${pid}::${mid}`;
306
+ }
307
+ function getState(k) {
308
+ let s = rateLimitStates.get(k);
309
+ if (!s) {
310
+ s = { cooldownUntil: 0, consecutiveFailures: 0, circuitOpen: false, circuitRecoveryAt: 0 };
311
+ rateLimitStates.set(k, s);
312
+ }
313
+ return s;
314
+ }
315
+ function estimateTokens(body) {
316
+ if (!body)
317
+ return 0;
318
+ try {
319
+ const p = JSON.parse(body);
320
+ if (Array.isArray(p.messages)) {
321
+ let c = 0;
322
+ for (const m of p.messages) {
323
+ if (typeof m.content === "string")
324
+ c += m.content.length;
325
+ else if (Array.isArray(m.content))
326
+ for (const x of m.content) {
327
+ if (typeof x === "string")
328
+ c += x.length;
329
+ else if (typeof x === "object" && x !== null && "text" in x)
330
+ c += String(x.text).length;
331
+ }
332
+ }
333
+ return Math.ceil(c / 4);
334
+ }
335
+ return Math.ceil(body.length / 4);
336
+ } catch {
337
+ return Math.ceil(body.length / 4);
338
+ }
339
+ }
340
+ var _log = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
341
+ function setLogger(l) {
342
+ _log = l;
343
+ }
344
+ async function peekBody(r) {
345
+ try {
346
+ return await r.clone().text();
347
+ } catch {
348
+ return "";
349
+ }
350
+ }
351
+ function createWrappedFetch(baseFetch, opts) {
352
+ const { config, providerId, modelId, onRateLimited, onFatalError } = opts;
353
+ const key = stateKey(providerId, modelId);
354
+ const tpmLimit = findProviderTPM(config, providerId, modelId);
355
+ const rpmLimit = findProviderRPM(config, providerId, modelId);
356
+ const provCfg = config.providers.find((p) => p.provider === providerId);
357
+ const errorRules = provCfg?.errorDetection ?? [];
358
+ return async (input, init) => {
359
+ const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
360
+ const isChat = url.includes("/chat/completions");
361
+ const now = Date.now();
362
+ const state = getState(key);
363
+ if (state.circuitOpen) {
364
+ if (now < state.circuitRecoveryAt) {
365
+ _log.warn(`[rl] Circuit OPEN: ${key}`);
366
+ throw new Error(`Circuit breaker open`);
367
+ }
368
+ state.circuitOpen = false;
369
+ _log.info(`[rl] Circuit recovery: ${key}`);
370
+ }
371
+ if (state.cooldownUntil > 0 && now < state.cooldownUntil) {
372
+ const rem = Math.ceil((state.cooldownUntil - now) / 1000);
373
+ _log.warn(`[rl] Cooldown ${key} (${rem}s)`);
374
+ onRateLimited(providerId, modelId, `Cooldown: ${rem}s`);
375
+ throw new Error(`Provider in cooldown`);
376
+ }
377
+ if (isChat && init?.body) {
378
+ const bodyStr = typeof init.body === "string" ? init.body : "";
379
+ const est = estimateTokens(bodyStr);
380
+ if (tpmLimit > 0 && tokenCounter.wouldExceedTPM(key, tpmLimit, est)) {
381
+ const cur = tokenCounter.getTPM(key);
382
+ _log.warn(`[rl] TPM: ${cur}/${tpmLimit} +${est} -> ${key}`);
383
+ onRateLimited(providerId, modelId, `Pre-flight TPM: ${cur}/${tpmLimit}`);
384
+ throw new Error(`TPM limit`);
385
+ }
386
+ if (rpmLimit > 0 && tokenCounter.wouldExceedRPM(key, rpmLimit)) {
387
+ _log.warn(`[rl] RPM: ${rpmLimit} -> ${key}`);
388
+ onRateLimited(providerId, modelId, `Pre-flight RPM: ${rpmLimit}`);
389
+ throw new Error(`RPM limit`);
390
+ }
391
+ tokenCounter.record(key, est);
392
+ tokenCounter.recordRequest(key);
393
+ }
394
+ let response;
395
+ try {
396
+ response = await baseFetch(input, init);
397
+ } catch (e) {
398
+ throw e;
399
+ }
400
+ if (isChat) {
401
+ const rlh = parseRateLimitHeaders(response.headers);
402
+ if (rlh)
403
+ _log.debug(`[rl] quota ${key}: ${rlh.remaining}/${rlh.limit}`);
404
+ }
405
+ if (!response.ok) {
406
+ const bodyText = await peekBody(response);
407
+ const result = matchError(response.status, bodyText, errorRules, config.cooldownMs);
408
+ if (result.matched) {
409
+ _log.warn(`[rl] [${result.category}] ${result.description} (${key})`);
410
+ if (result.category === "fatal") {
411
+ _log.error(`[rl] FATAL: ${result.description}`);
412
+ onFatalError(providerId, modelId, result.description);
413
+ } else {
414
+ state.consecutiveFailures++;
415
+ if (config.circuitBreaker.enabled && state.consecutiveFailures >= config.circuitBreaker.failureThreshold) {
416
+ state.circuitOpen = true;
417
+ state.circuitRecoveryAt = now + config.circuitBreaker.recoveryTimeoutMs;
418
+ _log.error(`[rl] Circuit BREAKER OPEN: ${key}`);
419
+ }
420
+ state.cooldownUntil = now + result.waitMs;
421
+ onRateLimited(providerId, modelId, result.description);
422
+ }
423
+ } else {
424
+ _log.warn(`[rl] Unclassified HTTP ${response.status}: ${key}`);
425
+ }
426
+ } else if (state.consecutiveFailures > 0) {
427
+ _log.info(`[rl] ${key} recovered (failures: ${state.consecutiveFailures})`);
428
+ state.consecutiveFailures = 0;
429
+ }
430
+ return response;
431
+ };
432
+ }
433
+ function getFetchStatus(config) {
434
+ const now = Date.now();
435
+ return tokenCounter.getKeys().map((key) => {
436
+ const [pid, mid] = key.split("::");
437
+ const s = rateLimitStates.get(key);
438
+ return {
439
+ providerId: pid,
440
+ modelId: mid,
441
+ tpm: tokenCounter.getTPM(key),
442
+ tpmLimit: findProviderTPM(config, pid, mid),
443
+ rpm: tokenCounter.getRPM(key),
444
+ rpmLimit: findProviderRPM(config, pid, mid),
445
+ cooldownRemaining: s && s.cooldownUntil > now ? Math.ceil((s.cooldownUntil - now) / 1000) : 0,
446
+ circuitOpen: s?.circuitOpen ?? false,
447
+ consecutiveFailures: s?.consecutiveFailures ?? 0
448
+ };
449
+ });
450
+ }
451
+
452
+ // src/fallback.ts
453
+ var sessionStates = new Map;
454
+ function getOrCreateSession(sessionId, originalModel) {
455
+ let state = sessionStates.get(sessionId);
456
+ if (!state) {
457
+ state = {
458
+ attemptCount: 0,
459
+ triedModels: [],
460
+ originalModel,
461
+ succeeded: false
462
+ };
463
+ sessionStates.set(sessionId, state);
464
+ }
465
+ return state;
466
+ }
467
+ var cooldowns = new Map;
468
+ function getModelKey(providerId, modelId) {
469
+ return `${providerId}::${modelId}`;
470
+ }
471
+ function isInCooldown(providerId, modelId) {
472
+ const entry = cooldowns.get(getModelKey(providerId, modelId));
473
+ if (!entry)
474
+ return false;
475
+ return Date.now() < entry.until;
476
+ }
477
+ function setCooldown(providerId, modelId, durationMs, reason) {
478
+ cooldowns.set(getModelKey(providerId, modelId), {
479
+ until: Date.now() + durationMs,
480
+ reason
481
+ });
482
+ }
483
+ function calculateDelay(policy, attempt) {
484
+ let delay;
485
+ switch (policy.strategy) {
486
+ case "immediate":
487
+ delay = 0;
488
+ break;
489
+ case "linear":
490
+ delay = policy.baseDelayMs * attempt;
491
+ break;
492
+ case "exponential":
493
+ default:
494
+ delay = policy.baseDelayMs * Math.pow(2, attempt - 1);
495
+ break;
496
+ }
497
+ delay = Math.min(delay, policy.maxDelayMs);
498
+ if (policy.jitterEnabled) {
499
+ delay = delay * (0.5 + Math.random() * 0.5);
500
+ }
501
+ return Math.round(delay);
502
+ }
503
+ function getAvailableFallbacks(config, triedModels) {
504
+ return config.fallbackModels.filter((fm) => {
505
+ const key = getModelKey(fm.providerId, fm.modelId);
506
+ return !triedModels.includes(key) && !isInCooldown(fm.providerId, fm.modelId);
507
+ }).sort((a, b) => (a.priority ?? 0) - (b.priority ?? 0));
508
+ }
509
+ var log = {
510
+ debug: () => {},
511
+ info: () => {},
512
+ warn: () => {},
513
+ error: () => {}
514
+ };
515
+ function attemptFallback(config, sessionId, originalProviderId, originalModelId, reason) {
516
+ const session = getOrCreateSession(sessionId, originalModelId);
517
+ setCooldown(originalProviderId, originalModelId, config.cooldownMs, reason);
518
+ session.triedModels.push(getModelKey(originalProviderId, originalModelId));
519
+ if (session.attemptCount >= config.retryPolicy.maxRetries) {
520
+ log.error(`[rate-limit] Max retries (${config.retryPolicy.maxRetries}) reached for session ${sessionId}`);
521
+ return { model: null, ok: false, reason: "Max retries exceeded" };
522
+ }
523
+ const available = getAvailableFallbacks(config, session.triedModels);
524
+ if (available.length === 0) {
525
+ if (config.fallbackMode === "cycle") {
526
+ log.info("[rate-limit] All models exhausted, clearing cooldowns for cycle mode");
527
+ cooldowns.clear();
528
+ const retryAvailable = getAvailableFallbacks(config, session.triedModels);
529
+ if (retryAvailable.length === 0) {
530
+ return { model: null, ok: false, reason: "No fallback models available" };
531
+ }
532
+ const next2 = retryAvailable[0];
533
+ session.attemptCount++;
534
+ session.triedModels.push(getModelKey(next2.providerId, next2.modelId));
535
+ return { model: next2, ok: true, reason: `Cycling to ${next2.providerId}/${next2.modelId}` };
536
+ }
537
+ return { model: null, ok: false, reason: "No fallback models available" };
538
+ }
539
+ const next = available[0];
540
+ session.attemptCount++;
541
+ session.triedModels.push(getModelKey(next.providerId, next.modelId));
542
+ const delay = calculateDelay(config.retryPolicy, session.attemptCount);
543
+ if (delay > 0) {
544
+ log.info(`[rate-limit] Waiting ${delay}ms before fallback attempt ${session.attemptCount}`);
545
+ }
546
+ log.info(`[rate-limit] Fallback attempt ${session.attemptCount}: ` + `${originalProviderId}/${originalModelId} → ${next.providerId}/${next.modelId} (reason: ${reason})`);
547
+ return {
548
+ model: next,
549
+ ok: true,
550
+ reason: `Switched to ${next.providerId}/${next.modelId}`
551
+ };
552
+ }
553
+ function markFallbackSuccess(sessionId) {
554
+ const state = sessionStates.get(sessionId);
555
+ if (state) {
556
+ state.succeeded = true;
557
+ }
558
+ }
559
+ function removeSession(sessionId) {
560
+ sessionStates.delete(sessionId);
561
+ }
562
+ function getFallbackStatus() {
563
+ const now = Date.now();
564
+ const cooldownEntries = [];
565
+ for (const [key, entry] of cooldowns.entries()) {
566
+ if (now < entry.until) {
567
+ cooldownEntries.push({
568
+ key,
569
+ remaining: Math.ceil((entry.until - now) / 1000),
570
+ reason: entry.reason
571
+ });
572
+ }
573
+ }
574
+ return {
575
+ activeSessions: sessionStates.size,
576
+ cooldowns: cooldownEntries
577
+ };
578
+ }
579
+
580
+ // src/event-handler.ts
581
+ var _l = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
582
+ function setEventLogger(l) {
583
+ _l = l;
584
+ }
585
+ function allRules(config, pid) {
586
+ const pc = config.providers.find((x) => x.provider === pid);
587
+ const r = pc?.errorDetection ?? [];
588
+ return r.length > 0 ? r : config.providers.flatMap((x) => x.errorDetection ?? []);
589
+ }
590
+ function handleEvent(e, ctx) {
591
+ if (!ctx.config.eventFallback)
592
+ return false;
593
+ if (e.type === "session.error") {
594
+ const p = e.properties;
595
+ if (!p?.sessionID || !p?.error)
596
+ return false;
597
+ const msg = typeof p.error.message === "string" ? p.error.message : JSON.stringify(p.error);
598
+ const st = typeof p.error.statusCode === "number" ? p.error.statusCode : 0;
599
+ const r = matchError(st, msg, allRules(ctx.config, ""), ctx.config.cooldownMs);
600
+ if (r.matched && r.category !== "fatal") {
601
+ _l.warn(`[rl] event err: ${r.description}`);
602
+ const fb = attemptFallback(ctx.config, p.sessionID, "", "", r.description);
603
+ if (fb.ok && fb.model)
604
+ ctx.onFb(fb.model.providerId, fb.model.modelId, p.sessionID);
605
+ return true;
606
+ }
607
+ }
608
+ if (e.type === "message.updated") {
609
+ const p = e.properties;
610
+ if (!p?.info)
611
+ return false;
612
+ const i = p.info;
613
+ if (i.status === "completed" && !i.error && i.sessionID) {
614
+ markFallbackSuccess(i.sessionID);
615
+ return false;
616
+ }
617
+ if (!i.error)
618
+ return false;
619
+ const msg = typeof i.error === "string" ? i.error : JSON.stringify(i.error);
620
+ const r = matchError(0, msg, allRules(ctx.config, i.providerID ?? ""), ctx.config.cooldownMs);
621
+ if (r.matched && r.category !== "fatal") {
622
+ _l.warn(`[rl] event msg: ${r.description}`);
623
+ const fb = attemptFallback(ctx.config, i.sessionID ?? "", i.providerID ?? "", i.modelID ?? "", r.description);
624
+ if (fb.ok && fb.model)
625
+ ctx.onFb(fb.model.providerId, fb.model.modelId, i.sessionID ?? "");
626
+ return true;
627
+ }
628
+ }
629
+ if (e.type === "session.status") {
630
+ const p = e.properties;
631
+ if (p?.status?.type === "retry" && p.status.message) {
632
+ const r = matchError(0, p.status.message, allRules(ctx.config, ""), ctx.config.cooldownMs);
633
+ if (r.matched && r.category !== "fatal") {
634
+ _l.warn(`[rl] event retry: ${r.description}`);
635
+ const fb = attemptFallback(ctx.config, p.sessionID ?? "", "", "", r.description);
636
+ if (fb.ok && fb.model)
637
+ ctx.onFb(fb.model.providerId, fb.model.modelId, p.sessionID ?? "");
638
+ return true;
639
+ }
640
+ }
641
+ }
642
+ if (e.type === "session.deleted") {
643
+ const p = e.properties;
644
+ if (p?.sessionID)
645
+ removeSession(p.sessionID);
646
+ }
647
+ return false;
648
+ }
649
+
650
+ // src/index.ts
651
+ var RateLimitPlugin = async ({ client, directory, worktree }) => {
652
+ const { config, sourcePath, errors } = loadConfig(worktree);
653
+ const levels = { debug: 0, info: 1, warn: 2, error: 3 };
654
+ const threshold = levels[config.logLevel] ?? 2;
655
+ const logger = {
656
+ debug: (m, ...a) => {
657
+ if (threshold <= 0)
658
+ console.debug(`[rate-limit] ${m}`, ...a);
659
+ },
660
+ info: (m, ...a) => {
661
+ if (threshold <= 1)
662
+ console.info(`[rate-limit] ${m}`, ...a);
663
+ },
664
+ warn: (m, ...a) => {
665
+ if (threshold <= 2)
666
+ console.warn(`[rate-limit] ${m}`, ...a);
667
+ },
668
+ error: (m, ...a) => {
669
+ if (threshold <= 3)
670
+ console.error(`[rate-limit] ${m}`, ...a);
671
+ }
672
+ };
673
+ setLogger(logger);
674
+ setEventLogger(logger);
675
+ if (!config.enabled) {
676
+ logger.info("Plugin disabled");
677
+ return {};
678
+ }
679
+ if (sourcePath)
680
+ logger.info(`Config: ${sourcePath}`);
681
+ else
682
+ logger.info("No config file, using defaults");
683
+ if (errors.length > 0) {
684
+ for (const e of errors)
685
+ logger.error(`Config error: ${e}`);
686
+ return {};
687
+ }
688
+ tokenCounter.startResetTimer();
689
+ return {
690
+ async config(cfg) {
691
+ if (!config.fetchInterception)
692
+ return;
693
+ const providers = cfg.provider ?? {};
694
+ for (const [pid, pCfg] of Object.entries(providers)) {
695
+ if (!pCfg || typeof pCfg !== "object")
696
+ continue;
697
+ const pc = pCfg;
698
+ const models = pc.models ?? {};
699
+ for (const [mid] of Object.entries(models)) {
700
+ const origFetch = pc.options?.fetch;
701
+ const baseFetch = origFetch ?? globalThis.fetch.bind(globalThis);
702
+ const wrapped = createWrappedFetch(baseFetch, {
703
+ config,
704
+ providerId: pid,
705
+ modelId: mid,
706
+ onRateLimited: (p, m, reason) => {
707
+ logger.warn(`Fallback triggered: ${p}/${m} — ${reason}`);
708
+ },
709
+ onFatalError: (p, m, reason) => {
710
+ logger.error(`Fatal: ${p}/${m} — ${reason}`);
711
+ }
712
+ });
713
+ if (!pc.options)
714
+ pc.options = {};
715
+ pc.options.fetch = wrapped;
716
+ logger.info(`Fetch injected: ${pid}/${mid}`);
717
+ }
718
+ }
719
+ },
720
+ event: async ({ event }) => {
721
+ handleEvent(event, {
722
+ config,
723
+ onFb: (pid, mid, sid) => {
724
+ logger.warn(`Event fallback: ${pid}/${mid} for session ${sid}`);
725
+ }
726
+ });
727
+ },
728
+ async command(cfg) {
729
+ cfg.command = cfg.command ?? {};
730
+ cfg.command["rate-limit-status"] = {
731
+ description: "Show rate limit status and statistics",
732
+ template: buildStatusReport(config)
733
+ };
734
+ },
735
+ cleanup: () => {
736
+ tokenCounter.destroy();
737
+ }
738
+ };
739
+ };
740
+ function buildStatusReport(config) {
741
+ const fetchStatus = getFetchStatus(config);
742
+ const fallbackStatus = getFallbackStatus();
743
+ let report = `## Rate Limit Status
744
+
745
+ `;
746
+ if (fetchStatus.length === 0) {
747
+ report += `_No active providers tracked._
748
+ `;
749
+ } else {
750
+ report += `| Provider | Model | TPM | Limit | RPM | Limit | Cooldown | Circuit | Failures |
751
+ `;
752
+ report += `|----------|-------|-----|-------|-----|-------|----------|---------|----------|
753
+ `;
754
+ for (const s of fetchStatus) {
755
+ report += `| ${s.providerId} | ${s.modelId} | ${s.tpm} | ${s.tpmLimit || "∞"} | ${s.rpm} | ${s.rpmLimit || "∞"} | ${s.cooldownRemaining > 0 ? s.cooldownRemaining + "s" : "—"} | ${s.circuitOpen ? "⚠ OPEN" : "✅"} | ${s.consecutiveFailures} |
756
+ `;
757
+ }
758
+ }
759
+ report += `
760
+ **Active sessions:** ${fallbackStatus.activeSessions}
761
+ `;
762
+ if (fallbackStatus.cooldowns.length > 0) {
763
+ report += `
764
+ **Cooldowns:**
765
+ `;
766
+ for (const c of fallbackStatus.cooldowns) {
767
+ report += `- ${c.key}: ${c.remaining}s (${c.reason})
768
+ `;
769
+ }
770
+ }
771
+ report += `
772
+ **Config:** ${config.enabled ? "enabled" : "disabled"} | Fetch: ${config.fetchInterception ? "on" : "off"} | Events: ${config.eventFallback ? "on" : "off"}`;
773
+ return report;
774
+ }
775
+ var src_default = RateLimitPlugin;
776
+ export {
777
+ src_default as default,
778
+ RateLimitPlugin
779
+ };
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@crystalfluxay/opencode-rate-limiter",
3
+ "version": "0.1.0",
4
+ "description": "OpenCode plugin for proactive TPM/RPM rate limiting with multi-provider fallback and fuzzy error matching",
5
+ "author": {
6
+ "name": "crystalfluxay"
7
+ },
8
+ "license": "MIT",
9
+ "keywords": ["opencode", "plugin", "rate-limit", "tpm", "rpm", "fallback", "xunfei", "xfyun"],
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/crystalfluxay/opencode-rate-limiter"
13
+ },
14
+ "type": "module",
15
+ "exports": {
16
+ ".": {
17
+ "types": "./dist/index.d.ts",
18
+ "default": "./dist/index.js"
19
+ }
20
+ },
21
+ "files": [
22
+ "dist/index.js",
23
+ "rate-limit.example.json",
24
+ "README.md"
25
+ ],
26
+ "dependencies": {
27
+ "@opencode-ai/plugin": "1.0.85"
28
+ },
29
+ "devDependencies": {
30
+ "@types/node": "^20.11.5",
31
+ "bun-types": "latest",
32
+ "typescript": "^5.7.0"
33
+ },
34
+ "scripts": {
35
+ "build": "tsc",
36
+ "typecheck": "tsc --noEmit"
37
+ }
38
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "enabled": true,
3
+ "cooldownMs": 60000,
4
+ "fallbackMode": "cycle",
5
+ "fallbackModels": [],
6
+ "retryPolicy": {
7
+ "maxRetries": 3,
8
+ "strategy": "exponential",
9
+ "baseDelayMs": 1000,
10
+ "maxDelayMs": 30000,
11
+ "jitterEnabled": true
12
+ },
13
+ "circuitBreaker": {
14
+ "enabled": true,
15
+ "failureThreshold": 5,
16
+ "recoveryTimeoutMs": 60000
17
+ },
18
+ "providers": [
19
+ {
20
+ "provider": "xfyun",
21
+ "tpm": 100000,
22
+ "rpm": 10,
23
+ "errorDetection": [
24
+ { "bodyPattern": "11210|TPM.*超限|tpm.*limit", "category": "tpm", "description": "TPM超限", "waitMs": 60000 },
25
+ { "bodyPattern": "11201|次数超限|速率限制|请求.*过快", "category": "tpm", "description": "次数超限", "waitMs": 60000 },
26
+ { "bodyPattern": "11202|秒级流控", "category": "rpm", "description": "秒级流控超限", "waitMs": 5000 },
27
+ { "bodyPattern": "11203|并发.*超|并发流控|并发.*限制", "category": "rpm", "description": "并发流控超限", "waitMs": 10000 },
28
+ { "statusPattern": "^429$", "category": "tpm", "description": "HTTP 429", "waitMs": 60000 },
29
+ { "bodyPattern": "10008|服务容量不足", "category": "server_error", "description": "服务容量不足" },
30
+ { "bodyPattern": "10010|引擎.*排队|引擎.*连接失败", "category": "server_error", "description": "引擎排队" },
31
+ { "bodyPattern": "10012|引擎内部错误", "category": "server_error", "description": "引擎内部错误" },
32
+ { "bodyPattern": "10110|服务忙|服务繁忙", "category": "server_error", "description": "服务忙" },
33
+ { "bodyPattern": "10222|引擎网络异常", "category": "server_error", "description": "引擎网络异常" },
34
+ { "statusPattern": "^5\\d{2}$", "category": "server_error", "description": "HTTP 5xx" },
35
+ { "bodyPattern": "10907|10910|token.*上限|token.*超", "category": "retryable", "description": "Token超上限", "waitMs": 5000 },
36
+ { "statusPattern": "^401$", "category": "fatal", "description": "HTTP 401" },
37
+ { "statusPattern": "^403$", "category": "fatal", "description": "HTTP 403" },
38
+ { "bodyPattern": "10004|schema.*错误", "category": "fatal", "description": "schema错误" },
39
+ { "bodyPattern": "10005|参数.*错误", "category": "fatal", "description": "参数错误" },
40
+ { "bodyPattern": "10013|审核不通过", "category": "fatal", "description": "审核不通过" },
41
+ { "bodyPattern": "10014|回复.*敏感", "category": "fatal", "description": "回复敏感" },
42
+ { "bodyPattern": "10015|黑名单", "category": "fatal", "description": "黑名单" },
43
+ { "bodyPattern": "10016|授权.*错误|未开通", "category": "fatal", "description": "授权错误" },
44
+ { "bodyPattern": "10019|疑似敏感", "category": "fatal", "description": "疑似敏感" },
45
+ { "bodyPattern": "11200|授权错误|无.*授权", "category": "fatal", "description": "授权错误" },
46
+ { "bodyPattern": "11221|套餐.*不支持|模型配置错误", "category": "fatal", "description": "模型配置错误" }
47
+ ]
48
+ }
49
+ ],
50
+ "fetchInterception": true,
51
+ "eventFallback": true,
52
+ "logLevel": "warn"
53
+ }