clawmoney 0.15.69 → 0.15.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/relay-setup.js +34 -20
- package/dist/relay/pricing.js +1 -0
- package/dist/relay/provider.js +80 -1
- package/dist/relay/types.d.ts +1 -0
- package/dist/relay/upstream/antigravity-api.js +1 -0
- package/dist/relay/upstream/claude-api.js +18 -0
- package/dist/relay/upstream/codex-api.js +1 -0
- package/dist/relay/upstream/gemini-api.js +1 -0
- package/dist/relay/upstream/rate-guard.d.ts +20 -2
- package/dist/relay/upstream/rate-guard.js +49 -3
- package/package.json +1 -1
|
@@ -6,7 +6,7 @@ import * as readline from "node:readline";
|
|
|
6
6
|
import { intro, outro, multiselect, select, spinner, isCancel, cancel, log, } from "@clack/prompts";
|
|
7
7
|
import chalk from "chalk";
|
|
8
8
|
import { apiPost } from "../utils/api.js";
|
|
9
|
-
import { loadConfig, requireConfig } from "../utils/config.js";
|
|
9
|
+
import { loadConfig, requireConfig, saveConfig } from "../utils/config.js";
|
|
10
10
|
import { setupCommand } from "./setup.js";
|
|
11
11
|
import { API_PRICES, PLATFORM_FEE } from "../relay/pricing.js";
|
|
12
12
|
import { hasClaudeFingerprint, bootstrapClaudeFingerprint, } from "../relay/upstream/claude-bootstrap.js";
|
|
@@ -31,9 +31,10 @@ import { hasCodexFingerprint, bootstrapCodexFingerprint, } from "../relay/upstre
|
|
|
31
31
|
// falls through to modelsForCli(cli) which returns EVERY priced
|
|
32
32
|
// model in that family.
|
|
33
33
|
const RECOMMENDED_MODELS = {
|
|
34
|
-
// Claude Code /model menu
|
|
35
|
-
//
|
|
36
|
-
|
|
34
|
+
// Claude Code /model menu (post 2026-04-16 Opus 4.7 release):
|
|
35
|
+
// Default(Opus 4.7 1M) / Sonnet 4.6 / Haiku 4.5
|
|
36
|
+
// Opus 4.7 released 2026-04-16 and became the default model.
|
|
37
|
+
claude: ["claude-opus-4-7", "claude-sonnet-4-6", "claude-opus-4-6", "claude-haiku-4-5"],
|
|
37
38
|
// Codex CLI /model menu for ChatGPT sign-in (post 2026-04-14 cleanup):
|
|
38
39
|
// gpt-5.4 — latest frontier agentic coding (current default)
|
|
39
40
|
// gpt-5.4-mini — smaller frontier agentic coding
|
|
@@ -358,37 +359,50 @@ export async function relaySetupCommand() {
|
|
|
358
359
|
// pricing × number of providers; we can't predict that, so we don't
|
|
359
360
|
// pretend to.
|
|
360
361
|
const concurrency = 5;
|
|
361
|
-
const
|
|
362
|
-
message: "
|
|
362
|
+
const quotaShareChoice = await select({
|
|
363
|
+
message: "How much of your 5h session window can relay use?",
|
|
363
364
|
options: [
|
|
364
365
|
{
|
|
365
|
-
value:
|
|
366
|
-
label: "
|
|
366
|
+
value: 25,
|
|
367
|
+
label: "25% · Light",
|
|
367
368
|
hint: "share a quarter, leaves 75% for your personal use",
|
|
368
369
|
},
|
|
369
370
|
{
|
|
370
|
-
value:
|
|
371
|
-
label: "
|
|
372
|
-
hint: "splits
|
|
371
|
+
value: 50,
|
|
372
|
+
label: "50% · Balanced (recommended)",
|
|
373
|
+
hint: "splits your quota evenly between you and the relay",
|
|
373
374
|
},
|
|
374
375
|
{
|
|
375
|
-
value:
|
|
376
|
-
label: "
|
|
376
|
+
value: 75,
|
|
377
|
+
label: "75% · Heavy",
|
|
377
378
|
hint: "most of your subscription goes to relay, 25% reserved for personal use",
|
|
378
379
|
},
|
|
379
380
|
{
|
|
380
|
-
value:
|
|
381
|
-
label: "
|
|
381
|
+
value: 100,
|
|
382
|
+
label: "100% · Full",
|
|
382
383
|
hint: "dedicates your subscription to relay — best for accounts you don't use personally",
|
|
383
384
|
},
|
|
384
385
|
],
|
|
385
|
-
initialValue:
|
|
386
|
+
initialValue: 50,
|
|
386
387
|
});
|
|
387
|
-
if (isCancel(
|
|
388
|
+
if (isCancel(quotaShareChoice)) {
|
|
388
389
|
cancel("Setup cancelled");
|
|
389
390
|
process.exit(0);
|
|
390
391
|
}
|
|
391
|
-
const
|
|
392
|
+
const maxRelayUtilization = quotaShareChoice;
|
|
393
|
+
// daily_limit_usd is kept as a high fallback — the real cap is now
|
|
394
|
+
// maxRelayUtilization enforced by the daemon's rate-guard. Set it
|
|
395
|
+
// generously so it doesn't interfere.
|
|
396
|
+
const dailyLimit = 60;
|
|
397
|
+
// Persist max_relay_utilization into config.yaml so the daemon's
|
|
398
|
+
// rate-guard reads it on startup.
|
|
399
|
+
saveConfig({
|
|
400
|
+
relay: {
|
|
401
|
+
rate_guard: {
|
|
402
|
+
max_relay_utilization: maxRelayUtilization,
|
|
403
|
+
},
|
|
404
|
+
},
|
|
405
|
+
});
|
|
392
406
|
// ── Step 5: register everything under one spinner ──
|
|
393
407
|
//
|
|
394
408
|
// We deliberately skip the old per-model Summary block: pricing is on
|
|
@@ -404,7 +418,7 @@ export async function relaySetupCommand() {
|
|
|
404
418
|
// subscriptions + quota share above; Ctrl-C still aborts, and the
|
|
405
419
|
// backend is idempotent so mid-way aborts are safe to re-run.
|
|
406
420
|
const limitLabel = {
|
|
407
|
-
|
|
421
|
+
25: "25%", 50: "50%", 75: "75%", 100: "100%",
|
|
408
422
|
};
|
|
409
423
|
const earnPct = Math.round((1 - PLATFORM_FEE) * 100);
|
|
410
424
|
// Single batch POST — one round-trip, one DB session, no
|
|
@@ -454,7 +468,7 @@ export async function relaySetupCommand() {
|
|
|
454
468
|
if (failed === 0) {
|
|
455
469
|
const breakdown = cliSummary.length > 0 ? `: ${cliSummary.join(chalk.dim(" · "))}` : "";
|
|
456
470
|
regSpin.stop(`${chalk.green(`✓ Registered${breakdown}`)} ` +
|
|
457
|
-
chalk.dim(`(${limitLabel[
|
|
471
|
+
chalk.dim(`(${limitLabel[maxRelayUtilization] ?? `${maxRelayUtilization}%`} of 5h window · you earn ~${earnPct}%)`));
|
|
458
472
|
}
|
|
459
473
|
else {
|
|
460
474
|
regSpin.stop(`${chalk.yellow(`${succeeded} registered, ${failed} failed`)}`);
|
package/dist/relay/pricing.js
CHANGED
|
@@ -19,6 +19,7 @@ export const API_PRICES = {
|
|
|
19
19
|
// ── Anthropic (Claude) ──
|
|
20
20
|
// Verified against LiteLLM pricing DB. cache_read = 0.1x input,
|
|
21
21
|
// cache_write = 1.25x input (Anthropic ephemeral cache).
|
|
22
|
+
"claude-opus-4-7": { input: 5, output: 25 }, // released 2026-04-16
|
|
22
23
|
"claude-opus-4-6": { input: 5, output: 25 },
|
|
23
24
|
"claude-opus-4-5": { input: 5, output: 25 },
|
|
24
25
|
"claude-sonnet-4-6": { input: 3, output: 15 },
|
package/dist/relay/provider.js
CHANGED
|
@@ -7,7 +7,7 @@ import { callClaudeApi, callClaudeApiPassthrough, preflightClaudeApi, getRateGua
|
|
|
7
7
|
import { callCodexApi, callCodexApiPassthrough, preflightCodexApi, getRateGuardSnapshot as getCodexRateGuardSnapshot, } from "./upstream/codex-api.js";
|
|
8
8
|
import { callGeminiApi, preflightGeminiApi, getGeminiRateGuardSnapshot, } from "./upstream/gemini-api.js";
|
|
9
9
|
import { callAntigravityApi, preflightAntigravityApi, getAntigravityRateGuardSnapshot, } from "./upstream/antigravity-api.js";
|
|
10
|
-
import { apiGet } from "../utils/api.js";
|
|
10
|
+
import { apiGet, apiPost } from "../utils/api.js";
|
|
11
11
|
/**
|
|
12
12
|
* Pick the rate-guard snapshot matching this request's cli_type. Fixes a
|
|
13
13
|
* pre-existing bug where gemini/codex responses were piggy-backing Claude's
|
|
@@ -551,6 +551,85 @@ export function runRelayProvider(cliOverride) {
|
|
|
551
551
|
});
|
|
552
552
|
}
|
|
553
553
|
const activeTasks = new Set();
|
|
554
|
+
async function syncModelCatalog() {
|
|
555
|
+
try {
|
|
556
|
+
// Step 1: existing providers (gives us cli_types + default settings).
|
|
557
|
+
const myResp = await apiGet("/api/v1/relay/providers/me", config.api_key);
|
|
558
|
+
if (!myResp.ok || !Array.isArray(myResp.data)) {
|
|
559
|
+
logger.warn(`[catalog-sync] skipped: /providers/me returned ${myResp.status}`);
|
|
560
|
+
return;
|
|
561
|
+
}
|
|
562
|
+
const existing = myResp.data;
|
|
563
|
+
if (existing.length === 0) {
|
|
564
|
+
logger.info("[catalog-sync] no existing providers yet — skipping auto-sync");
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
// Settings template per cli_type (from any existing provider in that family).
|
|
568
|
+
const settingsByCli = new Map();
|
|
569
|
+
const knownModels = new Set();
|
|
570
|
+
for (const p of existing) {
|
|
571
|
+
knownModels.add(`${p.cli_type}/${p.model}`);
|
|
572
|
+
if (!settingsByCli.has(p.cli_type)) {
|
|
573
|
+
settingsByCli.set(p.cli_type, {
|
|
574
|
+
concurrency: p.concurrency,
|
|
575
|
+
daily_limit_usd: p.daily_limit_usd,
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
// Step 2: fetch catalog.
|
|
580
|
+
const catalogResp = await apiGet("/api/v1/relay/model-catalog");
|
|
581
|
+
if (!catalogResp.ok || !catalogResp.data?.catalog) {
|
|
582
|
+
logger.warn(`[catalog-sync] skipped: /model-catalog returned ${catalogResp.status}`);
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
const catalog = catalogResp.data.catalog;
|
|
586
|
+
// Step 3: build batch for cli_types the agent has at least one provider for.
|
|
587
|
+
const batch = [];
|
|
588
|
+
const newModels = [];
|
|
589
|
+
for (const [cliType, settings] of settingsByCli) {
|
|
590
|
+
const recommended = catalog[cliType] ?? [];
|
|
591
|
+
for (const entry of recommended) {
|
|
592
|
+
if (!knownModels.has(`${cliType}/${entry.model}`)) {
|
|
593
|
+
newModels.push(`${cliType}/${entry.model}`);
|
|
594
|
+
}
|
|
595
|
+
batch.push({
|
|
596
|
+
cli_type: cliType,
|
|
597
|
+
model: entry.model,
|
|
598
|
+
mode: "chat",
|
|
599
|
+
concurrency: settings.concurrency,
|
|
600
|
+
daily_limit_usd: settings.daily_limit_usd,
|
|
601
|
+
price_input_per_m: entry.input,
|
|
602
|
+
price_output_per_m: entry.output,
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
if (batch.length === 0) {
|
|
607
|
+
return;
|
|
608
|
+
}
|
|
609
|
+
// Step 4: upsert via batch register (already idempotent).
|
|
610
|
+
const regResp = await apiPost("/api/v1/relay/providers/batch", { providers: batch }, config.api_key);
|
|
611
|
+
if (!regResp.ok) {
|
|
612
|
+
logger.warn(`[catalog-sync] batch register failed: ${regResp.status}`);
|
|
613
|
+
return;
|
|
614
|
+
}
|
|
615
|
+
const created = regResp.data.created?.length ?? 0;
|
|
616
|
+
const failed = regResp.data.failed?.length ?? 0;
|
|
617
|
+
if (newModels.length > 0 || created > 0) {
|
|
618
|
+
logger.info(`[catalog-sync] OK: ${batch.length} entries, ${created} newly created, ${failed} failed` +
|
|
619
|
+
(newModels.length > 0 ? ` (new: ${newModels.join(", ")})` : ""));
|
|
620
|
+
}
|
|
621
|
+
else {
|
|
622
|
+
logger.info(`[catalog-sync] OK: ${batch.length} entries, no changes`);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
catch (err) {
|
|
626
|
+
logger.warn(`[catalog-sync] error: ${err.message}`);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
// Initial sync, then every 30 min.
|
|
630
|
+
syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] initial sync failed: ${err.message}`));
|
|
631
|
+
const catalogTimer = setInterval(() => syncModelCatalog().catch((err) => logger.warn(`[catalog-sync] periodic sync failed: ${err.message}`)), 30 * 60 * 1000);
|
|
632
|
+
catalogTimer.unref();
|
|
554
633
|
// Create WS client
|
|
555
634
|
const wsClient = new RelayWsClient(config, (event) => {
|
|
556
635
|
handleEvent(event);
|
package/dist/relay/types.d.ts
CHANGED
|
@@ -507,6 +507,7 @@ export function configureAntigravityRateGuard(config) {
|
|
|
507
507
|
minRequestGapMs: config.min_request_gap_ms,
|
|
508
508
|
jitterMs: config.jitter_ms,
|
|
509
509
|
dailyBudgetUsd: config.daily_budget_usd,
|
|
510
|
+
maxRelayUtilization: config.max_relay_utilization,
|
|
510
511
|
}
|
|
511
512
|
: {};
|
|
512
513
|
const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
|
|
@@ -726,6 +726,7 @@ export function configureRateGuard(config) {
|
|
|
726
726
|
minRequestGapMs: config.min_request_gap_ms,
|
|
727
727
|
jitterMs: config.jitter_ms,
|
|
728
728
|
dailyBudgetUsd: config.daily_budget_usd,
|
|
729
|
+
maxRelayUtilization: config.max_relay_utilization,
|
|
729
730
|
}
|
|
730
731
|
: {};
|
|
731
732
|
// Filter out undefined so defaults apply.
|
|
@@ -896,7 +897,19 @@ async function doCallClaudeApi(opts) {
|
|
|
896
897
|
// account harder and extend the ban. Parse the reset headers, mark
|
|
897
898
|
// cooldown, and fail this request. Subsequent requests will immediately
|
|
898
899
|
// short-circuit via checkCooldown().
|
|
900
|
+
//
|
|
901
|
+
// Exception: "Extra usage is required" is NOT a rate limit — it's a
|
|
902
|
+
// billing/feature gate (e.g. Sonnet 1M context requires Extra usage
|
|
903
|
+
// credits on Claude Max). Triggering a global 5-minute cooldown for
|
|
904
|
+
// this would block ALL subsequent requests (including Opus, Haiku,
|
|
905
|
+
// non-1M Sonnet) even though they don't need Extra usage. Instead,
|
|
906
|
+
// fail only this request and let others through.
|
|
899
907
|
if (resp.status === 429) {
|
|
908
|
+
const isExtraUsage = errText.toLowerCase().includes("extra usage");
|
|
909
|
+
if (isExtraUsage) {
|
|
910
|
+
logger.warn("[claude-api] 429 Extra usage required — skipping cooldown (not a rate limit)");
|
|
911
|
+
throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
|
|
912
|
+
}
|
|
900
913
|
const cooldown = extractCooldownUntilFromHeaders(resp.headers);
|
|
901
914
|
if (cooldown && rateGuard) {
|
|
902
915
|
rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
|
|
@@ -1307,6 +1320,11 @@ async function doCallClaudeApiPassthrough(opts) {
|
|
|
1307
1320
|
}
|
|
1308
1321
|
const errText = await resp.text();
|
|
1309
1322
|
if (resp.status === 429) {
|
|
1323
|
+
const isExtraUsage = errText.toLowerCase().includes("extra usage");
|
|
1324
|
+
if (isExtraUsage) {
|
|
1325
|
+
logger.warn("[claude-api] 429 Extra usage required (passthrough) — skipping cooldown");
|
|
1326
|
+
throw new Error(`Anthropic 429 extra-usage-required: ${errText.slice(0, 300)}`);
|
|
1327
|
+
}
|
|
1310
1328
|
const cooldown = extractCooldownUntilFromHeaders(resp.headers);
|
|
1311
1329
|
if (cooldown && rateGuard) {
|
|
1312
1330
|
rateGuard.triggerCooldown(cooldown.untilMs, cooldown.reason);
|
|
@@ -371,6 +371,7 @@ export function configureRateGuard(config) {
|
|
|
371
371
|
minRequestGapMs: config.min_request_gap_ms,
|
|
372
372
|
jitterMs: config.jitter_ms,
|
|
373
373
|
dailyBudgetUsd: config.daily_budget_usd,
|
|
374
|
+
maxRelayUtilization: config.max_relay_utilization,
|
|
374
375
|
}
|
|
375
376
|
: {};
|
|
376
377
|
const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
|
|
@@ -243,6 +243,7 @@ export function configureGeminiRateGuard(config) {
|
|
|
243
243
|
minRequestGapMs: config.min_request_gap_ms,
|
|
244
244
|
jitterMs: config.jitter_ms,
|
|
245
245
|
dailyBudgetUsd: config.daily_budget_usd,
|
|
246
|
+
maxRelayUtilization: config.max_relay_utilization,
|
|
246
247
|
}
|
|
247
248
|
: {};
|
|
248
249
|
const cleaned = Object.fromEntries(Object.entries(mapped).filter(([, v]) => v !== undefined));
|
|
@@ -35,11 +35,23 @@ export interface RateGuardConfig {
|
|
|
35
35
|
jitterMs: number;
|
|
36
36
|
/** Hard daily cost cap in USD. Default 15. */
|
|
37
37
|
dailyBudgetUsd: number;
|
|
38
|
+
/**
|
|
39
|
+
* Max relay utilization of the 5h session window (0-100).
|
|
40
|
+
* When relay's own accumulated utilization delta reaches this %,
|
|
41
|
+
* further relay requests are refused until the window resets.
|
|
42
|
+
* Provider's direct usage does NOT count against this budget —
|
|
43
|
+
* only the delta observed across relay requests is tracked.
|
|
44
|
+
* Default 50 (relay can use up to 50% of the 5h window).
|
|
45
|
+
*/
|
|
46
|
+
maxRelayUtilization: number;
|
|
38
47
|
}
|
|
39
48
|
export declare const DEFAULT_RATE_GUARD_CONFIG: RateGuardConfig;
|
|
40
49
|
export declare class RateGuardBudgetExceededError extends Error {
|
|
41
50
|
constructor(spent: number, limit: number);
|
|
42
51
|
}
|
|
52
|
+
export declare class RateGuardRelayUtilizationExceededError extends Error {
|
|
53
|
+
constructor(used: number, limit: number, resetMins: number);
|
|
54
|
+
}
|
|
43
55
|
/**
|
|
44
56
|
* Thrown when the rate-guard is in a hard cooldown after observing a real
|
|
45
57
|
* upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
|
|
@@ -71,15 +83,19 @@ export declare class RateGuard {
|
|
|
71
83
|
private cooldownUntilMs;
|
|
72
84
|
private cooldownReason;
|
|
73
85
|
private sessionWindow;
|
|
86
|
+
private relayWindowUsed;
|
|
87
|
+
private relayWindowEndMs;
|
|
88
|
+
private lastSeenUtilization;
|
|
74
89
|
constructor(config?: Partial<RateGuardConfig>);
|
|
75
90
|
/** Record an upstream-imposed cooldown. Called after parsing a real 429. */
|
|
76
91
|
triggerCooldown(untilMs: number, reason: string): void;
|
|
77
|
-
/** Update the 5h session window tracker from parsed upstream headers.
|
|
92
|
+
/** Update the 5h session window tracker from parsed upstream headers.
|
|
93
|
+
* Also accumulates relay's own utilization delta for quota enforcement. */
|
|
78
94
|
setSessionWindow(window: SessionWindow): void;
|
|
79
95
|
getSessionWindow(): SessionWindow | null;
|
|
80
96
|
private currentMaxConcurrency;
|
|
81
97
|
private rotateDailyCounterIfNeeded;
|
|
82
|
-
/** Check whether a new request would exceed the daily budget. */
|
|
98
|
+
/** Check whether a new request would exceed the daily budget or relay utilization cap. */
|
|
83
99
|
checkBudget(): void;
|
|
84
100
|
/** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
|
|
85
101
|
checkCooldown(): void;
|
|
@@ -93,6 +109,8 @@ export declare class RateGuard {
|
|
|
93
109
|
cooldownUntilMs: number;
|
|
94
110
|
cooldownReason: string;
|
|
95
111
|
sessionWindow: SessionWindow | null;
|
|
112
|
+
relayWindowUsed: number;
|
|
113
|
+
maxRelayUtilization: number;
|
|
96
114
|
};
|
|
97
115
|
/**
|
|
98
116
|
* Wrap an upstream call. Blocks until:
|
|
@@ -30,6 +30,7 @@ export const DEFAULT_RATE_GUARD_CONFIG = {
|
|
|
30
30
|
minRequestGapMs: 500,
|
|
31
31
|
jitterMs: 1500,
|
|
32
32
|
dailyBudgetUsd: 15,
|
|
33
|
+
maxRelayUtilization: 50,
|
|
33
34
|
};
|
|
34
35
|
export class RateGuardBudgetExceededError extends Error {
|
|
35
36
|
constructor(spent, limit) {
|
|
@@ -37,6 +38,12 @@ export class RateGuardBudgetExceededError extends Error {
|
|
|
37
38
|
this.name = "RateGuardBudgetExceededError";
|
|
38
39
|
}
|
|
39
40
|
}
|
|
41
|
+
export class RateGuardRelayUtilizationExceededError extends Error {
|
|
42
|
+
constructor(used, limit, resetMins) {
|
|
43
|
+
super(`Relay utilization quota reached: ${used.toFixed(1)}% / ${limit}% of 5h window used by relay (resets in ${resetMins}min)`);
|
|
44
|
+
this.name = "RateGuardRelayUtilizationExceededError";
|
|
45
|
+
}
|
|
46
|
+
}
|
|
40
47
|
/**
|
|
41
48
|
* Thrown when the rate-guard is in a hard cooldown after observing a real
|
|
42
49
|
* upstream 429. The `untilMs` field is an absolute UNIX ms timestamp — after
|
|
@@ -66,6 +73,12 @@ export class RateGuard {
|
|
|
66
73
|
cooldownReason = "";
|
|
67
74
|
// Rolling 5h session window surfaced by Anthropic headers.
|
|
68
75
|
sessionWindow = null;
|
|
76
|
+
// Relay utilization tracking — accumulated delta of session_window
|
|
77
|
+
// utilization across relay requests within the current 5h window.
|
|
78
|
+
// Resets when the window resets (endMs changes).
|
|
79
|
+
relayWindowUsed = 0; // accumulated relay % (0-100)
|
|
80
|
+
relayWindowEndMs = 0; // which window we're tracking
|
|
81
|
+
lastSeenUtilization = null; // for delta computation
|
|
69
82
|
constructor(config = {}) {
|
|
70
83
|
this.cfg = { ...DEFAULT_RATE_GUARD_CONFIG, ...config };
|
|
71
84
|
}
|
|
@@ -81,11 +94,34 @@ export class RateGuard {
|
|
|
81
94
|
logger.warn(`[rate-guard] cooldown engaged (${reason}): ${seconds}s until reset`);
|
|
82
95
|
}
|
|
83
96
|
}
|
|
84
|
-
/** Update the 5h session window tracker from parsed upstream headers.
|
|
97
|
+
/** Update the 5h session window tracker from parsed upstream headers.
|
|
98
|
+
* Also accumulates relay's own utilization delta for quota enforcement. */
|
|
85
99
|
setSessionWindow(window) {
|
|
100
|
+
// Detect window reset — if endMs changed, we're in a new window.
|
|
101
|
+
if (window.endMs !== this.relayWindowEndMs) {
|
|
102
|
+
if (this.relayWindowEndMs > 0 && this.relayWindowUsed > 0) {
|
|
103
|
+
logger.info(`[rate-guard] relay window reset (previous relay_used=${this.relayWindowUsed.toFixed(1)}%)`);
|
|
104
|
+
}
|
|
105
|
+
this.relayWindowUsed = 0;
|
|
106
|
+
this.relayWindowEndMs = window.endMs;
|
|
107
|
+
this.lastSeenUtilization = null;
|
|
108
|
+
}
|
|
109
|
+
// Compute relay delta: how much utilization increased since last observation.
|
|
110
|
+
// This is called AFTER each relay request, so the delta is (approximately)
|
|
111
|
+
// the utilization cost of that one relay request. If the provider was also
|
|
112
|
+
// using the account directly during this request, the delta includes their
|
|
113
|
+
// usage too — accepted trade-off (see design discussion).
|
|
114
|
+
if (typeof window.utilization === "number" &&
|
|
115
|
+
this.lastSeenUtilization !== null) {
|
|
116
|
+
const delta = window.utilization - this.lastSeenUtilization;
|
|
117
|
+
if (delta > 0) {
|
|
118
|
+
this.relayWindowUsed += delta;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
this.lastSeenUtilization = window.utilization ?? null;
|
|
86
122
|
this.sessionWindow = window;
|
|
87
123
|
const mins = Math.round((window.endMs - Date.now()) / 60_000);
|
|
88
|
-
logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used, resets in ${mins}min (status=${window.status ?? "unknown"})`);
|
|
124
|
+
logger.info(`[rate-guard] session window: ${window.utilization ?? "?"}% used (relay_used=${this.relayWindowUsed.toFixed(1)}%/${this.cfg.maxRelayUtilization}%), resets in ${mins}min (status=${window.status ?? "unknown"})`);
|
|
89
125
|
}
|
|
90
126
|
getSessionWindow() {
|
|
91
127
|
if (!this.sessionWindow)
|
|
@@ -114,12 +150,20 @@ export class RateGuard {
|
|
|
114
150
|
this.dailySpentUsd = 0;
|
|
115
151
|
}
|
|
116
152
|
}
|
|
117
|
-
/** Check whether a new request would exceed the daily budget. */
|
|
153
|
+
/** Check whether a new request would exceed the daily budget or relay utilization cap. */
|
|
118
154
|
checkBudget() {
|
|
119
155
|
this.rotateDailyCounterIfNeeded();
|
|
120
156
|
if (this.dailySpentUsd >= this.cfg.dailyBudgetUsd) {
|
|
121
157
|
throw new RateGuardBudgetExceededError(this.dailySpentUsd, this.cfg.dailyBudgetUsd);
|
|
122
158
|
}
|
|
159
|
+
// Check relay utilization cap against 5h window.
|
|
160
|
+
// Only enforce if we've seen at least one session window update
|
|
161
|
+
// (otherwise we don't know the utilization yet — fail open).
|
|
162
|
+
if (this.relayWindowEndMs > 0 &&
|
|
163
|
+
this.relayWindowUsed >= this.cfg.maxRelayUtilization) {
|
|
164
|
+
const resetMins = Math.max(0, Math.round((this.relayWindowEndMs - Date.now()) / 60_000));
|
|
165
|
+
throw new RateGuardRelayUtilizationExceededError(this.relayWindowUsed, this.cfg.maxRelayUtilization, resetMins);
|
|
166
|
+
}
|
|
123
167
|
}
|
|
124
168
|
/** Check upstream-imposed cooldown. Throws RateGuardCooldownError if still cooling. */
|
|
125
169
|
checkCooldown() {
|
|
@@ -147,6 +191,8 @@ export class RateGuard {
|
|
|
147
191
|
cooldownUntilMs: this.cooldownUntilMs,
|
|
148
192
|
cooldownReason: this.cooldownReason,
|
|
149
193
|
sessionWindow: this.getSessionWindow(),
|
|
194
|
+
relayWindowUsed: this.relayWindowUsed,
|
|
195
|
+
maxRelayUtilization: this.cfg.maxRelayUtilization,
|
|
150
196
|
};
|
|
151
197
|
}
|
|
152
198
|
/**
|