@kill-switch/agent-guard 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -1
- package/dist/alert.d.ts +12 -2
- package/dist/alert.js +11 -0
- package/dist/cli.js +46 -15
- package/dist/config.d.ts +25 -0
- package/dist/config.js +22 -0
- package/dist/estimate.d.ts +44 -0
- package/dist/estimate.js +71 -0
- package/dist/hook.js +35 -1
- package/dist/index.d.ts +7 -4
- package/dist/index.js +6 -3
- package/dist/limits.d.ts +83 -0
- package/dist/limits.js +133 -0
- package/dist/ops.d.ts +12 -0
- package/dist/ops.js +27 -1
- package/dist/pacing.d.ts +53 -0
- package/dist/pacing.js +127 -0
- package/dist/proxy.js +62 -1
- package/dist/report.d.ts +25 -0
- package/dist/report.js +90 -0
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -107,6 +107,55 @@ AGENT_GUARD_SESSION_HARD=10 claude # one-off $10 ceiling
|
|
|
107
107
|
|
|
108
108
|
A cap of `0` disables that check.
|
|
109
109
|
|
|
110
|
+
## Subscription limits (Claude Code Pro / Max)
|
|
111
|
+
|
|
112
|
+
Dollar caps are the wrong currency for a **Pro/Max subscription**: you pay a flat fee, so the
|
|
113
|
+
scarce resource isn't dollars — it's your plan's rate-limit quota, in two rolling windows:
|
|
114
|
+
|
|
115
|
+
- a **5-hour** window (burst protection), and
|
|
116
|
+
- a **weekly** (7-day) window — the real lockout risk, "resets a couple times a month".
|
|
117
|
+
|
|
118
|
+
Anthropic reports exactly where you stand on every response via `anthropic-ratelimit-unified-*`
|
|
119
|
+
headers. Run Claude Code **through the proxy** and agent-guard reads them — no estimation:
|
|
120
|
+
|
|
121
|
+
```sh
|
|
122
|
+
agent-guard proxy # meters Anthropic + reads limit headers
|
|
123
|
+
ANTHROPIC_BASE_URL=http://localhost:8787 claude
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Once those headers are seen, the session is in **subscription mode**: alert-only. agent-guard
|
|
127
|
+
**never blocks** a flat-fee plan (you already paid; Anthropic's own limit is the real wall) —
|
|
128
|
+
instead it *paces* you. For each window it computes burn-rate vs. a sustainable pace and
|
|
129
|
+
projects whether you'll exhaust the window **before it resets**, then warns in-session and via
|
|
130
|
+
your alert channels:
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
🟥 Claude Code plan limits · observed just now
|
|
134
|
+
[████████████░░░░░░░░] weekly limit 62% used, resets Sat 6:00 PM, burning 3.1× pace,
|
|
135
|
+
→ lockout in ~14h (5.1d before reset)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
`status` shows it; the hook injects it into the session even when only the hook is running
|
|
139
|
+
(it reads the snapshot the proxy persisted). No proxy and want a rough read? Pin your tier and
|
|
140
|
+
agent-guard *estimates* from the ledger (clearly labelled, never blocks):
|
|
141
|
+
|
|
142
|
+
```sh
|
|
143
|
+
ks guard config --plan max5 # auto | pro | max5 | max20
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Tune the thresholds (0–1 utilization) if the defaults are too eager:
|
|
147
|
+
|
|
148
|
+
| Setting | Meaning | Default |
|
|
149
|
+
|---|---|---|
|
|
150
|
+
| `--plan` (`AGENT_GUARD_PLAN`) | `auto` (headers only) or a tier for estimation | `auto` |
|
|
151
|
+
| `--weekly-soft` / `--weekly-danger` | weekly warn / danger utilization | 0.6 / 0.85 |
|
|
152
|
+
| `--5h-soft` / `--5h-danger` | 5-hour warn / danger utilization | 0.7 / 0.9 |
|
|
153
|
+
| `--burn-ratio` | pace multiplier that triggers a warning | 1.5 |
|
|
154
|
+
|
|
155
|
+
> Because subscription mode is alert-only, the "don't run both hook *and* proxy" caveat below
|
|
156
|
+
> doesn't bite here — running Claude Code through the proxy is exactly what feeds the limit
|
|
157
|
+
> headers, and dollars no longer gate anything.
|
|
158
|
+
|
|
110
159
|
## Alerts
|
|
111
160
|
|
|
112
161
|
On the first soft/hard trip per scope, agent-guard:
|
|
@@ -132,8 +181,9 @@ rates so the guard never *under*-counts. Override any model in
|
|
|
132
181
|
```
|
|
133
182
|
agent-guard install [--global] [--command <cmd>] wire the Claude Code hook
|
|
134
183
|
agent-guard proxy [--port 8787] [--flavor anthropic|openai] [--upstream URL]
|
|
135
|
-
agent-guard status [--json] spend vs budget
|
|
184
|
+
agent-guard status [--json] spend vs budget + plan limits
|
|
136
185
|
agent-guard config [--session-hard N ...] view/set caps
|
|
186
|
+
agent-guard config [--plan max5 --weekly-soft 0.6 ...] view/set plan limits
|
|
137
187
|
agent-guard reset [--all|--today|--session <id>] clear the ledger
|
|
138
188
|
agent-guard hook (internal) Claude Code entrypoint
|
|
139
189
|
```
|
package/dist/alert.d.ts
CHANGED
|
@@ -11,17 +11,27 @@
|
|
|
11
11
|
* delay (or crash) the agent's tool call.
|
|
12
12
|
*/
|
|
13
13
|
import { type GuardConfig } from "./config.js";
|
|
14
|
-
|
|
14
|
+
/** Spend verdicts are ok/warn/block; pacing assessments are ok/warn/danger. */
|
|
15
|
+
export type AlertLevel = "ok" | "warn" | "block" | "danger";
|
|
15
16
|
export interface AlertEvent {
|
|
16
17
|
ts: number;
|
|
17
18
|
source: "hook" | "proxy";
|
|
19
|
+
/** "spend" = dollar budget trip (default); "limit" = subscription pacing alert. */
|
|
20
|
+
kind?: "spend" | "limit";
|
|
18
21
|
sessionId: string;
|
|
19
|
-
level:
|
|
22
|
+
level: AlertLevel;
|
|
20
23
|
sessionUSD: number;
|
|
21
24
|
dailyUSD: number;
|
|
22
25
|
reasons: string[];
|
|
23
26
|
action: string;
|
|
24
27
|
cwd?: string;
|
|
28
|
+
/** For kind:"limit" — per-window utilization summary (0–1) for the payload. */
|
|
29
|
+
limits?: Array<{
|
|
30
|
+
window: string;
|
|
31
|
+
utilization: number;
|
|
32
|
+
resetAt: number | null;
|
|
33
|
+
level: string;
|
|
34
|
+
}>;
|
|
25
35
|
}
|
|
26
36
|
/** Dispatch an alert across all configured channels. Resolves once all attempts settle. */
|
|
27
37
|
export declare function dispatchAlert(cfg: GuardConfig, evt: AlertEvent): Promise<void>;
|
package/dist/alert.js
CHANGED
|
@@ -43,6 +43,17 @@ function writeLocal(evt) {
|
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
function slackText(evt) {
|
|
46
|
+
if (evt.kind === "limit") {
|
|
47
|
+
const icon = evt.level === "danger" ? "🟥" : "🟡";
|
|
48
|
+
return [
|
|
49
|
+
`${icon} *Kill Switch — Claude Code subscription pacing*`,
|
|
50
|
+
`• Status: ${evt.action}`,
|
|
51
|
+
evt.cwd ? `• Project: \`${evt.cwd}\`` : "",
|
|
52
|
+
...evt.reasons.map((r) => `• ${r}`),
|
|
53
|
+
]
|
|
54
|
+
.filter(Boolean)
|
|
55
|
+
.join("\n");
|
|
56
|
+
}
|
|
46
57
|
const icon = evt.level === "block" ? "🛑" : "⚠️";
|
|
47
58
|
const verb = evt.level === "block" ? "BLOCKED a coding agent" : "warning on a coding agent";
|
|
48
59
|
return [
|
package/dist/cli.js
CHANGED
|
@@ -17,7 +17,8 @@ import { loadConfig, configPath, isPaused, pauseExpiry, writePause, clearPause,
|
|
|
17
17
|
import { loadLedger, rollingDailyCost } from "./ledger.js";
|
|
18
18
|
import { evaluate } from "./budget.js";
|
|
19
19
|
import { fmtUSD } from "./cost.js";
|
|
20
|
-
import { installHook, setBudget, resetLedger } from "./ops.js";
|
|
20
|
+
import { installHook, setBudget, setLimits, resetLedger } from "./ops.js";
|
|
21
|
+
import { buildStatusReport, formatLimitsLines } from "./report.js";
|
|
21
22
|
const program = new Command();
|
|
22
23
|
program
|
|
23
24
|
.name("agent-guard")
|
|
@@ -77,6 +78,7 @@ program
|
|
|
77
78
|
verdict: verdict.level,
|
|
78
79
|
reasons: verdict.reasons,
|
|
79
80
|
sessions: sessions.map(([id, s]) => ({ id, ...s })),
|
|
81
|
+
limits: buildStatusReport(now).limits,
|
|
80
82
|
}, null, 2));
|
|
81
83
|
return;
|
|
82
84
|
}
|
|
@@ -109,6 +111,13 @@ program
|
|
|
109
111
|
for (const r of verdict.reasons)
|
|
110
112
|
console.log(` • ${r}`);
|
|
111
113
|
}
|
|
114
|
+
// Subscription rate-limit pacing (Claude Code Pro/Max).
|
|
115
|
+
const limitLines = formatLimitsLines(buildStatusReport(now).limits, now);
|
|
116
|
+
if (limitLines.length) {
|
|
117
|
+
console.log("");
|
|
118
|
+
for (const line of limitLines)
|
|
119
|
+
console.log(line);
|
|
120
|
+
}
|
|
112
121
|
});
|
|
113
122
|
// ── pause / resume (escape hatch) ────────────────────────────────────────────
|
|
114
123
|
program
|
|
@@ -153,31 +162,53 @@ program
|
|
|
153
162
|
// ── config ───────────────────────────────────────────────────────────────────
|
|
154
163
|
program
|
|
155
164
|
.command("config")
|
|
156
|
-
.description("View or set budget caps (written to ~/.kill-switch/agent-guard/config.json)")
|
|
165
|
+
.description("View or set budget caps + Claude Code plan limits (written to ~/.kill-switch/agent-guard/config.json)")
|
|
157
166
|
.option("--session-soft <usd>", "Per-session soft cap (warn)")
|
|
158
167
|
.option("--session-hard <usd>", "Per-session hard cap (block)")
|
|
159
168
|
.option("--daily-soft <usd>", "Daily rolling soft cap (warn)")
|
|
160
169
|
.option("--daily-hard <usd>", "Daily rolling hard cap (block)")
|
|
161
170
|
.option("--slack-webhook <url>", "Slack incoming-webhook for breach alerts")
|
|
171
|
+
.option("--plan <tier>", "Claude Code plan: auto | pro | max5 | max20 (subscription limit awareness)")
|
|
172
|
+
.option("--weekly-soft <pct>", "Weekly limit soft threshold, 0–1 (warn)")
|
|
173
|
+
.option("--weekly-danger <pct>", "Weekly limit danger threshold, 0–1")
|
|
174
|
+
.option("--5h-soft <pct>", "5-hour limit soft threshold, 0–1 (warn)")
|
|
175
|
+
.option("--5h-danger <pct>", "5-hour limit danger threshold, 0–1")
|
|
176
|
+
.option("--burn-ratio <n>", "Burn-rate multiplier that triggers a pacing warning")
|
|
162
177
|
.action((opts) => {
|
|
163
|
-
const
|
|
164
|
-
|
|
165
|
-
|
|
178
|
+
const budgetKeys = ["sessionSoft", "sessionHard", "dailySoft", "dailyHard", "slackWebhook"];
|
|
179
|
+
const limitKeys = ["plan", "weeklySoft", "weeklyDanger", "5hSoft", "5hDanger", "burnRatio"];
|
|
180
|
+
const anyBudget = budgetKeys.some((k) => opts[k] !== undefined);
|
|
181
|
+
const anyLimit = limitKeys.some((k) => opts[k] !== undefined);
|
|
182
|
+
if (!anyBudget && !anyLimit) {
|
|
166
183
|
const cfg = loadConfig();
|
|
167
|
-
console.log(JSON.stringify({ budget: cfg.budget, slackWebhook: cfg.slackWebhook ? "(set)" : undefined }, null, 2));
|
|
184
|
+
console.log(JSON.stringify({ budget: cfg.budget, limits: cfg.limits, slackWebhook: cfg.slackWebhook ? "(set)" : undefined }, null, 2));
|
|
168
185
|
console.log(`\nConfig file: ${configPath()}`);
|
|
169
186
|
return;
|
|
170
187
|
}
|
|
171
188
|
const num = (v) => (v !== undefined ? Number(v) : undefined);
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
189
|
+
if (anyBudget) {
|
|
190
|
+
const budget = setBudget({
|
|
191
|
+
sessionSoftUSD: num(opts.sessionSoft),
|
|
192
|
+
sessionHardUSD: num(opts.sessionHard),
|
|
193
|
+
dailySoftUSD: num(opts.dailySoft),
|
|
194
|
+
dailyHardUSD: num(opts.dailyHard),
|
|
195
|
+
slackWebhook: opts.slackWebhook,
|
|
196
|
+
});
|
|
197
|
+
console.log(`✅ Budget saved → ${configPath()}`);
|
|
198
|
+
console.log(JSON.stringify(budget, null, 2));
|
|
199
|
+
}
|
|
200
|
+
if (anyLimit) {
|
|
201
|
+
const limits = setLimits({
|
|
202
|
+
plan: opts.plan,
|
|
203
|
+
weeklySoftPct: num(opts.weeklySoft),
|
|
204
|
+
weeklyDangerPct: num(opts.weeklyDanger),
|
|
205
|
+
fiveHourSoftPct: num(opts["5hSoft"]),
|
|
206
|
+
fiveHourDangerPct: num(opts["5hDanger"]),
|
|
207
|
+
burnRatioWarn: num(opts.burnRatio),
|
|
208
|
+
});
|
|
209
|
+
console.log(`✅ Plan limits saved → ${configPath()}`);
|
|
210
|
+
console.log(JSON.stringify(limits, null, 2));
|
|
211
|
+
}
|
|
181
212
|
});
|
|
182
213
|
// ── reset ────────────────────────────────────────────────────────────────────
|
|
183
214
|
program
|
package/dist/config.d.ts
CHANGED
|
@@ -8,8 +8,30 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Budget } from "./budget.js";
|
|
10
10
|
import type { ModelPricing } from "./pricing.js";
|
|
11
|
+
/**
|
|
12
|
+
* Subscription rate-limit config. Separate from the dollar {@link Budget}
|
|
13
|
+
* because a Claude Code Pro/Max session pays a flat fee — the scarce resource is
|
|
14
|
+
* the plan's 5-hour and weekly quota, not dollars. This is alert-only: the guard
|
|
15
|
+
* never blocks on these (you already paid), it just warns before you lock out.
|
|
16
|
+
*/
|
|
17
|
+
export interface LimitsConfig {
|
|
18
|
+
/**
|
|
19
|
+
* Plan tier. "auto" = derive everything from observed `unified-*` headers
|
|
20
|
+
* (proxy path); a pinned tier additionally enables hook-only estimation when
|
|
21
|
+
* no fresh header snapshot exists. See estimate.ts.
|
|
22
|
+
*/
|
|
23
|
+
plan: "auto" | "pro" | "max5" | "max20";
|
|
24
|
+
/** Per-window soft (warn) / danger thresholds, as 0–1 utilization fractions. */
|
|
25
|
+
fiveHourSoftPct: number;
|
|
26
|
+
fiveHourDangerPct: number;
|
|
27
|
+
weeklySoftPct: number;
|
|
28
|
+
weeklyDangerPct: number;
|
|
29
|
+
/** Burn ratio (actual/expected pace) above which we escalate on pacing alone. */
|
|
30
|
+
burnRatioWarn: number;
|
|
31
|
+
}
|
|
11
32
|
export interface GuardConfig {
|
|
12
33
|
budget: Budget;
|
|
34
|
+
limits: LimitsConfig;
|
|
13
35
|
/** Optional pricing overrides merged onto the built-in table. */
|
|
14
36
|
pricingOverrides?: Record<string, ModelPricing>;
|
|
15
37
|
/** Kill Switch API key (ks_live_…) for reporting kill events to Guardian. */
|
|
@@ -20,6 +42,7 @@ export interface GuardConfig {
|
|
|
20
42
|
slackWebhook?: string;
|
|
21
43
|
}
|
|
22
44
|
export declare const DEFAULT_BUDGET: Budget;
|
|
45
|
+
export declare const DEFAULT_LIMITS: LimitsConfig;
|
|
23
46
|
/** ~/.kill-switch/agent-guard — created on demand. */
|
|
24
47
|
export declare function guardDir(): string;
|
|
25
48
|
export declare function ensureGuardDir(): string;
|
|
@@ -27,6 +50,8 @@ export declare const ledgerPath: () => string;
|
|
|
27
50
|
export declare const configPath: () => string;
|
|
28
51
|
export declare const pricingPath: () => string;
|
|
29
52
|
export declare const eventsPath: () => string;
|
|
53
|
+
/** Subscription rate-limit state (latest unified-header snapshot + dedup). */
|
|
54
|
+
export declare const limitsPath: () => string;
|
|
30
55
|
/**
|
|
31
56
|
* Escape hatch. The hook/proxy fail OPEN while this sentinel exists, so a human
|
|
32
57
|
* can always disable enforcement from outside the agent loop — even with zero
|
package/dist/config.js
CHANGED
|
@@ -15,6 +15,14 @@ export const DEFAULT_BUDGET = {
|
|
|
15
15
|
dailySoftUSD: 25,
|
|
16
16
|
dailyHardUSD: 100,
|
|
17
17
|
};
|
|
18
|
+
export const DEFAULT_LIMITS = {
|
|
19
|
+
plan: "auto",
|
|
20
|
+
fiveHourSoftPct: 0.7,
|
|
21
|
+
fiveHourDangerPct: 0.9,
|
|
22
|
+
weeklySoftPct: 0.6,
|
|
23
|
+
weeklyDangerPct: 0.85,
|
|
24
|
+
burnRatioWarn: 1.5,
|
|
25
|
+
};
|
|
18
26
|
/** ~/.kill-switch/agent-guard — created on demand. */
|
|
19
27
|
export function guardDir() {
|
|
20
28
|
return join(homedir(), ".kill-switch", "agent-guard");
|
|
@@ -28,6 +36,8 @@ export const ledgerPath = () => join(guardDir(), "ledger.json");
|
|
|
28
36
|
export const configPath = () => join(guardDir(), "config.json");
|
|
29
37
|
export const pricingPath = () => join(guardDir(), "pricing.json");
|
|
30
38
|
export const eventsPath = () => join(guardDir(), "events.jsonl");
|
|
39
|
+
/** Subscription rate-limit state (latest unified-header snapshot + dedup). */
|
|
40
|
+
export const limitsPath = () => join(guardDir(), "limits.json");
|
|
31
41
|
/**
|
|
32
42
|
* Escape hatch. The hook/proxy fail OPEN while this sentinel exists, so a human
|
|
33
43
|
* can always disable enforcement from outside the agent loop — even with zero
|
|
@@ -95,8 +105,20 @@ export function loadConfig() {
|
|
|
95
105
|
dailySoftUSD: num(process.env.AGENT_GUARD_DAILY_SOFT, fileBudget.dailySoftUSD ?? DEFAULT_BUDGET.dailySoftUSD),
|
|
96
106
|
dailyHardUSD: num(process.env.AGENT_GUARD_DAILY_HARD, fileBudget.dailyHardUSD ?? DEFAULT_BUDGET.dailyHardUSD),
|
|
97
107
|
};
|
|
108
|
+
const fileLimits = fileCfg.limits ?? {};
|
|
109
|
+
const envPlan = process.env.AGENT_GUARD_PLAN;
|
|
110
|
+
const validPlan = (p) => p === "auto" || p === "pro" || p === "max5" || p === "max20";
|
|
111
|
+
const limits = {
|
|
112
|
+
plan: validPlan(envPlan) ? envPlan : validPlan(fileLimits.plan) ? fileLimits.plan : DEFAULT_LIMITS.plan,
|
|
113
|
+
fiveHourSoftPct: fileLimits.fiveHourSoftPct ?? DEFAULT_LIMITS.fiveHourSoftPct,
|
|
114
|
+
fiveHourDangerPct: fileLimits.fiveHourDangerPct ?? DEFAULT_LIMITS.fiveHourDangerPct,
|
|
115
|
+
weeklySoftPct: fileLimits.weeklySoftPct ?? DEFAULT_LIMITS.weeklySoftPct,
|
|
116
|
+
weeklyDangerPct: fileLimits.weeklyDangerPct ?? DEFAULT_LIMITS.weeklyDangerPct,
|
|
117
|
+
burnRatioWarn: fileLimits.burnRatioWarn ?? DEFAULT_LIMITS.burnRatioWarn,
|
|
118
|
+
};
|
|
98
119
|
return {
|
|
99
120
|
budget,
|
|
121
|
+
limits,
|
|
100
122
|
pricingOverrides: { ...(fileCfg.pricingOverrides ?? {}), ...(filePricing ?? {}) },
|
|
101
123
|
apiKey: process.env.KILL_SWITCH_API_KEY ?? fileCfg.apiKey,
|
|
102
124
|
apiUrl: process.env.KILL_SWITCH_API_URL ?? fileCfg.apiUrl ?? "https://api.kill-switch.net",
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hook-only fallback estimate of subscription utilization.
|
|
3
|
+
*
|
|
4
|
+
* Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
|
|
5
|
+
* response headers, which only the proxy sees. A user running just the Claude
|
|
6
|
+
* Code hook (the common, zero-config setup) never sees those headers — so when
|
|
7
|
+
* they've told us their plan tier, we *estimate* where they stand by summing the
|
|
8
|
+
* tokens the ledger recorded inside each rolling window and dividing by a
|
|
9
|
+
* per-tier token budget.
|
|
10
|
+
*
|
|
11
|
+
* This is deliberately approximate and always labelled as such:
|
|
12
|
+
* - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
|
|
13
|
+
* token budgets below are calibrated rough equivalents, not contractual.
|
|
14
|
+
* - The ledger stores a session's cumulative tokens against a single
|
|
15
|
+
* `lastAt`, not a time series, so a long session is counted wholesale into
|
|
16
|
+
* whichever window its last activity falls in.
|
|
17
|
+
*
|
|
18
|
+
* It exists to give hook-only users *a* signal and to nudge them toward
|
|
19
|
+
* `ks guard proxy` for exact numbers — never to block (subscription mode is
|
|
20
|
+
* alert-only). When in doubt it under-claims utilization so it won't cry wolf.
|
|
21
|
+
*/
|
|
22
|
+
import { type LimitSnapshot } from "./limits.js";
|
|
23
|
+
import type { Ledger } from "./ledger.js";
|
|
24
|
+
export type PlanTier = "pro" | "max5" | "max20";
|
|
25
|
+
/**
|
|
26
|
+
* Rough per-tier token-equivalent budgets per window. Pro is the published
|
|
27
|
+
* baseline; Max 5x / 20x scale the 5-hour burst ~linearly with the multiplier,
|
|
28
|
+
* while the weekly cap scales more conservatively (Anthropic's weekly multiplier
|
|
29
|
+
* is smaller than the per-session one). Tune via config if your mileage differs.
|
|
30
|
+
*/
|
|
31
|
+
export interface TierBudget {
|
|
32
|
+
fiveHourTokens: number;
|
|
33
|
+
weeklyTokens: number;
|
|
34
|
+
}
|
|
35
|
+
export declare const TIER_BUDGETS: Record<PlanTier, TierBudget>;
|
|
36
|
+
/**
|
|
37
|
+
* Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
|
|
38
|
+
* Reset times are derived from the rolling window assumption (oldest in-window
|
|
39
|
+
* activity + window length is unknowable here, so we report the window end from
|
|
40
|
+
* `now` as a conservative upper bound on time remaining).
|
|
41
|
+
*/
|
|
42
|
+
export declare function estimateSnapshot(ledger: Ledger, tier: PlanTier, now: number, budgets?: Record<PlanTier, TierBudget>): LimitSnapshot;
|
|
43
|
+
/** True when a snapshot came from {@link estimateSnapshot} rather than real headers. */
|
|
44
|
+
export declare function isEstimated(snap: LimitSnapshot | null): boolean;
|
package/dist/estimate.js
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hook-only fallback estimate of subscription utilization.
|
|
3
|
+
*
|
|
4
|
+
* Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
|
|
5
|
+
* response headers, which only the proxy sees. A user running just the Claude
|
|
6
|
+
* Code hook (the common, zero-config setup) never sees those headers — so when
|
|
7
|
+
* they've told us their plan tier, we *estimate* where they stand by summing the
|
|
8
|
+
* tokens the ledger recorded inside each rolling window and dividing by a
|
|
9
|
+
* per-tier token budget.
|
|
10
|
+
*
|
|
11
|
+
* This is deliberately approximate and always labelled as such:
|
|
12
|
+
* - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
|
|
13
|
+
* token budgets below are calibrated rough equivalents, not contractual.
|
|
14
|
+
* - The ledger stores a session's cumulative tokens against a single
|
|
15
|
+
* `lastAt`, not a time series, so a long session is counted wholesale into
|
|
16
|
+
* whichever window its last activity falls in.
|
|
17
|
+
*
|
|
18
|
+
* It exists to give hook-only users *a* signal and to nudge them toward
|
|
19
|
+
* `ks guard proxy` for exact numbers — never to block (subscription mode is
|
|
20
|
+
* alert-only). When in doubt it under-claims utilization so it won't cry wolf.
|
|
21
|
+
*/
|
|
22
|
+
import { WINDOW_MS } from "./limits.js";
|
|
23
|
+
export const TIER_BUDGETS = {
|
|
24
|
+
// Calibrated rough equivalents — Pro ≈ 45 prompts / 5h, modest weekly cap.
|
|
25
|
+
pro: { fiveHourTokens: 8_000_000, weeklyTokens: 120_000_000 },
|
|
26
|
+
max5: { fiveHourTokens: 40_000_000, weeklyTokens: 480_000_000 },
|
|
27
|
+
max20: { fiveHourTokens: 160_000_000, weeklyTokens: 1_400_000_000 },
|
|
28
|
+
};
|
|
29
|
+
const FIVE_HOUR_MS = WINDOW_MS["5h"];
|
|
30
|
+
const WEEK_MS = WINDOW_MS.weekly;
|
|
31
|
+
/** Sum tokens (input+output) across sessions whose last activity is within `windowMs`. */
|
|
32
|
+
function tokensInWindow(ledger, now, windowMs) {
|
|
33
|
+
let total = 0;
|
|
34
|
+
for (const s of Object.values(ledger.sessions)) {
|
|
35
|
+
if (now - s.lastAt < windowMs)
|
|
36
|
+
total += (s.inputTokens || 0) + (s.outputTokens || 0);
|
|
37
|
+
}
|
|
38
|
+
return total;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
|
|
42
|
+
* Reset times are derived from the rolling window assumption (oldest in-window
|
|
43
|
+
* activity + window length is unknowable here, so we report the window end from
|
|
44
|
+
* `now` as a conservative upper bound on time remaining).
|
|
45
|
+
*/
|
|
46
|
+
export function estimateSnapshot(ledger, tier, now, budgets = TIER_BUDGETS) {
|
|
47
|
+
const b = budgets[tier];
|
|
48
|
+
const fiveTokens = tokensInWindow(ledger, now, FIVE_HOUR_MS);
|
|
49
|
+
const weekTokens = tokensInWindow(ledger, now, WEEK_MS);
|
|
50
|
+
const clamp = (n) => Math.max(0, Math.min(1, n));
|
|
51
|
+
return {
|
|
52
|
+
fiveHour: {
|
|
53
|
+
utilization: clamp(fiveTokens / b.fiveHourTokens),
|
|
54
|
+
// Without a per-event time series we can't know the true rolling reset;
|
|
55
|
+
// report a full window from now as a conservative (latest-possible) reset.
|
|
56
|
+
resetAt: now + FIVE_HOUR_MS,
|
|
57
|
+
status: "estimated",
|
|
58
|
+
},
|
|
59
|
+
weekly: {
|
|
60
|
+
utilization: clamp(weekTokens / b.weeklyTokens),
|
|
61
|
+
resetAt: now + WEEK_MS,
|
|
62
|
+
status: "estimated",
|
|
63
|
+
},
|
|
64
|
+
status: "estimated",
|
|
65
|
+
observedAt: now,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
/** True when a snapshot came from {@link estimateSnapshot} rather than real headers. */
|
|
69
|
+
export function isEstimated(snap) {
|
|
70
|
+
return !!snap && snap.status === "estimated";
|
|
71
|
+
}
|
package/dist/hook.js
CHANGED
|
@@ -24,6 +24,7 @@ import { parseTranscript } from "./transcript.js";
|
|
|
24
24
|
import { loadLedger, saveLedger, setSessionCost, rollingDailyCost, prune, } from "./ledger.js";
|
|
25
25
|
import { evaluate, warnKey } from "./budget.js";
|
|
26
26
|
import { dispatchAlert } from "./alert.js";
|
|
27
|
+
import { buildStatusReport } from "./report.js";
|
|
27
28
|
function readStdin() {
|
|
28
29
|
return new Promise((resolve) => {
|
|
29
30
|
let data = "";
|
|
@@ -146,20 +147,53 @@ export async function runHook() {
|
|
|
146
147
|
emit(blockDecision(event, reason, `🛑 Kill Switch stopped this agent — ${verdict.reasons[0] ?? "budget exceeded"}.`));
|
|
147
148
|
process.exit(0);
|
|
148
149
|
}
|
|
150
|
+
// Subscription rate-limit pacing — alert-only, surfaced in-session. Reads the
|
|
151
|
+
// snapshot the proxy persisted from Anthropic's headers (or a tier estimate),
|
|
152
|
+
// so even a hook-only session learns when it's about to lock out. Deduped per
|
|
153
|
+
// window+level so it doesn't repeat every tool call.
|
|
154
|
+
const limitMsg = limitNudge(rec, ledger, now);
|
|
149
155
|
// Surface the warn nudge only on the first trip per scope (shouldAlert), not
|
|
150
156
|
// on every subsequent tool call — otherwise the agent's context fills with
|
|
151
157
|
// duplicate notices. After that, warnings stay silent until the hard cap.
|
|
152
158
|
if (verdict.level === "warn" && shouldAlert) {
|
|
153
|
-
const ctx = renderWarnContext(verdict);
|
|
159
|
+
const ctx = limitMsg ? `${renderWarnContext(verdict)} ${limitMsg}` : renderWarnContext(verdict);
|
|
154
160
|
emit(warnDecision(event, ctx, `⚠️ Kill Switch: ${verdict.reasons[0] ?? "approaching budget"}.`));
|
|
155
161
|
process.exit(0);
|
|
156
162
|
}
|
|
163
|
+
if (limitMsg) {
|
|
164
|
+
emit(warnDecision(event, `Kill Switch — Claude Code plan pacing (informational, you may continue): ${limitMsg}`, `⚠️ Kill Switch: ${limitMsg}`));
|
|
165
|
+
process.exit(0);
|
|
166
|
+
}
|
|
157
167
|
process.exit(0);
|
|
158
168
|
}
|
|
159
169
|
catch {
|
|
160
170
|
process.exit(0); // fail open on any unexpected error
|
|
161
171
|
}
|
|
162
172
|
}
|
|
173
|
+
/**
|
|
174
|
+
* Most-urgent subscription-window nudge, fired once per window+level. Mutates the
|
|
175
|
+
* session's notified map (and persists it) so the same warning doesn't repeat on
|
|
176
|
+
* every tool call. Returns null when there's nothing to surface.
|
|
177
|
+
*/
|
|
178
|
+
function limitNudge(rec, ledger, now) {
|
|
179
|
+
try {
|
|
180
|
+
const limits = buildStatusReport(now).limits;
|
|
181
|
+
if (!limits.windows.length)
|
|
182
|
+
return null;
|
|
183
|
+
const urgent = limits.windows.find((w) => w.level === "danger") ?? limits.windows.find((w) => w.level === "warn");
|
|
184
|
+
if (!urgent)
|
|
185
|
+
return null;
|
|
186
|
+
const key = `limit:${urgent.window}:${urgent.level}`;
|
|
187
|
+
if (rec.notified[key])
|
|
188
|
+
return null;
|
|
189
|
+
rec.notified[key] = true;
|
|
190
|
+
saveLedger(ledger);
|
|
191
|
+
return urgent.message;
|
|
192
|
+
}
|
|
193
|
+
catch {
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
163
197
|
/** Absolute path to this CLI, so recovery commands work without PATH / npm-link. */
|
|
164
198
|
function selfCmd() {
|
|
165
199
|
try {
|
package/dist/index.d.ts
CHANGED
|
@@ -12,9 +12,12 @@ export { costForUsage, totalTokens, fmtUSD, type TokenUsage } from "./cost.js";
|
|
|
12
12
|
export { evaluate, warnKey, type Budget, type Verdict, type Spend, type VerdictLevel } from "./budget.js";
|
|
13
13
|
export { loadLedger, saveLedger, setSessionCost, addSessionCost, rollingDailyCost, prune, emptyLedger, type Ledger, type SessionRecord, } from "./ledger.js";
|
|
14
14
|
export { parseTranscript, type TranscriptTotals } from "./transcript.js";
|
|
15
|
-
export { loadConfig, DEFAULT_BUDGET, guardDir, ensureGuardDir, configPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, type GuardConfig, } from "./config.js";
|
|
16
|
-
export { dispatchAlert, type AlertEvent } from "./alert.js";
|
|
15
|
+
export { loadConfig, DEFAULT_BUDGET, DEFAULT_LIMITS, guardDir, ensureGuardDir, configPath, limitsPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, type GuardConfig, type LimitsConfig, } from "./config.js";
|
|
16
|
+
export { dispatchAlert, type AlertEvent, type AlertLevel } from "./alert.js";
|
|
17
17
|
export { startProxy, resolveUpstream, type ProxyOptions } from "./proxy.js";
|
|
18
18
|
export { runHook } from "./hook.js";
|
|
19
|
-
export { buildStatusReport, type StatusReport } from "./report.js";
|
|
20
|
-
export {
|
|
19
|
+
export { buildStatusReport, formatLimitsLines, type StatusReport, type LimitsReport } from "./report.js";
|
|
20
|
+
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, WINDOW_MS, type LimitSnapshot, type WindowState, type LimitsState, type LimitWindow, type HeaderGetter, } from "./limits.js";
|
|
21
|
+
export { assessWindow, assessSnapshot, worstLevel, type PacingAssessment, type PacingLevel, type PacingThresholds, } from "./pacing.js";
|
|
22
|
+
export { estimateSnapshot, isEstimated, TIER_BUDGETS, type PlanTier, type TierBudget, } from "./estimate.js";
|
|
23
|
+
export { installHook, setBudget, setLimits, resetLedger, type InstallOptions, type InstallResult, type BudgetPatch, type LimitsPatch, } from "./ops.js";
|
package/dist/index.js
CHANGED
|
@@ -12,9 +12,12 @@ export { costForUsage, totalTokens, fmtUSD } from "./cost.js";
|
|
|
12
12
|
export { evaluate, warnKey } from "./budget.js";
|
|
13
13
|
export { loadLedger, saveLedger, setSessionCost, addSessionCost, rollingDailyCost, prune, emptyLedger, } from "./ledger.js";
|
|
14
14
|
export { parseTranscript } from "./transcript.js";
|
|
15
|
-
export { loadConfig, DEFAULT_BUDGET, guardDir, ensureGuardDir, configPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, } from "./config.js";
|
|
15
|
+
export { loadConfig, DEFAULT_BUDGET, DEFAULT_LIMITS, guardDir, ensureGuardDir, configPath, limitsPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, } from "./config.js";
|
|
16
16
|
export { dispatchAlert } from "./alert.js";
|
|
17
17
|
export { startProxy, resolveUpstream } from "./proxy.js";
|
|
18
18
|
export { runHook } from "./hook.js";
|
|
19
|
-
export { buildStatusReport } from "./report.js";
|
|
20
|
-
export {
|
|
19
|
+
export { buildStatusReport, formatLimitsLines } from "./report.js";
|
|
20
|
+
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, WINDOW_MS, } from "./limits.js";
|
|
21
|
+
export { assessWindow, assessSnapshot, worstLevel, } from "./pacing.js";
|
|
22
|
+
export { estimateSnapshot, isEstimated, TIER_BUDGETS, } from "./estimate.js";
|
|
23
|
+
export { installHook, setBudget, setLimits, resetLedger, } from "./ops.js";
|
package/dist/limits.d.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Subscription rate-limit awareness — the "how much of my Claude Code plan have
|
|
3
|
+
* I burned" half of the guard, complementary to the dollar ledger.
|
|
4
|
+
*
|
|
5
|
+
* Claude Code on a Pro/Max subscription is NOT billed per token — the scarce
|
|
6
|
+
* resource is the plan's rate-limit quota, measured in two rolling windows:
|
|
7
|
+
* - a 5-hour window (burst protection), and
|
|
8
|
+
* - a 7-day window (the real lockout risk, "resets a couple times a month").
|
|
9
|
+
*
|
|
10
|
+
* Anthropic reports exactly where you stand in both windows on every API
|
|
11
|
+
* response, via `anthropic-ratelimit-unified-*` headers. The proxy already sees
|
|
12
|
+
* every response, so it can read these and know the *real* remaining quota and
|
|
13
|
+
* the *real* reset times — no estimation, no guessing when limits reset.
|
|
14
|
+
*
|
|
15
|
+
* This module owns: parsing those headers into a {@link LimitSnapshot}, and the
|
|
16
|
+
* small global state file (`limits.json`) that persists the latest snapshot plus
|
|
17
|
+
* whether we've ever seen subscription headers (so the rest of the guard can
|
|
18
|
+
* switch into alert-only subscription mode).
|
|
19
|
+
*
|
|
20
|
+
* Header formats are owned by Anthropic, not us, and aren't fully contract-
|
|
21
|
+
* documented, so parsing is deliberately defensive: utilization is accepted as
|
|
22
|
+
* either a 0–1 fraction or a 0–100 percent; reset is accepted as an ISO 8601
|
|
23
|
+
* timestamp, an epoch (s or ms), or a relative seconds-until-reset.
|
|
24
|
+
*/
|
|
25
|
+
export type LimitWindow = "5h" | "weekly";
|
|
26
|
+
/** State of one rolling rate-limit window. */
|
|
27
|
+
export interface WindowState {
|
|
28
|
+
/** Fraction of the window consumed, 0–1. */
|
|
29
|
+
utilization: number;
|
|
30
|
+
/** Epoch ms when this window resets, or null if the header didn't say. */
|
|
31
|
+
resetAt: number | null;
|
|
32
|
+
/** Raw per-window status string from Anthropic (e.g. "allowed" / "warning"), if any. */
|
|
33
|
+
status?: string;
|
|
34
|
+
}
|
|
35
|
+
/** A point-in-time read of the account's subscription rate-limit standing. */
|
|
36
|
+
export interface LimitSnapshot {
|
|
37
|
+
fiveHour: WindowState | null;
|
|
38
|
+
weekly: WindowState | null;
|
|
39
|
+
/** Raw overall `anthropic-ratelimit-unified-status`, if present. */
|
|
40
|
+
status: string | null;
|
|
41
|
+
/** Epoch ms when this snapshot was observed. */
|
|
42
|
+
observedAt: number;
|
|
43
|
+
}
|
|
44
|
+
/** Persisted global state (account-wide, not per-session). */
|
|
45
|
+
export interface LimitsState {
|
|
46
|
+
version: 1;
|
|
47
|
+
/** True once we've ever observed unified subscription headers. Latches on. */
|
|
48
|
+
subscriptionDetected: boolean;
|
|
49
|
+
snapshot: LimitSnapshot | null;
|
|
50
|
+
/** Dedup flags so a given window/level/reset only alerts once. */
|
|
51
|
+
notified: Record<string, boolean>;
|
|
52
|
+
}
|
|
53
|
+
/** Nominal window durations, used for pacing math when a reset time is unknown. */
|
|
54
|
+
export declare const WINDOW_MS: Record<LimitWindow, number>;
|
|
55
|
+
export declare function emptyLimitsState(): LimitsState;
|
|
56
|
+
export declare function loadLimitsState(): LimitsState;
|
|
57
|
+
export declare function saveLimitsState(state: LimitsState): void;
|
|
58
|
+
/** A header bag that works for both a fetch `Headers` and a plain record. */
|
|
59
|
+
export interface HeaderGetter {
|
|
60
|
+
get(name: string): string | null | undefined;
|
|
61
|
+
}
|
|
62
|
+
/** Wrap a plain `Record<string,string>` so it satisfies {@link HeaderGetter}. */
|
|
63
|
+
export declare function recordHeaders(rec: Record<string, string | string[] | undefined>): HeaderGetter;
|
|
64
|
+
/**
|
|
65
|
+
* Parse a utilization header value into a 0–1 fraction.
|
|
66
|
+
* Accepts "0.62", "62", "62%". Values >1.5 are treated as percentages.
|
|
67
|
+
*/
|
|
68
|
+
export declare function parseUtilization(raw: string | null | undefined): number | null;
|
|
69
|
+
/**
|
|
70
|
+
* Parse a reset header value into an absolute epoch-ms timestamp.
|
|
71
|
+
* Accepts ISO 8601 ("2026-06-13T18:00:00Z"), epoch seconds, epoch ms, or a
|
|
72
|
+
* relative seconds-until-reset (small numbers). `now` anchors the relative case.
|
|
73
|
+
*/
|
|
74
|
+
export declare function parseReset(raw: string | null | undefined, now: number): number | null;
|
|
75
|
+
/**
|
|
76
|
+
* Read the `anthropic-ratelimit-unified-*` family into a {@link LimitSnapshot}.
|
|
77
|
+
* Returns null when no unified headers are present (i.e. not a subscription
|
|
78
|
+
* session, or an endpoint that doesn't emit them) — callers use that null to
|
|
79
|
+
* mean "stay in dollar mode for this response".
|
|
80
|
+
*/
|
|
81
|
+
export declare function parseUnifiedHeaders(h: HeaderGetter, now: number): LimitSnapshot | null;
|
|
82
|
+
/** Stable dedup key for a pacing alert: re-alerts when the window resets. */
|
|
83
|
+
export declare function limitNotifyKey(window: LimitWindow, level: string, resetAt: number | null): string;
|
package/dist/limits.js
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Subscription rate-limit awareness — the "how much of my Claude Code plan have
|
|
3
|
+
* I burned" half of the guard, complementary to the dollar ledger.
|
|
4
|
+
*
|
|
5
|
+
* Claude Code on a Pro/Max subscription is NOT billed per token — the scarce
|
|
6
|
+
* resource is the plan's rate-limit quota, measured in two rolling windows:
|
|
7
|
+
* - a 5-hour window (burst protection), and
|
|
8
|
+
* - a 7-day window (the real lockout risk, "resets a couple times a month").
|
|
9
|
+
*
|
|
10
|
+
* Anthropic reports exactly where you stand in both windows on every API
|
|
11
|
+
* response, via `anthropic-ratelimit-unified-*` headers. The proxy already sees
|
|
12
|
+
* every response, so it can read these and know the *real* remaining quota and
|
|
13
|
+
* the *real* reset times — no estimation, no guessing when limits reset.
|
|
14
|
+
*
|
|
15
|
+
* This module owns: parsing those headers into a {@link LimitSnapshot}, and the
|
|
16
|
+
* small global state file (`limits.json`) that persists the latest snapshot plus
|
|
17
|
+
* whether we've ever seen subscription headers (so the rest of the guard can
|
|
18
|
+
* switch into alert-only subscription mode).
|
|
19
|
+
*
|
|
20
|
+
* Header formats are owned by Anthropic, not us, and aren't fully contract-
|
|
21
|
+
* documented, so parsing is deliberately defensive: utilization is accepted as
|
|
22
|
+
* either a 0–1 fraction or a 0–100 percent; reset is accepted as an ISO 8601
|
|
23
|
+
* timestamp, an epoch (s or ms), or a relative seconds-until-reset.
|
|
24
|
+
*/
|
|
25
|
+
import { readFileSync, writeFileSync, renameSync } from "node:fs";
|
|
26
|
+
import { limitsPath, ensureGuardDir } from "./config.js";
|
|
27
|
+
/** Nominal window durations, used for pacing math when a reset time is unknown. */
|
|
28
|
+
export const WINDOW_MS = {
|
|
29
|
+
"5h": 5 * 60 * 60 * 1000,
|
|
30
|
+
weekly: 7 * 24 * 60 * 60 * 1000,
|
|
31
|
+
};
|
|
32
|
+
export function emptyLimitsState() {
|
|
33
|
+
return { version: 1, subscriptionDetected: false, snapshot: null, notified: {} };
|
|
34
|
+
}
|
|
35
|
+
export function loadLimitsState() {
|
|
36
|
+
try {
|
|
37
|
+
const data = JSON.parse(readFileSync(limitsPath(), "utf8"));
|
|
38
|
+
if (data && data.version === 1) {
|
|
39
|
+
return {
|
|
40
|
+
version: 1,
|
|
41
|
+
subscriptionDetected: data.subscriptionDetected ?? false,
|
|
42
|
+
snapshot: data.snapshot ?? null,
|
|
43
|
+
notified: data.notified ?? {},
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
/* fall through to empty */
|
|
49
|
+
}
|
|
50
|
+
return emptyLimitsState();
|
|
51
|
+
}
|
|
52
|
+
export function saveLimitsState(state) {
|
|
53
|
+
ensureGuardDir();
|
|
54
|
+
const path = limitsPath();
|
|
55
|
+
const tmp = `${path}.${process.pid}.tmp`;
|
|
56
|
+
writeFileSync(tmp, JSON.stringify(state, null, 2));
|
|
57
|
+
renameSync(tmp, path);
|
|
58
|
+
}
|
|
59
|
+
/** Wrap a plain `Record<string,string>` so it satisfies {@link HeaderGetter}. */
|
|
60
|
+
export function recordHeaders(rec) {
|
|
61
|
+
const lower = {};
|
|
62
|
+
for (const [k, v] of Object.entries(rec)) {
|
|
63
|
+
if (v === undefined)
|
|
64
|
+
continue;
|
|
65
|
+
lower[k.toLowerCase()] = Array.isArray(v) ? v.join(", ") : v;
|
|
66
|
+
}
|
|
67
|
+
return { get: (name) => lower[name.toLowerCase()] ?? null };
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Parse a utilization header value into a 0–1 fraction.
|
|
71
|
+
* Accepts "0.62", "62", "62%". Values >1.5 are treated as percentages.
|
|
72
|
+
*/
|
|
73
|
+
export function parseUtilization(raw) {
|
|
74
|
+
if (raw == null)
|
|
75
|
+
return null;
|
|
76
|
+
const n = Number(String(raw).replace(/%$/, "").trim());
|
|
77
|
+
if (!Number.isFinite(n) || n < 0)
|
|
78
|
+
return null;
|
|
79
|
+
const frac = n > 1.5 ? n / 100 : n;
|
|
80
|
+
return Math.max(0, Math.min(1, frac));
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Parse a reset header value into an absolute epoch-ms timestamp.
|
|
84
|
+
* Accepts ISO 8601 ("2026-06-13T18:00:00Z"), epoch seconds, epoch ms, or a
|
|
85
|
+
* relative seconds-until-reset (small numbers). `now` anchors the relative case.
|
|
86
|
+
*/
|
|
87
|
+
export function parseReset(raw, now) {
|
|
88
|
+
if (raw == null)
|
|
89
|
+
return null;
|
|
90
|
+
const s = String(raw).trim();
|
|
91
|
+
if (!s)
|
|
92
|
+
return null;
|
|
93
|
+
// Numeric: disambiguate ms / seconds / relative-seconds by magnitude.
|
|
94
|
+
if (/^\d+(\.\d+)?$/.test(s)) {
|
|
95
|
+
const n = Number(s);
|
|
96
|
+
if (!Number.isFinite(n))
|
|
97
|
+
return null;
|
|
98
|
+
if (n > 1e12)
|
|
99
|
+
return Math.round(n); // epoch ms
|
|
100
|
+
if (n > 1e9)
|
|
101
|
+
return Math.round(n * 1000); // epoch seconds
|
|
102
|
+
return Math.round(now + n * 1000); // relative seconds-until-reset
|
|
103
|
+
}
|
|
104
|
+
const t = Date.parse(s);
|
|
105
|
+
return Number.isNaN(t) ? null : t;
|
|
106
|
+
}
|
|
107
|
+
function parseWindow(h, prefix, key, now) {
|
|
108
|
+
const util = parseUtilization(h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-utilization`));
|
|
109
|
+
const reset = parseReset(h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-reset`), now);
|
|
110
|
+
const status = h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-status`) || undefined;
|
|
111
|
+
if (util == null && reset == null && !status)
|
|
112
|
+
return null;
|
|
113
|
+
return { utilization: util ?? 0, resetAt: reset, status };
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Read the `anthropic-ratelimit-unified-*` family into a {@link LimitSnapshot}.
|
|
117
|
+
* Returns null when no unified headers are present (i.e. not a subscription
|
|
118
|
+
* session, or an endpoint that doesn't emit them) — callers use that null to
|
|
119
|
+
* mean "stay in dollar mode for this response".
|
|
120
|
+
*/
|
|
121
|
+
export function parseUnifiedHeaders(h, now) {
|
|
122
|
+
const prefix = "anthropic-ratelimit-unified";
|
|
123
|
+
const fiveHour = parseWindow(h, prefix, "5h", now);
|
|
124
|
+
const weekly = parseWindow(h, prefix, "weekly", now);
|
|
125
|
+
const status = h.get(`${prefix}-status`) || null;
|
|
126
|
+
if (!fiveHour && !weekly && !status)
|
|
127
|
+
return null;
|
|
128
|
+
return { fiveHour, weekly, status, observedAt: now };
|
|
129
|
+
}
|
|
130
|
+
/** Stable dedup key for a pacing alert: re-alerts when the window resets. */
|
|
131
|
+
export function limitNotifyKey(window, level, resetAt) {
|
|
132
|
+
return `${window}:${level}:${resetAt ?? 0}`;
|
|
133
|
+
}
|
package/dist/ops.d.ts
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* subcommands, so both drive the same logic instead of duplicating it (or
|
|
4
4
|
* shelling out). Pure side-effecting helpers over config + Claude Code settings.
|
|
5
5
|
*/
|
|
6
|
+
import { type LimitsConfig } from "./config.js";
|
|
6
7
|
import type { Budget } from "./budget.js";
|
|
7
8
|
export interface InstallOptions {
|
|
8
9
|
/** Install into ~/.claude/settings.json instead of ./.claude/settings.json */
|
|
@@ -33,6 +34,17 @@ export interface BudgetPatch {
|
|
|
33
34
|
}
|
|
34
35
|
/** Write budget/webhook overrides to the config file. Returns the saved budget. */
|
|
35
36
|
export declare function setBudget(patch: BudgetPatch): Budget;
|
|
37
|
+
/** Partial subscription-limits update. Merges onto the existing config file. */
|
|
38
|
+
export interface LimitsPatch {
|
|
39
|
+
plan?: LimitsConfig["plan"];
|
|
40
|
+
fiveHourSoftPct?: number;
|
|
41
|
+
fiveHourDangerPct?: number;
|
|
42
|
+
weeklySoftPct?: number;
|
|
43
|
+
weeklyDangerPct?: number;
|
|
44
|
+
burnRatioWarn?: number;
|
|
45
|
+
}
|
|
46
|
+
/** Write subscription-limit overrides to the config file. Returns the saved limits. */
|
|
47
|
+
export declare function setLimits(patch: LimitsPatch): LimitsConfig;
|
|
36
48
|
/** Clear the spend ledger. Scope: all | a single session | today's sessions. */
|
|
37
49
|
export declare function resetLedger(opts: {
|
|
38
50
|
all?: boolean;
|
package/dist/ops.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
7
7
|
import { join, dirname } from "node:path";
|
|
8
8
|
import { homedir } from "node:os";
|
|
9
|
-
import { configPath, ensureGuardDir, DEFAULT_BUDGET } from "./config.js";
|
|
9
|
+
import { configPath, ensureGuardDir, DEFAULT_BUDGET, DEFAULT_LIMITS } from "./config.js";
|
|
10
10
|
import { loadLedger, saveLedger, emptyLedger } from "./ledger.js";
|
|
11
11
|
/**
|
|
12
12
|
* Wire the agent-guard hook into Claude Code settings for PreToolUse,
|
|
@@ -73,6 +73,32 @@ export function setBudget(patch) {
|
|
|
73
73
|
writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
|
|
74
74
|
return budget;
|
|
75
75
|
}
|
|
76
|
+
/** Write subscription-limit overrides to the config file. Returns the saved limits. */
|
|
77
|
+
export function setLimits(patch) {
|
|
78
|
+
let file = {};
|
|
79
|
+
try {
|
|
80
|
+
file = JSON.parse(readFileSync(configPath(), "utf8"));
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
/* new */
|
|
84
|
+
}
|
|
85
|
+
const limits = { ...DEFAULT_LIMITS, ...(file.limits ?? {}) };
|
|
86
|
+
if (patch.plan && ["auto", "pro", "max5", "max20"].includes(patch.plan))
|
|
87
|
+
limits.plan = patch.plan;
|
|
88
|
+
const setPct = (k, v) => {
|
|
89
|
+
if (v !== undefined && Number.isFinite(v))
|
|
90
|
+
limits[k] = v;
|
|
91
|
+
};
|
|
92
|
+
setPct("fiveHourSoftPct", patch.fiveHourSoftPct);
|
|
93
|
+
setPct("fiveHourDangerPct", patch.fiveHourDangerPct);
|
|
94
|
+
setPct("weeklySoftPct", patch.weeklySoftPct);
|
|
95
|
+
setPct("weeklyDangerPct", patch.weeklyDangerPct);
|
|
96
|
+
setPct("burnRatioWarn", patch.burnRatioWarn);
|
|
97
|
+
file.limits = limits;
|
|
98
|
+
ensureGuardDir();
|
|
99
|
+
writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
|
|
100
|
+
return limits;
|
|
101
|
+
}
|
|
76
102
|
/** Clear the spend ledger. Scope: all | a single session | today's sessions. */
|
|
77
103
|
export function resetLedger(opts) {
|
|
78
104
|
if (opts.all) {
|
package/dist/pacing.d.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pacing engine — the "intelligent" half the user asked for.
|
|
3
|
+
*
|
|
4
|
+
* Blocking at "90% of weekly" is dumb: 90% on day 6 is fine, but 60% on day 2
|
|
5
|
+
* means you'll be locked out mid-week. The resource you're spending is a budget
|
|
6
|
+
* that should last until the window resets, so the real question isn't "how much
|
|
7
|
+
* is left" — it's "at this burn rate, will I run out before the window resets?".
|
|
8
|
+
*
|
|
9
|
+
* For each window we compute:
|
|
10
|
+
* - expected utilization = fraction of the window already elapsed
|
|
11
|
+
* - burn ratio = actual / expected (1.0 = perfectly on pace)
|
|
12
|
+
* - projected exhaustion = when utilization hits 1.0 at the current rate
|
|
13
|
+
* - will-lock-out-before-reset = exhaustion lands before the reset
|
|
14
|
+
*
|
|
15
|
+
* The level (ok / warn / danger) is the worse of two signals: absolute
|
|
16
|
+
* utilization against soft/danger thresholds, and pacing (burning fast enough to
|
|
17
|
+
* lock out before reset). In subscription mode the guard never blocks on this —
|
|
18
|
+
* it surfaces the assessment as a warning so the human can ease off or switch to
|
|
19
|
+
* a cheaper model before Anthropic's own limit stops them mid-task.
|
|
20
|
+
*/
|
|
21
|
+
import { type LimitSnapshot, type LimitWindow, type WindowState } from "./limits.js";
|
|
22
|
+
export interface PacingThresholds {
|
|
23
|
+
/** Per-window soft / danger utilization thresholds (0–1). */
|
|
24
|
+
fiveHourSoftPct: number;
|
|
25
|
+
fiveHourDangerPct: number;
|
|
26
|
+
weeklySoftPct: number;
|
|
27
|
+
weeklyDangerPct: number;
|
|
28
|
+
/** Burn ratio above which pacing alone escalates (with meaningful utilization). */
|
|
29
|
+
burnRatioWarn: number;
|
|
30
|
+
}
|
|
31
|
+
export type PacingLevel = "ok" | "warn" | "danger";
|
|
32
|
+
export interface PacingAssessment {
|
|
33
|
+
window: LimitWindow;
|
|
34
|
+
/** 0–1 fraction of the window consumed. */
|
|
35
|
+
utilization: number;
|
|
36
|
+
/** Epoch ms the window resets, or null if unknown. */
|
|
37
|
+
resetAt: number | null;
|
|
38
|
+
/** actual / expected utilization; null when elapsed is unknown (no reset time). */
|
|
39
|
+
burnRatio: number | null;
|
|
40
|
+
/** Epoch ms we project utilization hits 100% at the current rate, or null. */
|
|
41
|
+
projectedExhaustionAt: number | null;
|
|
42
|
+
/** True when projected exhaustion lands before the window resets. */
|
|
43
|
+
willLockOutBeforeReset: boolean;
|
|
44
|
+
level: PacingLevel;
|
|
45
|
+
/** One-line human summary. */
|
|
46
|
+
message: string;
|
|
47
|
+
}
|
|
48
|
+
/** Assess a single window's pacing. `now` is epoch ms. */
|
|
49
|
+
export declare function assessWindow(window: LimitWindow, state: WindowState, thresholds: PacingThresholds, now: number): PacingAssessment;
|
|
50
|
+
/** Assess every window present in a snapshot. */
|
|
51
|
+
export declare function assessSnapshot(snap: LimitSnapshot, thresholds: PacingThresholds, now: number): PacingAssessment[];
|
|
52
|
+
/** Worst level across a set of assessments. */
|
|
53
|
+
export declare function worstLevel(assessments: PacingAssessment[]): PacingLevel;
|
package/dist/pacing.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pacing engine — the "intelligent" half the user asked for.
|
|
3
|
+
*
|
|
4
|
+
* Blocking at "90% of weekly" is dumb: 90% on day 6 is fine, but 60% on day 2
|
|
5
|
+
* means you'll be locked out mid-week. The resource you're spending is a budget
|
|
6
|
+
* that should last until the window resets, so the real question isn't "how much
|
|
7
|
+
* is left" — it's "at this burn rate, will I run out before the window resets?".
|
|
8
|
+
*
|
|
9
|
+
* For each window we compute:
|
|
10
|
+
* - expected utilization = fraction of the window already elapsed
|
|
11
|
+
* - burn ratio = actual / expected (1.0 = perfectly on pace)
|
|
12
|
+
* - projected exhaustion = when utilization hits 1.0 at the current rate
|
|
13
|
+
* - will-lock-out-before-reset = exhaustion lands before the reset
|
|
14
|
+
*
|
|
15
|
+
* The level (ok / warn / danger) is the worse of two signals: absolute
|
|
16
|
+
* utilization against soft/danger thresholds, and pacing (burning fast enough to
|
|
17
|
+
* lock out before reset). In subscription mode the guard never blocks on this —
|
|
18
|
+
* it surfaces the assessment as a warning so the human can ease off or switch to
|
|
19
|
+
* a cheaper model before Anthropic's own limit stops them mid-task.
|
|
20
|
+
*/
|
|
21
|
+
import { WINDOW_MS } from "./limits.js";
|
|
22
|
+
function windowLabel(w) {
|
|
23
|
+
return w === "5h" ? "5-hour" : "weekly";
|
|
24
|
+
}
|
|
25
|
+
function fmtClock(epochMs, now) {
|
|
26
|
+
const dt = new Date(epochMs);
|
|
27
|
+
const sameDay = new Date(now).toDateString() === dt.toDateString();
|
|
28
|
+
// Day-of-week + time reads naturally for a multi-day weekly window.
|
|
29
|
+
return sameDay
|
|
30
|
+
? dt.toLocaleTimeString([], { hour: "numeric", minute: "2-digit" })
|
|
31
|
+
: dt.toLocaleString([], { weekday: "short", hour: "numeric", minute: "2-digit" });
|
|
32
|
+
}
|
|
33
|
+
function fmtDuration(ms) {
|
|
34
|
+
if (ms <= 0)
|
|
35
|
+
return "now";
|
|
36
|
+
const h = ms / (60 * 60 * 1000);
|
|
37
|
+
if (h < 1)
|
|
38
|
+
return `${Math.round(ms / 60000)}m`;
|
|
39
|
+
if (h < 24)
|
|
40
|
+
return `${h.toFixed(h < 10 ? 1 : 0)}h`;
|
|
41
|
+
return `${(h / 24).toFixed(1)}d`;
|
|
42
|
+
}
|
|
43
|
+
/** Assess a single window's pacing. `now` is epoch ms. */
|
|
44
|
+
export function assessWindow(window, state, thresholds, now) {
|
|
45
|
+
const util = Math.max(0, Math.min(1, state.utilization));
|
|
46
|
+
const soft = window === "5h" ? thresholds.fiveHourSoftPct : thresholds.weeklySoftPct;
|
|
47
|
+
const danger = window === "5h" ? thresholds.fiveHourDangerPct : thresholds.weeklyDangerPct;
|
|
48
|
+
const duration = WINDOW_MS[window];
|
|
49
|
+
// elapsed = duration - timeUntilReset; only known when we have a reset time.
|
|
50
|
+
let elapsed = null;
|
|
51
|
+
if (state.resetAt != null) {
|
|
52
|
+
const untilReset = state.resetAt - now;
|
|
53
|
+
elapsed = Math.max(0, Math.min(duration, duration - untilReset));
|
|
54
|
+
}
|
|
55
|
+
let burnRatio = null;
|
|
56
|
+
let projectedExhaustionAt = null;
|
|
57
|
+
let willLockOut = false;
|
|
58
|
+
if (elapsed != null && elapsed > 0) {
|
|
59
|
+
const expected = elapsed / duration;
|
|
60
|
+
burnRatio = expected > 0 ? util / expected : null;
|
|
61
|
+
if (util > 0 && util < 1) {
|
|
62
|
+
const ratePerMs = util / elapsed; // utilization per ms so far
|
|
63
|
+
const msToFull = (1 - util) / ratePerMs;
|
|
64
|
+
projectedExhaustionAt = now + msToFull;
|
|
65
|
+
if (state.resetAt != null)
|
|
66
|
+
willLockOut = projectedExhaustionAt < state.resetAt;
|
|
67
|
+
}
|
|
68
|
+
else if (util >= 1) {
|
|
69
|
+
projectedExhaustionAt = now;
|
|
70
|
+
willLockOut = true;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// Level: worse of absolute-utilization and pacing signals. A projected lockout
|
|
74
|
+
// only escalates once you've used a meaningful slice of the window — otherwise
|
|
75
|
+
// tiny noise near a linear burn (e.g. 15% used, exhaustion landing a few hours
|
|
76
|
+
// before a reset days away) would scream danger far too early. We gate it at
|
|
77
|
+
// half the soft threshold.
|
|
78
|
+
const lockoutFloor = soft * 0.5;
|
|
79
|
+
const lockoutMatters = willLockOut && util >= lockoutFloor;
|
|
80
|
+
let level = "ok";
|
|
81
|
+
if (util >= danger || (lockoutMatters && util >= soft))
|
|
82
|
+
level = "danger";
|
|
83
|
+
else if (util >= soft ||
|
|
84
|
+
lockoutMatters ||
|
|
85
|
+
(burnRatio != null && burnRatio >= thresholds.burnRatioWarn && util >= lockoutFloor))
|
|
86
|
+
level = "warn";
|
|
87
|
+
const pct = Math.round(util * 100);
|
|
88
|
+
const label = windowLabel(window);
|
|
89
|
+
const parts = [`${label} limit ${pct}% used`];
|
|
90
|
+
if (state.resetAt != null)
|
|
91
|
+
parts.push(`resets ${fmtClock(state.resetAt, now)}`);
|
|
92
|
+
if (burnRatio != null && burnRatio >= 1.2 && level !== "ok")
|
|
93
|
+
parts.push(`burning ${burnRatio.toFixed(1)}× pace`);
|
|
94
|
+
// Only surface the lockout projection once it actually drives the level —
|
|
95
|
+
// keeps low-utilization projection noise out of the message.
|
|
96
|
+
if (lockoutMatters && projectedExhaustionAt != null && state.resetAt != null) {
|
|
97
|
+
const before = state.resetAt - projectedExhaustionAt;
|
|
98
|
+
parts.push(`→ lockout in ~${fmtDuration(projectedExhaustionAt - now)} (${fmtDuration(before)} before reset)`);
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
window,
|
|
102
|
+
utilization: util,
|
|
103
|
+
resetAt: state.resetAt,
|
|
104
|
+
burnRatio,
|
|
105
|
+
projectedExhaustionAt,
|
|
106
|
+
willLockOutBeforeReset: willLockOut,
|
|
107
|
+
level,
|
|
108
|
+
message: parts.join(", "),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
/** Assess every window present in a snapshot. */
|
|
112
|
+
export function assessSnapshot(snap, thresholds, now) {
|
|
113
|
+
const out = [];
|
|
114
|
+
if (snap.fiveHour)
|
|
115
|
+
out.push(assessWindow("5h", snap.fiveHour, thresholds, now));
|
|
116
|
+
if (snap.weekly)
|
|
117
|
+
out.push(assessWindow("weekly", snap.weekly, thresholds, now));
|
|
118
|
+
return out;
|
|
119
|
+
}
|
|
120
|
+
/** Worst level across a set of assessments. */
|
|
121
|
+
export function worstLevel(assessments) {
|
|
122
|
+
if (assessments.some((a) => a.level === "danger"))
|
|
123
|
+
return "danger";
|
|
124
|
+
if (assessments.some((a) => a.level === "warn"))
|
|
125
|
+
return "warn";
|
|
126
|
+
return "ok";
|
|
127
|
+
}
|
package/dist/proxy.js
CHANGED
|
@@ -24,6 +24,8 @@ import { loadLedger, saveLedger, addSessionCost, rollingDailyCost, prune, } from
|
|
|
24
24
|
import { evaluate } from "./budget.js";
|
|
25
25
|
import { dispatchAlert } from "./alert.js";
|
|
26
26
|
import { assertSafeEndpoint, warnIfUnexpectedHost } from "./net.js";
|
|
27
|
+
import { parseUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, } from "./limits.js";
|
|
28
|
+
import { assessSnapshot, worstLevel } from "./pacing.js";
|
|
27
29
|
const UPSTREAMS = {
|
|
28
30
|
anthropic: "https://api.anthropic.com",
|
|
29
31
|
openai: "https://api.openai.com",
|
|
@@ -134,6 +136,49 @@ function meter(cfg, ledger, sessionId, parsed, now) {
|
|
|
134
136
|
prune(ledger, now);
|
|
135
137
|
saveLedger(ledger);
|
|
136
138
|
}
|
|
139
|
+
/**
|
|
140
|
+
* Read Anthropic's `unified-*` rate-limit headers off a response, persist the
|
|
141
|
+
* snapshot, latch subscription mode on, and fire a deduped pacing alert when a
|
|
142
|
+
* window crosses into warn/danger. Returns true if subscription headers were
|
|
143
|
+
* seen. Alert-only by design — this never blocks (a subscription session already
|
|
144
|
+
* paid a flat fee; the scarce resource is quota, and Anthropic's own limit is
|
|
145
|
+
* the real wall).
|
|
146
|
+
*/
|
|
147
|
+
function captureLimits(cfg, headers, sessionId, now) {
|
|
148
|
+
const snap = parseUnifiedHeaders(headers, now);
|
|
149
|
+
if (!snap)
|
|
150
|
+
return false;
|
|
151
|
+
const state = loadLimitsState();
|
|
152
|
+
state.subscriptionDetected = true;
|
|
153
|
+
state.snapshot = snap;
|
|
154
|
+
const assessments = assessSnapshot(snap, cfg.limits, now);
|
|
155
|
+
const fresh = assessments.filter((a) => {
|
|
156
|
+
if (a.level === "ok")
|
|
157
|
+
return false;
|
|
158
|
+
const key = limitNotifyKey(a.window, a.level, a.resetAt);
|
|
159
|
+
if (state.notified[key])
|
|
160
|
+
return false;
|
|
161
|
+
state.notified[key] = true;
|
|
162
|
+
return true;
|
|
163
|
+
});
|
|
164
|
+
saveLimitsState(state);
|
|
165
|
+
if (fresh.length) {
|
|
166
|
+
const level = worstLevel(fresh);
|
|
167
|
+
dispatchAlert(cfg, {
|
|
168
|
+
ts: now,
|
|
169
|
+
source: "proxy",
|
|
170
|
+
kind: "limit",
|
|
171
|
+
sessionId,
|
|
172
|
+
level: level === "danger" ? "danger" : "warn",
|
|
173
|
+
sessionUSD: 0,
|
|
174
|
+
dailyUSD: 0,
|
|
175
|
+
reasons: fresh.map((a) => a.message),
|
|
176
|
+
action: level === "danger" ? "on pace to lock out before reset" : "approaching plan limit",
|
|
177
|
+
limits: fresh.map((a) => ({ window: a.window, utilization: a.utilization, resetAt: a.resetAt, level: a.level })),
|
|
178
|
+
}).catch(() => { });
|
|
179
|
+
}
|
|
180
|
+
return true;
|
|
181
|
+
}
|
|
137
182
|
export function startProxy(opts) {
|
|
138
183
|
const cfg = loadConfig();
|
|
139
184
|
const upstreamOrigin = assertSafeEndpoint(opts.upstream, "upstream").replace(/\/$/, "");
|
|
@@ -143,11 +188,15 @@ export function startProxy(opts) {
|
|
|
143
188
|
const sessionId = req.headers["x-agent-guard-session"] || `proxy:${todayKey(now)}`;
|
|
144
189
|
// 1) Pre-flight budget check — block before spending anything.
|
|
145
190
|
// Escape hatch: while a human has paused enforcement, never block (but still meter).
|
|
191
|
+
// Subscription mode is ALERT-ONLY: once we've seen Anthropic's unified
|
|
192
|
+
// rate-limit headers, the session is on a flat-fee plan where dollars are
|
|
193
|
+
// meaningless, so we never 402 it — we only pace + warn.
|
|
194
|
+
const subscriptionMode = loadLimitsState().subscriptionDetected;
|
|
146
195
|
const ledger = loadLedger();
|
|
147
196
|
const sessionUSD = ledger.sessions[sessionId]?.costUSD ?? 0;
|
|
148
197
|
const dailyUSD = rollingDailyCost(ledger, now);
|
|
149
198
|
const verdict = evaluate({ sessionUSD, dailyUSD }, cfg.budget);
|
|
150
|
-
if (verdict.level === "block" && !isPaused(now)) {
|
|
199
|
+
if (verdict.level === "block" && !isPaused(now) && !subscriptionMode) {
|
|
151
200
|
if (!blockedNotified[sessionId]) {
|
|
152
201
|
blockedNotified[sessionId] = true;
|
|
153
202
|
dispatchAlert(cfg, {
|
|
@@ -190,6 +239,15 @@ export function startProxy(opts) {
|
|
|
190
239
|
res.end(JSON.stringify({ error: "kill-switch proxy: upstream fetch failed", detail: String(err) }));
|
|
191
240
|
return;
|
|
192
241
|
}
|
|
242
|
+
// 2.5) Read Anthropic's subscription rate-limit headers (alert-only).
|
|
243
|
+
if (opts.flavor === "anthropic") {
|
|
244
|
+
try {
|
|
245
|
+
captureLimits(cfg, upstream.headers, sessionId, Date.now());
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
/* limit capture must never break the proxied response */
|
|
249
|
+
}
|
|
250
|
+
}
|
|
193
251
|
// 3) Relay status + headers.
|
|
194
252
|
const respHeaders = {};
|
|
195
253
|
upstream.headers.forEach((v, k) => {
|
|
@@ -248,6 +306,9 @@ export function startProxy(opts) {
|
|
|
248
306
|
server.listen(opts.port, "127.0.0.1", () => {
|
|
249
307
|
process.stdout.write(`🛡 agent-guard proxy on http://localhost:${opts.port} → ${upstreamOrigin} (${opts.flavor})\n` +
|
|
250
308
|
` Caps: session hard ${fmtUSD(cfg.budget.sessionHardUSD)}, daily hard ${fmtUSD(cfg.budget.dailyHardUSD)}\n` +
|
|
309
|
+
(opts.flavor === "anthropic"
|
|
310
|
+
? ` Subscription mode: reads Anthropic rate-limit headers → paces your Pro/Max plan (alert-only)\n`
|
|
311
|
+
: "") +
|
|
251
312
|
` Point your agent at it, e.g.:\n` +
|
|
252
313
|
(opts.flavor === "anthropic"
|
|
253
314
|
? ` ANTHROPIC_BASE_URL=http://localhost:${opts.port} claude\n`
|
package/dist/report.d.ts
CHANGED
|
@@ -1,9 +1,26 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared status report — the single computation behind `agent-guard status` and
|
|
3
3
|
* `ks guard status`, so both emit an identical JSON shape and never drift.
|
|
4
|
+
*
|
|
5
|
+
* Two halves:
|
|
6
|
+
* - the dollar budget (session + daily-rolling), always present; and
|
|
7
|
+
* - the subscription rate-limit standing (5-hour + weekly pacing), present
|
|
8
|
+
* once we've seen Anthropic's unified headers via the proxy, or estimated
|
|
9
|
+
* when the user has pinned a plan tier. Alert-only — never blocks.
|
|
4
10
|
*/
|
|
5
11
|
import { type SessionRecord } from "./ledger.js";
|
|
6
12
|
import { type Budget, type VerdictLevel } from "./budget.js";
|
|
13
|
+
import { type PacingAssessment, type PacingLevel } from "./pacing.js";
|
|
14
|
+
export interface LimitsReport {
|
|
15
|
+
/** Where the numbers came from. "none" = no data and no pinned plan to estimate from. */
|
|
16
|
+
source: "headers" | "estimated" | "none";
|
|
17
|
+
plan: string;
|
|
18
|
+
subscriptionDetected: boolean;
|
|
19
|
+
/** Epoch ms the snapshot was observed (headers) or computed (estimated). */
|
|
20
|
+
observedAt: number | null;
|
|
21
|
+
windows: PacingAssessment[];
|
|
22
|
+
level: PacingLevel;
|
|
23
|
+
}
|
|
7
24
|
export interface StatusReport {
|
|
8
25
|
budget: Budget;
|
|
9
26
|
dailyUSD: number;
|
|
@@ -15,6 +32,14 @@ export interface StatusReport {
|
|
|
15
32
|
sessions: Array<{
|
|
16
33
|
id: string;
|
|
17
34
|
} & SessionRecord>;
|
|
35
|
+
/** Subscription rate-limit pacing — present whenever we have data to show. */
|
|
36
|
+
limits: LimitsReport;
|
|
18
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Render the subscription rate-limit section as plain text lines (no color), so
|
|
40
|
+
* both the `agent-guard` and `ks guard` status views stay identical. Returns an
|
|
41
|
+
* empty array when there's nothing useful to show.
|
|
42
|
+
*/
|
|
43
|
+
export declare function formatLimitsLines(limits: LimitsReport, now?: number): string[];
|
|
19
44
|
/** Build the current status report from the on-disk config + ledger. */
|
|
20
45
|
export declare function buildStatusReport(now?: number): StatusReport;
|
package/dist/report.js
CHANGED
|
@@ -1,12 +1,101 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared status report — the single computation behind `agent-guard status` and
|
|
3
3
|
* `ks guard status`, so both emit an identical JSON shape and never drift.
|
|
4
|
+
*
|
|
5
|
+
* Two halves:
|
|
6
|
+
* - the dollar budget (session + daily-rolling), always present; and
|
|
7
|
+
* - the subscription rate-limit standing (5-hour + weekly pacing), present
|
|
8
|
+
* once we've seen Anthropic's unified headers via the proxy, or estimated
|
|
9
|
+
* when the user has pinned a plan tier. Alert-only — never blocks.
|
|
4
10
|
*/
|
|
5
11
|
import { loadConfig } from "./config.js";
|
|
6
12
|
import { isPaused, pauseExpiry } from "./config.js";
|
|
7
13
|
import { loadLedger, rollingDailyCost } from "./ledger.js";
|
|
8
14
|
import { evaluate } from "./budget.js";
|
|
15
|
+
import { loadLimitsState } from "./limits.js";
|
|
16
|
+
import { assessSnapshot, worstLevel } from "./pacing.js";
|
|
17
|
+
import { estimateSnapshot } from "./estimate.js";
|
|
9
18
|
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
19
|
+
function buildLimitsReport(cfg, ledger, now) {
|
|
20
|
+
const state = loadLimitsState();
|
|
21
|
+
const thresholds = cfg.limits;
|
|
22
|
+
const plan = cfg.limits.plan;
|
|
23
|
+
// Prefer real header data when we have it.
|
|
24
|
+
if (state.snapshot) {
|
|
25
|
+
const windows = assessSnapshot(state.snapshot, thresholds, now);
|
|
26
|
+
return {
|
|
27
|
+
source: "headers",
|
|
28
|
+
plan,
|
|
29
|
+
subscriptionDetected: state.subscriptionDetected,
|
|
30
|
+
observedAt: state.snapshot.observedAt,
|
|
31
|
+
windows,
|
|
32
|
+
level: worstLevel(windows),
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
// Otherwise estimate, but only when the user pinned a tier (opt-in, fuzzy).
|
|
36
|
+
if (plan === "pro" || plan === "max5" || plan === "max20") {
|
|
37
|
+
const snap = estimateSnapshot(ledger, plan, now);
|
|
38
|
+
const windows = assessSnapshot(snap, thresholds, now);
|
|
39
|
+
return {
|
|
40
|
+
source: "estimated",
|
|
41
|
+
plan,
|
|
42
|
+
subscriptionDetected: state.subscriptionDetected,
|
|
43
|
+
observedAt: snap.observedAt,
|
|
44
|
+
windows,
|
|
45
|
+
level: worstLevel(windows),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
source: "none",
|
|
50
|
+
plan,
|
|
51
|
+
subscriptionDetected: state.subscriptionDetected,
|
|
52
|
+
observedAt: null,
|
|
53
|
+
windows: [],
|
|
54
|
+
level: "ok",
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
function bar(frac) {
|
|
58
|
+
const pct = Math.max(0, Math.min(100, Math.round(frac * 100)));
|
|
59
|
+
const filled = Math.round(pct / 5);
|
|
60
|
+
return `[${"█".repeat(filled)}${"░".repeat(20 - filled)}]`;
|
|
61
|
+
}
|
|
62
|
+
function ageString(observedAt, now) {
|
|
63
|
+
const ms = now - observedAt;
|
|
64
|
+
if (ms < 60_000)
|
|
65
|
+
return "just now";
|
|
66
|
+
if (ms < 3_600_000)
|
|
67
|
+
return `${Math.round(ms / 60_000)}m ago`;
|
|
68
|
+
if (ms < 86_400_000)
|
|
69
|
+
return `${Math.round(ms / 3_600_000)}h ago`;
|
|
70
|
+
return `${Math.round(ms / 86_400_000)}d ago`;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Render the subscription rate-limit section as plain text lines (no color), so
|
|
74
|
+
* both the `agent-guard` and `ks guard` status views stay identical. Returns an
|
|
75
|
+
* empty array when there's nothing useful to show.
|
|
76
|
+
*/
|
|
77
|
+
export function formatLimitsLines(limits, now = Date.now()) {
|
|
78
|
+
if (limits.source === "none") {
|
|
79
|
+
// Only nudge if they haven't opted into either path.
|
|
80
|
+
if (!limits.subscriptionDetected) {
|
|
81
|
+
return [
|
|
82
|
+
"Claude Code plan limits: unknown.",
|
|
83
|
+
" Run `ks guard proxy` and point Claude Code at it for exact 5-hour + weekly usage,",
|
|
84
|
+
" or set your tier (`ks guard config --plan max5`) for an estimate.",
|
|
85
|
+
];
|
|
86
|
+
}
|
|
87
|
+
return [];
|
|
88
|
+
}
|
|
89
|
+
const icon = limits.level === "danger" ? "🟥" : limits.level === "warn" ? "🟡" : "🟢";
|
|
90
|
+
const tag = limits.source === "estimated" ? " (estimated — run the proxy for exact)" : "";
|
|
91
|
+
const lines = [`${icon} Claude Code plan limits${tag} · observed ${limits.observedAt ? ageString(limits.observedAt, now) : "—"}`];
|
|
92
|
+
for (const w of limits.windows) {
|
|
93
|
+
// w.message already leads with "<window> limit NN% used, …", so the bar
|
|
94
|
+
// carries the visual and the message carries the numbers + pacing.
|
|
95
|
+
lines.push(` ${bar(w.utilization)} ${w.message}`);
|
|
96
|
+
}
|
|
97
|
+
return lines;
|
|
98
|
+
}
|
|
10
99
|
/** Build the current status report from the on-disk config + ledger. */
|
|
11
100
|
export function buildStatusReport(now = Date.now()) {
|
|
12
101
|
const cfg = loadConfig();
|
|
@@ -26,5 +115,6 @@ export function buildStatusReport(now = Date.now()) {
|
|
|
26
115
|
paused: isPaused(now),
|
|
27
116
|
pauseUntil: pauseExpiry(),
|
|
28
117
|
sessions,
|
|
118
|
+
limits: buildLimitsReport(cfg, ledger, now),
|
|
29
119
|
};
|
|
30
120
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kill-switch/agent-guard",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Kill Switch for coding agents — stop runaway Claude Code / Cursor / Aider sessions from racking up an LLM bill. Native hook + token-metering proxy with per-session and daily-rolling budgets.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
"scripts": {
|
|
13
13
|
"build": "tsc",
|
|
14
14
|
"dev": "tsx src/cli.ts",
|
|
15
|
-
"test": "vitest run"
|
|
15
|
+
"test": "vitest run",
|
|
16
|
+
"e2e": "tsc && node scripts/e2e-subscription.mjs"
|
|
16
17
|
},
|
|
17
18
|
"dependencies": {
|
|
18
19
|
"commander": "^12.1.0"
|