@kill-switch/agent-guard 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -107,6 +107,59 @@ AGENT_GUARD_SESSION_HARD=10 claude # one-off $10 ceiling
107
107
 
108
108
  A cap of `0` disables that check.
109
109
 
110
+ ## Subscription limits (Claude Code Pro / Max)
111
+
112
+ Dollar caps are the wrong currency for a **Pro/Max subscription**: you pay a flat fee, so the
113
+ scarce resource isn't dollars — it's your plan's rate-limit quota, in two rolling windows:
114
+
115
+ - a **5-hour** window (burst protection), and
116
+ - a **weekly** (7-day) window — the real lockout risk, "resets a couple times a month".
117
+
118
+ Anthropic reports exactly where you stand on every response via `anthropic-ratelimit-unified-*`
119
+ headers. Run Claude Code **through the proxy** and agent-guard reads them — no estimation:
120
+
121
+ ```sh
122
+ agent-guard proxy # meters Anthropic + reads limit headers
123
+ ANTHROPIC_BASE_URL=http://localhost:8787 claude
124
+ ```
125
+
126
+ Once those headers are seen, the session is in **subscription mode**: alert-only. agent-guard
127
+ **never blocks** a flat-fee plan (you already paid; Anthropic's own limit is the real wall) —
128
+ instead it *paces* you. For each window it computes burn-rate vs. a sustainable pace and
129
+ projects whether you'll exhaust the window **before it resets**, then warns in-session and via
130
+ your alert channels:
131
+
132
+ ```
133
+ 🟥 Claude Code plan limits · observed just now
134
+ [████████████░░░░░░░░] weekly limit 62% used, resets Sat 6:00 PM, burning 3.1× pace,
135
+ → lockout in ~14h (5.1d before reset)
136
+ ```
137
+
138
+ `status` shows it; the hook injects it into the session even when only the hook is running
139
+ (it reads the snapshot the proxy persisted). No proxy and want a rough read? Pin your tier and
140
+ agent-guard *estimates* from the ledger (clearly labelled, never blocks):
141
+
142
+ ```sh
143
+ ks guard config --plan max5 # auto | pro | max5 | max20
144
+ ```
145
+
146
+ Tune the thresholds (0–1 utilization) if the defaults are too eager:
147
+
148
+ | Setting | Meaning | Default |
149
+ |---|---|---|
150
+ | `--plan` (`AGENT_GUARD_PLAN`) | `auto` (headers only) or a tier for estimation | `auto` |
151
+ | `--weekly-soft` / `--weekly-danger` | weekly warn / danger utilization | 0.6 / 0.85 |
152
+ | `--5h-soft` / `--5h-danger` | 5-hour warn / danger utilization | 0.7 / 0.9 |
153
+ | `--burn-ratio` | pace multiplier that triggers a warning | 1.5 |
154
+
155
+ The first time the proxy sees the `unified-*` headers it writes the raw values once to
156
+ `~/.kill-switch/agent-guard/events.jsonl` (`kind: "unified-headers-observed"`) — so you can
157
+ confirm Anthropic's exact value formats with a single `cat`.
158
+
159
+ > Because subscription mode is alert-only, the "don't run both hook *and* proxy" caveat below
160
+ > doesn't bite here — running Claude Code through the proxy is exactly what feeds the limit
161
+ > headers, and dollars no longer gate anything.
162
+
110
163
  ## Alerts
111
164
 
112
165
  On the first soft/hard trip per scope, agent-guard:
@@ -132,8 +185,9 @@ rates so the guard never *under*-counts. Override any model in
132
185
  ```
133
186
  agent-guard install [--global] [--command <cmd>] wire the Claude Code hook
134
187
  agent-guard proxy [--port 8787] [--flavor anthropic|openai] [--upstream URL]
135
- agent-guard status [--json] spend vs budget
188
+ agent-guard status [--json] spend vs budget + plan limits
136
189
  agent-guard config [--session-hard N ...] view/set caps
190
+ agent-guard config [--plan max5 --weekly-soft 0.6 ...] view/set plan limits
137
191
  agent-guard reset [--all|--today|--session <id>] clear the ledger
138
192
  agent-guard hook (internal) Claude Code entrypoint
139
193
  ```
package/dist/alert.d.ts CHANGED
@@ -11,17 +11,27 @@
11
11
  * delay (or crash) the agent's tool call.
12
12
  */
13
13
  import { type GuardConfig } from "./config.js";
14
- import type { Verdict } from "./budget.js";
14
+ /** Spend verdicts are ok/warn/block; pacing assessments are ok/warn/danger. */
15
+ export type AlertLevel = "ok" | "warn" | "block" | "danger";
15
16
  export interface AlertEvent {
16
17
  ts: number;
17
18
  source: "hook" | "proxy";
19
+ /** "spend" = dollar budget trip (default); "limit" = subscription pacing alert. */
20
+ kind?: "spend" | "limit";
18
21
  sessionId: string;
19
- level: Verdict["level"];
22
+ level: AlertLevel;
20
23
  sessionUSD: number;
21
24
  dailyUSD: number;
22
25
  reasons: string[];
23
26
  action: string;
24
27
  cwd?: string;
28
+ /** For kind:"limit" — per-window utilization summary (0–1) for the payload. */
29
+ limits?: Array<{
30
+ window: string;
31
+ utilization: number;
32
+ resetAt: number | null;
33
+ level: string;
34
+ }>;
25
35
  }
26
36
  /** Dispatch an alert across all configured channels. Resolves once all attempts settle. */
27
37
  export declare function dispatchAlert(cfg: GuardConfig, evt: AlertEvent): Promise<void>;
package/dist/alert.js CHANGED
@@ -43,6 +43,17 @@ function writeLocal(evt) {
43
43
  }
44
44
  }
45
45
  function slackText(evt) {
46
+ if (evt.kind === "limit") {
47
+ const icon = evt.level === "danger" ? "🟥" : "🟡";
48
+ return [
49
+ `${icon} *Kill Switch — Claude Code subscription pacing*`,
50
+ `• Status: ${evt.action}`,
51
+ evt.cwd ? `• Project: \`${evt.cwd}\`` : "",
52
+ ...evt.reasons.map((r) => `• ${r}`),
53
+ ]
54
+ .filter(Boolean)
55
+ .join("\n");
56
+ }
46
57
  const icon = evt.level === "block" ? "🛑" : "⚠️";
47
58
  const verb = evt.level === "block" ? "BLOCKED a coding agent" : "warning on a coding agent";
48
59
  return [
package/dist/cli.js CHANGED
@@ -17,7 +17,8 @@ import { loadConfig, configPath, isPaused, pauseExpiry, writePause, clearPause,
17
17
  import { loadLedger, rollingDailyCost } from "./ledger.js";
18
18
  import { evaluate } from "./budget.js";
19
19
  import { fmtUSD } from "./cost.js";
20
- import { installHook, setBudget, resetLedger } from "./ops.js";
20
+ import { installHook, setBudget, setLimits, resetLedger } from "./ops.js";
21
+ import { buildStatusReport, formatLimitsLines } from "./report.js";
21
22
  const program = new Command();
22
23
  program
23
24
  .name("agent-guard")
@@ -77,6 +78,7 @@ program
77
78
  verdict: verdict.level,
78
79
  reasons: verdict.reasons,
79
80
  sessions: sessions.map(([id, s]) => ({ id, ...s })),
81
+ limits: buildStatusReport(now).limits,
80
82
  }, null, 2));
81
83
  return;
82
84
  }
@@ -109,6 +111,13 @@ program
109
111
  for (const r of verdict.reasons)
110
112
  console.log(` • ${r}`);
111
113
  }
114
+ // Subscription rate-limit pacing (Claude Code Pro/Max).
115
+ const limitLines = formatLimitsLines(buildStatusReport(now).limits, now);
116
+ if (limitLines.length) {
117
+ console.log("");
118
+ for (const line of limitLines)
119
+ console.log(line);
120
+ }
112
121
  });
113
122
  // ── pause / resume (escape hatch) ────────────────────────────────────────────
114
123
  program
@@ -153,31 +162,53 @@ program
153
162
  // ── config ───────────────────────────────────────────────────────────────────
154
163
  program
155
164
  .command("config")
156
- .description("View or set budget caps (written to ~/.kill-switch/agent-guard/config.json)")
165
+ .description("View or set budget caps + Claude Code plan limits (written to ~/.kill-switch/agent-guard/config.json)")
157
166
  .option("--session-soft <usd>", "Per-session soft cap (warn)")
158
167
  .option("--session-hard <usd>", "Per-session hard cap (block)")
159
168
  .option("--daily-soft <usd>", "Daily rolling soft cap (warn)")
160
169
  .option("--daily-hard <usd>", "Daily rolling hard cap (block)")
161
170
  .option("--slack-webhook <url>", "Slack incoming-webhook for breach alerts")
171
+ .option("--plan <tier>", "Claude Code plan: auto | pro | max5 | max20 (subscription limit awareness)")
172
+ .option("--weekly-soft <pct>", "Weekly limit soft threshold, 0–1 (warn)")
173
+ .option("--weekly-danger <pct>", "Weekly limit danger threshold, 0–1")
174
+ .option("--5h-soft <pct>", "5-hour limit soft threshold, 0–1 (warn)")
175
+ .option("--5h-danger <pct>", "5-hour limit danger threshold, 0–1")
176
+ .option("--burn-ratio <n>", "Burn-rate multiplier that triggers a pacing warning")
162
177
  .action((opts) => {
163
- const anySet = ["sessionSoft", "sessionHard", "dailySoft", "dailyHard", "slackWebhook"]
164
- .some((k) => opts[k] !== undefined);
165
- if (!anySet) {
178
+ const budgetKeys = ["sessionSoft", "sessionHard", "dailySoft", "dailyHard", "slackWebhook"];
179
+ const limitKeys = ["plan", "weeklySoft", "weeklyDanger", "5hSoft", "5hDanger", "burnRatio"];
180
+ const anyBudget = budgetKeys.some((k) => opts[k] !== undefined);
181
+ const anyLimit = limitKeys.some((k) => opts[k] !== undefined);
182
+ if (!anyBudget && !anyLimit) {
166
183
  const cfg = loadConfig();
167
- console.log(JSON.stringify({ budget: cfg.budget, slackWebhook: cfg.slackWebhook ? "(set)" : undefined }, null, 2));
184
+ console.log(JSON.stringify({ budget: cfg.budget, limits: cfg.limits, slackWebhook: cfg.slackWebhook ? "(set)" : undefined }, null, 2));
168
185
  console.log(`\nConfig file: ${configPath()}`);
169
186
  return;
170
187
  }
171
188
  const num = (v) => (v !== undefined ? Number(v) : undefined);
172
- const budget = setBudget({
173
- sessionSoftUSD: num(opts.sessionSoft),
174
- sessionHardUSD: num(opts.sessionHard),
175
- dailySoftUSD: num(opts.dailySoft),
176
- dailyHardUSD: num(opts.dailyHard),
177
- slackWebhook: opts.slackWebhook,
178
- });
179
- console.log(`✅ Saved → ${configPath()}`);
180
- console.log(JSON.stringify(budget, null, 2));
189
+ if (anyBudget) {
190
+ const budget = setBudget({
191
+ sessionSoftUSD: num(opts.sessionSoft),
192
+ sessionHardUSD: num(opts.sessionHard),
193
+ dailySoftUSD: num(opts.dailySoft),
194
+ dailyHardUSD: num(opts.dailyHard),
195
+ slackWebhook: opts.slackWebhook,
196
+ });
197
+ console.log(`✅ Budget saved → ${configPath()}`);
198
+ console.log(JSON.stringify(budget, null, 2));
199
+ }
200
+ if (anyLimit) {
201
+ const limits = setLimits({
202
+ plan: opts.plan,
203
+ weeklySoftPct: num(opts.weeklySoft),
204
+ weeklyDangerPct: num(opts.weeklyDanger),
205
+ fiveHourSoftPct: num(opts["5hSoft"]),
206
+ fiveHourDangerPct: num(opts["5hDanger"]),
207
+ burnRatioWarn: num(opts.burnRatio),
208
+ });
209
+ console.log(`✅ Plan limits saved → ${configPath()}`);
210
+ console.log(JSON.stringify(limits, null, 2));
211
+ }
181
212
  });
182
213
  // ── reset ────────────────────────────────────────────────────────────────────
183
214
  program
package/dist/config.d.ts CHANGED
@@ -8,8 +8,30 @@
8
8
  */
9
9
  import type { Budget } from "./budget.js";
10
10
  import type { ModelPricing } from "./pricing.js";
11
+ /**
12
+ * Subscription rate-limit config. Separate from the dollar {@link Budget}
13
+ * because a Claude Code Pro/Max session pays a flat fee — the scarce resource is
14
+ * the plan's 5-hour and weekly quota, not dollars. This is alert-only: the guard
15
+ * never blocks on these (you already paid), it just warns before you lock out.
16
+ */
17
+ export interface LimitsConfig {
18
+ /**
19
+ * Plan tier. "auto" = derive everything from observed `unified-*` headers
20
+ * (proxy path); a pinned tier additionally enables hook-only estimation when
21
+ * no fresh header snapshot exists. See estimate.ts.
22
+ */
23
+ plan: "auto" | "pro" | "max5" | "max20";
24
+ /** Per-window soft (warn) / danger thresholds, as 0–1 utilization fractions. */
25
+ fiveHourSoftPct: number;
26
+ fiveHourDangerPct: number;
27
+ weeklySoftPct: number;
28
+ weeklyDangerPct: number;
29
+ /** Burn ratio (actual/expected pace) above which we escalate on pacing alone. */
30
+ burnRatioWarn: number;
31
+ }
11
32
  export interface GuardConfig {
12
33
  budget: Budget;
34
+ limits: LimitsConfig;
13
35
  /** Optional pricing overrides merged onto the built-in table. */
14
36
  pricingOverrides?: Record<string, ModelPricing>;
15
37
  /** Kill Switch API key (ks_live_…) for reporting kill events to Guardian. */
@@ -20,6 +42,7 @@ export interface GuardConfig {
20
42
  slackWebhook?: string;
21
43
  }
22
44
  export declare const DEFAULT_BUDGET: Budget;
45
+ export declare const DEFAULT_LIMITS: LimitsConfig;
23
46
  /** ~/.kill-switch/agent-guard — created on demand. */
24
47
  export declare function guardDir(): string;
25
48
  export declare function ensureGuardDir(): string;
@@ -27,6 +50,8 @@ export declare const ledgerPath: () => string;
27
50
  export declare const configPath: () => string;
28
51
  export declare const pricingPath: () => string;
29
52
  export declare const eventsPath: () => string;
53
+ /** Subscription rate-limit state (latest unified-header snapshot + dedup). */
54
+ export declare const limitsPath: () => string;
30
55
  /**
31
56
  * Escape hatch. The hook/proxy fail OPEN while this sentinel exists, so a human
32
57
  * can always disable enforcement from outside the agent loop — even with zero
package/dist/config.js CHANGED
@@ -15,6 +15,14 @@ export const DEFAULT_BUDGET = {
15
15
  dailySoftUSD: 25,
16
16
  dailyHardUSD: 100,
17
17
  };
18
+ export const DEFAULT_LIMITS = {
19
+ plan: "auto",
20
+ fiveHourSoftPct: 0.7,
21
+ fiveHourDangerPct: 0.9,
22
+ weeklySoftPct: 0.6,
23
+ weeklyDangerPct: 0.85,
24
+ burnRatioWarn: 1.5,
25
+ };
18
26
  /** ~/.kill-switch/agent-guard — created on demand. */
19
27
  export function guardDir() {
20
28
  return join(homedir(), ".kill-switch", "agent-guard");
@@ -28,6 +36,8 @@ export const ledgerPath = () => join(guardDir(), "ledger.json");
28
36
  export const configPath = () => join(guardDir(), "config.json");
29
37
  export const pricingPath = () => join(guardDir(), "pricing.json");
30
38
  export const eventsPath = () => join(guardDir(), "events.jsonl");
39
+ /** Subscription rate-limit state (latest unified-header snapshot + dedup). */
40
+ export const limitsPath = () => join(guardDir(), "limits.json");
31
41
  /**
32
42
  * Escape hatch. The hook/proxy fail OPEN while this sentinel exists, so a human
33
43
  * can always disable enforcement from outside the agent loop — even with zero
@@ -95,8 +105,20 @@ export function loadConfig() {
95
105
  dailySoftUSD: num(process.env.AGENT_GUARD_DAILY_SOFT, fileBudget.dailySoftUSD ?? DEFAULT_BUDGET.dailySoftUSD),
96
106
  dailyHardUSD: num(process.env.AGENT_GUARD_DAILY_HARD, fileBudget.dailyHardUSD ?? DEFAULT_BUDGET.dailyHardUSD),
97
107
  };
108
+ const fileLimits = fileCfg.limits ?? {};
109
+ const envPlan = process.env.AGENT_GUARD_PLAN;
110
+ const validPlan = (p) => p === "auto" || p === "pro" || p === "max5" || p === "max20";
111
+ const limits = {
112
+ plan: validPlan(envPlan) ? envPlan : validPlan(fileLimits.plan) ? fileLimits.plan : DEFAULT_LIMITS.plan,
113
+ fiveHourSoftPct: fileLimits.fiveHourSoftPct ?? DEFAULT_LIMITS.fiveHourSoftPct,
114
+ fiveHourDangerPct: fileLimits.fiveHourDangerPct ?? DEFAULT_LIMITS.fiveHourDangerPct,
115
+ weeklySoftPct: fileLimits.weeklySoftPct ?? DEFAULT_LIMITS.weeklySoftPct,
116
+ weeklyDangerPct: fileLimits.weeklyDangerPct ?? DEFAULT_LIMITS.weeklyDangerPct,
117
+ burnRatioWarn: fileLimits.burnRatioWarn ?? DEFAULT_LIMITS.burnRatioWarn,
118
+ };
98
119
  return {
99
120
  budget,
121
+ limits,
100
122
  pricingOverrides: { ...(fileCfg.pricingOverrides ?? {}), ...(filePricing ?? {}) },
101
123
  apiKey: process.env.KILL_SWITCH_API_KEY ?? fileCfg.apiKey,
102
124
  apiUrl: process.env.KILL_SWITCH_API_URL ?? fileCfg.apiUrl ?? "https://api.kill-switch.net",
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Hook-only fallback estimate of subscription utilization.
3
+ *
4
+ * Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
5
+ * response headers, which only the proxy sees. A user running just the Claude
6
+ * Code hook (the common, zero-config setup) never sees those headers — so when
7
+ * they've told us their plan tier, we *estimate* where they stand by summing the
8
+ * tokens the ledger recorded inside each rolling window and dividing by a
9
+ * per-tier token budget.
10
+ *
11
+ * This is deliberately approximate and always labelled as such:
12
+ * - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
13
+ * token budgets below are calibrated rough equivalents, not contractual.
14
+ * - The ledger stores a session's cumulative tokens against a single
15
+ * `lastAt`, not a time series, so a long session is counted wholesale into
16
+ * whichever window its last activity falls in.
17
+ *
18
+ * It exists to give hook-only users *a* signal and to nudge them toward
19
+ * `ks guard proxy` for exact numbers — never to block (subscription mode is
20
+ * alert-only). When in doubt it under-claims utilization so it won't cry wolf.
21
+ */
22
+ import { type LimitSnapshot } from "./limits.js";
23
+ import type { Ledger } from "./ledger.js";
24
+ export type PlanTier = "pro" | "max5" | "max20";
25
+ /**
26
+ * Rough per-tier token-equivalent budgets per window. Pro is the published
27
+ * baseline; Max 5x / 20x scale the 5-hour burst ~linearly with the multiplier,
28
+ * while the weekly cap scales more conservatively (Anthropic's weekly multiplier
29
+ * is smaller than the per-session one). Tune via config if your mileage differs.
30
+ */
31
+ export interface TierBudget {
32
+ fiveHourTokens: number;
33
+ weeklyTokens: number;
34
+ }
35
+ export declare const TIER_BUDGETS: Record<PlanTier, TierBudget>;
36
+ /**
37
+ * Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
38
+ * Reset times are derived from the rolling window assumption (oldest in-window
39
+ * activity + window length is unknowable here, so we report the window end from
40
+ * `now` as a conservative upper bound on time remaining).
41
+ */
42
+ export declare function estimateSnapshot(ledger: Ledger, tier: PlanTier, now: number, budgets?: Record<PlanTier, TierBudget>): LimitSnapshot;
43
+ /** True when a snapshot came from {@link estimateSnapshot} rather than real headers. */
44
+ export declare function isEstimated(snap: LimitSnapshot | null): boolean;
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Hook-only fallback estimate of subscription utilization.
3
+ *
4
+ * Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
5
+ * response headers, which only the proxy sees. A user running just the Claude
6
+ * Code hook (the common, zero-config setup) never sees those headers — so when
7
+ * they've told us their plan tier, we *estimate* where they stand by summing the
8
+ * tokens the ledger recorded inside each rolling window and dividing by a
9
+ * per-tier token budget.
10
+ *
11
+ * This is deliberately approximate and always labelled as such:
12
+ * - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
13
+ * token budgets below are calibrated rough equivalents, not contractual.
14
+ * - The ledger stores a session's cumulative tokens against a single
15
+ * `lastAt`, not a time series, so a long session is counted wholesale into
16
+ * whichever window its last activity falls in.
17
+ *
18
+ * It exists to give hook-only users *a* signal and to nudge them toward
19
+ * `ks guard proxy` for exact numbers — never to block (subscription mode is
20
+ * alert-only). When in doubt it under-claims utilization so it won't cry wolf.
21
+ */
22
+ import { WINDOW_MS } from "./limits.js";
23
+ export const TIER_BUDGETS = {
24
+ // Calibrated rough equivalents — Pro ≈ 45 prompts / 5h, modest weekly cap.
25
+ pro: { fiveHourTokens: 8_000_000, weeklyTokens: 120_000_000 },
26
+ max5: { fiveHourTokens: 40_000_000, weeklyTokens: 480_000_000 },
27
+ max20: { fiveHourTokens: 160_000_000, weeklyTokens: 1_400_000_000 },
28
+ };
29
+ const FIVE_HOUR_MS = WINDOW_MS["5h"];
30
+ const WEEK_MS = WINDOW_MS.weekly;
31
+ /** Sum tokens (input+output) across sessions whose last activity is within `windowMs`. */
32
+ function tokensInWindow(ledger, now, windowMs) {
33
+ let total = 0;
34
+ for (const s of Object.values(ledger.sessions)) {
35
+ if (now - s.lastAt < windowMs)
36
+ total += (s.inputTokens || 0) + (s.outputTokens || 0);
37
+ }
38
+ return total;
39
+ }
40
+ /**
41
+ * Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
42
+ * Reset times are derived from the rolling window assumption (oldest in-window
43
+ * activity + window length is unknowable here, so we report the window end from
44
+ * `now` as a conservative upper bound on time remaining).
45
+ */
46
+ export function estimateSnapshot(ledger, tier, now, budgets = TIER_BUDGETS) {
47
+ const b = budgets[tier];
48
+ const fiveTokens = tokensInWindow(ledger, now, FIVE_HOUR_MS);
49
+ const weekTokens = tokensInWindow(ledger, now, WEEK_MS);
50
+ const clamp = (n) => Math.max(0, Math.min(1, n));
51
+ return {
52
+ fiveHour: {
53
+ utilization: clamp(fiveTokens / b.fiveHourTokens),
54
+ // Without a per-event time series we can't know the true rolling reset;
55
+ // report a full window from now as a conservative (latest-possible) reset.
56
+ resetAt: now + FIVE_HOUR_MS,
57
+ status: "estimated",
58
+ },
59
+ weekly: {
60
+ utilization: clamp(weekTokens / b.weeklyTokens),
61
+ resetAt: now + WEEK_MS,
62
+ status: "estimated",
63
+ },
64
+ status: "estimated",
65
+ observedAt: now,
66
+ };
67
+ }
68
+ /** True when a snapshot came from {@link estimateSnapshot} rather than real headers. */
69
+ export function isEstimated(snap) {
70
+ return !!snap && snap.status === "estimated";
71
+ }
package/dist/hook.js CHANGED
@@ -24,6 +24,7 @@ import { parseTranscript } from "./transcript.js";
24
24
  import { loadLedger, saveLedger, setSessionCost, rollingDailyCost, prune, } from "./ledger.js";
25
25
  import { evaluate, warnKey } from "./budget.js";
26
26
  import { dispatchAlert } from "./alert.js";
27
+ import { buildStatusReport } from "./report.js";
27
28
  function readStdin() {
28
29
  return new Promise((resolve) => {
29
30
  let data = "";
@@ -146,20 +147,53 @@ export async function runHook() {
146
147
  emit(blockDecision(event, reason, `🛑 Kill Switch stopped this agent — ${verdict.reasons[0] ?? "budget exceeded"}.`));
147
148
  process.exit(0);
148
149
  }
150
+ // Subscription rate-limit pacing — alert-only, surfaced in-session. Reads the
151
+ // snapshot the proxy persisted from Anthropic's headers (or a tier estimate),
152
+ // so even a hook-only session learns when it's about to lock out. Deduped per
153
+ // window+level so it doesn't repeat every tool call.
154
+ const limitMsg = limitNudge(rec, ledger, now);
149
155
  // Surface the warn nudge only on the first trip per scope (shouldAlert), not
150
156
  // on every subsequent tool call — otherwise the agent's context fills with
151
157
  // duplicate notices. After that, warnings stay silent until the hard cap.
152
158
  if (verdict.level === "warn" && shouldAlert) {
153
- const ctx = renderWarnContext(verdict);
159
+ const ctx = limitMsg ? `${renderWarnContext(verdict)} ${limitMsg}` : renderWarnContext(verdict);
154
160
  emit(warnDecision(event, ctx, `⚠️ Kill Switch: ${verdict.reasons[0] ?? "approaching budget"}.`));
155
161
  process.exit(0);
156
162
  }
163
+ if (limitMsg) {
164
+ emit(warnDecision(event, `Kill Switch — Claude Code plan pacing (informational, you may continue): ${limitMsg}`, `⚠️ Kill Switch: ${limitMsg}`));
165
+ process.exit(0);
166
+ }
157
167
  process.exit(0);
158
168
  }
159
169
  catch {
160
170
  process.exit(0); // fail open on any unexpected error
161
171
  }
162
172
  }
173
+ /**
174
+ * Most-urgent subscription-window nudge, fired once per window+level. Mutates the
175
+ * session's notified map (and persists it) so the same warning doesn't repeat on
176
+ * every tool call. Returns null when there's nothing to surface.
177
+ */
178
+ function limitNudge(rec, ledger, now) {
179
+ try {
180
+ const limits = buildStatusReport(now).limits;
181
+ if (!limits.windows.length)
182
+ return null;
183
+ const urgent = limits.windows.find((w) => w.level === "danger") ?? limits.windows.find((w) => w.level === "warn");
184
+ if (!urgent)
185
+ return null;
186
+ const key = `limit:${urgent.window}:${urgent.level}`;
187
+ if (rec.notified[key])
188
+ return null;
189
+ rec.notified[key] = true;
190
+ saveLedger(ledger);
191
+ return urgent.message;
192
+ }
193
+ catch {
194
+ return null;
195
+ }
196
+ }
163
197
  /** Absolute path to this CLI, so recovery commands work without PATH / npm-link. */
164
198
  function selfCmd() {
165
199
  try {
package/dist/index.d.ts CHANGED
@@ -12,9 +12,12 @@ export { costForUsage, totalTokens, fmtUSD, type TokenUsage } from "./cost.js";
12
12
  export { evaluate, warnKey, type Budget, type Verdict, type Spend, type VerdictLevel } from "./budget.js";
13
13
  export { loadLedger, saveLedger, setSessionCost, addSessionCost, rollingDailyCost, prune, emptyLedger, type Ledger, type SessionRecord, } from "./ledger.js";
14
14
  export { parseTranscript, type TranscriptTotals } from "./transcript.js";
15
- export { loadConfig, DEFAULT_BUDGET, guardDir, ensureGuardDir, configPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, type GuardConfig, } from "./config.js";
16
- export { dispatchAlert, type AlertEvent } from "./alert.js";
15
+ export { loadConfig, DEFAULT_BUDGET, DEFAULT_LIMITS, guardDir, ensureGuardDir, configPath, limitsPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, type GuardConfig, type LimitsConfig, } from "./config.js";
16
+ export { dispatchAlert, type AlertEvent, type AlertLevel } from "./alert.js";
17
17
  export { startProxy, resolveUpstream, type ProxyOptions } from "./proxy.js";
18
18
  export { runHook } from "./hook.js";
19
- export { buildStatusReport, type StatusReport } from "./report.js";
20
- export { installHook, setBudget, resetLedger, type InstallOptions, type InstallResult, type BudgetPatch, } from "./ops.js";
19
+ export { buildStatusReport, formatLimitsLines, type StatusReport, type LimitsReport } from "./report.js";
20
+ export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, unifiedHeaderDump, logUnifiedHeaders, WINDOW_MS, type LimitSnapshot, type WindowState, type LimitsState, type LimitWindow, type HeaderGetter, } from "./limits.js";
21
+ export { assessWindow, assessSnapshot, worstLevel, type PacingAssessment, type PacingLevel, type PacingThresholds, } from "./pacing.js";
22
+ export { estimateSnapshot, isEstimated, TIER_BUDGETS, type PlanTier, type TierBudget, } from "./estimate.js";
23
+ export { installHook, setBudget, setLimits, resetLedger, type InstallOptions, type InstallResult, type BudgetPatch, type LimitsPatch, } from "./ops.js";
package/dist/index.js CHANGED
@@ -12,9 +12,12 @@ export { costForUsage, totalTokens, fmtUSD } from "./cost.js";
12
12
  export { evaluate, warnKey } from "./budget.js";
13
13
  export { loadLedger, saveLedger, setSessionCost, addSessionCost, rollingDailyCost, prune, emptyLedger, } from "./ledger.js";
14
14
  export { parseTranscript } from "./transcript.js";
15
- export { loadConfig, DEFAULT_BUDGET, guardDir, ensureGuardDir, configPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, } from "./config.js";
15
+ export { loadConfig, DEFAULT_BUDGET, DEFAULT_LIMITS, guardDir, ensureGuardDir, configPath, limitsPath, pausePath, isPaused, pauseExpiry, writePause, clearPause, } from "./config.js";
16
16
  export { dispatchAlert } from "./alert.js";
17
17
  export { startProxy, resolveUpstream } from "./proxy.js";
18
18
  export { runHook } from "./hook.js";
19
- export { buildStatusReport } from "./report.js";
20
- export { installHook, setBudget, resetLedger, } from "./ops.js";
19
+ export { buildStatusReport, formatLimitsLines } from "./report.js";
20
+ export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, unifiedHeaderDump, logUnifiedHeaders, WINDOW_MS, } from "./limits.js";
21
+ export { assessWindow, assessSnapshot, worstLevel, } from "./pacing.js";
22
+ export { estimateSnapshot, isEstimated, TIER_BUDGETS, } from "./estimate.js";
23
+ export { installHook, setBudget, setLimits, resetLedger, } from "./ops.js";
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Subscription rate-limit awareness — the "how much of my Claude Code plan have
3
+ * I burned" half of the guard, complementary to the dollar ledger.
4
+ *
5
+ * Claude Code on a Pro/Max subscription is NOT billed per token — the scarce
6
+ * resource is the plan's rate-limit quota, measured in two rolling windows:
7
+ * - a 5-hour window (burst protection), and
8
+ * - a 7-day window (the real lockout risk, "resets a couple times a month").
9
+ *
10
+ * Anthropic reports exactly where you stand in both windows on every API
11
+ * response, via `anthropic-ratelimit-unified-*` headers. The proxy already sees
12
+ * every response, so it can read these and know the *real* remaining quota and
13
+ * the *real* reset times — no estimation, no guessing when limits reset.
14
+ *
15
+ * This module owns: parsing those headers into a {@link LimitSnapshot}, and the
16
+ * small global state file (`limits.json`) that persists the latest snapshot plus
17
+ * whether we've ever seen subscription headers (so the rest of the guard can
18
+ * switch into alert-only subscription mode).
19
+ *
20
+ * Header formats are owned by Anthropic, not us, and aren't fully contract-
21
+ * documented, so parsing is deliberately defensive: utilization is accepted as
22
+ * either a 0–1 fraction or a 0–100 percent; reset is accepted as an ISO 8601
23
+ * timestamp, an epoch (s or ms), or a relative seconds-until-reset.
24
+ */
25
+ export type LimitWindow = "5h" | "weekly";
26
+ /** State of one rolling rate-limit window. */
27
+ export interface WindowState {
28
+ /** Fraction of the window consumed, 0–1. */
29
+ utilization: number;
30
+ /** Epoch ms when this window resets, or null if the header didn't say. */
31
+ resetAt: number | null;
32
+ /** Raw per-window status string from Anthropic (e.g. "allowed" / "warning"), if any. */
33
+ status?: string;
34
+ }
35
+ /** A point-in-time read of the account's subscription rate-limit standing. */
36
+ export interface LimitSnapshot {
37
+ fiveHour: WindowState | null;
38
+ weekly: WindowState | null;
39
+ /** Raw overall `anthropic-ratelimit-unified-status`, if present. */
40
+ status: string | null;
41
+ /** Epoch ms when this snapshot was observed. */
42
+ observedAt: number;
43
+ }
44
+ /** Persisted global state (account-wide, not per-session). */
45
+ export interface LimitsState {
46
+ version: 1;
47
+ /** True once we've ever observed unified subscription headers. Latches on. */
48
+ subscriptionDetected: boolean;
49
+ snapshot: LimitSnapshot | null;
50
+ /** Dedup flags so a given window/level/reset only alerts once. */
51
+ notified: Record<string, boolean>;
52
+ /** Epoch ms we first logged the raw unified-* headers (write-once diagnostic). */
53
+ headersLoggedAt?: number;
54
+ }
55
+ /** Nominal window durations, used for pacing math when a reset time is unknown. */
56
+ export declare const WINDOW_MS: Record<LimitWindow, number>;
57
+ export declare function emptyLimitsState(): LimitsState;
58
+ export declare function loadLimitsState(): LimitsState;
59
+ export declare function saveLimitsState(state: LimitsState): void;
60
+ /** A header bag that works for both a fetch `Headers` and a plain record. */
61
+ export interface HeaderGetter {
62
+ get(name: string): string | null | undefined;
63
+ }
64
+ /** Wrap a plain `Record<string,string>` so it satisfies {@link HeaderGetter}. */
65
+ export declare function recordHeaders(rec: Record<string, string | string[] | undefined>): HeaderGetter;
66
+ /**
67
+ * Parse a utilization header value into a 0–1 fraction.
68
+ * Accepts "0.62", "62", "62%". Values >1.5 are treated as percentages.
69
+ */
70
+ export declare function parseUtilization(raw: string | null | undefined): number | null;
71
+ /**
72
+ * Parse a reset header value into an absolute epoch-ms timestamp.
73
+ * Accepts ISO 8601 ("2026-06-13T18:00:00Z"), epoch seconds, epoch ms, or a
74
+ * relative seconds-until-reset (small numbers). `now` anchors the relative case.
75
+ */
76
+ export declare function parseReset(raw: string | null | undefined, now: number): number | null;
77
+ /**
78
+ * Read the `anthropic-ratelimit-unified-*` family into a {@link LimitSnapshot}.
79
+ * Returns null when no unified headers are present (i.e. not a subscription
80
+ * session, or an endpoint that doesn't emit them) — callers use that null to
81
+ * mean "stay in dollar mode for this response".
82
+ */
83
+ export declare function parseUnifiedHeaders(h: HeaderGetter, now: number): LimitSnapshot | null;
84
+ /** Stable dedup key for a pacing alert: re-alerts when the window resets. */
85
+ export declare function limitNotifyKey(window: LimitWindow, level: string, resetAt: number | null): string;
86
+ /**
87
+ * Pull every `anthropic-ratelimit-unified-*` header out of a raw record, verbatim.
88
+ * Used for the write-once diagnostic — Anthropic's value *formats* (fraction vs.
89
+ * percent, ISO vs. epoch reset) aren't fully documented, so capturing the raw
90
+ * strings the first time we see them makes verification a single `cat` away.
91
+ */
92
+ export declare function unifiedHeaderDump(rec: Record<string, string | string[] | undefined>): Record<string, string>;
93
+ /** Append a one-time raw-header diagnostic to events.jsonl. Best-effort, never throws. */
94
+ export declare function logUnifiedHeaders(dump: Record<string, string>, now: number): void;