@kill-switch/agent-guard 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/limits.js ADDED
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Subscription rate-limit awareness — the "how much of my Claude Code plan have
3
+ * I burned" half of the guard, complementary to the dollar ledger.
4
+ *
5
+ * Claude Code on a Pro/Max subscription is NOT billed per token — the scarce
6
+ * resource is the plan's rate-limit quota, measured in two rolling windows:
7
+ * - a 5-hour window (burst protection), and
8
+ * - a 7-day window (the real lockout risk, "resets a couple times a month").
9
+ *
10
+ * Anthropic reports exactly where you stand in both windows on every API
11
+ * response, via `anthropic-ratelimit-unified-*` headers. The proxy already sees
12
+ * every response, so it can read these and know the *real* remaining quota and
13
+ * the *real* reset times — no estimation, no guessing when limits reset.
14
+ *
15
+ * This module owns: parsing those headers into a {@link LimitSnapshot}, and the
16
+ * small global state file (`limits.json`) that persists the latest snapshot plus
17
+ * whether we've ever seen subscription headers (so the rest of the guard can
18
+ * switch into alert-only subscription mode).
19
+ *
20
+ * Header formats are owned by Anthropic, not us, and aren't fully contract-
21
+ * documented, so parsing is deliberately defensive: utilization is accepted as
22
+ * either a 0–1 fraction or a 0–100 percent; reset is accepted as an ISO 8601
23
+ * timestamp, an epoch (s or ms), or a relative seconds-until-reset.
24
+ */
25
+ import { readFileSync, writeFileSync, renameSync, appendFileSync } from "node:fs";
26
+ import { limitsPath, eventsPath, ensureGuardDir } from "./config.js";
27
+ /** Nominal window durations, used for pacing math when a reset time is unknown. */
28
+ export const WINDOW_MS = {
29
+ "5h": 5 * 60 * 60 * 1000,
30
+ weekly: 7 * 24 * 60 * 60 * 1000,
31
+ };
32
+ export function emptyLimitsState() {
33
+ return { version: 1, subscriptionDetected: false, snapshot: null, notified: {} };
34
+ }
35
+ export function loadLimitsState() {
36
+ try {
37
+ const data = JSON.parse(readFileSync(limitsPath(), "utf8"));
38
+ if (data && data.version === 1) {
39
+ return {
40
+ version: 1,
41
+ subscriptionDetected: data.subscriptionDetected ?? false,
42
+ snapshot: data.snapshot ?? null,
43
+ notified: data.notified ?? {},
44
+ headersLoggedAt: data.headersLoggedAt,
45
+ };
46
+ }
47
+ }
48
+ catch {
49
+ /* fall through to empty */
50
+ }
51
+ return emptyLimitsState();
52
+ }
53
+ export function saveLimitsState(state) {
54
+ ensureGuardDir();
55
+ const path = limitsPath();
56
+ const tmp = `${path}.${process.pid}.tmp`;
57
+ writeFileSync(tmp, JSON.stringify(state, null, 2));
58
+ renameSync(tmp, path);
59
+ }
60
+ /** Wrap a plain `Record<string,string>` so it satisfies {@link HeaderGetter}. */
61
+ export function recordHeaders(rec) {
62
+ const lower = {};
63
+ for (const [k, v] of Object.entries(rec)) {
64
+ if (v === undefined)
65
+ continue;
66
+ lower[k.toLowerCase()] = Array.isArray(v) ? v.join(", ") : v;
67
+ }
68
+ return { get: (name) => lower[name.toLowerCase()] ?? null };
69
+ }
70
+ /**
71
+ * Parse a utilization header value into a 0–1 fraction.
72
+ * Accepts "0.62", "62", "62%". Values >1.5 are treated as percentages.
73
+ */
74
+ export function parseUtilization(raw) {
75
+ if (raw == null)
76
+ return null;
77
+ const n = Number(String(raw).replace(/%$/, "").trim());
78
+ if (!Number.isFinite(n) || n < 0)
79
+ return null;
80
+ const frac = n > 1.5 ? n / 100 : n;
81
+ return Math.max(0, Math.min(1, frac));
82
+ }
83
+ /**
84
+ * Parse a reset header value into an absolute epoch-ms timestamp.
85
+ * Accepts ISO 8601 ("2026-06-13T18:00:00Z"), epoch seconds, epoch ms, or a
86
+ * relative seconds-until-reset (small numbers). `now` anchors the relative case.
87
+ */
88
+ export function parseReset(raw, now) {
89
+ if (raw == null)
90
+ return null;
91
+ const s = String(raw).trim();
92
+ if (!s)
93
+ return null;
94
+ // Numeric: disambiguate ms / seconds / relative-seconds by magnitude.
95
+ if (/^\d+(\.\d+)?$/.test(s)) {
96
+ const n = Number(s);
97
+ if (!Number.isFinite(n))
98
+ return null;
99
+ if (n > 1e12)
100
+ return Math.round(n); // epoch ms
101
+ if (n > 1e9)
102
+ return Math.round(n * 1000); // epoch seconds
103
+ return Math.round(now + n * 1000); // relative seconds-until-reset
104
+ }
105
+ const t = Date.parse(s);
106
+ return Number.isNaN(t) ? null : t;
107
+ }
108
+ function parseWindow(h, prefix, key, now) {
109
+ const util = parseUtilization(h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-utilization`));
110
+ const reset = parseReset(h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-reset`), now);
111
+ const status = h.get(`${prefix}-${key === "5h" ? "5h" : "7d"}-status`) || undefined;
112
+ if (util == null && reset == null && !status)
113
+ return null;
114
+ return { utilization: util ?? 0, resetAt: reset, status };
115
+ }
116
+ /**
117
+ * Read the `anthropic-ratelimit-unified-*` family into a {@link LimitSnapshot}.
118
+ * Returns null when no unified headers are present (i.e. not a subscription
119
+ * session, or an endpoint that doesn't emit them) — callers use that null to
120
+ * mean "stay in dollar mode for this response".
121
+ */
122
+ export function parseUnifiedHeaders(h, now) {
123
+ const prefix = "anthropic-ratelimit-unified";
124
+ const fiveHour = parseWindow(h, prefix, "5h", now);
125
+ const weekly = parseWindow(h, prefix, "weekly", now);
126
+ const status = h.get(`${prefix}-status`) || null;
127
+ if (!fiveHour && !weekly && !status)
128
+ return null;
129
+ return { fiveHour, weekly, status, observedAt: now };
130
+ }
131
+ /** Stable dedup key for a pacing alert: re-alerts when the window resets. */
132
+ export function limitNotifyKey(window, level, resetAt) {
133
+ return `${window}:${level}:${resetAt ?? 0}`;
134
+ }
135
+ /**
136
+ * Pull every `anthropic-ratelimit-unified-*` header out of a raw record, verbatim.
137
+ * Used for the write-once diagnostic — Anthropic's value *formats* (fraction vs.
138
+ * percent, ISO vs. epoch reset) aren't fully documented, so capturing the raw
139
+ * strings the first time we see them makes verification a single `cat` away.
140
+ */
141
+ export function unifiedHeaderDump(rec) {
142
+ const out = {};
143
+ for (const [k, v] of Object.entries(rec)) {
144
+ if (v == null)
145
+ continue;
146
+ const key = k.toLowerCase();
147
+ if (key.startsWith("anthropic-ratelimit-unified"))
148
+ out[key] = Array.isArray(v) ? v.join(", ") : v;
149
+ }
150
+ return out;
151
+ }
152
+ /** Append a one-time raw-header diagnostic to events.jsonl. Best-effort, never throws. */
153
+ export function logUnifiedHeaders(dump, now) {
154
+ try {
155
+ ensureGuardDir();
156
+ appendFileSync(eventsPath(), JSON.stringify({ ts: now, kind: "unified-headers-observed", headers: dump }) + "\n");
157
+ }
158
+ catch {
159
+ /* diagnostic only */
160
+ }
161
+ }
package/dist/ops.d.ts CHANGED
@@ -3,6 +3,7 @@
3
3
  * subcommands, so both drive the same logic instead of duplicating it (or
4
4
  * shelling out). Pure side-effecting helpers over config + Claude Code settings.
5
5
  */
6
+ import { type LimitsConfig } from "./config.js";
6
7
  import type { Budget } from "./budget.js";
7
8
  export interface InstallOptions {
8
9
  /** Install into ~/.claude/settings.json instead of ./.claude/settings.json */
@@ -33,6 +34,17 @@ export interface BudgetPatch {
33
34
  }
34
35
  /** Write budget/webhook overrides to the config file. Returns the saved budget. */
35
36
  export declare function setBudget(patch: BudgetPatch): Budget;
37
+ /** Partial subscription-limits update. Merges onto the existing config file. */
38
+ export interface LimitsPatch {
39
+ plan?: LimitsConfig["plan"];
40
+ fiveHourSoftPct?: number;
41
+ fiveHourDangerPct?: number;
42
+ weeklySoftPct?: number;
43
+ weeklyDangerPct?: number;
44
+ burnRatioWarn?: number;
45
+ }
46
+ /** Write subscription-limit overrides to the config file. Returns the saved limits. */
47
+ export declare function setLimits(patch: LimitsPatch): LimitsConfig;
36
48
  /** Clear the spend ledger. Scope: all | a single session | today's sessions. */
37
49
  export declare function resetLedger(opts: {
38
50
  all?: boolean;
package/dist/ops.js CHANGED
@@ -6,7 +6,7 @@
6
6
  import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
7
7
  import { join, dirname } from "node:path";
8
8
  import { homedir } from "node:os";
9
- import { configPath, ensureGuardDir, DEFAULT_BUDGET } from "./config.js";
9
+ import { configPath, ensureGuardDir, DEFAULT_BUDGET, DEFAULT_LIMITS } from "./config.js";
10
10
  import { loadLedger, saveLedger, emptyLedger } from "./ledger.js";
11
11
  /**
12
12
  * Wire the agent-guard hook into Claude Code settings for PreToolUse,
@@ -73,6 +73,32 @@ export function setBudget(patch) {
73
73
  writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
74
74
  return budget;
75
75
  }
76
+ /** Write subscription-limit overrides to the config file. Returns the saved limits. */
77
+ export function setLimits(patch) {
78
+ let file = {};
79
+ try {
80
+ file = JSON.parse(readFileSync(configPath(), "utf8"));
81
+ }
82
+ catch {
83
+ /* new */
84
+ }
85
+ const limits = { ...DEFAULT_LIMITS, ...(file.limits ?? {}) };
86
+ if (patch.plan && ["auto", "pro", "max5", "max20"].includes(patch.plan))
87
+ limits.plan = patch.plan;
88
+ const setPct = (k, v) => {
89
+ if (v !== undefined && Number.isFinite(v))
90
+ limits[k] = v;
91
+ };
92
+ setPct("fiveHourSoftPct", patch.fiveHourSoftPct);
93
+ setPct("fiveHourDangerPct", patch.fiveHourDangerPct);
94
+ setPct("weeklySoftPct", patch.weeklySoftPct);
95
+ setPct("weeklyDangerPct", patch.weeklyDangerPct);
96
+ setPct("burnRatioWarn", patch.burnRatioWarn);
97
+ file.limits = limits;
98
+ ensureGuardDir();
99
+ writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
100
+ return limits;
101
+ }
76
102
  /** Clear the spend ledger. Scope: all | a single session | today's sessions. */
77
103
  export function resetLedger(opts) {
78
104
  if (opts.all) {
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Pacing engine — the "intelligent" half the user asked for.
3
+ *
4
+ * Blocking at "90% of weekly" is dumb: 90% on day 6 is fine, but 60% on day 2
5
+ * means you'll be locked out mid-week. The resource you're spending is a budget
6
+ * that should last until the window resets, so the real question isn't "how much
7
+ * is left" — it's "at this burn rate, will I run out before the window resets?".
8
+ *
9
+ * For each window we compute:
10
+ * - expected utilization = fraction of the window already elapsed
11
+ * - burn ratio = actual / expected (1.0 = perfectly on pace)
12
+ * - projected exhaustion = when utilization hits 1.0 at the current rate
13
+ * - will-lock-out-before-reset = exhaustion lands before the reset
14
+ *
15
+ * The level (ok / warn / danger) is the worse of two signals: absolute
16
+ * utilization against soft/danger thresholds, and pacing (burning fast enough to
17
+ * lock out before reset). In subscription mode the guard never blocks on this —
18
+ * it surfaces the assessment as a warning so the human can ease off or switch to
19
+ * a cheaper model before Anthropic's own limit stops them mid-task.
20
+ */
21
+ import { type LimitSnapshot, type LimitWindow, type WindowState } from "./limits.js";
22
+ export interface PacingThresholds {
23
+ /** Per-window soft / danger utilization thresholds (0–1). */
24
+ fiveHourSoftPct: number;
25
+ fiveHourDangerPct: number;
26
+ weeklySoftPct: number;
27
+ weeklyDangerPct: number;
28
+ /** Burn ratio above which pacing alone escalates (with meaningful utilization). */
29
+ burnRatioWarn: number;
30
+ }
31
+ export type PacingLevel = "ok" | "warn" | "danger";
32
+ export interface PacingAssessment {
33
+ window: LimitWindow;
34
+ /** 0–1 fraction of the window consumed. */
35
+ utilization: number;
36
+ /** Epoch ms the window resets, or null if unknown. */
37
+ resetAt: number | null;
38
+ /** actual / expected utilization; null when elapsed is unknown (no reset time). */
39
+ burnRatio: number | null;
40
+ /** Epoch ms we project utilization hits 100% at the current rate, or null. */
41
+ projectedExhaustionAt: number | null;
42
+ /** True when projected exhaustion lands before the window resets. */
43
+ willLockOutBeforeReset: boolean;
44
+ level: PacingLevel;
45
+ /** One-line human summary. */
46
+ message: string;
47
+ }
48
+ /** Assess a single window's pacing. `now` is epoch ms. */
49
+ export declare function assessWindow(window: LimitWindow, state: WindowState, thresholds: PacingThresholds, now: number): PacingAssessment;
50
+ /** Assess every window present in a snapshot. */
51
+ export declare function assessSnapshot(snap: LimitSnapshot, thresholds: PacingThresholds, now: number): PacingAssessment[];
52
+ /** Worst level across a set of assessments. */
53
+ export declare function worstLevel(assessments: PacingAssessment[]): PacingLevel;
package/dist/pacing.js ADDED
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Pacing engine — the "intelligent" half the user asked for.
3
+ *
4
+ * Blocking at "90% of weekly" is dumb: 90% on day 6 is fine, but 60% on day 2
5
+ * means you'll be locked out mid-week. The resource you're spending is a budget
6
+ * that should last until the window resets, so the real question isn't "how much
7
+ * is left" — it's "at this burn rate, will I run out before the window resets?".
8
+ *
9
+ * For each window we compute:
10
+ * - expected utilization = fraction of the window already elapsed
11
+ * - burn ratio = actual / expected (1.0 = perfectly on pace)
12
+ * - projected exhaustion = when utilization hits 1.0 at the current rate
13
+ * - will-lock-out-before-reset = exhaustion lands before the reset
14
+ *
15
+ * The level (ok / warn / danger) is the worse of two signals: absolute
16
+ * utilization against soft/danger thresholds, and pacing (burning fast enough to
17
+ * lock out before reset). In subscription mode the guard never blocks on this —
18
+ * it surfaces the assessment as a warning so the human can ease off or switch to
19
+ * a cheaper model before Anthropic's own limit stops them mid-task.
20
+ */
21
+ import { WINDOW_MS } from "./limits.js";
22
+ function windowLabel(w) {
23
+ return w === "5h" ? "5-hour" : "weekly";
24
+ }
25
+ function fmtClock(epochMs, now) {
26
+ const dt = new Date(epochMs);
27
+ const sameDay = new Date(now).toDateString() === dt.toDateString();
28
+ // Day-of-week + time reads naturally for a multi-day weekly window.
29
+ return sameDay
30
+ ? dt.toLocaleTimeString([], { hour: "numeric", minute: "2-digit" })
31
+ : dt.toLocaleString([], { weekday: "short", hour: "numeric", minute: "2-digit" });
32
+ }
33
+ function fmtDuration(ms) {
34
+ if (ms <= 0)
35
+ return "now";
36
+ const h = ms / (60 * 60 * 1000);
37
+ if (h < 1)
38
+ return `${Math.round(ms / 60000)}m`;
39
+ if (h < 24)
40
+ return `${h.toFixed(h < 10 ? 1 : 0)}h`;
41
+ return `${(h / 24).toFixed(1)}d`;
42
+ }
43
+ /** Assess a single window's pacing. `now` is epoch ms. */
44
+ export function assessWindow(window, state, thresholds, now) {
45
+ const util = Math.max(0, Math.min(1, state.utilization));
46
+ const soft = window === "5h" ? thresholds.fiveHourSoftPct : thresholds.weeklySoftPct;
47
+ const danger = window === "5h" ? thresholds.fiveHourDangerPct : thresholds.weeklyDangerPct;
48
+ const duration = WINDOW_MS[window];
49
+ // elapsed = duration - timeUntilReset; only known when we have a reset time.
50
+ let elapsed = null;
51
+ if (state.resetAt != null) {
52
+ const untilReset = state.resetAt - now;
53
+ elapsed = Math.max(0, Math.min(duration, duration - untilReset));
54
+ }
55
+ let burnRatio = null;
56
+ let projectedExhaustionAt = null;
57
+ let willLockOut = false;
58
+ if (elapsed != null && elapsed > 0) {
59
+ const expected = elapsed / duration;
60
+ burnRatio = expected > 0 ? util / expected : null;
61
+ if (util > 0 && util < 1) {
62
+ const ratePerMs = util / elapsed; // utilization per ms so far
63
+ const msToFull = (1 - util) / ratePerMs;
64
+ projectedExhaustionAt = now + msToFull;
65
+ if (state.resetAt != null)
66
+ willLockOut = projectedExhaustionAt < state.resetAt;
67
+ }
68
+ else if (util >= 1) {
69
+ projectedExhaustionAt = now;
70
+ willLockOut = true;
71
+ }
72
+ }
73
+ // Level: worse of absolute-utilization and pacing signals. A projected lockout
74
+ // only escalates once you've used a meaningful slice of the window — otherwise
75
+ // tiny noise near a linear burn (e.g. 15% used, exhaustion landing a few hours
76
+ // before a reset days away) would scream danger far too early. We gate it at
77
+ // half the soft threshold.
78
+ const lockoutFloor = soft * 0.5;
79
+ const lockoutMatters = willLockOut && util >= lockoutFloor;
80
+ let level = "ok";
81
+ if (util >= danger || (lockoutMatters && util >= soft))
82
+ level = "danger";
83
+ else if (util >= soft ||
84
+ lockoutMatters ||
85
+ (burnRatio != null && burnRatio >= thresholds.burnRatioWarn && util >= lockoutFloor))
86
+ level = "warn";
87
+ const pct = Math.round(util * 100);
88
+ const label = windowLabel(window);
89
+ const parts = [`${label} limit ${pct}% used`];
90
+ if (state.resetAt != null)
91
+ parts.push(`resets ${fmtClock(state.resetAt, now)}`);
92
+ if (burnRatio != null && burnRatio >= 1.2 && level !== "ok")
93
+ parts.push(`burning ${burnRatio.toFixed(1)}× pace`);
94
+ // Only surface the lockout projection once it actually drives the level —
95
+ // keeps low-utilization projection noise out of the message.
96
+ if (lockoutMatters && projectedExhaustionAt != null && state.resetAt != null) {
97
+ const before = state.resetAt - projectedExhaustionAt;
98
+ parts.push(`→ lockout in ~${fmtDuration(projectedExhaustionAt - now)} (${fmtDuration(before)} before reset)`);
99
+ }
100
+ return {
101
+ window,
102
+ utilization: util,
103
+ resetAt: state.resetAt,
104
+ burnRatio,
105
+ projectedExhaustionAt,
106
+ willLockOutBeforeReset: willLockOut,
107
+ level,
108
+ message: parts.join(", "),
109
+ };
110
+ }
111
+ /** Assess every window present in a snapshot. */
112
+ export function assessSnapshot(snap, thresholds, now) {
113
+ const out = [];
114
+ if (snap.fiveHour)
115
+ out.push(assessWindow("5h", snap.fiveHour, thresholds, now));
116
+ if (snap.weekly)
117
+ out.push(assessWindow("weekly", snap.weekly, thresholds, now));
118
+ return out;
119
+ }
120
+ /** Worst level across a set of assessments. */
121
+ export function worstLevel(assessments) {
122
+ if (assessments.some((a) => a.level === "danger"))
123
+ return "danger";
124
+ if (assessments.some((a) => a.level === "warn"))
125
+ return "warn";
126
+ return "ok";
127
+ }
package/dist/proxy.js CHANGED
@@ -24,6 +24,8 @@ import { loadLedger, saveLedger, addSessionCost, rollingDailyCost, prune, } from
24
24
  import { evaluate } from "./budget.js";
25
25
  import { dispatchAlert } from "./alert.js";
26
26
  import { assertSafeEndpoint, warnIfUnexpectedHost } from "./net.js";
27
+ import { parseUnifiedHeaders, recordHeaders, unifiedHeaderDump, logUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, } from "./limits.js";
28
+ import { assessSnapshot, worstLevel } from "./pacing.js";
27
29
  const UPSTREAMS = {
28
30
  anthropic: "https://api.anthropic.com",
29
31
  openai: "https://api.openai.com",
@@ -134,6 +136,59 @@ function meter(cfg, ledger, sessionId, parsed, now) {
134
136
  prune(ledger, now);
135
137
  saveLedger(ledger);
136
138
  }
139
+ /**
140
+ * Read Anthropic's `unified-*` rate-limit headers off a response, persist the
141
+ * snapshot, latch subscription mode on, and fire a deduped pacing alert when a
142
+ * window crosses into warn/danger. Returns true if subscription headers were
143
+ * seen. Alert-only by design — this never blocks (a subscription session already
144
+ * paid a flat fee; the scarce resource is quota, and Anthropic's own limit is
145
+ * the real wall).
146
+ */
147
+ function captureLimits(cfg, headers, sessionId, now) {
148
+ // Flatten to a lowercased record so we can both parse and dump the raw values.
149
+ const rec = {};
150
+ headers.forEach((v, k) => {
151
+ rec[k.toLowerCase()] = v;
152
+ });
153
+ const snap = parseUnifiedHeaders(recordHeaders(rec), now);
154
+ if (!snap)
155
+ return false;
156
+ const state = loadLimitsState();
157
+ // Write-once raw-header diagnostic for format verification (`cat events.jsonl`).
158
+ if (!state.headersLoggedAt) {
159
+ logUnifiedHeaders(unifiedHeaderDump(rec), now);
160
+ state.headersLoggedAt = now;
161
+ }
162
+ state.subscriptionDetected = true;
163
+ state.snapshot = snap;
164
+ const assessments = assessSnapshot(snap, cfg.limits, now);
165
+ const fresh = assessments.filter((a) => {
166
+ if (a.level === "ok")
167
+ return false;
168
+ const key = limitNotifyKey(a.window, a.level, a.resetAt);
169
+ if (state.notified[key])
170
+ return false;
171
+ state.notified[key] = true;
172
+ return true;
173
+ });
174
+ saveLimitsState(state);
175
+ if (fresh.length) {
176
+ const level = worstLevel(fresh);
177
+ dispatchAlert(cfg, {
178
+ ts: now,
179
+ source: "proxy",
180
+ kind: "limit",
181
+ sessionId,
182
+ level: level === "danger" ? "danger" : "warn",
183
+ sessionUSD: 0,
184
+ dailyUSD: 0,
185
+ reasons: fresh.map((a) => a.message),
186
+ action: level === "danger" ? "on pace to lock out before reset" : "approaching plan limit",
187
+ limits: fresh.map((a) => ({ window: a.window, utilization: a.utilization, resetAt: a.resetAt, level: a.level })),
188
+ }).catch(() => { });
189
+ }
190
+ return true;
191
+ }
137
192
  export function startProxy(opts) {
138
193
  const cfg = loadConfig();
139
194
  const upstreamOrigin = assertSafeEndpoint(opts.upstream, "upstream").replace(/\/$/, "");
@@ -143,11 +198,15 @@ export function startProxy(opts) {
143
198
  const sessionId = req.headers["x-agent-guard-session"] || `proxy:${todayKey(now)}`;
144
199
  // 1) Pre-flight budget check — block before spending anything.
145
200
  // Escape hatch: while a human has paused enforcement, never block (but still meter).
201
+ // Subscription mode is ALERT-ONLY: once we've seen Anthropic's unified
202
+ // rate-limit headers, the session is on a flat-fee plan where dollars are
203
+ // meaningless, so we never 402 it — we only pace + warn.
204
+ const subscriptionMode = loadLimitsState().subscriptionDetected;
146
205
  const ledger = loadLedger();
147
206
  const sessionUSD = ledger.sessions[sessionId]?.costUSD ?? 0;
148
207
  const dailyUSD = rollingDailyCost(ledger, now);
149
208
  const verdict = evaluate({ sessionUSD, dailyUSD }, cfg.budget);
150
- if (verdict.level === "block" && !isPaused(now)) {
209
+ if (verdict.level === "block" && !isPaused(now) && !subscriptionMode) {
151
210
  if (!blockedNotified[sessionId]) {
152
211
  blockedNotified[sessionId] = true;
153
212
  dispatchAlert(cfg, {
@@ -190,6 +249,15 @@ export function startProxy(opts) {
190
249
  res.end(JSON.stringify({ error: "kill-switch proxy: upstream fetch failed", detail: String(err) }));
191
250
  return;
192
251
  }
252
+ // 2.5) Read Anthropic's subscription rate-limit headers (alert-only).
253
+ if (opts.flavor === "anthropic") {
254
+ try {
255
+ captureLimits(cfg, upstream.headers, sessionId, Date.now());
256
+ }
257
+ catch {
258
+ /* limit capture must never break the proxied response */
259
+ }
260
+ }
193
261
  // 3) Relay status + headers.
194
262
  const respHeaders = {};
195
263
  upstream.headers.forEach((v, k) => {
@@ -248,6 +316,9 @@ export function startProxy(opts) {
248
316
  server.listen(opts.port, "127.0.0.1", () => {
249
317
  process.stdout.write(`🛡 agent-guard proxy on http://localhost:${opts.port} → ${upstreamOrigin} (${opts.flavor})\n` +
250
318
  ` Caps: session hard ${fmtUSD(cfg.budget.sessionHardUSD)}, daily hard ${fmtUSD(cfg.budget.dailyHardUSD)}\n` +
319
+ (opts.flavor === "anthropic"
320
+ ? ` Subscription mode: reads Anthropic rate-limit headers → paces your Pro/Max plan (alert-only)\n`
321
+ : "") +
251
322
  ` Point your agent at it, e.g.:\n` +
252
323
  (opts.flavor === "anthropic"
253
324
  ? ` ANTHROPIC_BASE_URL=http://localhost:${opts.port} claude\n`
package/dist/report.d.ts CHANGED
@@ -1,9 +1,26 @@
1
1
  /**
2
2
  * Shared status report — the single computation behind `agent-guard status` and
3
3
  * `ks guard status`, so both emit an identical JSON shape and never drift.
4
+ *
5
+ * Two halves:
6
+ * - the dollar budget (session + daily-rolling), always present; and
7
+ * - the subscription rate-limit standing (5-hour + weekly pacing), present
8
+ * once we've seen Anthropic's unified headers via the proxy, or estimated
9
+ * when the user has pinned a plan tier. Alert-only — never blocks.
4
10
  */
5
11
  import { type SessionRecord } from "./ledger.js";
6
12
  import { type Budget, type VerdictLevel } from "./budget.js";
13
+ import { type PacingAssessment, type PacingLevel } from "./pacing.js";
14
+ export interface LimitsReport {
15
+ /** Where the numbers came from. "none" = no data and no pinned plan to estimate from. */
16
+ source: "headers" | "estimated" | "none";
17
+ plan: string;
18
+ subscriptionDetected: boolean;
19
+ /** Epoch ms the snapshot was observed (headers) or computed (estimated). */
20
+ observedAt: number | null;
21
+ windows: PacingAssessment[];
22
+ level: PacingLevel;
23
+ }
7
24
  export interface StatusReport {
8
25
  budget: Budget;
9
26
  dailyUSD: number;
@@ -15,6 +32,14 @@ export interface StatusReport {
15
32
  sessions: Array<{
16
33
  id: string;
17
34
  } & SessionRecord>;
35
+ /** Subscription rate-limit pacing — present whenever we have data to show. */
36
+ limits: LimitsReport;
18
37
  }
38
+ /**
39
+ * Render the subscription rate-limit section as plain text lines (no color), so
40
+ * both the `agent-guard` and `ks guard` status views stay identical. Returns an
41
+ * empty array when there's nothing useful to show.
42
+ */
43
+ export declare function formatLimitsLines(limits: LimitsReport, now?: number): string[];
19
44
  /** Build the current status report from the on-disk config + ledger. */
20
45
  export declare function buildStatusReport(now?: number): StatusReport;
package/dist/report.js CHANGED
@@ -1,12 +1,101 @@
1
1
  /**
2
2
  * Shared status report — the single computation behind `agent-guard status` and
3
3
  * `ks guard status`, so both emit an identical JSON shape and never drift.
4
+ *
5
+ * Two halves:
6
+ * - the dollar budget (session + daily-rolling), always present; and
7
+ * - the subscription rate-limit standing (5-hour + weekly pacing), present
8
+ * once we've seen Anthropic's unified headers via the proxy, or estimated
9
+ * when the user has pinned a plan tier. Alert-only — never blocks.
4
10
  */
5
11
  import { loadConfig } from "./config.js";
6
12
  import { isPaused, pauseExpiry } from "./config.js";
7
13
  import { loadLedger, rollingDailyCost } from "./ledger.js";
8
14
  import { evaluate } from "./budget.js";
15
+ import { loadLimitsState } from "./limits.js";
16
+ import { assessSnapshot, worstLevel } from "./pacing.js";
17
+ import { estimateSnapshot } from "./estimate.js";
9
18
  const DAY_MS = 24 * 60 * 60 * 1000;
19
+ function buildLimitsReport(cfg, ledger, now) {
20
+ const state = loadLimitsState();
21
+ const thresholds = cfg.limits;
22
+ const plan = cfg.limits.plan;
23
+ // Prefer real header data when we have it.
24
+ if (state.snapshot) {
25
+ const windows = assessSnapshot(state.snapshot, thresholds, now);
26
+ return {
27
+ source: "headers",
28
+ plan,
29
+ subscriptionDetected: state.subscriptionDetected,
30
+ observedAt: state.snapshot.observedAt,
31
+ windows,
32
+ level: worstLevel(windows),
33
+ };
34
+ }
35
+ // Otherwise estimate, but only when the user pinned a tier (opt-in, fuzzy).
36
+ if (plan === "pro" || plan === "max5" || plan === "max20") {
37
+ const snap = estimateSnapshot(ledger, plan, now);
38
+ const windows = assessSnapshot(snap, thresholds, now);
39
+ return {
40
+ source: "estimated",
41
+ plan,
42
+ subscriptionDetected: state.subscriptionDetected,
43
+ observedAt: snap.observedAt,
44
+ windows,
45
+ level: worstLevel(windows),
46
+ };
47
+ }
48
+ return {
49
+ source: "none",
50
+ plan,
51
+ subscriptionDetected: state.subscriptionDetected,
52
+ observedAt: null,
53
+ windows: [],
54
+ level: "ok",
55
+ };
56
+ }
57
+ function bar(frac) {
58
+ const pct = Math.max(0, Math.min(100, Math.round(frac * 100)));
59
+ const filled = Math.round(pct / 5);
60
+ return `[${"█".repeat(filled)}${"░".repeat(20 - filled)}]`;
61
+ }
62
+ function ageString(observedAt, now) {
63
+ const ms = now - observedAt;
64
+ if (ms < 60_000)
65
+ return "just now";
66
+ if (ms < 3_600_000)
67
+ return `${Math.round(ms / 60_000)}m ago`;
68
+ if (ms < 86_400_000)
69
+ return `${Math.round(ms / 3_600_000)}h ago`;
70
+ return `${Math.round(ms / 86_400_000)}d ago`;
71
+ }
72
+ /**
73
+ * Render the subscription rate-limit section as plain text lines (no color), so
74
+ * both the `agent-guard` and `ks guard` status views stay identical. Returns an
75
+ * empty array when there's nothing useful to show.
76
+ */
77
+ export function formatLimitsLines(limits, now = Date.now()) {
78
+ if (limits.source === "none") {
79
+ // Only nudge if they haven't opted into either path.
80
+ if (!limits.subscriptionDetected) {
81
+ return [
82
+ "Claude Code plan limits: unknown.",
83
+ " Run `ks guard proxy` and point Claude Code at it for exact 5-hour + weekly usage,",
84
+ " or set your tier (`ks guard config --plan max5`) for an estimate.",
85
+ ];
86
+ }
87
+ return [];
88
+ }
89
+ const icon = limits.level === "danger" ? "🟥" : limits.level === "warn" ? "🟡" : "🟢";
90
+ const tag = limits.source === "estimated" ? " (estimated — run the proxy for exact)" : "";
91
+ const lines = [`${icon} Claude Code plan limits${tag} · observed ${limits.observedAt ? ageString(limits.observedAt, now) : "—"}`];
92
+ for (const w of limits.windows) {
93
+ // w.message already leads with "<window> limit NN% used, …", so the bar
94
+ // carries the visual and the message carries the numbers + pacing.
95
+ lines.push(` ${bar(w.utilization)} ${w.message}`);
96
+ }
97
+ return lines;
98
+ }
10
99
  /** Build the current status report from the on-disk config + ledger. */
11
100
  export function buildStatusReport(now = Date.now()) {
12
101
  const cfg = loadConfig();
@@ -26,5 +115,6 @@ export function buildStatusReport(now = Date.now()) {
26
115
  paused: isPaused(now),
27
116
  pauseUntil: pauseExpiry(),
28
117
  sessions,
118
+ limits: buildLimitsReport(cfg, ledger, now),
29
119
  };
30
120
  }