npm - @kill-switch/agent-guard - Versions diffs - 0.1.3 → 0.1.5 - Mend

@kill-switch/agent-guard 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md CHANGED Viewed

@@ -154,7 +154,10 @@ Tune the thresholds (0–1 utilization) if the defaults are too eager:
 The first time the proxy sees the `unified-*` headers it writes the raw values once to
 `~/.kill-switch/agent-guard/events.jsonl` (`kind: "unified-headers-observed"`) — so you can
-confirm Anthropic's exact value formats with a single `cat`.
+confirm Anthropic's exact value formats with a single `cat`. Only `unified-*` headers are
+captured (an explicit allowlist — never `Authorization` / `x-api-key` / cookies), values are
+length-capped, and the dump stays local. In `auto` mode the dollar-wall suppression trusts the
+upstream's headers; pin `--plan` if you'd rather it not depend on what the upstream reports.
 > Because subscription mode is alert-only, the "don't run both hook *and* proxy" caveat below
 > doesn't bite here — running Claude Code through the proxy is exactly what feeds the limit
@@ -188,7 +191,7 @@ agent-guard proxy   [--port 8787] [--flavor anthropic|openai] [--upstream URL]
 agent-guard status  [--json]                        spend vs budget + plan limits
 agent-guard config  [--session-hard N ...]          view/set caps
 agent-guard config  [--plan max5 --weekly-soft 0.6 ...]   view/set plan limits
-agent-guard reset   [--all|--today|--session <id>]  clear the ledger
+agent-guard reset   [--all|--limits|--today|--session <id>]  clear the ledger / subscription-limit state
 agent-guard hook                                    (internal) Claude Code entrypoint
 ```

package/dist/cli.js CHANGED Viewed

@@ -213,11 +213,12 @@ program
 // ── reset ────────────────────────────────────────────────────────────────────
 program
     .command("reset")
-    .description("Clear the spend ledger")
-    .option("--all", "Wipe all sessions")
+    .description("Clear the spend ledger and/or subscription-limit state")
+    .option("--all", "Wipe all sessions + subscription-limit state")
+    .option("--limits", "Clear subscription detection latch + snapshot only")
     .option("--session <id>", "Clear a single session")
     .option("--today", "Clear sessions active today")
     .action((opts) => {
-    console.log(`✅ ${resetLedger({ all: opts.all, session: opts.session, today: opts.today })}`);
+    console.log(`✅ ${resetLedger({ all: opts.all, limits: opts.limits, session: opts.session, today: opts.today })}`);
 });
 program.parseAsync();

package/dist/estimate.d.ts CHANGED Viewed

@@ -4,40 +4,46 @@
  * Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
  * response headers, which only the proxy sees. A user running just the Claude
  * Code hook (the common, zero-config setup) never sees those headers — so when
- * they've told us their plan tier, we *estimate* where they stand by summing the
- * tokens the ledger recorded inside each rolling window and dividing by a
- * per-tier token budget.
+ * they've told us their plan tier, we *estimate* where they stand.
  *
- * This is deliberately approximate and always labelled as such:
- *   - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
- *     token budgets below are calibrated rough equivalents, not contractual.
- *   - The ledger stores a session's cumulative tokens against a single
- *     `lastAt`, not a time series, so a long session is counted wholesale into
- *     whichever window its last activity falls in.
+ * We estimate from the ledger's **cost** (`costUSD`), not its token counts. That
+ * matters: a coding agent's volume is dominated by cache reads/writes, and the
+ * ledger only stores non-cache input/output token counts — so a token-based
+ * estimate undercounts real throughput (and thus rate-limit consumption) by a
+ * large factor. `costUSD` is priced from the *full* usage including cache, so it
+ * tracks actual consumption far better. We compare it against a rough per-tier
+ * **API-equivalent** dollar ceiling for each window.
  *
- * It exists to give hook-only users *a* signal and to nudge them toward
- * `ks guard proxy` for exact numbers — never to block (subscription mode is
- * alert-only). When in doubt it under-claims utilization so it won't cry wolf.
+ * This is still deliberately approximate and always labelled as such:
+ *   - Anthropic meters opaque "prompts" / "active hours", and the dollar ceilings
+ *     below are rough API-equivalent calibrations, not contractual.
+ *   - The ledger stores a session's cumulative cost against a single `lastAt`,
+ *     not a time series, so a long session is counted wholesale into whichever
+ *     window its last activity falls in.
+ *
+ * It exists to give hook-only users *a* directional signal and to nudge them
+ * toward `ks guard proxy` for exact numbers — never to block (subscription mode
+ * is alert-only).
  */
 import { type LimitSnapshot } from "./limits.js";
 import type { Ledger } from "./ledger.js";
 export type PlanTier = "pro" | "max5" | "max20";
 /**
- * Rough per-tier token-equivalent budgets per window. Pro is the published
- * baseline; Max 5x / 20x scale the 5-hour burst ~linearly with the multiplier,
- * while the weekly cap scales more conservatively (Anthropic's weekly multiplier
- * is smaller than the per-session one). Tune via config if your mileage differs.
+ * Rough per-tier **API-equivalent USD** ceilings per window — what the plan's
+ * rate limit lets you consume before lock-out, expressed in the same list-price
+ * dollars the ledger meters. Scaled by plan: Pro is the baseline, Max 5x/20x lift
+ * the burst (5h) roughly with the multiplier and the weekly cap more
+ * conservatively. These are estimates; the proxy's real headers override them.
  */
 export interface TierBudget {
-    fiveHourTokens: number;
-    weeklyTokens: number;
+    fiveHourUSD: number;
+    weeklyUSD: number;
 }
 export declare const TIER_BUDGETS: Record<PlanTier, TierBudget>;
 /**
  * Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
- * Reset times are derived from the rolling window assumption (oldest in-window
- * activity + window length is unknowable here, so we report the window end from
- * `now` as a conservative upper bound on time remaining).
+ * `resetAt` is null (the true rolling reset is unknowable without a per-event
+ * time series), so pacing reports utilization only — no fabricated reset/lockout.
  */
 export declare function estimateSnapshot(ledger: Ledger, tier: PlanTier, now: number, budgets?: Record<PlanTier, TierBudget>): LimitSnapshot;
 /** True when a snapshot came from {@link estimateSnapshot} rather than real headers. */

package/dist/estimate.js CHANGED Viewed

@@ -4,63 +4,57 @@
  * Ground truth for plan limits lives in the `anthropic-ratelimit-unified-*`
  * response headers, which only the proxy sees. A user running just the Claude
  * Code hook (the common, zero-config setup) never sees those headers — so when
- * they've told us their plan tier, we *estimate* where they stand by summing the
- * tokens the ledger recorded inside each rolling window and dividing by a
- * per-tier token budget.
+ * they've told us their plan tier, we *estimate* where they stand.
  *
- * This is deliberately approximate and always labelled as such:
- *   - Anthropic meters opaque "prompts" / "active hours", not tokens, so the
- *     token budgets below are calibrated rough equivalents, not contractual.
- *   - The ledger stores a session's cumulative tokens against a single
- *     `lastAt`, not a time series, so a long session is counted wholesale into
- *     whichever window its last activity falls in.
+ * We estimate from the ledger's **cost** (`costUSD`), not its token counts. That
+ * matters: a coding agent's volume is dominated by cache reads/writes, and the
+ * ledger only stores non-cache input/output token counts — so a token-based
+ * estimate undercounts real throughput (and thus rate-limit consumption) by a
+ * large factor. `costUSD` is priced from the *full* usage including cache, so it
+ * tracks actual consumption far better. We compare it against a rough per-tier
+ * **API-equivalent** dollar ceiling for each window.
  *
- * It exists to give hook-only users *a* signal and to nudge them toward
- * `ks guard proxy` for exact numbers — never to block (subscription mode is
- * alert-only). When in doubt it under-claims utilization so it won't cry wolf.
+ * This is still deliberately approximate and always labelled as such:
+ *   - Anthropic meters opaque "prompts" / "active hours", and the dollar ceilings
+ *     below are rough API-equivalent calibrations, not contractual.
+ *   - The ledger stores a session's cumulative cost against a single `lastAt`,
+ *     not a time series, so a long session is counted wholesale into whichever
+ *     window its last activity falls in.
+ *
+ * It exists to give hook-only users *a* directional signal and to nudge them
+ * toward `ks guard proxy` for exact numbers — never to block (subscription mode
+ * is alert-only).
  */
 import { WINDOW_MS } from "./limits.js";
 export const TIER_BUDGETS = {
-    // Calibrated rough equivalents — Pro ≈ 45 prompts / 5h, modest weekly cap.
-    pro: { fiveHourTokens: 8_000_000, weeklyTokens: 120_000_000 },
-    max5: { fiveHourTokens: 40_000_000, weeklyTokens: 480_000_000 },
-    max20: { fiveHourTokens: 160_000_000, weeklyTokens: 1_400_000_000 },
+    pro: { fiveHourUSD: 16, weeklyUSD: 100 },
+    max5: { fiveHourUSD: 80, weeklyUSD: 500 },
+    max20: { fiveHourUSD: 300, weeklyUSD: 2000 },
 };
 const FIVE_HOUR_MS = WINDOW_MS["5h"];
 const WEEK_MS = WINDOW_MS.weekly;
-/** Sum tokens (input+output) across sessions whose last activity is within `windowMs`. */
-function tokensInWindow(ledger, now, windowMs) {
+/** Sum metered cost across sessions whose last activity is within `windowMs`. */
+function costInWindow(ledger, now, windowMs) {
     let total = 0;
     for (const s of Object.values(ledger.sessions)) {
         if (now - s.lastAt < windowMs)
-            total += (s.inputTokens || 0) + (s.outputTokens || 0);
+            total += s.costUSD || 0;
     }
     return total;
 }
 /**
  * Build an estimated {@link LimitSnapshot} from the ledger for a known tier.
- * Reset times are derived from the rolling window assumption (oldest in-window
- * activity + window length is unknowable here, so we report the window end from
- * `now` as a conservative upper bound on time remaining).
+ * `resetAt` is null (the true rolling reset is unknowable without a per-event
+ * time series), so pacing reports utilization only — no fabricated reset/lockout.
  */
 export function estimateSnapshot(ledger, tier, now, budgets = TIER_BUDGETS) {
     const b = budgets[tier];
-    const fiveTokens = tokensInWindow(ledger, now, FIVE_HOUR_MS);
-    const weekTokens = tokensInWindow(ledger, now, WEEK_MS);
+    const fiveUSD = costInWindow(ledger, now, FIVE_HOUR_MS);
+    const weekUSD = costInWindow(ledger, now, WEEK_MS);
     const clamp = (n) => Math.max(0, Math.min(1, n));
     return {
-        fiveHour: {
-            utilization: clamp(fiveTokens / b.fiveHourTokens),
-            // Without a per-event time series we can't know the true rolling reset;
-            // report a full window from now as a conservative (latest-possible) reset.
-            resetAt: now + FIVE_HOUR_MS,
-            status: "estimated",
-        },
-        weekly: {
-            utilization: clamp(weekTokens / b.weeklyTokens),
-            resetAt: now + WEEK_MS,
-            status: "estimated",
-        },
+        fiveHour: { utilization: clamp(fiveUSD / b.fiveHourUSD), resetAt: null, status: "estimated" },
+        weekly: { utilization: clamp(weekUSD / b.weeklyUSD), resetAt: null, status: "estimated" },
         status: "estimated",
         observedAt: now,
     };

package/dist/hook.js CHANGED Viewed

@@ -24,7 +24,7 @@ import { parseTranscript } from "./transcript.js";
 import { loadLedger, saveLedger, setSessionCost, rollingDailyCost, prune, } from "./ledger.js";
 import { evaluate, warnKey } from "./budget.js";
 import { dispatchAlert } from "./alert.js";
-import { buildStatusReport } from "./report.js";
+import { buildLimitsReport } from "./report.js";
 function readStdin() {
     return new Promise((resolve) => {
         let data = "";
@@ -151,7 +151,7 @@ export async function runHook() {
         // snapshot the proxy persisted from Anthropic's headers (or a tier estimate),
         // so even a hook-only session learns when it's about to lock out. Deduped per
         // window+level so it doesn't repeat every tool call.
-        const limitMsg = limitNudge(rec, ledger, now);
+        const limitMsg = limitNudge(cfg, rec, ledger, now);
         // Surface the warn nudge only on the first trip per scope (shouldAlert), not
         // on every subsequent tool call — otherwise the agent's context fills with
         // duplicate notices. After that, warnings stay silent until the hard cap.
@@ -175,9 +175,9 @@ export async function runHook() {
  * session's notified map (and persists it) so the same warning doesn't repeat on
  * every tool call. Returns null when there's nothing to surface.
  */
-function limitNudge(rec, ledger, now) {
+function limitNudge(cfg, rec, ledger, now) {
     try {
-        const limits = buildStatusReport(now).limits;
+        const limits = buildLimitsReport(cfg, ledger, now);
         if (!limits.windows.length)
             return null;
         const urgent = limits.windows.find((w) => w.level === "danger") ?? limits.windows.find((w) => w.level === "warn");

package/dist/limits.d.ts CHANGED Viewed

@@ -83,12 +83,6 @@ export declare function parseReset(raw: string | null | undefined, now: number):
 export declare function parseUnifiedHeaders(h: HeaderGetter, now: number): LimitSnapshot | null;
 /** Stable dedup key for a pacing alert: re-alerts when the window resets. */
 export declare function limitNotifyKey(window: LimitWindow, level: string, resetAt: number | null): string;
-/**
- * Pull every `anthropic-ratelimit-unified-*` header out of a raw record, verbatim.
- * Used for the write-once diagnostic — Anthropic's value *formats* (fraction vs.
- * percent, ISO vs. epoch reset) aren't fully documented, so capturing the raw
- * strings the first time we see them makes verification a single `cat` away.
- */
 export declare function unifiedHeaderDump(rec: Record<string, string | string[] | undefined>): Record<string, string>;
 /** Append a one-time raw-header diagnostic to events.jsonl. Best-effort, never throws. */
 export declare function logUnifiedHeaders(dump: Record<string, string>, now: number): void;

package/dist/limits.js CHANGED Viewed

@@ -137,15 +137,23 @@ export function limitNotifyKey(window, level, resetAt) {
  * Used for the write-once diagnostic — Anthropic's value *formats* (fraction vs.
  * percent, ISO vs. epoch reset) aren't fully documented, so capturing the raw
  * strings the first time we see them makes verification a single `cat` away.
+ *
+ * Security: this is an explicit **allowlist** by the `anthropic-ratelimit-unified`
+ * prefix — credential headers (Authorization, x-api-key, cookies) are never
+ * captured, even though the caller hands us the full response header set. Values
+ * are length-capped so a hostile/compromised upstream can't bloat the log.
  */
+const MAX_DUMP_VALUE = 256;
 export function unifiedHeaderDump(rec) {
     const out = {};
     for (const [k, v] of Object.entries(rec)) {
         if (v == null)
             continue;
         const key = k.toLowerCase();
-        if (key.startsWith("anthropic-ratelimit-unified"))
-            out[key] = Array.isArray(v) ? v.join(", ") : v;
+        if (!key.startsWith("anthropic-ratelimit-unified"))
+            continue;
+        const val = Array.isArray(v) ? v.join(", ") : v;
+        out[key] = val.length > MAX_DUMP_VALUE ? val.slice(0, MAX_DUMP_VALUE) + "…[truncated]" : val;
     }
     return out;
 }

package/dist/ops.d.ts CHANGED Viewed

@@ -45,9 +45,15 @@ export interface LimitsPatch {
 }
 /** Write subscription-limit overrides to the config file. Returns the saved limits. */
 export declare function setLimits(patch: LimitsPatch): LimitsConfig;
-/** Clear the spend ledger. Scope: all | a single session | today's sessions. */
+/**
+ * Clear guard state. Scope: all (ledger + limits) | limits only | a single
+ * session | today's sessions. The `limits` scope clears the subscription
+ * detection latch + last snapshot — useful when you stop using a Pro/Max plan
+ * and want the dollar wall fully re-armed.
+ */
 export declare function resetLedger(opts: {
     all?: boolean;
+    limits?: boolean;
     session?: string;
     today?: boolean;
 }): string;

package/dist/ops.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { join, dirname } from "node:path";
 import { homedir } from "node:os";
 import { configPath, ensureGuardDir, DEFAULT_BUDGET, DEFAULT_LIMITS } from "./config.js";
 import { loadLedger, saveLedger, emptyLedger } from "./ledger.js";
+import { saveLimitsState, emptyLimitsState } from "./limits.js";
 /**
  * Wire the agent-guard hook into Claude Code settings for PreToolUse,
  * UserPromptSubmit, and Stop. Idempotent: re-running adds nothing if the hook
@@ -99,11 +100,21 @@ export function setLimits(patch) {
     writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
     return limits;
 }
-/** Clear the spend ledger. Scope: all | a single session | today's sessions. */
+/**
+ * Clear guard state. Scope: all (ledger + limits) | limits only | a single
+ * session | today's sessions. The `limits` scope clears the subscription
+ * detection latch + last snapshot — useful when you stop using a Pro/Max plan
+ * and want the dollar wall fully re-armed.
+ */
 export function resetLedger(opts) {
     if (opts.all) {
         saveLedger(emptyLedger());
-        return "Ledger wiped.";
+        saveLimitsState(emptyLimitsState());
+        return "Ledger + subscription-limit state wiped.";
+    }
+    if (opts.limits) {
+        saveLimitsState(emptyLimitsState());
+        return "Subscription-limit state cleared (detection latch + snapshot).";
     }
     const ledger = loadLedger();
     if (opts.session) {
@@ -120,5 +131,5 @@ export function resetLedger(opts) {
         saveLedger(ledger);
         return "Cleared today's sessions.";
     }
-    return "Specify all, session <id>, or today.";
+    return "Specify all, limits, session <id>, or today.";
 }

package/dist/proxy.js CHANGED Viewed

@@ -24,7 +24,7 @@ import { loadLedger, saveLedger, addSessionCost, rollingDailyCost, prune, } from
 import { evaluate } from "./budget.js";
 import { dispatchAlert } from "./alert.js";
 import { assertSafeEndpoint, warnIfUnexpectedHost } from "./net.js";
-import { parseUnifiedHeaders, recordHeaders, unifiedHeaderDump, logUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, } from "./limits.js";
+import { parseUnifiedHeaders, recordHeaders, unifiedHeaderDump, logUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, WINDOW_MS, } from "./limits.js";
 import { assessSnapshot, worstLevel } from "./pacing.js";
 const UPSTREAMS = {
     anthropic: "https://api.anthropic.com",
@@ -159,19 +159,34 @@ function captureLimits(cfg, headers, sessionId, now) {
         logUnifiedHeaders(unifiedHeaderDump(rec), now);
         state.headersLoggedAt = now;
     }
-    state.subscriptionDetected = true;
-    state.snapshot = snap;
+    // Which windows newly cross into warn/danger (dedup vs. what we've alerted).
     const assessments = assessSnapshot(snap, cfg.limits, now);
+    const newlyNotified = [];
     const fresh = assessments.filter((a) => {
         if (a.level === "ok")
             return false;
         const key = limitNotifyKey(a.window, a.level, a.resetAt);
         if (state.notified[key])
             return false;
-        state.notified[key] = true;
+        newlyNotified.push(key);
         return true;
     });
-    saveLimitsState(state);
+    // Re-read at write time to mitigate read-modify-write races: the file write is
+    // atomic (no corruption), but a concurrent response could otherwise clobber a
+    // newer snapshot or a just-set notified flag. Keep the newest snapshot by
+    // observedAt; union the notified flags.
+    const onDisk = loadLimitsState();
+    const keepNewer = onDisk.snapshot && onDisk.snapshot.observedAt > snap.observedAt;
+    const merged = {
+        version: 1,
+        subscriptionDetected: true,
+        snapshot: keepNewer ? onDisk.snapshot : snap,
+        notified: { ...onDisk.notified, ...state.notified },
+        headersLoggedAt: onDisk.headersLoggedAt ?? state.headersLoggedAt,
+    };
+    for (const key of newlyNotified)
+        merged.notified[key] = true;
+    saveLimitsState(merged);
     if (fresh.length) {
         const level = worstLevel(fresh);
         dispatchAlert(cfg, {
@@ -189,6 +204,40 @@ function captureLimits(cfg, headers, sessionId, now) {
     }
     return true;
 }
+function planIsSubscription(plan) {
+    return plan === "pro" || plan === "max5" || plan === "max20";
+}
+/**
+ * Should the dollar hard-cap 402 be suppressed for THIS proxy/request?
+ *
+ * Only for the **Anthropic** flavor — an OpenAI / other-API agent is billed per
+ * token and must keep its wall, even if a *different* (Claude Code) session once
+ * latched subscription mode on the shared `limits.json`. And only when we have a
+ * live reason to believe this is a flat-fee plan: either the operator pinned a
+ * subscription tier (`--plan`), or we saw real `unified-*` headers **recently**
+ * (within the 5-hour window). A stale, months-old detection must never disarm
+ * the wall — that's the bug this replaces (a permanent global latch).
+ *
+ * Residual edge: an Anthropic-flavor *API-key* agent run within 5h of a Claude
+ * Code subscription session (or under a pinned `--plan`) would also be
+ * suppressed. That's a narrow, opt-in-ish overlap; the common dual-use case
+ * (Claude Code + an OpenAI-flavor agent) is fully covered by the flavor gate.
+ *
+ * Trust model: in `auto` mode this trusts the upstream's `unified-*` headers, so
+ * a malicious/compromised Anthropic-compatible gateway could disarm the dollar
+ * wall by emitting fake subscription headers. That upstream already holds your
+ * API key (you pointed the proxy at it), and `net.ts` enforces https + warns on
+ * an unexpected host — so this isn't a new trust boundary. Pin `--plan` if you
+ * want suppression to be an explicit, upstream-independent choice.
+ */
+function dollarWallSuppressed(cfg, flavor, state, now) {
+    if (flavor !== "anthropic")
+        return false;
+    if (planIsSubscription(cfg.limits.plan))
+        return true;
+    const snap = state.snapshot;
+    return !!snap && now - snap.observedAt < WINDOW_MS["5h"] && !!(snap.fiveHour || snap.weekly);
+}
 export function startProxy(opts) {
     const cfg = loadConfig();
     const upstreamOrigin = assertSafeEndpoint(opts.upstream, "upstream").replace(/\/$/, "");
@@ -198,10 +247,10 @@ export function startProxy(opts) {
         const sessionId = req.headers["x-agent-guard-session"] || `proxy:${todayKey(now)}`;
         // 1) Pre-flight budget check — block before spending anything.
         // Escape hatch: while a human has paused enforcement, never block (but still meter).
-        // Subscription mode is ALERT-ONLY: once we've seen Anthropic's unified
-        // rate-limit headers, the session is on a flat-fee plan where dollars are
-        // meaningless, so we never 402 it — we only pace + warn.
-        const subscriptionMode = loadLimitsState().subscriptionDetected;
+        // Subscription mode is ALERT-ONLY: a flat-fee Pro/Max session is paced, not
+        // dollar-gated. Scope that suppression tightly (flavor + pinned plan / fresh
+        // headers) so it never disarms the wall for a genuinely-billed agent.
+        let subscriptionMode = dollarWallSuppressed(cfg, opts.flavor, loadLimitsState(), now);
         const ledger = loadLedger();
         const sessionUSD = ledger.sessions[sessionId]?.costUSD ?? 0;
         const dailyUSD = rollingDailyCost(ledger, now);
@@ -249,10 +298,13 @@ export function startProxy(opts) {
             res.end(JSON.stringify({ error: "kill-switch proxy: upstream fetch failed", detail: String(err) }));
             return;
         }
-        // 2.5) Read Anthropic's subscription rate-limit headers (alert-only).
+        // 2.5) Read Anthropic's subscription rate-limit headers (alert-only). If this
+        // response carried them, treat the session as subscription for alert purposes
+        // too — even if the pre-flight check (run before we'd seen any headers) didn't.
         if (opts.flavor === "anthropic") {
             try {
-                captureLimits(cfg, upstream.headers, sessionId, Date.now());
+                if (captureLimits(cfg, upstream.headers, sessionId, Date.now()))
+                    subscriptionMode = true;
             }
             catch {
                 /* limit capture must never break the proxied response */
@@ -294,11 +346,12 @@ export function startProxy(opts) {
                 // Re-load ledger (the request may have been concurrent) and meter.
                 const fresh = loadLedger();
                 meter(cfg, fresh, sessionId, parsed, Date.now());
-                // Post-meter soft-cap alert (once).
+                // Post-meter soft-cap alert (once). Skipped in subscription mode — the
+                // dollars are meaningless on a flat-fee plan, so a USD warn is just noise.
                 const after = fresh.sessions[sessionId]?.costUSD ?? 0;
                 const afterDaily = rollingDailyCost(fresh, Date.now());
                 const v2 = evaluate({ sessionUSD: after, dailyUSD: afterDaily }, cfg.budget);
-                if (v2.level === "warn" && !blockedNotified[`warn:${sessionId}`]) {
+                if (v2.level === "warn" && !subscriptionMode && !blockedNotified[`warn:${sessionId}`]) {
                     blockedNotified[`warn:${sessionId}`] = true;
                     dispatchAlert(cfg, {
                         ts: Date.now(), source: "proxy", sessionId, level: "warn",

package/dist/report.d.ts CHANGED Viewed

@@ -11,6 +11,8 @@
 import { type SessionRecord } from "./ledger.js";
 import { type Budget, type VerdictLevel } from "./budget.js";
 import { type PacingAssessment, type PacingLevel } from "./pacing.js";
+import type { GuardConfig } from "./config.js";
+import type { Ledger } from "./ledger.js";
 export interface LimitsReport {
     /** Where the numbers came from. "none" = no data and no pinned plan to estimate from. */
     source: "headers" | "estimated" | "none";
@@ -35,6 +37,12 @@ export interface StatusReport {
     /** Subscription rate-limit pacing — present whenever we have data to show. */
     limits: LimitsReport;
 }
+/**
+ * Compute the subscription rate-limit section. Exported so the Claude Code hook
+ * can reuse its already-loaded cfg + ledger instead of paying for a second
+ * loadConfig/loadLedger on every tool call.
+ */
+export declare function buildLimitsReport(cfg: GuardConfig, ledger: Ledger, now: number): LimitsReport;
 /**
  * Render the subscription rate-limit section as plain text lines (no color), so
  * both the `agent-guard` and `ks guard` status views stay identical. Returns an

package/dist/report.js CHANGED Viewed

@@ -12,25 +12,38 @@ import { loadConfig } from "./config.js";
 import { isPaused, pauseExpiry } from "./config.js";
 import { loadLedger, rollingDailyCost } from "./ledger.js";
 import { evaluate } from "./budget.js";
-import { loadLimitsState } from "./limits.js";
+import { loadLimitsState, WINDOW_MS } from "./limits.js";
 import { assessSnapshot, worstLevel } from "./pacing.js";
 import { estimateSnapshot } from "./estimate.js";
 const DAY_MS = 24 * 60 * 60 * 1000;
-function buildLimitsReport(cfg, ledger, now) {
+/**
+ * Compute the subscription rate-limit section. Exported so the Claude Code hook
+ * can reuse its already-loaded cfg + ledger instead of paying for a second
+ * loadConfig/loadLedger on every tool call.
+ */
+export function buildLimitsReport(cfg, ledger, now) {
     const state = loadLimitsState();
     const thresholds = cfg.limits;
     const plan = cfg.limits.plan;
-    // Prefer real header data when we have it.
-    if (state.snapshot) {
-        const windows = assessSnapshot(state.snapshot, thresholds, now);
-        return {
-            source: "headers",
-            plan,
-            subscriptionDetected: state.subscriptionDetected,
-            observedAt: state.snapshot.observedAt,
-            windows,
-            level: worstLevel(windows),
-        };
+    // Prefer real header data — but only while it's still usable. A snapshot older
+    // than the weekly window is too stale to trust at all; and any single window
+    // whose reset time has already passed has since rolled over (its utilization is
+    // from a prior window), so we drop it rather than present expired numbers — and
+    // a reset time in the past — as if they were live. If nothing usable remains we
+    // fall through to the estimate (or "none").
+    const snap = state.snapshot;
+    if (snap && now - snap.observedAt < WINDOW_MS.weekly) {
+        const windows = assessSnapshot(snap, thresholds, now).filter((w) => !(w.resetAt != null && w.resetAt <= now));
+        if (windows.length) {
+            return {
+                source: "headers",
+                plan,
+                subscriptionDetected: state.subscriptionDetected,
+                observedAt: snap.observedAt,
+                windows,
+                level: worstLevel(windows),
+            };
+        }
     }
     // Otherwise estimate, but only when the user pinned a tier (opt-in, fuzzy).
     if (plan === "pro" || plan === "max5" || plan === "max20") {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kill-switch/agent-guard",
-  "version": "0.1.3",
+  "version": "0.1.5",
   "description": "Kill Switch for coding agents — stop runaway Claude Code / Cursor / Aider sessions from racking up an LLM bill. Native hook + token-metering proxy with per-session and daily-rolling budgets.",
   "type": "module",
   "bin": {