@kill-switch/agent-guard 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/cli.js +4 -3
- package/dist/estimate.js +6 -12
- package/dist/hook.js +4 -4
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/limits.d.ts +5 -0
- package/dist/limits.js +38 -2
- package/dist/ops.d.ts +7 -1
- package/dist/ops.js +14 -3
- package/dist/proxy.js +77 -14
- package/dist/report.d.ts +8 -0
- package/dist/report.js +26 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -152,6 +152,13 @@ Tune the thresholds (0–1 utilization) if the defaults are too eager:
|
|
|
152
152
|
| `--5h-soft` / `--5h-danger` | 5-hour warn / danger utilization | 0.7 / 0.9 |
|
|
153
153
|
| `--burn-ratio` | pace multiplier that triggers a warning | 1.5 |
|
|
154
154
|
|
|
155
|
+
The first time the proxy sees the `unified-*` headers it writes the raw values once to
|
|
156
|
+
`~/.kill-switch/agent-guard/events.jsonl` (`kind: "unified-headers-observed"`) — so you can
|
|
157
|
+
confirm Anthropic's exact value formats with a single `cat`. Only `unified-*` headers are
|
|
158
|
+
captured (an explicit allowlist — never `Authorization` / `x-api-key` / cookies), values are
|
|
159
|
+
length-capped, and the dump stays local. In `auto` mode the dollar-wall suppression trusts the
|
|
160
|
+
upstream's headers; pin `--plan` if you'd rather it not depend on what the upstream reports.
|
|
161
|
+
|
|
155
162
|
> Because subscription mode is alert-only, the "don't run both hook *and* proxy" caveat below
|
|
156
163
|
> doesn't bite here — running Claude Code through the proxy is exactly what feeds the limit
|
|
157
164
|
> headers, and dollars no longer gate anything.
|
|
@@ -184,7 +191,7 @@ agent-guard proxy [--port 8787] [--flavor anthropic|openai] [--upstream URL]
|
|
|
184
191
|
agent-guard status [--json] spend vs budget + plan limits
|
|
185
192
|
agent-guard config [--session-hard N ...] view/set caps
|
|
186
193
|
agent-guard config [--plan max5 --weekly-soft 0.6 ...] view/set plan limits
|
|
187
|
-
agent-guard reset [--all|--today|--session <id>] clear the ledger
|
|
194
|
+
agent-guard reset [--all|--limits|--today|--session <id>] clear the ledger / subscription-limit state
|
|
188
195
|
agent-guard hook (internal) Claude Code entrypoint
|
|
189
196
|
```
|
|
190
197
|
|
package/dist/cli.js
CHANGED
|
@@ -213,11 +213,12 @@ program
|
|
|
213
213
|
// ── reset ────────────────────────────────────────────────────────────────────
|
|
214
214
|
program
|
|
215
215
|
.command("reset")
|
|
216
|
-
.description("Clear the spend ledger")
|
|
217
|
-
.option("--all", "Wipe all sessions")
|
|
216
|
+
.description("Clear the spend ledger and/or subscription-limit state")
|
|
217
|
+
.option("--all", "Wipe all sessions + subscription-limit state")
|
|
218
|
+
.option("--limits", "Clear subscription detection latch + snapshot only")
|
|
218
219
|
.option("--session <id>", "Clear a single session")
|
|
219
220
|
.option("--today", "Clear sessions active today")
|
|
220
221
|
.action((opts) => {
|
|
221
|
-
console.log(`✅ ${resetLedger({ all: opts.all, session: opts.session, today: opts.today })}`);
|
|
222
|
+
console.log(`✅ ${resetLedger({ all: opts.all, limits: opts.limits, session: opts.session, today: opts.today })}`);
|
|
222
223
|
});
|
|
223
224
|
program.parseAsync();
|
package/dist/estimate.js
CHANGED
|
@@ -48,19 +48,13 @@ export function estimateSnapshot(ledger, tier, now, budgets = TIER_BUDGETS) {
|
|
|
48
48
|
const fiveTokens = tokensInWindow(ledger, now, FIVE_HOUR_MS);
|
|
49
49
|
const weekTokens = tokensInWindow(ledger, now, WEEK_MS);
|
|
50
50
|
const clamp = (n) => Math.max(0, Math.min(1, n));
|
|
51
|
+
// resetAt is null, not fabricated: we have no per-event time series, so the
|
|
52
|
+
// true rolling reset is unknowable. A null reset means pacing reports
|
|
53
|
+
// utilization only (no burn-rate, no lockout projection, no bogus reset time) —
|
|
54
|
+
// the honest behaviour for an estimate.
|
|
51
55
|
return {
|
|
52
|
-
fiveHour: {
|
|
53
|
-
|
|
54
|
-
// Without a per-event time series we can't know the true rolling reset;
|
|
55
|
-
// report a full window from now as a conservative (latest-possible) reset.
|
|
56
|
-
resetAt: now + FIVE_HOUR_MS,
|
|
57
|
-
status: "estimated",
|
|
58
|
-
},
|
|
59
|
-
weekly: {
|
|
60
|
-
utilization: clamp(weekTokens / b.weeklyTokens),
|
|
61
|
-
resetAt: now + WEEK_MS,
|
|
62
|
-
status: "estimated",
|
|
63
|
-
},
|
|
56
|
+
fiveHour: { utilization: clamp(fiveTokens / b.fiveHourTokens), resetAt: null, status: "estimated" },
|
|
57
|
+
weekly: { utilization: clamp(weekTokens / b.weeklyTokens), resetAt: null, status: "estimated" },
|
|
64
58
|
status: "estimated",
|
|
65
59
|
observedAt: now,
|
|
66
60
|
};
|
package/dist/hook.js
CHANGED
|
@@ -24,7 +24,7 @@ import { parseTranscript } from "./transcript.js";
|
|
|
24
24
|
import { loadLedger, saveLedger, setSessionCost, rollingDailyCost, prune, } from "./ledger.js";
|
|
25
25
|
import { evaluate, warnKey } from "./budget.js";
|
|
26
26
|
import { dispatchAlert } from "./alert.js";
|
|
27
|
-
import {
|
|
27
|
+
import { buildLimitsReport } from "./report.js";
|
|
28
28
|
function readStdin() {
|
|
29
29
|
return new Promise((resolve) => {
|
|
30
30
|
let data = "";
|
|
@@ -151,7 +151,7 @@ export async function runHook() {
|
|
|
151
151
|
// snapshot the proxy persisted from Anthropic's headers (or a tier estimate),
|
|
152
152
|
// so even a hook-only session learns when it's about to lock out. Deduped per
|
|
153
153
|
// window+level so it doesn't repeat every tool call.
|
|
154
|
-
const limitMsg = limitNudge(rec, ledger, now);
|
|
154
|
+
const limitMsg = limitNudge(cfg, rec, ledger, now);
|
|
155
155
|
// Surface the warn nudge only on the first trip per scope (shouldAlert), not
|
|
156
156
|
// on every subsequent tool call — otherwise the agent's context fills with
|
|
157
157
|
// duplicate notices. After that, warnings stay silent until the hard cap.
|
|
@@ -175,9 +175,9 @@ export async function runHook() {
|
|
|
175
175
|
* session's notified map (and persists it) so the same warning doesn't repeat on
|
|
176
176
|
* every tool call. Returns null when there's nothing to surface.
|
|
177
177
|
*/
|
|
178
|
-
function limitNudge(rec, ledger, now) {
|
|
178
|
+
function limitNudge(cfg, rec, ledger, now) {
|
|
179
179
|
try {
|
|
180
|
-
const limits =
|
|
180
|
+
const limits = buildLimitsReport(cfg, ledger, now);
|
|
181
181
|
if (!limits.windows.length)
|
|
182
182
|
return null;
|
|
183
183
|
const urgent = limits.windows.find((w) => w.level === "danger") ?? limits.windows.find((w) => w.level === "warn");
|
package/dist/index.d.ts
CHANGED
|
@@ -17,7 +17,7 @@ export { dispatchAlert, type AlertEvent, type AlertLevel } from "./alert.js";
|
|
|
17
17
|
export { startProxy, resolveUpstream, type ProxyOptions } from "./proxy.js";
|
|
18
18
|
export { runHook } from "./hook.js";
|
|
19
19
|
export { buildStatusReport, formatLimitsLines, type StatusReport, type LimitsReport } from "./report.js";
|
|
20
|
-
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, WINDOW_MS, type LimitSnapshot, type WindowState, type LimitsState, type LimitWindow, type HeaderGetter, } from "./limits.js";
|
|
20
|
+
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, unifiedHeaderDump, logUnifiedHeaders, WINDOW_MS, type LimitSnapshot, type WindowState, type LimitsState, type LimitWindow, type HeaderGetter, } from "./limits.js";
|
|
21
21
|
export { assessWindow, assessSnapshot, worstLevel, type PacingAssessment, type PacingLevel, type PacingThresholds, } from "./pacing.js";
|
|
22
22
|
export { estimateSnapshot, isEstimated, TIER_BUDGETS, type PlanTier, type TierBudget, } from "./estimate.js";
|
|
23
23
|
export { installHook, setBudget, setLimits, resetLedger, type InstallOptions, type InstallResult, type BudgetPatch, type LimitsPatch, } from "./ops.js";
|
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ export { dispatchAlert } from "./alert.js";
|
|
|
17
17
|
export { startProxy, resolveUpstream } from "./proxy.js";
|
|
18
18
|
export { runHook } from "./hook.js";
|
|
19
19
|
export { buildStatusReport, formatLimitsLines } from "./report.js";
|
|
20
|
-
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, WINDOW_MS, } from "./limits.js";
|
|
20
|
+
export { parseUnifiedHeaders, parseUtilization, parseReset, recordHeaders, loadLimitsState, saveLimitsState, emptyLimitsState, limitNotifyKey, unifiedHeaderDump, logUnifiedHeaders, WINDOW_MS, } from "./limits.js";
|
|
21
21
|
export { assessWindow, assessSnapshot, worstLevel, } from "./pacing.js";
|
|
22
22
|
export { estimateSnapshot, isEstimated, TIER_BUDGETS, } from "./estimate.js";
|
|
23
23
|
export { installHook, setBudget, setLimits, resetLedger, } from "./ops.js";
|
package/dist/limits.d.ts
CHANGED
|
@@ -49,6 +49,8 @@ export interface LimitsState {
|
|
|
49
49
|
snapshot: LimitSnapshot | null;
|
|
50
50
|
/** Dedup flags so a given window/level/reset only alerts once. */
|
|
51
51
|
notified: Record<string, boolean>;
|
|
52
|
+
/** Epoch ms we first logged the raw unified-* headers (write-once diagnostic). */
|
|
53
|
+
headersLoggedAt?: number;
|
|
52
54
|
}
|
|
53
55
|
/** Nominal window durations, used for pacing math when a reset time is unknown. */
|
|
54
56
|
export declare const WINDOW_MS: Record<LimitWindow, number>;
|
|
@@ -81,3 +83,6 @@ export declare function parseReset(raw: string | null | undefined, now: number):
|
|
|
81
83
|
export declare function parseUnifiedHeaders(h: HeaderGetter, now: number): LimitSnapshot | null;
|
|
82
84
|
/** Stable dedup key for a pacing alert: re-alerts when the window resets. */
|
|
83
85
|
export declare function limitNotifyKey(window: LimitWindow, level: string, resetAt: number | null): string;
|
|
86
|
+
export declare function unifiedHeaderDump(rec: Record<string, string | string[] | undefined>): Record<string, string>;
|
|
87
|
+
/** Append a one-time raw-header diagnostic to events.jsonl. Best-effort, never throws. */
|
|
88
|
+
export declare function logUnifiedHeaders(dump: Record<string, string>, now: number): void;
|
package/dist/limits.js
CHANGED
|
@@ -22,8 +22,8 @@
|
|
|
22
22
|
* either a 0–1 fraction or a 0–100 percent; reset is accepted as an ISO 8601
|
|
23
23
|
* timestamp, an epoch (s or ms), or a relative seconds-until-reset.
|
|
24
24
|
*/
|
|
25
|
-
import { readFileSync, writeFileSync, renameSync } from "node:fs";
|
|
26
|
-
import { limitsPath, ensureGuardDir } from "./config.js";
|
|
25
|
+
import { readFileSync, writeFileSync, renameSync, appendFileSync } from "node:fs";
|
|
26
|
+
import { limitsPath, eventsPath, ensureGuardDir } from "./config.js";
|
|
27
27
|
/** Nominal window durations, used for pacing math when a reset time is unknown. */
|
|
28
28
|
export const WINDOW_MS = {
|
|
29
29
|
"5h": 5 * 60 * 60 * 1000,
|
|
@@ -41,6 +41,7 @@ export function loadLimitsState() {
|
|
|
41
41
|
subscriptionDetected: data.subscriptionDetected ?? false,
|
|
42
42
|
snapshot: data.snapshot ?? null,
|
|
43
43
|
notified: data.notified ?? {},
|
|
44
|
+
headersLoggedAt: data.headersLoggedAt,
|
|
44
45
|
};
|
|
45
46
|
}
|
|
46
47
|
}
|
|
@@ -131,3 +132,38 @@ export function parseUnifiedHeaders(h, now) {
|
|
|
131
132
|
export function limitNotifyKey(window, level, resetAt) {
|
|
132
133
|
return `${window}:${level}:${resetAt ?? 0}`;
|
|
133
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Pull every `anthropic-ratelimit-unified-*` header out of a raw record, verbatim.
|
|
137
|
+
* Used for the write-once diagnostic — Anthropic's value *formats* (fraction vs.
|
|
138
|
+
* percent, ISO vs. epoch reset) aren't fully documented, so capturing the raw
|
|
139
|
+
* strings the first time we see them makes verification a single `cat` away.
|
|
140
|
+
*
|
|
141
|
+
* Security: this is an explicit **allowlist** by the `anthropic-ratelimit-unified`
|
|
142
|
+
* prefix — credential headers (Authorization, x-api-key, cookies) are never
|
|
143
|
+
* captured, even though the caller hands us the full response header set. Values
|
|
144
|
+
* are length-capped so a hostile/compromised upstream can't bloat the log.
|
|
145
|
+
*/
|
|
146
|
+
const MAX_DUMP_VALUE = 256;
|
|
147
|
+
export function unifiedHeaderDump(rec) {
|
|
148
|
+
const out = {};
|
|
149
|
+
for (const [k, v] of Object.entries(rec)) {
|
|
150
|
+
if (v == null)
|
|
151
|
+
continue;
|
|
152
|
+
const key = k.toLowerCase();
|
|
153
|
+
if (!key.startsWith("anthropic-ratelimit-unified"))
|
|
154
|
+
continue;
|
|
155
|
+
const val = Array.isArray(v) ? v.join(", ") : v;
|
|
156
|
+
out[key] = val.length > MAX_DUMP_VALUE ? val.slice(0, MAX_DUMP_VALUE) + "…[truncated]" : val;
|
|
157
|
+
}
|
|
158
|
+
return out;
|
|
159
|
+
}
|
|
160
|
+
/** Append a one-time raw-header diagnostic to events.jsonl. Best-effort, never throws. */
|
|
161
|
+
export function logUnifiedHeaders(dump, now) {
|
|
162
|
+
try {
|
|
163
|
+
ensureGuardDir();
|
|
164
|
+
appendFileSync(eventsPath(), JSON.stringify({ ts: now, kind: "unified-headers-observed", headers: dump }) + "\n");
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
/* diagnostic only */
|
|
168
|
+
}
|
|
169
|
+
}
|
package/dist/ops.d.ts
CHANGED
|
@@ -45,9 +45,15 @@ export interface LimitsPatch {
|
|
|
45
45
|
}
|
|
46
46
|
/** Write subscription-limit overrides to the config file. Returns the saved limits. */
|
|
47
47
|
export declare function setLimits(patch: LimitsPatch): LimitsConfig;
|
|
48
|
-
/**
|
|
48
|
+
/**
|
|
49
|
+
* Clear guard state. Scope: all (ledger + limits) | limits only | a single
|
|
50
|
+
* session | today's sessions. The `limits` scope clears the subscription
|
|
51
|
+
* detection latch + last snapshot — useful when you stop using a Pro/Max plan
|
|
52
|
+
* and want the dollar wall fully re-armed.
|
|
53
|
+
*/
|
|
49
54
|
export declare function resetLedger(opts: {
|
|
50
55
|
all?: boolean;
|
|
56
|
+
limits?: boolean;
|
|
51
57
|
session?: string;
|
|
52
58
|
today?: boolean;
|
|
53
59
|
}): string;
|
package/dist/ops.js
CHANGED
|
@@ -8,6 +8,7 @@ import { join, dirname } from "node:path";
|
|
|
8
8
|
import { homedir } from "node:os";
|
|
9
9
|
import { configPath, ensureGuardDir, DEFAULT_BUDGET, DEFAULT_LIMITS } from "./config.js";
|
|
10
10
|
import { loadLedger, saveLedger, emptyLedger } from "./ledger.js";
|
|
11
|
+
import { saveLimitsState, emptyLimitsState } from "./limits.js";
|
|
11
12
|
/**
|
|
12
13
|
* Wire the agent-guard hook into Claude Code settings for PreToolUse,
|
|
13
14
|
* UserPromptSubmit, and Stop. Idempotent: re-running adds nothing if the hook
|
|
@@ -99,11 +100,21 @@ export function setLimits(patch) {
|
|
|
99
100
|
writeFileSync(configPath(), JSON.stringify(file, null, 2) + "\n");
|
|
100
101
|
return limits;
|
|
101
102
|
}
|
|
102
|
-
/**
|
|
103
|
+
/**
|
|
104
|
+
* Clear guard state. Scope: all (ledger + limits) | limits only | a single
|
|
105
|
+
* session | today's sessions. The `limits` scope clears the subscription
|
|
106
|
+
* detection latch + last snapshot — useful when you stop using a Pro/Max plan
|
|
107
|
+
* and want the dollar wall fully re-armed.
|
|
108
|
+
*/
|
|
103
109
|
export function resetLedger(opts) {
|
|
104
110
|
if (opts.all) {
|
|
105
111
|
saveLedger(emptyLedger());
|
|
106
|
-
|
|
112
|
+
saveLimitsState(emptyLimitsState());
|
|
113
|
+
return "Ledger + subscription-limit state wiped.";
|
|
114
|
+
}
|
|
115
|
+
if (opts.limits) {
|
|
116
|
+
saveLimitsState(emptyLimitsState());
|
|
117
|
+
return "Subscription-limit state cleared (detection latch + snapshot).";
|
|
107
118
|
}
|
|
108
119
|
const ledger = loadLedger();
|
|
109
120
|
if (opts.session) {
|
|
@@ -120,5 +131,5 @@ export function resetLedger(opts) {
|
|
|
120
131
|
saveLedger(ledger);
|
|
121
132
|
return "Cleared today's sessions.";
|
|
122
133
|
}
|
|
123
|
-
return "Specify all, session <id>, or today.";
|
|
134
|
+
return "Specify all, limits, session <id>, or today.";
|
|
124
135
|
}
|
package/dist/proxy.js
CHANGED
|
@@ -24,7 +24,7 @@ import { loadLedger, saveLedger, addSessionCost, rollingDailyCost, prune, } from
|
|
|
24
24
|
import { evaluate } from "./budget.js";
|
|
25
25
|
import { dispatchAlert } from "./alert.js";
|
|
26
26
|
import { assertSafeEndpoint, warnIfUnexpectedHost } from "./net.js";
|
|
27
|
-
import { parseUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, } from "./limits.js";
|
|
27
|
+
import { parseUnifiedHeaders, recordHeaders, unifiedHeaderDump, logUnifiedHeaders, loadLimitsState, saveLimitsState, limitNotifyKey, WINDOW_MS, } from "./limits.js";
|
|
28
28
|
import { assessSnapshot, worstLevel } from "./pacing.js";
|
|
29
29
|
const UPSTREAMS = {
|
|
30
30
|
anthropic: "https://api.anthropic.com",
|
|
@@ -145,23 +145,48 @@ function meter(cfg, ledger, sessionId, parsed, now) {
|
|
|
145
145
|
* the real wall).
|
|
146
146
|
*/
|
|
147
147
|
function captureLimits(cfg, headers, sessionId, now) {
|
|
148
|
-
|
|
148
|
+
// Flatten to a lowercased record so we can both parse and dump the raw values.
|
|
149
|
+
const rec = {};
|
|
150
|
+
headers.forEach((v, k) => {
|
|
151
|
+
rec[k.toLowerCase()] = v;
|
|
152
|
+
});
|
|
153
|
+
const snap = parseUnifiedHeaders(recordHeaders(rec), now);
|
|
149
154
|
if (!snap)
|
|
150
155
|
return false;
|
|
151
156
|
const state = loadLimitsState();
|
|
152
|
-
|
|
153
|
-
state.
|
|
157
|
+
// Write-once raw-header diagnostic for format verification (`cat events.jsonl`).
|
|
158
|
+
if (!state.headersLoggedAt) {
|
|
159
|
+
logUnifiedHeaders(unifiedHeaderDump(rec), now);
|
|
160
|
+
state.headersLoggedAt = now;
|
|
161
|
+
}
|
|
162
|
+
// Which windows newly cross into warn/danger (dedup vs. what we've alerted).
|
|
154
163
|
const assessments = assessSnapshot(snap, cfg.limits, now);
|
|
164
|
+
const newlyNotified = [];
|
|
155
165
|
const fresh = assessments.filter((a) => {
|
|
156
166
|
if (a.level === "ok")
|
|
157
167
|
return false;
|
|
158
168
|
const key = limitNotifyKey(a.window, a.level, a.resetAt);
|
|
159
169
|
if (state.notified[key])
|
|
160
170
|
return false;
|
|
161
|
-
|
|
171
|
+
newlyNotified.push(key);
|
|
162
172
|
return true;
|
|
163
173
|
});
|
|
164
|
-
|
|
174
|
+
// Re-read at write time to mitigate read-modify-write races: the file write is
|
|
175
|
+
// atomic (no corruption), but a concurrent response could otherwise clobber a
|
|
176
|
+
// newer snapshot or a just-set notified flag. Keep the newest snapshot by
|
|
177
|
+
// observedAt; union the notified flags.
|
|
178
|
+
const onDisk = loadLimitsState();
|
|
179
|
+
const keepNewer = onDisk.snapshot && onDisk.snapshot.observedAt > snap.observedAt;
|
|
180
|
+
const merged = {
|
|
181
|
+
version: 1,
|
|
182
|
+
subscriptionDetected: true,
|
|
183
|
+
snapshot: keepNewer ? onDisk.snapshot : snap,
|
|
184
|
+
notified: { ...onDisk.notified, ...state.notified },
|
|
185
|
+
headersLoggedAt: onDisk.headersLoggedAt ?? state.headersLoggedAt,
|
|
186
|
+
};
|
|
187
|
+
for (const key of newlyNotified)
|
|
188
|
+
merged.notified[key] = true;
|
|
189
|
+
saveLimitsState(merged);
|
|
165
190
|
if (fresh.length) {
|
|
166
191
|
const level = worstLevel(fresh);
|
|
167
192
|
dispatchAlert(cfg, {
|
|
@@ -179,6 +204,40 @@ function captureLimits(cfg, headers, sessionId, now) {
|
|
|
179
204
|
}
|
|
180
205
|
return true;
|
|
181
206
|
}
|
|
207
|
+
function planIsSubscription(plan) {
|
|
208
|
+
return plan === "pro" || plan === "max5" || plan === "max20";
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Should the dollar hard-cap 402 be suppressed for THIS proxy/request?
|
|
212
|
+
*
|
|
213
|
+
* Only for the **Anthropic** flavor — an OpenAI / other-API agent is billed per
|
|
214
|
+
* token and must keep its wall, even if a *different* (Claude Code) session once
|
|
215
|
+
* latched subscription mode on the shared `limits.json`. And only when we have a
|
|
216
|
+
* live reason to believe this is a flat-fee plan: either the operator pinned a
|
|
217
|
+
* subscription tier (`--plan`), or we saw real `unified-*` headers **recently**
|
|
218
|
+
* (within the 5-hour window). A stale, months-old detection must never disarm
|
|
219
|
+
* the wall — that's the bug this replaces (a permanent global latch).
|
|
220
|
+
*
|
|
221
|
+
* Residual edge: an Anthropic-flavor *API-key* agent run within 5h of a Claude
|
|
222
|
+
* Code subscription session (or under a pinned `--plan`) would also be
|
|
223
|
+
* suppressed. That's a narrow, opt-in-ish overlap; the common dual-use case
|
|
224
|
+
* (Claude Code + an OpenAI-flavor agent) is fully covered by the flavor gate.
|
|
225
|
+
*
|
|
226
|
+
* Trust model: in `auto` mode this trusts the upstream's `unified-*` headers, so
|
|
227
|
+
* a malicious/compromised Anthropic-compatible gateway could disarm the dollar
|
|
228
|
+
* wall by emitting fake subscription headers. That upstream already holds your
|
|
229
|
+
* API key (you pointed the proxy at it), and `net.ts` enforces https + warns on
|
|
230
|
+
* an unexpected host — so this isn't a new trust boundary. Pin `--plan` if you
|
|
231
|
+
* want suppression to be an explicit, upstream-independent choice.
|
|
232
|
+
*/
|
|
233
|
+
function dollarWallSuppressed(cfg, flavor, state, now) {
|
|
234
|
+
if (flavor !== "anthropic")
|
|
235
|
+
return false;
|
|
236
|
+
if (planIsSubscription(cfg.limits.plan))
|
|
237
|
+
return true;
|
|
238
|
+
const snap = state.snapshot;
|
|
239
|
+
return !!snap && now - snap.observedAt < WINDOW_MS["5h"] && !!(snap.fiveHour || snap.weekly);
|
|
240
|
+
}
|
|
182
241
|
export function startProxy(opts) {
|
|
183
242
|
const cfg = loadConfig();
|
|
184
243
|
const upstreamOrigin = assertSafeEndpoint(opts.upstream, "upstream").replace(/\/$/, "");
|
|
@@ -188,10 +247,10 @@ export function startProxy(opts) {
|
|
|
188
247
|
const sessionId = req.headers["x-agent-guard-session"] || `proxy:${todayKey(now)}`;
|
|
189
248
|
// 1) Pre-flight budget check — block before spending anything.
|
|
190
249
|
// Escape hatch: while a human has paused enforcement, never block (but still meter).
|
|
191
|
-
// Subscription mode is ALERT-ONLY:
|
|
192
|
-
//
|
|
193
|
-
//
|
|
194
|
-
|
|
250
|
+
// Subscription mode is ALERT-ONLY: a flat-fee Pro/Max session is paced, not
|
|
251
|
+
// dollar-gated. Scope that suppression tightly (flavor + pinned plan / fresh
|
|
252
|
+
// headers) so it never disarms the wall for a genuinely-billed agent.
|
|
253
|
+
let subscriptionMode = dollarWallSuppressed(cfg, opts.flavor, loadLimitsState(), now);
|
|
195
254
|
const ledger = loadLedger();
|
|
196
255
|
const sessionUSD = ledger.sessions[sessionId]?.costUSD ?? 0;
|
|
197
256
|
const dailyUSD = rollingDailyCost(ledger, now);
|
|
@@ -239,10 +298,13 @@ export function startProxy(opts) {
|
|
|
239
298
|
res.end(JSON.stringify({ error: "kill-switch proxy: upstream fetch failed", detail: String(err) }));
|
|
240
299
|
return;
|
|
241
300
|
}
|
|
242
|
-
// 2.5) Read Anthropic's subscription rate-limit headers (alert-only).
|
|
301
|
+
// 2.5) Read Anthropic's subscription rate-limit headers (alert-only). If this
|
|
302
|
+
// response carried them, treat the session as subscription for alert purposes
|
|
303
|
+
// too — even if the pre-flight check (run before we'd seen any headers) didn't.
|
|
243
304
|
if (opts.flavor === "anthropic") {
|
|
244
305
|
try {
|
|
245
|
-
captureLimits(cfg, upstream.headers, sessionId, Date.now())
|
|
306
|
+
if (captureLimits(cfg, upstream.headers, sessionId, Date.now()))
|
|
307
|
+
subscriptionMode = true;
|
|
246
308
|
}
|
|
247
309
|
catch {
|
|
248
310
|
/* limit capture must never break the proxied response */
|
|
@@ -284,11 +346,12 @@ export function startProxy(opts) {
|
|
|
284
346
|
// Re-load ledger (the request may have been concurrent) and meter.
|
|
285
347
|
const fresh = loadLedger();
|
|
286
348
|
meter(cfg, fresh, sessionId, parsed, Date.now());
|
|
287
|
-
// Post-meter soft-cap alert (once).
|
|
349
|
+
// Post-meter soft-cap alert (once). Skipped in subscription mode — the
|
|
350
|
+
// dollars are meaningless on a flat-fee plan, so a USD warn is just noise.
|
|
288
351
|
const after = fresh.sessions[sessionId]?.costUSD ?? 0;
|
|
289
352
|
const afterDaily = rollingDailyCost(fresh, Date.now());
|
|
290
353
|
const v2 = evaluate({ sessionUSD: after, dailyUSD: afterDaily }, cfg.budget);
|
|
291
|
-
if (v2.level === "warn" && !blockedNotified[`warn:${sessionId}`]) {
|
|
354
|
+
if (v2.level === "warn" && !subscriptionMode && !blockedNotified[`warn:${sessionId}`]) {
|
|
292
355
|
blockedNotified[`warn:${sessionId}`] = true;
|
|
293
356
|
dispatchAlert(cfg, {
|
|
294
357
|
ts: Date.now(), source: "proxy", sessionId, level: "warn",
|
package/dist/report.d.ts
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
import { type SessionRecord } from "./ledger.js";
|
|
12
12
|
import { type Budget, type VerdictLevel } from "./budget.js";
|
|
13
13
|
import { type PacingAssessment, type PacingLevel } from "./pacing.js";
|
|
14
|
+
import type { GuardConfig } from "./config.js";
|
|
15
|
+
import type { Ledger } from "./ledger.js";
|
|
14
16
|
export interface LimitsReport {
|
|
15
17
|
/** Where the numbers came from. "none" = no data and no pinned plan to estimate from. */
|
|
16
18
|
source: "headers" | "estimated" | "none";
|
|
@@ -35,6 +37,12 @@ export interface StatusReport {
|
|
|
35
37
|
/** Subscription rate-limit pacing — present whenever we have data to show. */
|
|
36
38
|
limits: LimitsReport;
|
|
37
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Compute the subscription rate-limit section. Exported so the Claude Code hook
|
|
42
|
+
* can reuse its already-loaded cfg + ledger instead of paying for a second
|
|
43
|
+
* loadConfig/loadLedger on every tool call.
|
|
44
|
+
*/
|
|
45
|
+
export declare function buildLimitsReport(cfg: GuardConfig, ledger: Ledger, now: number): LimitsReport;
|
|
38
46
|
/**
|
|
39
47
|
* Render the subscription rate-limit section as plain text lines (no color), so
|
|
40
48
|
* both the `agent-guard` and `ks guard` status views stay identical. Returns an
|
package/dist/report.js
CHANGED
|
@@ -12,25 +12,38 @@ import { loadConfig } from "./config.js";
|
|
|
12
12
|
import { isPaused, pauseExpiry } from "./config.js";
|
|
13
13
|
import { loadLedger, rollingDailyCost } from "./ledger.js";
|
|
14
14
|
import { evaluate } from "./budget.js";
|
|
15
|
-
import { loadLimitsState } from "./limits.js";
|
|
15
|
+
import { loadLimitsState, WINDOW_MS } from "./limits.js";
|
|
16
16
|
import { assessSnapshot, worstLevel } from "./pacing.js";
|
|
17
17
|
import { estimateSnapshot } from "./estimate.js";
|
|
18
18
|
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
19
|
-
|
|
19
|
+
/**
|
|
20
|
+
* Compute the subscription rate-limit section. Exported so the Claude Code hook
|
|
21
|
+
* can reuse its already-loaded cfg + ledger instead of paying for a second
|
|
22
|
+
* loadConfig/loadLedger on every tool call.
|
|
23
|
+
*/
|
|
24
|
+
export function buildLimitsReport(cfg, ledger, now) {
|
|
20
25
|
const state = loadLimitsState();
|
|
21
26
|
const thresholds = cfg.limits;
|
|
22
27
|
const plan = cfg.limits.plan;
|
|
23
|
-
// Prefer real header data
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
// Prefer real header data — but only while it's still usable. A snapshot older
|
|
29
|
+
// than the weekly window is too stale to trust at all; and any single window
|
|
30
|
+
// whose reset time has already passed has since rolled over (its utilization is
|
|
31
|
+
// from a prior window), so we drop it rather than present expired numbers — and
|
|
32
|
+
// a reset time in the past — as if they were live. If nothing usable remains we
|
|
33
|
+
// fall through to the estimate (or "none").
|
|
34
|
+
const snap = state.snapshot;
|
|
35
|
+
if (snap && now - snap.observedAt < WINDOW_MS.weekly) {
|
|
36
|
+
const windows = assessSnapshot(snap, thresholds, now).filter((w) => !(w.resetAt != null && w.resetAt <= now));
|
|
37
|
+
if (windows.length) {
|
|
38
|
+
return {
|
|
39
|
+
source: "headers",
|
|
40
|
+
plan,
|
|
41
|
+
subscriptionDetected: state.subscriptionDetected,
|
|
42
|
+
observedAt: snap.observedAt,
|
|
43
|
+
windows,
|
|
44
|
+
level: worstLevel(windows),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
34
47
|
}
|
|
35
48
|
// Otherwise estimate, but only when the user pinned a tier (opt-in, fuzzy).
|
|
36
49
|
if (plan === "pro" || plan === "max5" || plan === "max20") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kill-switch/agent-guard",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Kill Switch for coding agents — stop runaway Claude Code / Cursor / Aider sessions from racking up an LLM bill. Native hook + token-metering proxy with per-session and daily-rolling budgets.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|