ai-shield-core 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/logger.d.ts.map +1 -1
- package/dist/audit/logger.js +13 -14
- package/dist/audit/types.js +1 -2
- package/dist/cache/lru.js +1 -5
- package/dist/canary/memory.d.ts +75 -0
- package/dist/canary/memory.d.ts.map +1 -0
- package/dist/canary/memory.js +194 -0
- package/dist/context/wrap-context.d.ts +169 -0
- package/dist/context/wrap-context.d.ts.map +1 -0
- package/dist/context/wrap-context.js +278 -0
- package/dist/cost/anomaly.js +1 -4
- package/dist/cost/pricing.d.ts.map +1 -1
- package/dist/cost/pricing.js +26 -19
- package/dist/cost/tracker.d.ts +19 -1
- package/dist/cost/tracker.d.ts.map +1 -1
- package/dist/cost/tracker.js +27 -10
- package/dist/index.d.ts +34 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +55 -37
- package/dist/judge/async-judge.d.ts +85 -0
- package/dist/judge/async-judge.d.ts.map +1 -0
- package/dist/judge/async-judge.js +146 -0
- package/dist/policy/circuit-breaker.d.ts +70 -0
- package/dist/policy/circuit-breaker.d.ts.map +1 -0
- package/dist/policy/circuit-breaker.js +376 -0
- package/dist/policy/engine.js +1 -5
- package/dist/policy/tools.js +4 -8
- package/dist/scanner/canary.js +4 -8
- package/dist/scanner/chain.js +1 -5
- package/dist/scanner/heuristic.d.ts +27 -0
- package/dist/scanner/heuristic.d.ts.map +1 -1
- package/dist/scanner/heuristic.js +118 -7
- package/dist/scanner/ingestion.d.ts +147 -0
- package/dist/scanner/ingestion.d.ts.map +1 -0
- package/dist/scanner/ingestion.js +520 -0
- package/dist/scanner/output.d.ts +73 -0
- package/dist/scanner/output.d.ts.map +1 -0
- package/dist/scanner/output.js +297 -0
- package/dist/scanner/pii.d.ts.map +1 -1
- package/dist/scanner/pii.js +24 -12
- package/dist/shield.d.ts.map +1 -1
- package/dist/shield.js +34 -26
- package/dist/types.d.ts +156 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -2
- package/package.json +4 -3
- package/src/audit/logger.ts +6 -1
- package/src/canary/memory.ts +259 -0
- package/src/context/wrap-context.ts +475 -0
- package/src/cost/pricing.ts +21 -9
- package/src/cost/tracker.ts +35 -1
- package/src/index.ts +113 -2
- package/src/judge/async-judge.ts +254 -0
- package/src/policy/circuit-breaker.ts +449 -0
- package/src/scanner/heuristic.ts +125 -2
- package/src/scanner/ingestion.ts +624 -0
- package/src/scanner/output.ts +386 -0
- package/src/scanner/pii.ts +21 -7
- package/src/shield.ts +15 -2
- package/src/types.ts +194 -2
- package/tsconfig.json +2 -1
- package/dist/audit/logger.js.map +0 -1
- package/dist/audit/types.js.map +0 -1
- package/dist/cache/lru.js.map +0 -1
- package/dist/cost/anomaly.js.map +0 -1
- package/dist/cost/pricing.js.map +0 -1
- package/dist/cost/tracker.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/policy/engine.js.map +0 -1
- package/dist/policy/tools.js.map +0 -1
- package/dist/scanner/canary.js.map +0 -1
- package/dist/scanner/chain.js.map +0 -1
- package/dist/scanner/heuristic.js.map +0 -1
- package/dist/scanner/pii.js.map +0 -1
- package/dist/shield.js.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
CircuitBreakerConfig,
|
|
3
|
+
CircuitBreakerDecision,
|
|
4
|
+
CircuitState,
|
|
5
|
+
CounterStoreLike,
|
|
6
|
+
ScanContext,
|
|
7
|
+
ToolCall,
|
|
8
|
+
ViolationType,
|
|
9
|
+
} from "../types.js";
|
|
10
|
+
|
|
11
|
+
// ============================================================
|
|
12
|
+
// Circuit Breaker — Tool-Policy Runtime Guard
|
|
13
|
+
//
|
|
14
|
+
// The existing `ToolPolicyScanner` (policy/tools.ts) is a *static*
|
|
15
|
+
// gate: allow / deny lists, manifest pin, dangerous patterns. It
|
|
16
|
+
// runs once per call.
|
|
17
|
+
//
|
|
18
|
+
// The circuit breaker layers *runtime* defense on top:
|
|
19
|
+
// - Rate limit per (tool, scope) within a rolling window.
|
|
20
|
+
// - "Blast radius" cap: max writes per window (for destructive ops).
|
|
21
|
+
// - Trip + cooldown: after N anomalies the tool is blocked for a
|
|
22
|
+
// period regardless of static policy.
|
|
23
|
+
// - Optional Human-In-The-Loop hook for destructive operations
|
|
24
|
+
// ("type the tool name to confirm").
|
|
25
|
+
//
|
|
26
|
+
// Counters can live in-process (default) or in any `ioredis`-shaped
|
|
27
|
+
// store so the breaker tracks state across replicas.
|
|
28
|
+
// ============================================================
|
|
29
|
+
|
|
30
|
+
const DESTRUCTIVE_DEFAULTS = [
|
|
31
|
+
"delete_",
|
|
32
|
+
"remove_",
|
|
33
|
+
"drop_",
|
|
34
|
+
"destroy_",
|
|
35
|
+
"wipe_",
|
|
36
|
+
"shutdown_",
|
|
37
|
+
"purge_",
|
|
38
|
+
"truncate_",
|
|
39
|
+
"send_email",
|
|
40
|
+
"transfer_",
|
|
41
|
+
"payment_",
|
|
42
|
+
];
|
|
43
|
+
|
|
44
|
+
const DEFAULTS: Required<
|
|
45
|
+
Pick<
|
|
46
|
+
CircuitBreakerConfig,
|
|
47
|
+
"failureThreshold" | "windowMs" | "cooldownMs"
|
|
48
|
+
>
|
|
49
|
+
> = {
|
|
50
|
+
failureThreshold: 5,
|
|
51
|
+
windowMs: 60_000,
|
|
52
|
+
cooldownMs: 60_000,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
interface InternalState {
|
|
56
|
+
state: CircuitState;
|
|
57
|
+
openedAt: number;
|
|
58
|
+
failures: number[]; // timestamps within current window
|
|
59
|
+
calls: number[]; // timestamps within current window
|
|
60
|
+
writes: number[]; // timestamps within current window
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
class InMemoryCounter implements CounterStoreLike {
|
|
64
|
+
private data = new Map<string, { value: string; expiresAt?: number }>();
|
|
65
|
+
|
|
66
|
+
async get(key: string): Promise<string | null> {
|
|
67
|
+
const e = this.data.get(key);
|
|
68
|
+
if (!e) return null;
|
|
69
|
+
if (e.expiresAt && Date.now() > e.expiresAt) {
|
|
70
|
+
this.data.delete(key);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
return e.value;
|
|
74
|
+
}
|
|
75
|
+
async incrbyfloat(key: string, increment: number): Promise<string> {
|
|
76
|
+
const cur = parseFloat((await this.get(key)) ?? "0");
|
|
77
|
+
const next = (cur + increment).toString();
|
|
78
|
+
const e = this.data.get(key);
|
|
79
|
+
this.data.set(key, { value: next, expiresAt: e?.expiresAt });
|
|
80
|
+
return next;
|
|
81
|
+
}
|
|
82
|
+
async expire(key: string, seconds: number): Promise<number> {
|
|
83
|
+
const e = this.data.get(key);
|
|
84
|
+
if (!e) return 0;
|
|
85
|
+
e.expiresAt = Date.now() + seconds * 1000;
|
|
86
|
+
return 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface CircuitBreakerOptions {
|
|
91
|
+
/** Optional distributed counter store (ioredis-compatible). */
|
|
92
|
+
counterStore?: CounterStoreLike;
|
|
93
|
+
/**
|
|
94
|
+
* Cap on the number of (tool, scope) pairs tracked in-process.
|
|
95
|
+
* Prevents unbounded growth in long-lived runtimes. Default: 5_000.
|
|
96
|
+
* Override via env `AI_SHIELD_CIRCUIT_MAX_KEYS`.
|
|
97
|
+
*/
|
|
98
|
+
maxKeys?: number;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Registry of breakers keyed by `${tool}::${scope}`. The registry
|
|
103
|
+
* owns config + state; per-(tool, scope) breakers are created lazily.
|
|
104
|
+
*/
|
|
105
|
+
export class CircuitBreakerRegistry {
|
|
106
|
+
private configs = new Map<string, Required<CircuitBreakerConfig>>();
|
|
107
|
+
private states = new Map<string, InternalState>();
|
|
108
|
+
/**
|
|
109
|
+
* Reserved for distributed-counter mode (e.g. cross-replica state).
|
|
110
|
+
* The in-process path is the supported v0.2 surface; the store is
|
|
111
|
+
* accepted so callers wiring up an `ioredis`-shaped backend get a
|
|
112
|
+
* stable constructor option, and downstream releases can swap the
|
|
113
|
+
* internal accounting to use it without breaking the API.
|
|
114
|
+
*/
|
|
115
|
+
protected readonly store: CounterStoreLike;
|
|
116
|
+
private readonly maxKeys: number;
|
|
117
|
+
|
|
118
|
+
constructor(
|
|
119
|
+
configs: CircuitBreakerConfig[] = [],
|
|
120
|
+
options: CircuitBreakerOptions = {},
|
|
121
|
+
) {
|
|
122
|
+
this.store = options.counterStore ?? new InMemoryCounter();
|
|
123
|
+
const envCap = Number(process.env.AI_SHIELD_CIRCUIT_MAX_KEYS);
|
|
124
|
+
this.maxKeys =
|
|
125
|
+
options.maxKeys ??
|
|
126
|
+
(Number.isFinite(envCap) && envCap > 0 ? envCap : 5_000);
|
|
127
|
+
for (const cfg of configs) {
|
|
128
|
+
this.configure(cfg);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Configure (or re-configure) a breaker. Idempotent. */
|
|
133
|
+
configure(config: CircuitBreakerConfig): void {
|
|
134
|
+
const key = keyFor(config.tool, config.scope);
|
|
135
|
+
this.configs.set(key, {
|
|
136
|
+
tool: config.tool,
|
|
137
|
+
scope: config.scope ?? "",
|
|
138
|
+
failureThreshold:
|
|
139
|
+
config.failureThreshold ?? DEFAULTS.failureThreshold,
|
|
140
|
+
windowMs: config.windowMs ?? DEFAULTS.windowMs,
|
|
141
|
+
cooldownMs: config.cooldownMs ?? DEFAULTS.cooldownMs,
|
|
142
|
+
maxCallsPerWindow: config.maxCallsPerWindow ?? Infinity,
|
|
143
|
+
maxWritesPerWindow: config.maxWritesPerWindow ?? Infinity,
|
|
144
|
+
onDestructive: config.onDestructive ?? (() => true),
|
|
145
|
+
isDestructive:
|
|
146
|
+
config.isDestructive ?? isLikelyDestructive(config.tool),
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Check whether a tool call is allowed. Records the attempt either
|
|
152
|
+
* way; callers must invoke `recordSuccess()`/`recordFailure()` AFTER
|
|
153
|
+
* the actual call so anomaly counts stay honest.
|
|
154
|
+
*/
|
|
155
|
+
async check(
|
|
156
|
+
tool: ToolCall,
|
|
157
|
+
context: ScanContext = {},
|
|
158
|
+
): Promise<CircuitBreakerDecision> {
|
|
159
|
+
const scope = scopeFor(context);
|
|
160
|
+
const key = keyFor(tool.name, scope);
|
|
161
|
+
const config = this.configs.get(key) ?? this.configs.get(keyFor(tool.name, ""));
|
|
162
|
+
|
|
163
|
+
// No config → no breaker → allow. The caller may still use
|
|
164
|
+
// the static ToolPolicyScanner for default deny.
|
|
165
|
+
if (!config) {
|
|
166
|
+
return { allowed: true, state: "closed" };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const state = this.getOrInitState(key);
|
|
170
|
+
const now = Date.now();
|
|
171
|
+
prune(state, now, config.windowMs);
|
|
172
|
+
|
|
173
|
+
// 1. Open / half-open transitions.
|
|
174
|
+
if (state.state === "open") {
|
|
175
|
+
if (now - state.openedAt >= config.cooldownMs) {
|
|
176
|
+
state.state = "half-open";
|
|
177
|
+
} else {
|
|
178
|
+
return {
|
|
179
|
+
allowed: false,
|
|
180
|
+
state: "open",
|
|
181
|
+
reason: "circuit_open",
|
|
182
|
+
retryAfterMs: config.cooldownMs - (now - state.openedAt),
|
|
183
|
+
message: `Circuit OPEN for ${tool.name}${scope ? `@${scope}` : ""}`,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// 2. Rate-limit cap.
|
|
189
|
+
if (state.calls.length >= config.maxCallsPerWindow) {
|
|
190
|
+
return {
|
|
191
|
+
allowed: false,
|
|
192
|
+
state: state.state,
|
|
193
|
+
reason: "rate_limit",
|
|
194
|
+
retryAfterMs: config.windowMs,
|
|
195
|
+
message: `Rate limit ${config.maxCallsPerWindow}/${config.windowMs}ms exceeded for ${tool.name}`,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// 3. Blast-radius cap for destructive tools.
|
|
200
|
+
if (
|
|
201
|
+
config.isDestructive &&
|
|
202
|
+
state.writes.length >= config.maxWritesPerWindow
|
|
203
|
+
) {
|
|
204
|
+
return {
|
|
205
|
+
allowed: false,
|
|
206
|
+
state: state.state,
|
|
207
|
+
reason: "blast_radius_exceeded",
|
|
208
|
+
retryAfterMs: config.windowMs,
|
|
209
|
+
message: `Blast-radius cap ${config.maxWritesPerWindow}/${config.windowMs}ms hit for ${tool.name}`,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// 4. HITL gate for destructive ops.
|
|
214
|
+
//
|
|
215
|
+
// Record the call/write OPTIMISTICALLY first, BEFORE awaiting the
|
|
216
|
+
// HITL hook. Two concurrent destructive calls otherwise both see
|
|
217
|
+
// `state.writes.length === 0` and both get past the blast-radius
|
|
218
|
+
// gate (Critic M3 round 1 — TOCTOU on shared mutable state).
|
|
219
|
+
//
|
|
220
|
+
// Round 2 Critic H-NEW-1: rolling back via `pop()` is unsafe under
|
|
221
|
+
// Node.js's cooperative scheduler — a concurrent push between our
|
|
222
|
+
// push and our pop can shift positions, so `pop()` removes the wrong
|
|
223
|
+
// entry. Capture the SENTINEL value we pushed and remove that exact
|
|
224
|
+
// entry on rollback. Two concurrent rollbacks of identical-now
|
|
225
|
+
// timestamps could theoretically still touch each other's entry,
|
|
226
|
+
// but at worst they remove a sibling rather than letting a counter
|
|
227
|
+
// run away — semantically equivalent for rate-limit purposes.
|
|
228
|
+
const callSentinel: number = now;
|
|
229
|
+
state.calls.push(callSentinel);
|
|
230
|
+
let writeSentinel: number | null = null;
|
|
231
|
+
if (config.isDestructive) {
|
|
232
|
+
writeSentinel = now;
|
|
233
|
+
state.writes.push(writeSentinel);
|
|
234
|
+
}
|
|
235
|
+
const rollbackOptimisticRecord = (): void => {
|
|
236
|
+
// Remove the LAST occurrence of the sentinel (the one we pushed)
|
|
237
|
+
// so concurrent rollbacks don't touch each other's entries.
|
|
238
|
+
const callIdx = state.calls.lastIndexOf(callSentinel);
|
|
239
|
+
if (callIdx >= 0) state.calls.splice(callIdx, 1);
|
|
240
|
+
if (writeSentinel !== null) {
|
|
241
|
+
const writeIdx = state.writes.lastIndexOf(writeSentinel);
|
|
242
|
+
if (writeIdx >= 0) state.writes.splice(writeIdx, 1);
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
if (config.isDestructive) {
|
|
247
|
+
let rawResult: unknown;
|
|
248
|
+
try {
|
|
249
|
+
rawResult = await Promise.resolve(
|
|
250
|
+
config.onDestructive({
|
|
251
|
+
tool: tool.name,
|
|
252
|
+
scope: config.scope,
|
|
253
|
+
context,
|
|
254
|
+
}),
|
|
255
|
+
);
|
|
256
|
+
} catch (err) {
|
|
257
|
+
rollbackOptimisticRecord();
|
|
258
|
+
return {
|
|
259
|
+
allowed: false,
|
|
260
|
+
state: state.state,
|
|
261
|
+
reason: "hitl_denied",
|
|
262
|
+
message: `HITL hook threw: ${(err as Error).message}`,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
// Critic H3 — a hook that returns `undefined` (async function
|
|
266
|
+
// without explicit `return`) or any non-boolean value is the most
|
|
267
|
+
// common HITL footgun. Fail safe AND surface the programming
|
|
268
|
+
// error rather than silently coerce.
|
|
269
|
+
if (typeof rawResult !== "boolean") {
|
|
270
|
+
rollbackOptimisticRecord();
|
|
271
|
+
return {
|
|
272
|
+
allowed: false,
|
|
273
|
+
state: state.state,
|
|
274
|
+
reason: "hitl_denied",
|
|
275
|
+
message: `HITL hook for '${tool.name}' returned non-boolean (${typeof rawResult}); treating as denial`,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
if (!rawResult) {
|
|
279
|
+
rollbackOptimisticRecord();
|
|
280
|
+
return {
|
|
281
|
+
allowed: false,
|
|
282
|
+
state: state.state,
|
|
283
|
+
reason: "hitl_denied",
|
|
284
|
+
message: `Human-in-the-loop denied ${tool.name}`,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return { allowed: true, state: state.state };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/** Record a successful tool invocation. Closes a half-open breaker. */
|
|
293
|
+
recordSuccess(toolName: string, context: ScanContext = {}): void {
|
|
294
|
+
const scope = scopeFor(context);
|
|
295
|
+
const key = keyFor(toolName, scope);
|
|
296
|
+
const state = this.states.get(key);
|
|
297
|
+
if (!state) return;
|
|
298
|
+
if (state.state === "half-open") {
|
|
299
|
+
state.state = "closed";
|
|
300
|
+
state.failures = [];
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Record a failed tool invocation. Trips the breaker once
|
|
306
|
+
* `failureThreshold` failures accumulate within the window.
|
|
307
|
+
*/
|
|
308
|
+
recordFailure(toolName: string, context: ScanContext = {}): void {
|
|
309
|
+
const scope = scopeFor(context);
|
|
310
|
+
const key = keyFor(toolName, scope);
|
|
311
|
+
const config = this.configs.get(key) ?? this.configs.get(keyFor(toolName, ""));
|
|
312
|
+
if (!config) return;
|
|
313
|
+
const state = this.getOrInitState(key);
|
|
314
|
+
const now = Date.now();
|
|
315
|
+
prune(state, now, config.windowMs);
|
|
316
|
+
state.failures.push(now);
|
|
317
|
+
|
|
318
|
+
if (state.failures.length >= config.failureThreshold) {
|
|
319
|
+
state.state = "open";
|
|
320
|
+
state.openedAt = now;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/** Manually force a breaker into a state — useful for tests / ops. */
|
|
325
|
+
trip(toolName: string, scope?: string): void {
|
|
326
|
+
const key = keyFor(toolName, scope ?? "");
|
|
327
|
+
const state = this.getOrInitState(key);
|
|
328
|
+
state.state = "open";
|
|
329
|
+
state.openedAt = Date.now();
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
reset(toolName: string, scope?: string): void {
|
|
333
|
+
const key = keyFor(toolName, scope ?? "");
|
|
334
|
+
this.states.delete(key);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/** Inspect current state — for dashboards / audit. */
|
|
338
|
+
inspect(toolName: string, scope?: string): {
|
|
339
|
+
state: CircuitState;
|
|
340
|
+
callsInWindow: number;
|
|
341
|
+
writesInWindow: number;
|
|
342
|
+
failuresInWindow: number;
|
|
343
|
+
} | null {
|
|
344
|
+
const key = keyFor(toolName, scope ?? "");
|
|
345
|
+
const state = this.states.get(key);
|
|
346
|
+
const config = this.configs.get(key) ?? this.configs.get(keyFor(toolName, ""));
|
|
347
|
+
if (!state || !config) return null;
|
|
348
|
+
const now = Date.now();
|
|
349
|
+
prune(state, now, config.windowMs);
|
|
350
|
+
return {
|
|
351
|
+
state: state.state,
|
|
352
|
+
callsInWindow: state.calls.length,
|
|
353
|
+
writesInWindow: state.writes.length,
|
|
354
|
+
failuresInWindow: state.failures.length,
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/** Suggested ViolationType for a denied decision — useful in audit logs. */
|
|
359
|
+
static violationType(decision: CircuitBreakerDecision): ViolationType {
|
|
360
|
+
if (decision.reason === "circuit_open") return "circuit_breaker_open";
|
|
361
|
+
if (decision.reason === "blast_radius_exceeded")
|
|
362
|
+
return "blast_radius_exceeded";
|
|
363
|
+
if (decision.reason === "rate_limit") return "tool_rate_limit";
|
|
364
|
+
return "tool_denied";
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// --- internal ---
|
|
368
|
+
|
|
369
|
+
private getOrInitState(key: string): InternalState {
|
|
370
|
+
let state = this.states.get(key);
|
|
371
|
+
if (state) {
|
|
372
|
+
// Touch — promote to MRU. JS Map preserves insertion order;
|
|
373
|
+
// delete + set moves the entry to the tail (Analyst A5 round 1).
|
|
374
|
+
this.states.delete(key);
|
|
375
|
+
this.states.set(key, state);
|
|
376
|
+
return state;
|
|
377
|
+
}
|
|
378
|
+
// True-LRU eviction: oldest key (head of Map) is dropped first.
|
|
379
|
+
// Combined with the touch-on-access above this gives correct LRU
|
|
380
|
+
// semantics and prevents key-explosion attacks from evicting
|
|
381
|
+
// long-lived legitimate breakers.
|
|
382
|
+
if (this.states.size >= this.maxKeys) {
|
|
383
|
+
const oldestKey = this.states.keys().next().value;
|
|
384
|
+
if (oldestKey) this.states.delete(oldestKey);
|
|
385
|
+
}
|
|
386
|
+
state = {
|
|
387
|
+
state: "closed",
|
|
388
|
+
openedAt: 0,
|
|
389
|
+
failures: [],
|
|
390
|
+
calls: [],
|
|
391
|
+
writes: [],
|
|
392
|
+
};
|
|
393
|
+
this.states.set(key, state);
|
|
394
|
+
return state;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// --- helpers ---
|
|
399
|
+
|
|
400
|
+
// NUL byte cannot appear in valid tool names or agent/session IDs.
|
|
401
|
+
// `keyFor` uses TWO NULs as the tool↔scope boundary; `makeBreakerScope`
|
|
402
|
+
// uses ONE NUL between agentId and sessionId. Two-NUL boundary disambig-
|
|
403
|
+
// uates tool name from scope payload even when the scope itself contains
|
|
404
|
+
// a single NUL — Analyst A6 round 1 + Critic L-NEW-1 round 2.
|
|
405
|
+
// Callers MUST go through `makeBreakerScope()` rather than handcraft
|
|
406
|
+
// scope strings; passing a string that contains `\x00\x00` would alias
|
|
407
|
+
// the boundary marker.
|
|
408
|
+
const KEY_SEP = "\x00";
|
|
409
|
+
|
|
410
|
+
function keyFor(tool: string, scope?: string): string {
|
|
411
|
+
return `${tool}${KEY_SEP}${KEY_SEP}${scope ?? ""}`;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
function scopeFor(context: ScanContext): string {
|
|
415
|
+
return makeBreakerScope(context.agentId, context.sessionId);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Build the scope string the circuit breaker uses internally for a
|
|
420
|
+
* given (agentId, sessionId) pair. Exposed so callers of `inspect()`,
|
|
421
|
+
* `trip()`, and `reset()` don't have to know the separator convention.
|
|
422
|
+
*
|
|
423
|
+
* @example
|
|
424
|
+
* ```ts
|
|
425
|
+
* const scope = makeBreakerScope("agent-a", "session-1");
|
|
426
|
+
* const snap = registry.inspect("delete_user", scope);
|
|
427
|
+
* ```
|
|
428
|
+
*/
|
|
429
|
+
export function makeBreakerScope(
|
|
430
|
+
agentId?: string,
|
|
431
|
+
sessionId?: string,
|
|
432
|
+
): string {
|
|
433
|
+
if (agentId && sessionId) {
|
|
434
|
+
return `${agentId}${KEY_SEP}${sessionId}`;
|
|
435
|
+
}
|
|
436
|
+
return agentId ?? sessionId ?? "";
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function prune(state: InternalState, now: number, windowMs: number): void {
|
|
440
|
+
const cutoff = now - windowMs;
|
|
441
|
+
state.failures = state.failures.filter((t) => t >= cutoff);
|
|
442
|
+
state.calls = state.calls.filter((t) => t >= cutoff);
|
|
443
|
+
state.writes = state.writes.filter((t) => t >= cutoff);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
function isLikelyDestructive(toolName: string): boolean {
|
|
447
|
+
const lc = toolName.toLowerCase();
|
|
448
|
+
return DESTRUCTIVE_DEFAULTS.some((prefix) => lc.startsWith(prefix));
|
|
449
|
+
}
|
package/src/scanner/heuristic.ts
CHANGED
|
@@ -3,8 +3,77 @@ import type { Scanner, ScannerResult, ScanContext, Violation } from "../types.js
|
|
|
3
3
|
// ============================================================
|
|
4
4
|
// Heuristic Prompt Injection Scanner
|
|
5
5
|
// Score-based: multiple matches = higher confidence
|
|
6
|
+
// Unicode-normalizes input before pattern matching so that
|
|
7
|
+
// homoglyph/zero-width/fullwidth evasion attempts still hit.
|
|
6
8
|
// ============================================================
|
|
7
9
|
|
|
10
|
+
// Common Cyrillic/Greek Latin-lookalikes mapped to ASCII.
|
|
11
|
+
// Keep minimal — false-mappings in real content are worse than
|
|
12
|
+
// false-negatives in an attack attempt.
|
|
13
|
+
const HOMOGLYPH_MAP: Record<string, string> = {
|
|
14
|
+
// Cyrillic
|
|
15
|
+
"а": "a", "е": "e", "і": "i", "ј": "j", "о": "o", "р": "p", "с": "c", "ѕ": "s",
|
|
16
|
+
"у": "y", "х": "x", "ԁ": "d", "һ": "h", "ӏ": "l", "ո": "n", "А": "A", "В": "B",
|
|
17
|
+
"Е": "E", "І": "I", "К": "K", "М": "M", "Н": "H", "О": "O", "Р": "P", "С": "C",
|
|
18
|
+
"Т": "T", "Х": "X", "Ѕ": "S", "Ј": "J", "Ү": "Y", "Ԛ": "Q", "Ԝ": "W", "Ғ": "F",
|
|
19
|
+
// Greek
|
|
20
|
+
"α": "a", "ο": "o", "ρ": "p", "ε": "e", "υ": "y", "χ": "x", "ν": "v", "ι": "i",
|
|
21
|
+
"κ": "k", "Α": "A", "Β": "B", "Ε": "E", "Ζ": "Z", "Η": "H", "Ι": "I", "Κ": "K",
|
|
22
|
+
"Μ": "M", "Ν": "N", "Ο": "O", "Ρ": "P", "Τ": "T", "Υ": "Y", "Χ": "X",
|
|
23
|
+
// Armenian / Cherokee / other look-alikes occasionally used in evasion
|
|
24
|
+
"օ": "o", "ѵ": "v",
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const HOMOGLYPH_RE = new RegExp(Object.keys(HOMOGLYPH_MAP).join("|"), "g");
|
|
28
|
+
// Zero-width chars + BOM — used to split words like "ig<ZWSP>nore" across
|
|
29
|
+
// the pattern boundary (U+200B..U+200D, U+2060, U+FEFF).
|
|
30
|
+
const ZERO_WIDTH_RE = /[-]/g;
|
|
31
|
+
// Combining marks (diacritics) after NFKC can still slip through (U+0300..U+036F).
|
|
32
|
+
const COMBINING_RE = /[̀-ͯ]/g;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Normalize input for pattern matching. Returns the canonicalized string
|
|
36
|
+
* used only for scan decisions; the sanitized output passed to callers
|
|
37
|
+
* is still the original input.
|
|
38
|
+
*
|
|
39
|
+
* Order matters:
|
|
40
|
+
* 1. NFKD folds compatibility forms (fullwidth → ASCII, ligatures) AND
|
|
41
|
+
* decomposes precomposed accented letters into base + combining mark.
|
|
42
|
+
* 2. Strip zero-width chars so "ig<ZWSP>nore" collapses to "ignore".
|
|
43
|
+
* 3. Strip combining marks (diacritics) left behind by NFKD.
|
|
44
|
+
* 4. Map remaining Cyrillic/Greek look-alikes to Latin.
|
|
45
|
+
*/
|
|
46
|
+
export function normalizeForInjectionScan(input: string): string {
|
|
47
|
+
const nfkd = input.normalize("NFKD");
|
|
48
|
+
const noZW = nfkd.replace(ZERO_WIDTH_RE, "");
|
|
49
|
+
const noCombining = noZW.replace(COMBINING_RE, "");
|
|
50
|
+
return noCombining.replace(HOMOGLYPH_RE, (ch) => HOMOGLYPH_MAP[ch] ?? ch);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Collapse letter-splitting evasion: an attacker writes `i g n o r e` or
|
|
55
|
+
* `i.g.n.o.r.e` or `i-g-n-o-r-e` to break the literal token "ignore" across
|
|
56
|
+
* separators so the regex never matches. This produces an ADDITIONAL view
|
|
57
|
+
* where any run of `single-letter + separator` (≥4 letters) has its
|
|
58
|
+
* separators removed, so the spaced form collapses back to "ignore".
|
|
59
|
+
*
|
|
60
|
+
* Run as a second pass IN ADDITION to the normal normalized text — never
|
|
61
|
+
* as a replacement — because collapsing is lossy (it would also fuse the
|
|
62
|
+
* legitimate "a b c" list). Only single-letter groups separated by one
|
|
63
|
+
* space / dot / dash / underscore are collapsed; multi-letter words are
|
|
64
|
+
* left intact, which keeps benign prose untouched.
|
|
65
|
+
*/
|
|
66
|
+
export function collapseSpacedLetters(input: string): string {
|
|
67
|
+
// Match ≥3 "<letter><sep>" groups closed by a final lone letter. The
|
|
68
|
+
// trailing `(?![A-Za-z])` stops the greedy match from swallowing the
|
|
69
|
+
// first letter of the next real word ("i g n o r e all" must collapse to
|
|
70
|
+
// "ignore all", not "ignorea ll"). Bounded, linear — no nested quantifier.
|
|
71
|
+
return input.replace(
|
|
72
|
+
/(?:[A-Za-z][ \t._-]){3,}[A-Za-z](?![A-Za-z])/g,
|
|
73
|
+
(run) => run.replace(/[ \t._-]/g, ""),
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
8
77
|
interface PatternRule {
|
|
9
78
|
id: string;
|
|
10
79
|
category: InjectionCategory;
|
|
@@ -357,8 +426,26 @@ export class HeuristicScanner implements Scanner {
|
|
|
357
426
|
const violations: Violation[] = [];
|
|
358
427
|
let totalScore = 0;
|
|
359
428
|
|
|
429
|
+
// Normalize once — pattern matching runs against the canonical form so
|
|
430
|
+
// homoglyph/zero-width evasion doesn't bypass the rules. The caller
|
|
431
|
+
// still sees the original input in `sanitized`.
|
|
432
|
+
const normalized = normalizeForInjectionScan(input);
|
|
433
|
+
// Second view that un-splits letter-splitting evasion ("i g n o r e").
|
|
434
|
+
// Only computed when it actually differs (cheap guard), and only the
|
|
435
|
+
// high-value override/role/extraction/tool categories are re-tested
|
|
436
|
+
// against it — collapsing is lossy and the low-value framing rules
|
|
437
|
+
// would false-positive on collapsed prose.
|
|
438
|
+
const collapsed = collapseSpacedLetters(normalized);
|
|
439
|
+
const collapsedDiffers = collapsed !== normalized;
|
|
440
|
+
const SPLIT_SENSITIVE: ReadonlySet<InjectionCategory> = new Set([
|
|
441
|
+
"instruction_override",
|
|
442
|
+
"role_manipulation",
|
|
443
|
+
"system_prompt_extraction",
|
|
444
|
+
"tool_abuse",
|
|
445
|
+
]);
|
|
446
|
+
|
|
360
447
|
for (const rule of this.patterns) {
|
|
361
|
-
if (rule.pattern.test(
|
|
448
|
+
if (rule.pattern.test(normalized)) {
|
|
362
449
|
totalScore += rule.weight;
|
|
363
450
|
violations.push({
|
|
364
451
|
type: "prompt_injection",
|
|
@@ -368,10 +455,27 @@ export class HeuristicScanner implements Scanner {
|
|
|
368
455
|
message: rule.description,
|
|
369
456
|
detail: `Rule ${rule.id} (${rule.category})`,
|
|
370
457
|
});
|
|
458
|
+
} else if (
|
|
459
|
+
collapsedDiffers &&
|
|
460
|
+
SPLIT_SENSITIVE.has(rule.category) &&
|
|
461
|
+
rule.pattern.test(collapsed)
|
|
462
|
+
) {
|
|
463
|
+
// Matched only after un-splitting → letter-splitting evasion.
|
|
464
|
+
totalScore += rule.weight;
|
|
465
|
+
violations.push({
|
|
466
|
+
type: "prompt_injection",
|
|
467
|
+
scanner: this.name,
|
|
468
|
+
score: rule.weight,
|
|
469
|
+
threshold: this.threshold,
|
|
470
|
+
message: rule.description,
|
|
471
|
+
detail: `Rule ${rule.id} (${rule.category}, letter-splitting evasion)`,
|
|
472
|
+
});
|
|
371
473
|
}
|
|
372
474
|
}
|
|
373
475
|
|
|
374
|
-
// Structural signals (cumulative)
|
|
476
|
+
// Structural signals (cumulative) — intentionally run on the original
|
|
477
|
+
// input so real structural attacks (many newlines, long paddings) can
|
|
478
|
+
// still trip even when the textual patterns were evaded.
|
|
375
479
|
const structuralScore = this.checkStructuralSignals(input);
|
|
376
480
|
totalScore += structuralScore;
|
|
377
481
|
|
|
@@ -412,6 +516,25 @@ export class HeuristicScanner implements Scanner {
|
|
|
412
516
|
// Very long input (potential padding attack)
|
|
413
517
|
if (input.length > 5000) score += 0.05;
|
|
414
518
|
|
|
519
|
+
// Adversarial suffix (GCG-style): a long whitespace-free token packed
|
|
520
|
+
// with mixed punctuation/symbols, typically appended after the readable
|
|
521
|
+
// request. Conservative — needs ≥25 chars and ≥6 distinct punctuation
|
|
522
|
+
// marks so ordinary URLs, hashes and code tokens don't trip it.
|
|
523
|
+
const ADV_TOKEN_RE = /\S{25,}/g;
|
|
524
|
+
let advMatch: RegExpExecArray | null;
|
|
525
|
+
let advCount = 0;
|
|
526
|
+
while ((advMatch = ADV_TOKEN_RE.exec(input)) !== null && advCount < 32) {
|
|
527
|
+
advCount += 1;
|
|
528
|
+
const tok = advMatch[0];
|
|
529
|
+
const distinctPunct = new Set(
|
|
530
|
+
(tok.match(/[!-/:-@[-`{-~]/g) ?? []),
|
|
531
|
+
).size;
|
|
532
|
+
if (distinctPunct >= 6) {
|
|
533
|
+
score += 0.05;
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
415
538
|
return score;
|
|
416
539
|
}
|
|
417
540
|
|