@panguard-ai/panguard-mcp-proxy 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/evaluator.d.ts +25 -0
- package/dist/evaluator.js +49 -7
- package/dist/proxy.js +40 -10
- package/package.json +17 -6
package/dist/evaluator.d.ts
CHANGED
|
@@ -13,6 +13,30 @@ export interface EvalResult {
|
|
|
13
13
|
readonly confidence: number;
|
|
14
14
|
readonly durationMs: number;
|
|
15
15
|
}
|
|
16
|
+
/** Minimal rule shape the deny policy needs. */
|
|
17
|
+
interface RuleLike {
|
|
18
|
+
readonly severity: string;
|
|
19
|
+
readonly maturity?: string;
|
|
20
|
+
readonly confirm?: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Whether a single rule match is strong enough to HARD-DENY a live tool call
|
|
24
|
+
* (vs. degrade to 'ask'). This is the proxy's false-positive control point: the
|
|
25
|
+
* detection engine runs the full 'hunt' lane so nothing is missed, but we only
|
|
26
|
+
* auto-break the agent on a signal we trust.
|
|
27
|
+
*
|
|
28
|
+
* - confirm:embedding rules (the broad workhorses ATR-2026-00001/00002) need
|
|
29
|
+
* async semantic confirmation this proxy can't run and are the top FP
|
|
30
|
+
* sources -> never hard-deny unconfirmed (the caller degrades to 'ask').
|
|
31
|
+
* - critical severity hard-stops even on a younger rule (security-first:
|
|
32
|
+
* credential exfil / RCE / data destruction are specific, not broad).
|
|
33
|
+
* - high severity hard-stops only when proven (maturity=stable).
|
|
34
|
+
* - everything else (high-test, experimental, medium, low) -> not blockable.
|
|
35
|
+
*
|
|
36
|
+
* Pure + exported so the policy is unit-tested independently of which live rule
|
|
37
|
+
* happens to match (the rule corpus changes daily; this policy must not).
|
|
38
|
+
*/
|
|
39
|
+
export declare function shouldHardDeny(rule: RuleLike): boolean;
|
|
16
40
|
export declare class ProxyEvaluator {
|
|
17
41
|
private readonly engine;
|
|
18
42
|
private rulesLoaded;
|
|
@@ -34,3 +58,4 @@ export declare class ProxyEvaluator {
|
|
|
34
58
|
evaluateToolResponse(toolName: string, response: string): Promise<EvalResult>;
|
|
35
59
|
private evaluate;
|
|
36
60
|
}
|
|
61
|
+
export {};
|
package/dist/evaluator.js
CHANGED
|
@@ -28,6 +28,30 @@ function findRulesDir() {
|
|
|
28
28
|
}
|
|
29
29
|
throw new Error('Cannot find ATR rules directory. Install agent-threat-rules.');
|
|
30
30
|
}
|
|
31
|
+
/**
|
|
32
|
+
* Whether a single rule match is strong enough to HARD-DENY a live tool call
|
|
33
|
+
* (vs. degrade to 'ask'). This is the proxy's false-positive control point: the
|
|
34
|
+
* detection engine runs the full 'hunt' lane so nothing is missed, but we only
|
|
35
|
+
* auto-break the agent on a signal we trust.
|
|
36
|
+
*
|
|
37
|
+
* - confirm:embedding rules (the broad workhorses ATR-2026-00001/00002) need
|
|
38
|
+
* async semantic confirmation this proxy can't run and are the top FP
|
|
39
|
+
* sources -> never hard-deny unconfirmed (the caller degrades to 'ask').
|
|
40
|
+
* - critical severity hard-stops even on a younger rule (security-first:
|
|
41
|
+
* credential exfil / RCE / data destruction are specific, not broad).
|
|
42
|
+
* - high severity hard-stops only when proven (maturity=stable).
|
|
43
|
+
* - everything else (high-test, experimental, medium, low) -> not blockable.
|
|
44
|
+
*
|
|
45
|
+
* Pure + exported so the policy is unit-tested independently of which live rule
|
|
46
|
+
* happens to match (the rule corpus changes daily; this policy must not).
|
|
47
|
+
*/
|
|
48
|
+
export function shouldHardDeny(rule) {
|
|
49
|
+
if (rule.confirm === 'embedding')
|
|
50
|
+
return false;
|
|
51
|
+
if (rule.severity === 'critical')
|
|
52
|
+
return true;
|
|
53
|
+
return rule.severity === 'high' && rule.maturity === 'stable';
|
|
54
|
+
}
|
|
31
55
|
export class ProxyEvaluator {
|
|
32
56
|
engine;
|
|
33
57
|
rulesLoaded = false;
|
|
@@ -38,6 +62,12 @@ export class ProxyEvaluator {
|
|
|
38
62
|
blocklistSize = 0;
|
|
39
63
|
constructor() {
|
|
40
64
|
const rulesDir = findRulesDir();
|
|
65
|
+
// 'hunt' detection (every rule) so we never MISS an attack in a tool call —
|
|
66
|
+
// incl. the broad workhorse rule ATR-2026-00001 (stable but confirm:embedding,
|
|
67
|
+
// which an 'enforce' sync lane would silently drop since this proxy ships no
|
|
68
|
+
// embedding model). FP-safety is enforced at the DENY gate instead: a match
|
|
69
|
+
// only HARD-denies a live tool call when it is a proven rule (see evaluate());
|
|
70
|
+
// unproven matches degrade to 'ask' rather than breaking the agent.
|
|
41
71
|
this.engine = new ATREngine({ rulesDir });
|
|
42
72
|
this.blocklistPath = join(homedir(), '.panguard-guard', 'blocked-tools.json');
|
|
43
73
|
this.refreshBlocklist();
|
|
@@ -66,6 +96,18 @@ export class ProxyEvaluator {
|
|
|
66
96
|
return this.ruleCount;
|
|
67
97
|
this.ruleCount = await this.engine.loadRules();
|
|
68
98
|
this.rulesLoaded = true;
|
|
99
|
+
// 0 rules => the detection engine matches nothing, so every tool call gets
|
|
100
|
+
// an 'allow' verdict (evaluate() returns allow when matches.length === 0).
|
|
101
|
+
// That is protection silently OFF — never let it pass unannounced. We do NOT
|
|
102
|
+
// hard-fail (a blocklist-only proxy with no ATR rules is still a degraded but
|
|
103
|
+
// usable mode, and proxy fail-CLOSED on evaluation *crashes* is unchanged in
|
|
104
|
+
// evaluate()); we surface it loudly so the operator and the dashboard can see
|
|
105
|
+
// Layer A is degraded. See computeLayers() Layer A: ruleCount === 0 => 'degraded'.
|
|
106
|
+
if (this.ruleCount === 0) {
|
|
107
|
+
process.stderr.write('[panguard-proxy] WARNING: 0 ATR rules loaded — pattern detection (Layer A) is DEGRADED. ' +
|
|
108
|
+
'Tool calls will only be checked against the Guard blocklist; no rule-based threats will be caught. ' +
|
|
109
|
+
'Verify the agent-threat-rules package is installed and the rules directory is populated.\n');
|
|
110
|
+
}
|
|
69
111
|
return this.ruleCount;
|
|
70
112
|
}
|
|
71
113
|
getRuleCount() {
|
|
@@ -134,13 +176,13 @@ export class ProxyEvaluator {
|
|
|
134
176
|
durationMs,
|
|
135
177
|
};
|
|
136
178
|
}
|
|
137
|
-
//
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
const outcome =
|
|
143
|
-
const topMatch = matches[0];
|
|
179
|
+
// Hard-DENY only on a trusted match (see shouldHardDeny); every other
|
|
180
|
+
// match is still surfaced as 'ask' (user-in-the-loop), never silently
|
|
181
|
+
// allowed. This is the proxy's false-positive control point — the engine
|
|
182
|
+
// runs full 'hunt' detection so nothing is missed.
|
|
183
|
+
const blockMatch = matches.find((m) => shouldHardDeny(m.rule));
|
|
184
|
+
const outcome = blockMatch ? 'deny' : 'ask';
|
|
185
|
+
const topMatch = blockMatch ?? matches[0];
|
|
144
186
|
return {
|
|
145
187
|
outcome,
|
|
146
188
|
reason: `${topMatch.rule.title} (${topMatch.rule.severity})`,
|
package/dist/proxy.js
CHANGED
|
@@ -51,17 +51,22 @@ export class MCPProxy {
|
|
|
51
51
|
constructor(config, deps = {}) {
|
|
52
52
|
this.config = config;
|
|
53
53
|
this.evaluator = deps.evaluator ?? new ProxyEvaluator();
|
|
54
|
-
// Fail-
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
// (
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
|
|
54
|
+
// Fail-CLOSED by default (security-first): if the async evaluator errors or
|
|
55
|
+
// times out, DENY the call rather than forward it unprotected. A security
|
|
56
|
+
// tool whose default failure mode is "allow" can be defeated by simply making
|
|
57
|
+
// it fail (slow/ReDoS payload). Rules are awaited in connect() BEFORE the
|
|
58
|
+
// proxy accepts any call, so there is no cold-start window where a timeout is
|
|
59
|
+
// expected — a timeout means a genuine problem, where denying is correct.
|
|
60
|
+
// Availability-first deployments can opt back to fail-open via config or the
|
|
61
|
+
// PANGUARD_PROXY_FAIL_MODE=open env var.
|
|
62
|
+
const envFailMode = process.env['PANGUARD_PROXY_FAIL_MODE'];
|
|
63
|
+
this.failMode =
|
|
64
|
+
config.failMode ??
|
|
65
|
+
(envFailMode === 'open' || envFailMode === 'closed' ? envFailMode : 'closed');
|
|
61
66
|
this.evalTimeout = config.evalTimeout ?? 5000;
|
|
62
67
|
// Sync sub-ms pre-check. Runs in front of the async evaluator so the worst
|
|
63
|
-
// payloads (and any session the brain flags) are blocked instantly —
|
|
64
|
-
//
|
|
68
|
+
// payloads (and any session the brain flags) are blocked instantly — and,
|
|
69
|
+
// with fail-closed as the default, an unavailable async evaluator denies.
|
|
65
70
|
this.riskStore = new InMemoryRiskStore();
|
|
66
71
|
this.guard = new GuardGate({
|
|
67
72
|
gate: new InlineGate(),
|
|
@@ -103,7 +108,19 @@ export class MCPProxy {
|
|
|
103
108
|
this.server = new Server({ name: 'panguard-mcp-proxy', version: '0.1.0' }, { capabilities: { tools: {}, resources: {}, prompts: {} } });
|
|
104
109
|
this.registerHandlers();
|
|
105
110
|
await this.server.connect(agentTransport);
|
|
106
|
-
|
|
111
|
+
// With 0 rules, Layer A catches nothing — say so plainly instead of the
|
|
112
|
+
// misleading "0 rules protecting all tool calls" (the loud warning was
|
|
113
|
+
// already emitted by evaluator.loadRules()).
|
|
114
|
+
if (ruleCount === 0) {
|
|
115
|
+
process.stderr.write('[panguard-proxy] Proxy active in DEGRADED mode — 0 ATR rules loaded; only the Guard blocklist is enforced.\n');
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
process.stderr.write(`[panguard-proxy] Proxy active. ${ruleCount} rules protecting all tool calls.\n`);
|
|
119
|
+
}
|
|
120
|
+
// MCP has no user-in-the-loop channel, so an 'ask' verdict cannot pause for
|
|
121
|
+
// approval — it is logged and forwarded, and surfaced on stderr as
|
|
122
|
+
// "FLAGGED (ask)". Document it here so operators know flagged calls still run.
|
|
123
|
+
process.stderr.write("[panguard-proxy] Note: 'ask' verdicts are logged-and-forwarded (no MCP user prompt); watch stderr for FLAGGED (ask) lines.\n");
|
|
107
124
|
}
|
|
108
125
|
/**
|
|
109
126
|
* Run the Layer 1 inline gate for a tool call (sync, sub-ms): build the
|
|
@@ -205,6 +222,13 @@ export class MCPProxy {
|
|
|
205
222
|
],
|
|
206
223
|
};
|
|
207
224
|
}
|
|
225
|
+
// An 'ask' verdict cannot pause for a human here — MCP has no
|
|
226
|
+
// user-in-the-loop channel — so the call is forwarded. Surface it loudly
|
|
227
|
+
// on stderr so a flagged-but-not-denied call is never silent (the journal
|
|
228
|
+
// entry above is easy to miss). Semantics are unchanged: logged + forwarded.
|
|
229
|
+
if (preResult.outcome === 'ask') {
|
|
230
|
+
process.stderr.write(`[panguard-proxy] FLAGGED (ask): ${name} — ${preResult.reason}\n`);
|
|
231
|
+
}
|
|
208
232
|
// Forward to upstream
|
|
209
233
|
const result = await client.callTool({ name, arguments: toolArgs });
|
|
210
234
|
// PostToolUse: evaluate the response
|
|
@@ -250,6 +274,12 @@ export class MCPProxy {
|
|
|
250
274
|
],
|
|
251
275
|
};
|
|
252
276
|
}
|
|
277
|
+
// Same as the pre-call path: an 'ask' response verdict is logged +
|
|
278
|
+
// forwarded (no human-in-the-loop in MCP) — surface it loudly so it is
|
|
279
|
+
// never silent.
|
|
280
|
+
if (postResult.outcome === 'ask') {
|
|
281
|
+
process.stderr.write(`[panguard-proxy] FLAGGED response (ask): ${name} — ${postResult.reason}\n`);
|
|
282
|
+
}
|
|
253
283
|
}
|
|
254
284
|
return result;
|
|
255
285
|
});
|
package/package.json
CHANGED
|
@@ -1,26 +1,37 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@panguard-ai/panguard-mcp-proxy",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.7.0",
|
|
4
4
|
"description": "MCP Proxy — runtime interception for AI agent tool calls using ATR rules",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"types": "./dist/index.d.ts"
|
|
12
|
+
},
|
|
13
|
+
"./evaluator": {
|
|
14
|
+
"import": "./dist/evaluator.js",
|
|
15
|
+
"types": "./dist/evaluator.d.ts"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
8
18
|
"bin": {
|
|
9
19
|
"panguard-mcp-proxy": "./dist/index.js"
|
|
10
20
|
},
|
|
11
21
|
"dependencies": {
|
|
12
22
|
"@modelcontextprotocol/sdk": "^1.12.0",
|
|
13
|
-
"agent-threat-rules": "^3.
|
|
14
|
-
"@panguard-ai/
|
|
15
|
-
"@panguard-ai/
|
|
23
|
+
"agent-threat-rules": "^3.5.0",
|
|
24
|
+
"@panguard-ai/containment": "0.1.0",
|
|
25
|
+
"@panguard-ai/atr": "1.7.0"
|
|
16
26
|
},
|
|
17
27
|
"peerDependencies": {
|
|
18
|
-
"@panguard-ai/atr": "1.
|
|
28
|
+
"@panguard-ai/atr": "1.7.0"
|
|
19
29
|
},
|
|
20
30
|
"files": [
|
|
21
31
|
"dist",
|
|
22
32
|
"package.json",
|
|
23
|
-
"README.md"
|
|
33
|
+
"README.md",
|
|
34
|
+
"LICENSE"
|
|
24
35
|
],
|
|
25
36
|
"publishConfig": {
|
|
26
37
|
"access": "public"
|