npm - @panguard-ai/panguard-mcp-proxy - Versions diffs - 1.6.1 → 1.7.0 - Mend

@panguard-ai/panguard-mcp-proxy 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/evaluator.d.ts CHANGED Viewed

@@ -13,6 +13,30 @@ export interface EvalResult {
     readonly confidence: number;
     readonly durationMs: number;
 }
+/** Minimal rule shape the deny policy needs. */
+interface RuleLike {
+    readonly severity: string;
+    readonly maturity?: string;
+    readonly confirm?: string;
+}
+/**
+ * Whether a single rule match is strong enough to HARD-DENY a live tool call
+ * (vs. degrade to 'ask'). This is the proxy's false-positive control point: the
+ * detection engine runs the full 'hunt' lane so nothing is missed, but we only
+ * auto-break the agent on a signal we trust.
+ *
+ *   - confirm:embedding rules (the broad workhorses ATR-2026-00001/00002) need
+ *     async semantic confirmation this proxy can't run and are the top FP
+ *     sources -> never hard-deny unconfirmed (the caller degrades to 'ask').
+ *   - critical severity hard-stops even on a younger rule (security-first:
+ *     credential exfil / RCE / data destruction are specific, not broad).
+ *   - high severity hard-stops only when proven (maturity=stable).
+ *   - everything else (high-test, experimental, medium, low) -> not blockable.
+ *
+ * Pure + exported so the policy is unit-tested independently of which live rule
+ * happens to match (the rule corpus changes daily; this policy must not).
+ */
+export declare function shouldHardDeny(rule: RuleLike): boolean;
 export declare class ProxyEvaluator {
     private readonly engine;
     private rulesLoaded;
@@ -34,3 +58,4 @@ export declare class ProxyEvaluator {
     evaluateToolResponse(toolName: string, response: string): Promise<EvalResult>;
     private evaluate;
 }
+export {};

package/dist/evaluator.js CHANGED Viewed

@@ -28,6 +28,30 @@ function findRulesDir() {
     }
     throw new Error('Cannot find ATR rules directory. Install agent-threat-rules.');
 }
+/**
+ * Whether a single rule match is strong enough to HARD-DENY a live tool call
+ * (vs. degrade to 'ask'). This is the proxy's false-positive control point: the
+ * detection engine runs the full 'hunt' lane so nothing is missed, but we only
+ * auto-break the agent on a signal we trust.
+ *
+ *   - confirm:embedding rules (the broad workhorses ATR-2026-00001/00002) need
+ *     async semantic confirmation this proxy can't run and are the top FP
+ *     sources -> never hard-deny unconfirmed (the caller degrades to 'ask').
+ *   - critical severity hard-stops even on a younger rule (security-first:
+ *     credential exfil / RCE / data destruction are specific, not broad).
+ *   - high severity hard-stops only when proven (maturity=stable).
+ *   - everything else (high-test, experimental, medium, low) -> not blockable.
+ *
+ * Pure + exported so the policy is unit-tested independently of which live rule
+ * happens to match (the rule corpus changes daily; this policy must not).
+ */
+export function shouldHardDeny(rule) {
+    if (rule.confirm === 'embedding')
+        return false;
+    if (rule.severity === 'critical')
+        return true;
+    return rule.severity === 'high' && rule.maturity === 'stable';
+}
 export class ProxyEvaluator {
     engine;
     rulesLoaded = false;
@@ -38,6 +62,12 @@ export class ProxyEvaluator {
     blocklistSize = 0;
     constructor() {
         const rulesDir = findRulesDir();
+        // 'hunt' detection (every rule) so we never MISS an attack in a tool call —
+        // incl. the broad workhorse rule ATR-2026-00001 (stable but confirm:embedding,
+        // which an 'enforce' sync lane would silently drop since this proxy ships no
+        // embedding model). FP-safety is enforced at the DENY gate instead: a match
+        // only HARD-denies a live tool call when it is a proven rule (see evaluate());
+        // unproven matches degrade to 'ask' rather than breaking the agent.
         this.engine = new ATREngine({ rulesDir });
         this.blocklistPath = join(homedir(), '.panguard-guard', 'blocked-tools.json');
         this.refreshBlocklist();
@@ -66,6 +96,18 @@ export class ProxyEvaluator {
             return this.ruleCount;
         this.ruleCount = await this.engine.loadRules();
         this.rulesLoaded = true;
+        // 0 rules => the detection engine matches nothing, so every tool call gets
+        // an 'allow' verdict (evaluate() returns allow when matches.length === 0).
+        // That is protection silently OFF — never let it pass unannounced. We do NOT
+        // hard-fail (a blocklist-only proxy with no ATR rules is still a degraded but
+        // usable mode, and proxy fail-CLOSED on evaluation *crashes* is unchanged in
+        // evaluate()); we surface it loudly so the operator and the dashboard can see
+        // Layer A is degraded. See computeLayers() Layer A: ruleCount === 0 => 'degraded'.
+        if (this.ruleCount === 0) {
+            process.stderr.write('[panguard-proxy] WARNING: 0 ATR rules loaded — pattern detection (Layer A) is DEGRADED. ' +
+                'Tool calls will only be checked against the Guard blocklist; no rule-based threats will be caught. ' +
+                'Verify the agent-threat-rules package is installed and the rules directory is populated.\n');
+        }
         return this.ruleCount;
     }
     getRuleCount() {
@@ -134,13 +176,13 @@ export class ProxyEvaluator {
                     durationMs,
                 };
             }
-            // Check highest severity match
-            const maxSeverity = matches.reduce((max, m) => {
-                const order = ['informational', 'low', 'medium', 'high', 'critical'];
-                return order.indexOf(m.rule.severity) > order.indexOf(max) ? m.rule.severity : max;
-            }, 'informational');
-            const outcome = maxSeverity === 'critical' || maxSeverity === 'high' ? 'deny' : 'ask';
-            const topMatch = matches[0];
+            // Hard-DENY only on a trusted match (see shouldHardDeny); every other
+            // match is still surfaced as 'ask' (user-in-the-loop), never silently
+            // allowed. This is the proxy's false-positive control point — the engine
+            // runs full 'hunt' detection so nothing is missed.
+            const blockMatch = matches.find((m) => shouldHardDeny(m.rule));
+            const outcome = blockMatch ? 'deny' : 'ask';
+            const topMatch = blockMatch ?? matches[0];
             return {
                 outcome,
                 reason: `${topMatch.rule.title} (${topMatch.rule.severity})`,

package/dist/proxy.js CHANGED Viewed

@@ -51,17 +51,22 @@ export class MCPProxy {
     constructor(config, deps = {}) {
         this.config = config;
         this.evaluator = deps.evaluator ?? new ProxyEvaluator();
-        // Fail-OPEN by default: PanGuard must never become the failure point in the
-        // agent's hot path. If the async evaluator times out or errors (e.g. rules
-        // still loading on cold start), the tool call proceeds — the sync pre-check
-        // (GuardGate, below) still blocks the worst payloads instantly regardless of
-        // this mode. Opt into 'closed' only for high-assurance deployments that
-        // accept blocking the agent when the evaluator is unavailable.
-        this.failMode = config.failMode ?? 'open';
+        // Fail-CLOSED by default (security-first): if the async evaluator errors or
+        // times out, DENY the call rather than forward it unprotected. A security
+        // tool whose default failure mode is "allow" can be defeated by simply making
+        // it fail (slow/ReDoS payload). Rules are awaited in connect() BEFORE the
+        // proxy accepts any call, so there is no cold-start window where a timeout is
+        // expected — a timeout means a genuine problem, where denying is correct.
+        // Availability-first deployments can opt back to fail-open via config or the
+        // PANGUARD_PROXY_FAIL_MODE=open env var.
+        const envFailMode = process.env['PANGUARD_PROXY_FAIL_MODE'];
+        this.failMode =
+            config.failMode ??
+                (envFailMode === 'open' || envFailMode === 'closed' ? envFailMode : 'closed');
         this.evalTimeout = config.evalTimeout ?? 5000;
         // Sync sub-ms pre-check. Runs in front of the async evaluator so the worst
-        // payloads (and any session the brain flags) are blocked instantly — even
-        // if the async evaluator times out fail-open.
+        // payloads (and any session the brain flags) are blocked instantly — and,
+        // with fail-closed as the default, an unavailable async evaluator denies.
         this.riskStore = new InMemoryRiskStore();
         this.guard = new GuardGate({
             gate: new InlineGate(),
@@ -103,7 +108,19 @@ export class MCPProxy {
         this.server = new Server({ name: 'panguard-mcp-proxy', version: '0.1.0' }, { capabilities: { tools: {}, resources: {}, prompts: {} } });
         this.registerHandlers();
         await this.server.connect(agentTransport);
-        process.stderr.write(`[panguard-proxy] Proxy active. ${ruleCount} rules protecting all tool calls.\n`);
+        // With 0 rules, Layer A catches nothing — say so plainly instead of the
+        // misleading "0 rules protecting all tool calls" (the loud warning was
+        // already emitted by evaluator.loadRules()).
+        if (ruleCount === 0) {
+            process.stderr.write('[panguard-proxy] Proxy active in DEGRADED mode — 0 ATR rules loaded; only the Guard blocklist is enforced.\n');
+        }
+        else {
+            process.stderr.write(`[panguard-proxy] Proxy active. ${ruleCount} rules protecting all tool calls.\n`);
+        }
+        // MCP has no user-in-the-loop channel, so an 'ask' verdict cannot pause for
+        // approval — it is logged and forwarded, and surfaced on stderr as
+        // "FLAGGED (ask)". Document it here so operators know flagged calls still run.
+        process.stderr.write("[panguard-proxy] Note: 'ask' verdicts are logged-and-forwarded (no MCP user prompt); watch stderr for FLAGGED (ask) lines.\n");
     }
     /**
      * Run the Layer 1 inline gate for a tool call (sync, sub-ms): build the
@@ -205,6 +222,13 @@ export class MCPProxy {
                     ],
                 };
             }
+            // An 'ask' verdict cannot pause for a human here — MCP has no
+            // user-in-the-loop channel — so the call is forwarded. Surface it loudly
+            // on stderr so a flagged-but-not-denied call is never silent (the journal
+            // entry above is easy to miss). Semantics are unchanged: logged + forwarded.
+            if (preResult.outcome === 'ask') {
+                process.stderr.write(`[panguard-proxy] FLAGGED (ask): ${name} — ${preResult.reason}\n`);
+            }
             // Forward to upstream
             const result = await client.callTool({ name, arguments: toolArgs });
             // PostToolUse: evaluate the response
@@ -250,6 +274,12 @@ export class MCPProxy {
                         ],
                     };
                 }
+                // Same as the pre-call path: an 'ask' response verdict is logged +
+                // forwarded (no human-in-the-loop in MCP) — surface it loudly so it is
+                // never silent.
+                if (postResult.outcome === 'ask') {
+                    process.stderr.write(`[panguard-proxy] FLAGGED response (ask): ${name} — ${postResult.reason}\n`);
+                }
             }
             return result;
         });

package/package.json CHANGED Viewed

@@ -1,26 +1,37 @@
 {
   "name": "@panguard-ai/panguard-mcp-proxy",
-  "version": "1.6.1",
+  "version": "1.7.0",
   "description": "MCP Proxy — runtime interception for AI agent tool calls using ATR rules",
   "type": "module",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    },
+    "./evaluator": {
+      "import": "./dist/evaluator.js",
+      "types": "./dist/evaluator.d.ts"
+    }
+  },
   "bin": {
     "panguard-mcp-proxy": "./dist/index.js"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.12.0",
-    "agent-threat-rules": "^3.4.0",
+    "agent-threat-rules": "^3.5.0",
     "@panguard-ai/containment": "0.1.0",
-    "@panguard-ai/atr": "1.6.1"
+    "@panguard-ai/atr": "1.7.0"
   },
   "peerDependencies": {
-    "@panguard-ai/atr": "1.6.1"
+    "@panguard-ai/atr": "1.7.0"
   },
   "files": [
     "dist",
     "package.json",
-    "README.md"
+    "README.md",
+    "LICENSE"
   ],
   "publishConfig": {
     "access": "public"