npm - @panguard-ai/threat-cloud - Versions diffs - 1.4.2 → 1.5.6 - Mend

@panguard-ai/threat-cloud 1.4.2 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/LICENSE +21 -0
package/dist/audit-logger.d.ts +1 -1
package/dist/audit-logger.d.ts.map +1 -1
package/dist/audit-logger.js.map +1 -1
package/dist/cli.js +1 -1
package/dist/cli.js.map +1 -1
package/dist/database.d.ts +236 -2
package/dist/database.d.ts.map +1 -1
package/dist/database.js +603 -51
package/dist/database.js.map +1 -1
package/dist/llm-reviewer-tools.d.ts +110 -0
package/dist/llm-reviewer-tools.d.ts.map +1 -0
package/dist/llm-reviewer-tools.js +446 -0
package/dist/llm-reviewer-tools.js.map +1 -0
package/dist/llm-reviewer.d.ts +54 -0
package/dist/llm-reviewer.d.ts.map +1 -1
package/dist/llm-reviewer.js +708 -64
package/dist/llm-reviewer.js.map +1 -1
package/dist/migrations.d.ts.map +1 -1
package/dist/migrations.js +215 -0
package/dist/migrations.js.map +1 -1
package/dist/migrator-crystallization.d.ts +80 -0
package/dist/migrator-crystallization.d.ts.map +1 -0
package/dist/migrator-crystallization.js +108 -0
package/dist/migrator-crystallization.js.map +1 -0
package/dist/server.d.ts +69 -2
package/dist/server.d.ts.map +1 -1
package/dist/server.js +1093 -91
package/dist/server.js.map +1 -1
package/dist/types.d.ts +31 -0
package/dist/types.d.ts.map +1 -1
package/package.json +15 -12

package/dist/llm-reviewer.js CHANGED Viewed

@@ -9,10 +9,138 @@
  * @module @panguard-ai/threat-cloud/llm-reviewer
  */
 import * as https from 'node:https';
+import { createHash } from 'node:crypto';
+import { load as parseYaml } from 'js-yaml';
+import { parseATRRule, validateRuleMeetsStandard, } from '@panguard-ai/atr/quality';
+import { TC_DRAFTER_TOOLS, executeToolCall } from './llm-reviewer-tools.js';
+/**
+ * Run a rule's embedded test cases against its own regex conditions.
+ * This is the first-principles quality check: if a rule cannot match its
+ * own claimed TPs or falsely matches its own claimed TNs, the regex is
+ * broken regardless of how good the metadata looks.
+ *
+ * Returns `passed: true` only if ALL TPs match AND zero TNs match.
+ */
+function selfTestRule(ruleContent) {
+    let parsed;
+    try {
+        parsed = parseYaml(ruleContent);
+    }
+    catch (e) {
+        return {
+            passed: false,
+            tpTotal: 0,
+            tpMatched: 0,
+            tnTotal: 0,
+            tnMatched: 0,
+            failureReasons: [`YAML parse error: ${e instanceof Error ? e.message : String(e)}`],
+        };
+    }
+    const conditions = parsed?.detection?.conditions ?? [];
+    const regexes = [];
+    for (const c of conditions) {
+        if (!c?.value)
+            continue;
+        // Strip (?i) prefix — JS uses /pattern/i flag
+        const pattern = c.value.replace(/^\(\?i\)/, '');
+        try {
+            regexes.push(new RegExp(pattern, 'i'));
+        }
+        catch {
+            // Invalid regex — skip this condition. Other conditions may still work.
+        }
+    }
+    if (regexes.length === 0) {
+        return {
+            passed: false,
+            tpTotal: 0,
+            tpMatched: 0,
+            tnTotal: 0,
+            tnMatched: 0,
+            failureReasons: ['no compilable regex conditions'],
+        };
+    }
+    const matchesAny = (text) => regexes.some((r) => r.test(text));
+    const tps = parsed?.test_cases?.true_positives ?? [];
+    const tns = parsed?.test_cases?.true_negatives ?? [];
+    const failureReasons = [];
+    let tpMatched = 0;
+    for (let i = 0; i < tps.length; i++) {
+        const input = tps[i]?.input ?? tps[i]?.tool_response ?? '';
+        if (matchesAny(input)) {
+            tpMatched++;
+        }
+        else {
+            failureReasons.push(`TP ${i + 1} not caught: "${input.slice(0, 80)}..."`);
+        }
+    }
+    let tnPassed = 0;
+    for (let i = 0; i < tns.length; i++) {
+        const input = tns[i]?.input ?? tns[i]?.tool_response ?? '';
+        if (!matchesAny(input)) {
+            tnPassed++;
+        }
+        else {
+            failureReasons.push(`TN ${i + 1} false positive: "${input.slice(0, 80)}..."`);
+        }
+    }
+    // A rule passes self-test if all TPs match AND zero TNs match
+    const passed = tpMatched === tps.length && tnPassed === tns.length && tps.length > 0 && tns.length > 0;
+    return {
+        passed,
+        tpTotal: tps.length,
+        tpMatched,
+        tnTotal: tns.length,
+        tnMatched: tns.length - tnPassed, // FP count
+        failureReasons,
+    };
+}
 /** Timeout for LLM API calls in milliseconds */
 const LLM_TIMEOUT_MS = 60_000;
-/** Default model for review */
-const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
+/**
+ * Drafter model — used for bulk rule generation from attack payloads
+ * (garak pipe, skill scans). Defaults to Haiku for cost efficiency.
+ * Override via TC_DRAFTER_MODEL env var.
+ *
+ * Cost profile (per 1M tokens):
+ *   - Haiku 3.5:   $0.80 in / $4.00 out
+ *   - Haiku 4.5:   $1.00 in / $5.00 out   (90% of Sonnet capability per CLAUDE.md)
+ *   - Sonnet 4:    $3.00 in / $15.00 out  (4x Haiku)
+ *
+ * Haiku is sufficient for rule drafting — the RFC-001 quality gate + self-test
+ * catches output defects regardless of model. Sonnet adds ~3-5% quality at 4x
+ * cost, not worth it for bulk drafter.
+ */
+const DEFAULT_DRAFTER_MODEL = process.env['TC_DRAFTER_MODEL'] ?? 'claude-haiku-4-5-20251001';
+/**
+ * Reviewer model — used for the second-opinion review pass after a proposal
+ * is drafted (reviewProposal). Quality-critical, stays on Sonnet.
+ * Override via TC_REVIEWER_MODEL env var.
+ */
+const DEFAULT_REVIEWER_MODEL = process.env['TC_REVIEWER_MODEL'] ?? 'claude-sonnet-4-20250514';
+/** Legacy alias — kept so existing call sites compile during refactor. */
+const _DEFAULT_MODEL = DEFAULT_REVIEWER_MODEL;
+/**
+ * Normalize a payload for fingerprinting. Lowercases, collapses whitespace,
+ * strips common punctuation, caps at 2KB. Stable across minor formatting
+ * differences so near-duplicate garak prompts produce identical hashes.
+ *
+ * Normalization of "Ignore previous instructions" and "IGNORE   previous
+ * instructions!!!" both produce the same fingerprint.
+ */
+function normalizePayloadForFingerprint(payload) {
+    return payload
+        .toLowerCase()
+        .replace(/[^\p{L}\p{N}\s]/gu, ' ') // strip punctuation, keep letters/digits/whitespace
+        .replace(/\s+/g, ' ')
+        .trim()
+        .slice(0, 2000);
+}
+/** Compute a stable 16-hex-char fingerprint of a payload's semantic content. */
+function payloadFingerprint(payload) {
+    const norm = normalizePayloadForFingerprint(payload);
+    return createHash('sha256').update(norm).digest('hex').slice(0, 16);
+}
 /**
  * LLM Reviewer for ATR rule proposals
  * ATR 規則提案 LLM 審查器
@@ -20,11 +148,25 @@ const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
 export class LLMReviewer {
     apiKey;
     db;
+    /**
+     * Primary model used throughout this class. Historically one model served
+     * both drafting and reviewing; we now split to `drafterModel` (Haiku, cheap)
+     * and `reviewerModel` (Sonnet, quality-critical). `this.model` retains the
+     * reviewer value for backward compat with existing `reviewProposal` callers.
+     */
     model;
+    /** Model used for rule drafting (Haiku by default — 4x cheaper than Sonnet). */
+    drafterModel;
+    /** Model used for second-opinion review (Sonnet by default). */
+    reviewerModel;
     constructor(apiKey, db, model) {
         this.apiKey = apiKey;
         this.db = db;
-        this.model = model ?? DEFAULT_MODEL;
+        // Honor legacy `model` constructor arg for backward compat; when set it
+        // overrides BOTH drafter and reviewer. New code should prefer env vars.
+        this.reviewerModel = model ?? DEFAULT_REVIEWER_MODEL;
+        this.drafterModel = model ?? DEFAULT_DRAFTER_MODEL;
+        this.model = this.reviewerModel;
     }
     /** Check if the reviewer is available (API key is set) / 檢查審查器是否可用 */
     isAvailable() {
@@ -51,10 +193,11 @@ export class LLMReviewer {
                 console.error(`  -> Transient error, keeping proposal pending for retry`);
                 return { verdict: '', approved: false };
             }
-            // Permanent errors: store failure verdict
+            // Non-transient errors: store failure but keep proposal pending for retry
+            // Do NOT auto-reject — API errors are not evidence of bad rules
             const failVerdict = JSON.stringify({
                 approved: false,
-                falsePositiveRisk: 'high',
+                falsePositiveRisk: 'medium',
                 coverageScore: 0,
                 reasoning: `LLM review failed: ${msg}`,
             });
@@ -66,8 +209,14 @@ export class LLMReviewer {
         const verdictJson = JSON.stringify(verdict);
         // Store verdict in database
         this.db.updateATRProposalLLMReview(patternHash, verdictJson);
-        // If high false positive risk AND not approved, reject the proposal
-        if (!verdict.approved && verdict.falsePositiveRisk === 'high') {
+        // Terminal state transition on any legitimate rejection.
+        // Transient errors are handled earlier (they return without reaching this
+        // code path), so if we got a parsed verdict with approved=false, the LLM
+        // has made a reasoned decision — move the proposal to 'rejected' so the
+        // retry cron stops picking it up. Previously only high-FP rejections were
+        // marked terminal, which left low/medium-FP rejections in an infinite
+        // retry loop burning LLM API quota.
+        if (!verdict.approved) {
             this.db.rejectATRProposal(patternHash);
         }
         return { verdict: verdictJson, approved: verdict.approved };
@@ -119,9 +268,13 @@ Output ONLY valid JSON (no markdown, no explanation outside the JSON):
      */
     callAnthropicAPI(prompt) {
         return new Promise((resolve, reject) => {
+            // 4096 tokens is needed because the ATR drafter prompt requires
+            // a full rule YAML with 3+ conditions, 3+ TP, 3+ TN, 3+ evasion tests,
+            // MITRE + OWASP references, and descriptions. 1024 was cutting off
+            // mid-YAML and the regex extractor dropped the truncated block.
             const requestBody = JSON.stringify({
                 model: this.model,
-                max_tokens: 1024,
+                max_tokens: 4096,
                 messages: [{ role: 'user', content: prompt }],
             });
             const options = {
@@ -174,81 +327,323 @@ Output ONLY valid JSON (no markdown, no explanation outside the JSON):
             req.end();
         });
     }
+    /**
+     * Low-level Anthropic messages call that accepts a prepared request body.
+     * Used by the tool-use loop so we can pass full message histories.
+     */
+    callAnthropicRaw(body) {
+        return new Promise((resolve, reject) => {
+            const requestBody = JSON.stringify(body);
+            const options = {
+                hostname: 'api.anthropic.com',
+                port: 443,
+                path: '/v1/messages',
+                method: 'POST',
+                headers: {
+                    'x-api-key': this.apiKey,
+                    'anthropic-version': '2023-06-01',
+                    'content-type': 'application/json',
+                    'content-length': Buffer.byteLength(requestBody),
+                },
+                timeout: LLM_TIMEOUT_MS,
+            };
+            const req = https.request(options, (res) => {
+                const chunks = [];
+                res.on('data', (chunk) => chunks.push(chunk));
+                res.on('end', () => {
+                    const bodyText = Buffer.concat(chunks).toString('utf-8');
+                    if (res.statusCode !== 200) {
+                        reject(new Error(`Anthropic API status ${res.statusCode}: ${bodyText.slice(0, 500)}`));
+                        return;
+                    }
+                    try {
+                        const parsed = JSON.parse(bodyText);
+                        resolve({ content: parsed.content ?? [], stop_reason: parsed.stop_reason });
+                    }
+                    catch (err) {
+                        reject(new Error(`Anthropic API parse: ${err instanceof Error ? err.message : String(err)}`));
+                    }
+                });
+            });
+            req.on('timeout', () => {
+                req.destroy();
+                reject(new Error(`Anthropic API timeout after ${LLM_TIMEOUT_MS}ms`));
+            });
+            req.on('error', (err) => reject(new Error(`Anthropic API error: ${err.message}`)));
+            req.write(requestBody);
+            req.end();
+        });
+    }
+    /**
+     * Tool-use loop for TC v2 drafter. Runs a multi-turn conversation with
+     * Claude where it can call grep_existing_rules, read_rule, and
+     * fetch_research to ground its draft in existing ATR coverage and
+     * public threat research before emitting a rule YAML.
+     *
+     * Returns the concatenated text of Claude's final assistant turn (the
+     * message where stop_reason is "end_turn" and not "tool_use").
+     *
+     * Max 6 tool-use rounds per skill to bound latency and cost; if Claude
+     * still wants to use tools on round 7, we instruct it to finalize.
+     */
+    async callAnthropicWithTools(systemPrompt, userMessage, options) {
+        const MAX_ROUNDS = 6;
+        const modelToUse = options?.model ?? this.model;
+        const messages = [
+            { role: 'user', content: userMessage },
+        ];
+        let toolCalls = 0;
+        let finalText = '';
+        for (let round = 0; round < MAX_ROUNDS; round++) {
+            const body = {
+                model: modelToUse,
+                max_tokens: 4096,
+                system: systemPrompt,
+                tools: TC_DRAFTER_TOOLS,
+                messages,
+            };
+            const response = await this.callAnthropicRaw(body);
+            // Collect assistant response as content blocks
+            const assistantBlocks = [];
+            const toolUses = [];
+            for (const block of response.content) {
+                if (block.type === 'text' && typeof block.text === 'string') {
+                    assistantBlocks.push({ type: 'text', text: block.text });
+                    finalText += (finalText ? '\n' : '') + block.text;
+                }
+                else if (block.type === 'tool_use' && block.id && block.name) {
+                    assistantBlocks.push({
+                        type: 'tool_use',
+                        id: block.id,
+                        name: block.name,
+                        input: block.input ?? {},
+                    });
+                    toolUses.push({ id: block.id, name: block.name, input: block.input ?? {} });
+                }
+            }
+            messages.push({ role: 'assistant', content: assistantBlocks });
+            if (response.stop_reason !== 'tool_use' || toolUses.length === 0) {
+                // Claude is done — return its final text
+                break;
+            }
+            // Execute each requested tool and build a tool_result turn
+            const toolResults = [];
+            for (const tu of toolUses) {
+                toolCalls++;
+                console.log(`[tc-v2] round ${round + 1}: tool ${tu.name}(${JSON.stringify(tu.input).slice(0, 120)})`);
+                const result = await executeToolCall(tu.name, tu.input);
+                toolResults.push({
+                    type: 'tool_result',
+                    tool_use_id: tu.id,
+                    content: result.content.slice(0, 8000), // cap per-tool-result size
+                    is_error: result.isError,
+                });
+            }
+            messages.push({ role: 'user', content: toolResults });
+            // Reset finalText — we only want the LAST assistant turn's text
+            // (which contains the YAML rule output), not the interim narration
+            finalText = '';
+        }
+        return { finalText, toolCalls };
+    }
     // -------------------------------------------------------------------------
     // Skill Analysis — POST /api/analyze-skills
     // 技能分析 — 接收掃描結果，用 LLM 找 regex 漏掉的 semantic threats
     // -------------------------------------------------------------------------
-    static ATR_DRAFTER_PROMPT = `You are a senior AI security rule engineer for the ATR (Agent Threat Rules) standard.
+    /** Prompt for skill/tool analysis (both MCP and SKILL.md) */
+    static ATR_DRAFTER_PROMPT = `You are a senior AI security rule engineer for ATR (Agent Threat Rules). Cisco AI Defense merged 34 ATR rules into production. Your output must meet that quality bar AND the RFC-001 v1.0 quality gate (5+ TP, 5+ TN, 3+ evasion_tests, OWASP LLM + OWASP Agentic + MITRE ATLAS required).
+You have three tools: grep_existing_rules, read_rule, fetch_research. Use them.
+PROTOCOL — you MUST follow this order:
+STEP 0 — De-duplication check (required, non-negotiable):
+  a) Call grep_existing_rules with 2-4 keywords from the attack you are considering. Example keywords: ["prompt injection", "IMPORTANT tag"], ["credential exfil", "ssh key"], ["tool poisoning", "cross-tool"], ["hidden instruction", "system override"].
+  b) Read the results. If any matching rules look topically similar, call read_rule on the 1-3 most relevant to inspect their regex patterns and test cases.
+  c) Decide:
+     - If the attack is ALREADY covered by an existing rule (same patterns, same category) → output NO_THREATS_FOUND and stop. Do not duplicate existing work.
+     - If the attack is a NOVEL VARIANT that slips past existing regex → draft a new rule explicitly referencing what it catches that existing rules miss.
+     - If the attack is a GENUINELY NEW CLASS → draft a new rule from scratch.
+STEP 1 — Research grounding (strongly recommended):
+  If you're drafting a new rule, call fetch_research on at least one reputable source that documents the attack class. Suggested sources: invariantlabs.ai/blog, elastic.co/security-labs, snyk.io/articles, arxiv.org, atlas.mitre.org, unit42.paloaltonetworks.com, genai.owasp.org. Cite the source in the rule's \`references.research\` field.
+STEP 2 — Draft the rule (only after steps 0 and 1):
-You will receive MCP tool descriptions from a skill. Your job is to write PRODUCTION-QUALITY detection rules for SPECIFIC, CONCRETE attack patterns — not vague risk categories.
+You will receive MCP tool descriptions from a skill. Write a PRODUCTION-QUALITY detection rule ONLY if you find a SPECIFIC, CONCRETE attack pattern AND have verified it is not already covered.
-STRICT REQUIREMENTS — rules that violate these will be REJECTED:
+QUALITY BAR (Cisco-merge level + RFC-001 v1.0):
-1. REGEX MUST BE HIGHLY SPECIFIC — require multi-word phrase matching.
-   GOOD: "without\\s+(?:asking|requiring)\\s+(?:user|human)\\s+(?:confirmation|approval)"
-   BAD:  "without asking" or "directly to \\w+" (too generic, matches normal docs).
-   Each regex must require 3+ specific words in sequence to avoid false positives.
+1. REGEX — SINGLE-QUOTED YAML, compound patterns, 3+ word sequences:
+   GOOD: '(curl|wget)\\s+[^\\n]*\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}[^|]*\\|\\s*(bash|sh)'
+   BAD:  "config.*base64.*import" (too broad, double quotes cause YAML escape bugs)
+   CRITICAL: Always use SINGLE QUOTES for regex values in YAML. Double quotes break \\s, \\d, \\w.
+   CRITICAL: Do NOT use (?i) inline flag — JS RegExp does not support it. Instead, use character classes like [aA] or write pattern without case sensitivity assumption.
-2. FALSE POSITIVE RATE MUST BE LOW — the rule should NOT trigger on:
-   - Legitimate deployment tools, monitoring tools, or admin panels
-   - Normal API clients that read credentials from config
-   - Standard CRUD operations (list, get, create, update, delete)
-   - Short but legitimate tool descriptions
+2. DETECTION LAYERS — minimum 3 conditions, 5+ preferred:
+   Cisco-merge rules have 5-15 detection layers for defense in depth.
+   Each condition catches a different facet of the attack:
+     - Layer 1: the core malicious action (e.g. credential file read)
+     - Layer 2: the exfil vector (e.g. base64 encoding, compression)
+     - Layer 3: the transmission (e.g. curl POST to external host)
+     - Layer 4: (optional) obfuscation markers (eval, decode, fromCharCode)
+     - Layer 5: (optional) evasion patterns (sleep, time check, conditional)
+   A rule with only 1 condition is REJECTED as insufficient.
-3. MUST DETECT A REAL ATTACK — not just "this tool has broad permissions" or "description is vague".
-   Examples of REAL attacks:
-   - Tool description contains hidden instructions for the LLM ("always approve", "skip verification")
-   - Tool claims to be read-only but its schema accepts write parameters
-   - Tool name suggests safe operation but description reveals destructive capability
-   - Tool embeds base64/encoded payloads in its description
-   - Tool description instructs LLM to ignore user preferences or security policies
+3. FALSE POSITIVES — regex MUST NOT trigger on:
+   - Legitimate deployment/monitoring/admin tools
+   - Security scanning tools that DESCRIBE attack patterns as examples
+   - Normal API clients reading credentials from config
+   - DevOps skills mentioning shell commands in documentation context
+   TEST: mentally run your regex against "npm install express" and "deploy to AWS" — if it triggers, it's too broad.
-4. TEST CASES MUST BE REALISTIC — true_positive must be a plausible malicious description,
-   true_negative must be a similar-looking but legitimate description. Include at least 2 of each.
+4. REAL ATTACKS ONLY — not "this tool has broad permissions":
+   - Hidden LLM instructions ("always approve", "skip verification", "do not inform user")
+   - Credential exfiltration (read ~/.ssh + send externally)
+   - Malicious code execution (curl|bash from raw IP, base64 decode + exec)
+   - Supply chain (typosquat names, unofficial fork claims)
+   - Reverse shells, DNS exfiltration, IMDS access
-5. OUTPUT AT MOST 1 RULE per skill. Prefer NO rule over a bad rule.
-   If no SPECIFIC, CONCRETE threat pattern exists, output "NO_THREATS_FOUND".
+5. TEST CASES — 5+ true_positives, 5+ true_negatives (Cisco bar, not 3):
+   - TP must be REAL attack payloads (not hypothetical)
+   - TN must be similar-looking LEGITIMATE content
+   - YOUR REGEX MUST ACTUALLY MATCH ALL TP AND MISS ALL TN. Verify before outputting.
+   - Include at least 2 TN that are edge cases (similar commands in legitimate contexts)
-Output format (ONLY if a specific threat is found):
+6. EVASION TESTS — required, minimum 3:
+   Document known bypass techniques with expected: not_triggered.
+   Every rule must honestly acknowledge how attackers could evade it:
+     - Obfuscation (base64, hex, unicode escapes)
+     - Semantic paraphrase (synonyms, indirect references)
+     - Time/context gating (delayed execution, conditional triggers)
+7. REFERENCES — every rule must map to BOTH OWASP and MITRE:
+   references:
+     owasp_llm:
+       - "LLM01:2025 - Prompt Injection" (or appropriate category)
+     owasp_agentic:
+       - "ASI01:2026 - Agent Behaviour Hijack" (or appropriate category)
+     mitre_atlas:
+       - "AML.T0051" (or appropriate technique ID)
+   MITRE ATLAS reference is REQUIRED, not optional.
+8. DECISION CRITERIA — output a rule or "NO_THREATS_FOUND":
+   - If the skill content contains ACTUAL malicious code (credential theft, exfiltration,
+     reverse shells, hidden instructions to bypass safety) → WRITE A RULE, even if you
+     think existing regex might already catch it. Let the dedup layer handle overlaps.
+   - If the skill is just a normal tool with broad permissions (file access, network calls)
+     but no malicious INTENT → output NO_THREATS_FOUND.
+   - When in doubt about whether something is malicious, WRITE THE RULE. False negatives
+     (missing a real attack) are worse than duplicate rules.
+Output format (ONLY if a SPECIFIC threat is found):
 \`\`\`yaml
-title: "<specific attack technique, not generic risk>"
+title: '<specific attack technique>'
 id: ATR-2026-DRAFT-<8char-hex>
-status: draft
-description: |
-  <what SPECIFIC attack this detects, with concrete example from the analyzed skill>
-author: "Threat Cloud LLM Analyzer"
+rule_version: 1
+status: experimental
+description: >
+  <what SPECIFIC attack this detects, referencing the analyzed skill content>
+author: "ATR Threat Cloud Crystallization"
 date: "${new Date().toISOString().slice(0, 10).replace(/-/g, '/')}"
 schema_version: "0.1"
-detection_tier: semantic
+detection_tier: pattern
 maturity: experimental
-severity: <critical|high|medium|low>
+severity: <critical|high|medium>
+references:
+  owasp_llm:
+    - "<most relevant LLM Top 10 category>"
+  owasp_agentic:
+    - "<most relevant Agentic Top 10 category>"
+  mitre_atlas:
+    - "<AML.Txxxx technique ID — REQUIRED>"
 tags:
-  category: <tool-poisoning|prompt-injection|data-exfiltration|privilege-escalation>
+  category: <skill-compromise|tool-poisoning|prompt-injection|context-exfiltration|privilege-escalation>
   subcategory: <specific-technique>
-  confidence: medium
+  scan_target: <mcp|skill|both>
+  confidence: <high|medium>
+agent_source:
+  type: mcp_exchange
+  framework: [any]
+  provider: [any]
 detection:
   conditions:
-    - field: tool_description
+    - field: content
+      operator: regex
+      value: '<LAYER 1: core malicious action regex>'
+      description: '<what layer 1 matches>'
+    - field: content
       operator: regex
-      value: "<SPECIFIC regex with word boundaries and context>"
-      description: "<exactly what malicious pattern this matches>"
+      value: '<LAYER 2: exfil/encoding vector regex>'
+      description: '<what layer 2 matches>'
+    - field: content
+      operator: regex
+      value: '<LAYER 3: transmission/execution regex>'
+      description: '<what layer 3 matches>'
   condition: any
+  false_positives:
+    - '<edge case 1 — legitimate content that looks similar>'
+    - '<edge case 2 — common benign pattern>'
+    - '<edge case 3 — dev/admin tool context>'
 response:
-  actions: [alert, snapshot]
+  actions: [alert, block_tool]
+  message_template: >
+    [ATR-2026-DRAFT] <one-line description of what was detected>
 test_cases:
   true_positives:
-    - tool_description: "<realistic malicious tool description that should trigger>"
+    - input: '<real attack payload 1>'
+      expected: triggered
+    - input: '<real attack payload 2>'
+      expected: triggered
+    - input: '<real attack payload 3>'
+      expected: triggered
+    - input: '<real attack payload 4>'
       expected: triggered
-    - tool_description: "<another variant>"
+    - input: '<real attack payload 5>'
       expected: triggered
   true_negatives:
-    - tool_description: "<similar but legitimate tool description>"
+    - input: '<similar but safe content 1>'
       expected: not_triggered
-    - tool_description: "<another legitimate example>"
+      reason: '<why this is safe>'
+    - input: '<similar but safe content 2>'
       expected: not_triggered
+      reason: '<why this is safe>'
+    - input: '<similar but safe content 3>'
+      expected: not_triggered
+      reason: '<why this is safe>'
+    - input: '<edge case 4 — common legitimate usage>'
+      expected: not_triggered
+      reason: '<why this is safe>'
+    - input: '<edge case 5 — devops/admin tool context>'
+      expected: not_triggered
+      reason: '<why this is safe>'
+evasion_tests:
+  - input: '<bypass 1 — obfuscation variant>'
+    expected: not_triggered
+    bypass_technique: '<technique name>'
+    notes: '<how attacker could evade>'
+  - input: '<bypass 2 — semantic paraphrase>'
+    expected: not_triggered
+    bypass_technique: '<technique name>'
+    notes: '<why this bypasses the regex>'
+  - input: '<bypass 3 — time-gated or conditional>'
+    expected: not_triggered
+    bypass_technique: '<technique name>'
+    notes: '<explanation>'
 \`\`\`
-REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely suspicious patterns.`;
+BEFORE OUTPUTTING — reject your own output if any check fails:
+- [ ] At least 3 detection conditions (NOT 1)
+- [ ] At least 5 true_positives + 5 true_negatives (Cisco bar, not 3)
+- [ ] At least 3 evasion_tests documenting known bypasses
+- [ ] MITRE ATLAS reference present (REQUIRED)
+- [ ] OWASP LLM + OWASP Agentic references present
+- [ ] No (?i) inline flag — JS does not support it
+- [ ] Single-quoted regex values
+- [ ] Every condition has a description field
+- [ ] Your regex matches ALL true_positives AND misses ALL true_negatives
+If you cannot meet this bar, output NO_THREATS_FOUND instead of a weak rule.`;
     /**
      * Analyze skill scan results for semantic threats regex missed
      * 分析技能掃描結果，找出 regex 漏掉的語義威脅
@@ -256,16 +651,19 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
     async analyzeSkills(skills) {
         const results = [];
         for (const skill of skills) {
-            if (!skill.tools || skill.tools.length < 2)
+            if (!skill.tools || skill.tools.length === 0)
                 continue;
             const toolSummary = skill.tools
                 .slice(0, 30) // Limit to avoid token overflow
                 .map((t) => `- ${t.name}: ${t.description}`)
                 .join('\n');
-            const userMessage = `Analyze these MCP tools from "${skill.package}" for threats that regex scanning missed:\n\n${toolSummary}`;
+            const userMessage = `Analyze this skill content from "${skill.package}" for threats that regex scanning missed. Before drafting a rule, call grep_existing_rules to verify the attack class is not already covered. If a similar rule exists, call read_rule to inspect it and either propose a narrowly-scoped new variant or emit NO_THREATS_FOUND. Ground novel attack claims in research via fetch_research when possible.\n\nSkill content:\n\n${toolSummary}`;
             try {
-                const responseText = await this.callAnthropicAPI(LLMReviewer.ATR_DRAFTER_PROMPT + '\n\n' + userMessage);
+                const { finalText: responseText, toolCalls } = await this.callAnthropicWithTools(LLMReviewer.ATR_DRAFTER_PROMPT, userMessage);
+                console.log(`[LLM] analyzeSkills (tc-v2) for "${skill.package}": ${responseText.length} chars, ${toolCalls} tool calls`);
+                console.log(`[LLM] First 500 chars: ${responseText.slice(0, 500)}`);
                 if (responseText.includes('NO_THREATS_FOUND')) {
+                    console.log(`[LLM] Verdict: NO_THREATS_FOUND for "${skill.package}"`);
                     results.push({
                         package: skill.package,
                         threatsFound: false,
@@ -275,8 +673,18 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
                     continue;
                 }
                 // Extract YAML blocks
-                const yamlBlocks = responseText.match(/```yaml\n([\s\S]*?)```/g);
+                // Primary: properly-closed ```yaml\n...```
+                // Fallback: opening ```yaml\n...<end of string> (truncation safety net)
+                let yamlBlocks = responseText.match(/```yaml\n([\s\S]*?)```/g);
                 if (!yamlBlocks || yamlBlocks.length === 0) {
+                    const unclosed = responseText.match(/```yaml\n([\s\S]*?)$/);
+                    if (unclosed) {
+                        console.log(`[LLM] Recovered unclosed YAML block (max_tokens likely hit) for "${skill.package}"`);
+                        yamlBlocks = [unclosed[0] + '\n```'];
+                    }
+                }
+                if (!yamlBlocks || yamlBlocks.length === 0) {
+                    console.log(`[LLM] No YAML blocks found in response for "${skill.package}". Response starts with: ${responseText.slice(0, 200)}`);
                     results.push({
                         package: skill.package,
                         threatsFound: false,
@@ -285,26 +693,70 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
                     });
                     continue;
                 }
+                console.log(`[LLM] Found ${yamlBlocks.length} YAML block(s) for "${skill.package}"`);
                 const proposals = [];
                 const { createHash } = await import('node:crypto');
                 for (const block of yamlBlocks) {
-                    const ruleContent = block
+                    let ruleContent = block
                         .replace(/```yaml\n?/, '')
                         .replace(/```$/, '')
                         .trim();
                     // Validate: must have required ATR fields
-                    if (!ruleContent.includes('title:') || !ruleContent.includes('detection:'))
+                    if (!ruleContent.includes('title:') || !ruleContent.includes('detection:')) {
+                        console.log(`[LLM] YAML block skipped — missing title: (${ruleContent.includes('title:')}) or detection: (${ruleContent.includes('detection:')}). First 200 chars: ${ruleContent.slice(0, 200)}`);
                         continue;
-                    // Validate regex in the rule
-                    const regexMatch = ruleContent.match(/value:\s*"([^"]+)"/);
+                    }
+                    // Validate regex in the rule (match both single and double quoted values)
+                    const regexMatch = ruleContent.match(/value:\s*(['"])((?:(?!\1).)+)\1/);
                     if (regexMatch) {
+                        // Strip (?i) prefix — JS uses /pattern/i flag instead of PCRE inline (?i)
+                        const rawPattern = regexMatch[2];
+                        const jsPattern = rawPattern.replace(/^\(\?i\)/g, '');
                         try {
-                            new RegExp(regexMatch[1], 'i');
+                            new RegExp(jsPattern, 'i');
                         }
-                        catch {
+                        catch (regexErr) {
+                            console.log(`[LLM] YAML block skipped — invalid regex: ${rawPattern.slice(0, 100)}. Error: ${regexErr instanceof Error ? regexErr.message : String(regexErr)}`);
                             continue; // Skip rules with invalid regex
                         }
+                        // If we stripped (?i), also fix it in the rule content so downstream consumers don't hit the same issue
+                        if (rawPattern !== jsPattern) {
+                            ruleContent = ruleContent.replace(rawPattern, jsPattern);
+                            console.log(`[LLM] Stripped (?i) prefix from regex for JS compatibility`);
+                        }
+                    }
+                    // ATR Quality Gate — use the canonical library from agent-threat-rules/quality
+                    // Reject rules that don't meet the experimental quality bar (3+ conditions,
+                    // 3 TP + 3 TN, OWASP + MITRE, FP docs). See RFC-001 §3.
+                    let gateResult;
+                    try {
+                        const metadata = parseATRRule(ruleContent);
+                        // Mark as LLM-generated so downstream consumers know provenance
+                        const enriched = { ...metadata, llmGenerated: true };
+                        gateResult = validateRuleMeetsStandard(enriched, 'experimental');
+                    }
+                    catch (parseErr) {
+                        console.log(`[LLM] Rule rejected — failed to parse YAML for quality gate: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`);
+                        continue;
                     }
+                    if (!gateResult.passed) {
+                        console.log(`[LLM] Rule rejected by ATR Quality Gate: ${gateResult.issues.join('; ')}`);
+                        continue;
+                    }
+                    if (gateResult.warnings.length > 0) {
+                        console.log(`[LLM] Rule passed gate with warnings: ${gateResult.warnings.join('; ')}`);
+                    }
+                    // Self-test: run the rule's own test_cases against its own regex.
+                    // This is the first-principles quality check — if LLM-produced regex
+                    // can't match its own TPs or incorrectly matches its own TNs, the
+                    // rule is broken regardless of how good the metadata looks.
+                    const selfTest = selfTestRule(ruleContent);
+                    if (!selfTest.passed) {
+                        console.log(`[LLM] Rule rejected by self-test: TP ${selfTest.tpMatched}/${selfTest.tpTotal}, TN FP ${selfTest.tnMatched}/${selfTest.tnTotal}. ` +
+                            `Reasons: ${selfTest.failureReasons.slice(0, 3).join(' | ')}`);
+                        continue;
+                    }
+                    console.log(`[LLM] Rule passed self-test: ${selfTest.tpMatched}/${selfTest.tpTotal} TP caught, ${selfTest.tnTotal - selfTest.tnMatched}/${selfTest.tnTotal} TN clean`);
                     const patternHash = createHash('sha256').update(ruleContent).digest('hex').slice(0, 16);
                     // Submit as proposal + auto-review
                     this.db.insertATRProposal({
@@ -316,6 +768,8 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
                             approved: true,
                             source: 'skill-analysis',
                             package: skill.package,
+                            provenance: 'llm-generated',
+                            gateWarnings: gateResult.warnings,
                         }),
                     });
                     // Fire-and-forget: review the proposal we just created
@@ -345,6 +799,195 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
         }
         return results;
     }
+    /**
+     * Draft a full ATR YAML rule from a raw attack payload supplied by an
+     * external red-team source (e.g. NVIDIA garak). Same drafter pipeline as
+     * analyzeSkills but the input is the attack prompt itself rather than a
+     * skill's tool descriptions. Returns the drafted proposal or null when
+     * the drafter declined to write a rule (NO_THREATS_FOUND, duplicate of
+     * existing coverage, failed quality gate, failed self-test).
+     *
+     * Callers:
+     *  - POST /api/atr-proposals/from-payload (partner / admin auth)
+     *
+     * Always inserts the resulting proposal into atr_proposals so the normal
+     * canary → auto-merge → npm publish pipeline can take over.
+     */
+    async draftRuleFromPayload(payload, meta) {
+        // Bound input to keep the drafter prompt under token budget. Longer prompts
+        // than this are unusual for prompt-injection payloads and just waste tokens.
+        const boundedPayload = payload.slice(0, 8000);
+        const probe = meta.probe || 'unknown-probe';
+        const detector = meta.detector || 'unknown-detector';
+        const targetModel = meta.targetModel || 'unspecified-model';
+        const partner = meta.partnerName || 'external-red-team';
+        // ------------------------------------------------------------------
+        // FAST PATH: payload fingerprint dedup.
+        //
+        // Normalize the payload (lowercase, strip punctuation, collapse whitespace)
+        // and hash it. If we've seen this fingerprint before, we already know what
+        // the LLM would say — return the cached verdict without calling the API.
+        //
+        // Empirically ~90% of garak corpus submissions hit this cache on a second
+        // or subsequent run. Eliminating those API calls is the single biggest
+        // cost reduction in the drafter pipeline.
+        // ------------------------------------------------------------------
+        const fingerprint = payloadFingerprint(boundedPayload);
+        const cached = this.db.getPayloadFingerprint(fingerprint);
+        if (cached) {
+            // Bump hit count so we can see cache effectiveness in the stats
+            this.db.recordPayloadFingerprint(fingerprint, cached.result);
+            if (cached.result === 'novel' && cached.patternHash) {
+                // Previous call generated a rule — find it and return
+                const existing = this.db.getATRProposalByHash(cached.patternHash);
+                if (existing) {
+                    console.log(`[draftRuleFromPayload] fingerprint cache hit (novel): ${fingerprint} → ${cached.patternHash}`);
+                    // We don't have the ruleContent in getATRProposalByHash's return shape,
+                    // but that's OK — callers only use ruleContent for logging; returning a
+                    // minimal placeholder with the real patternHash is sufficient for the
+                    // dedup path (the original proposal row in atr_proposals is what
+                    // downstream canary → promote pipelines actually consume).
+                    return {
+                        patternHash: cached.patternHash,
+                        ruleContent: `# cached — see atr_proposals.pattern_hash=${cached.patternHash}`,
+                        toolCalls: 0,
+                    };
+                }
+                // Cached entry points at a proposal that was later deleted; fall through
+                // to re-draft rather than fail silently.
+            }
+            else {
+                // Previously judged duplicate or rejected — don't re-spend on LLM
+                console.log(`[draftRuleFromPayload] fingerprint cache hit (${cached.result}): ${fingerprint} — skipping LLM call`);
+                return null;
+            }
+        }
+        const userMessage = [
+            `Red-team finding from ${partner}. The attack prompt below bypassed ${targetModel} via garak probe ${probe} (detector: ${detector}).`,
+            '',
+            'Draft an ATR rule that detects this attack class at the agent-layer boundary (tool call args, skill content, user_input field, MCP tool descriptions). The rule should generalise beyond the literal prompt — target the technique, not the specific words.',
+            '',
+            'Follow STEP 0 (de-duplication via grep_existing_rules) before drafting. If an existing rule already covers this attack class with equivalent patterns, output NO_THREATS_FOUND.',
+            '',
+            `Attack prompt (${boundedPayload.length} chars):`,
+            '```',
+            boundedPayload,
+            '```',
+            '',
+            `Severity hint from partner: ${meta.severity || 'high'}. Use your judgement — downgrade to medium if the payload is a low-impact jailbreak that does not request sensitive operations.`,
+        ].join('\n');
+        let finalText = '';
+        let toolCalls = 0;
+        try {
+            // Bulk drafter runs on Haiku (4x cheaper than Sonnet). Quality gate +
+            // self-test + safety gate (benign corpus FP check) catch any output
+            // regressions regardless of model.
+            const result = await this.callAnthropicWithTools(LLMReviewer.ATR_DRAFTER_PROMPT, userMessage, { model: this.drafterModel });
+            finalText = result.finalText;
+            toolCalls = result.toolCalls;
+        }
+        catch (err) {
+            console.error(`[draftRuleFromPayload] LLM call failed: ${err instanceof Error ? err.message : String(err)}`);
+            // Do NOT cache LLM errors — transient failures should retry.
+            return null;
+        }
+        if (!finalText || /NO_THREATS_FOUND/.test(finalText)) {
+            console.log(`[draftRuleFromPayload] NO_THREATS_FOUND or empty response (probe=${probe})`);
+            // Cache the 'duplicate' verdict so repeat submissions of the same payload
+            // don't spend money to re-confirm "already covered".
+            this.db.recordPayloadFingerprint(fingerprint, 'duplicate');
+            return null;
+        }
+        // Extract YAML block (with unclosed-fence fallback — same as analyzeSkills)
+        let yamlBlocks = finalText.match(/```yaml\n([\s\S]*?)```/g);
+        if (!yamlBlocks || yamlBlocks.length === 0) {
+            const unclosed = finalText.match(/```yaml\n([\s\S]*?)$/);
+            if (unclosed)
+                yamlBlocks = [unclosed[0] + '\n```'];
+        }
+        if (!yamlBlocks || yamlBlocks.length === 0) {
+            console.log(`[draftRuleFromPayload] No YAML block in response. First 200 chars: ${finalText.slice(0, 200)}`);
+            this.db.recordPayloadFingerprint(fingerprint, 'rejected');
+            return null;
+        }
+        let ruleContent = yamlBlocks[0]
+            .replace(/^```yaml\n/, '')
+            .replace(/```$/, '')
+            .trim();
+        // Rewrite author line so the partner/source is visible on the shipped rule.
+        // The drafter prompt hardcodes 'ATR Threat Cloud Crystallization' as the
+        // author; replace it with the actual submitter so downstream consumers
+        // (npm, Cisco, etc.) can see who contributed each rule. Sanitise the partner
+        // string against YAML-breaking characters.
+        const safePartner = partner.replace(/[\r\n"'\\]/g, '').slice(0, 80);
+        const attributedAuthor = safePartner === 'external-red-team' || !safePartner
+            ? 'ATR Community (via garak pipe)'
+            : `${safePartner} (via ATR garak pipe)`;
+        ruleContent = ruleContent.replace(/^author:\s*["']ATR Threat Cloud Crystallization["']\s*$/m, `author: "${attributedAuthor}"`);
+        // Strip any (?i) prefix the LLM may have sneaked in despite the prompt
+        const regexFieldMatch = ruleContent.match(/value:\s*'(\(\?i\))([^']*)'/);
+        if (regexFieldMatch) {
+            const rawPattern = `'(?i)${regexFieldMatch[2]}'`;
+            const jsPattern = `'${regexFieldMatch[2]}'`;
+            ruleContent = ruleContent.replace(rawPattern, jsPattern);
+        }
+        // RFC-001 quality gate
+        let gateResult;
+        try {
+            const metadata = parseATRRule(ruleContent);
+            const enriched = { ...metadata, llmGenerated: true };
+            gateResult = validateRuleMeetsStandard(enriched, 'experimental');
+        }
+        catch (parseErr) {
+            console.log(`[draftRuleFromPayload] Rule rejected — cannot parse: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`);
+            this.db.recordPayloadFingerprint(fingerprint, 'rejected');
+            return null;
+        }
+        if (!gateResult.passed) {
+            console.log(`[draftRuleFromPayload] Rejected by quality gate: ${gateResult.issues.slice(0, 3).join('; ')}`);
+            this.db.recordPayloadFingerprint(fingerprint, 'rejected');
+            return null;
+        }
+        // Self-test: rule's own regex must catch its own TPs and miss its own TNs.
+        const selfTest = selfTestRule(ruleContent);
+        if (!selfTest.passed) {
+            console.log(`[draftRuleFromPayload] Rejected by self-test: TP ${selfTest.tpMatched}/${selfTest.tpTotal}, TN FP ${selfTest.tnMatched}/${selfTest.tnTotal}. ${selfTest.failureReasons.slice(0, 2).join(' | ')}`);
+            this.db.recordPayloadFingerprint(fingerprint, 'rejected');
+            return null;
+        }
+        const patternHash = createHash('sha256').update(ruleContent).digest('hex').slice(0, 16);
+        // Idempotent: if a previous submission produced the same YAML, skip insert.
+        if (this.db.getATRProposalByHash(patternHash)) {
+            // Still record the fingerprint so future dup payloads bypass LLM.
+            this.db.recordPayloadFingerprint(fingerprint, 'novel', { patternHash });
+            return { patternHash, ruleContent, toolCalls };
+        }
+        this.db.insertATRProposal({
+            patternHash,
+            ruleContent,
+            llmProvider: 'garak-drafter',
+            llmModel: this.drafterModel,
+            selfReviewVerdict: JSON.stringify({
+                approved: true,
+                source: 'external-red-team',
+                partner,
+                probe,
+                detector,
+                targetModel,
+                toolCalls,
+                provenance: 'llm-generated-from-payload',
+                gateWarnings: gateResult.warnings,
+            }),
+        });
+        // Record novel fingerprint so repeat submissions reuse this rule without LLM.
+        this.db.recordPayloadFingerprint(fingerprint, 'novel', { patternHash });
+        // Fire-and-forget second-opinion review (same as analyzeSkills)
+        void this.reviewProposal(patternHash, ruleContent).catch((err) => {
+            console.error(`[draftRuleFromPayload] review failed for ${patternHash}:`, err instanceof Error ? err.message : String(err));
+        });
+        console.log(`[draftRuleFromPayload] OK: patternHash=${patternHash} partner=${partner} probe=${probe} toolCalls=${toolCalls}`);
+        return { patternHash, ruleContent, toolCalls };
+    }
     /**
      * Parse the LLM response into a structured verdict
      * 解析 LLM 回應為結構化裁決
@@ -352,7 +995,7 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
     parseVerdict(responseText) {
         const defaultVerdict = {
             approved: false,
-            falsePositiveRisk: 'high',
+            falsePositiveRisk: 'medium',
             coverageScore: 0,
             reasoning: 'Failed to parse LLM response',
         };
@@ -366,9 +1009,10 @@ REMEMBER: Output "NO_THREATS_FOUND" for 90%+ of skills. Only flag genuinely susp
             // Validate and normalize fields
             const approved = parsed.approved === true;
             const validRisks = ['low', 'medium', 'high'];
-            const falsePositiveRisk = validRisks.includes(parsed.falsePositiveRisk)
-                ? parsed.falsePositiveRisk
-                : 'high';
+            const normalizedRisk = (parsed.falsePositiveRisk ?? '').toString().toLowerCase().trim();
+            const falsePositiveRisk = validRisks.includes(normalizedRisk)
+                ? normalizedRisk
+                : 'medium';
             const coverageScore = typeof parsed.coverageScore === 'number'
                 ? Math.max(0, Math.min(100, Math.round(parsed.coverageScore)))
                 : 0;