npm - ai-shield-core - Versions diffs - 0.1.0 → 0.3.0 - Mend

ai-shield-core 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/dist/audit/logger.d.ts.map +1 -1
package/dist/audit/logger.js +13 -14
package/dist/audit/types.js +1 -2
package/dist/cache/lru.js +1 -5
package/dist/canary/memory.d.ts +75 -0
package/dist/canary/memory.d.ts.map +1 -0
package/dist/canary/memory.js +194 -0
package/dist/context/wrap-context.d.ts +169 -0
package/dist/context/wrap-context.d.ts.map +1 -0
package/dist/context/wrap-context.js +278 -0
package/dist/cost/anomaly.js +1 -4
package/dist/cost/pricing.d.ts.map +1 -1
package/dist/cost/pricing.js +26 -19
package/dist/cost/tracker.d.ts +19 -1
package/dist/cost/tracker.d.ts.map +1 -1
package/dist/cost/tracker.js +27 -10
package/dist/index.d.ts +34 -3
package/dist/index.d.ts.map +1 -1
package/dist/index.js +55 -37
package/dist/judge/async-judge.d.ts +85 -0
package/dist/judge/async-judge.d.ts.map +1 -0
package/dist/judge/async-judge.js +146 -0
package/dist/policy/circuit-breaker.d.ts +70 -0
package/dist/policy/circuit-breaker.d.ts.map +1 -0
package/dist/policy/circuit-breaker.js +376 -0
package/dist/policy/engine.js +1 -5
package/dist/policy/tools.js +4 -8
package/dist/scanner/canary.js +4 -8
package/dist/scanner/chain.js +1 -5
package/dist/scanner/heuristic.d.ts +27 -0
package/dist/scanner/heuristic.d.ts.map +1 -1
package/dist/scanner/heuristic.js +118 -7
package/dist/scanner/ingestion.d.ts +147 -0
package/dist/scanner/ingestion.d.ts.map +1 -0
package/dist/scanner/ingestion.js +520 -0
package/dist/scanner/output.d.ts +73 -0
package/dist/scanner/output.d.ts.map +1 -0
package/dist/scanner/output.js +297 -0
package/dist/scanner/pii.d.ts.map +1 -1
package/dist/scanner/pii.js +24 -12
package/dist/shield.d.ts.map +1 -1
package/dist/shield.js +34 -26
package/dist/types.d.ts +156 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js +1 -2
package/package.json +4 -3
package/src/audit/logger.ts +6 -1
package/src/canary/memory.ts +259 -0
package/src/context/wrap-context.ts +475 -0
package/src/cost/pricing.ts +21 -9
package/src/cost/tracker.ts +35 -1
package/src/index.ts +113 -2
package/src/judge/async-judge.ts +254 -0
package/src/policy/circuit-breaker.ts +449 -0
package/src/scanner/heuristic.ts +125 -2
package/src/scanner/ingestion.ts +624 -0
package/src/scanner/output.ts +386 -0
package/src/scanner/pii.ts +21 -7
package/src/shield.ts +15 -2
package/src/types.ts +194 -2
package/tsconfig.json +2 -1
package/dist/audit/logger.js.map +0 -1
package/dist/audit/types.js.map +0 -1
package/dist/cache/lru.js.map +0 -1
package/dist/cost/anomaly.js.map +0 -1
package/dist/cost/pricing.js.map +0 -1
package/dist/cost/tracker.js.map +0 -1
package/dist/index.js.map +0 -1
package/dist/policy/engine.js.map +0 -1
package/dist/policy/tools.js.map +0 -1
package/dist/scanner/canary.js.map +0 -1
package/dist/scanner/chain.js.map +0 -1
package/dist/scanner/heuristic.js.map +0 -1
package/dist/scanner/pii.js.map +0 -1
package/dist/shield.js.map +0 -1
package/dist/types.js.map +0 -1

package/dist/scanner/heuristic.js CHANGED Viewed

@@ -1,6 +1,69 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.HeuristicScanner = void 0;
+// ============================================================
+// Heuristic Prompt Injection Scanner
+// Score-based: multiple matches = higher confidence
+// Unicode-normalizes input before pattern matching so that
+// homoglyph/zero-width/fullwidth evasion attempts still hit.
+// ============================================================
+// Common Cyrillic/Greek Latin-lookalikes mapped to ASCII.
+// Keep minimal — false-mappings in real content are worse than
+// false-negatives in an attack attempt.
+const HOMOGLYPH_MAP = {
+    // Cyrillic
+    "а": "a", "е": "e", "і": "i", "ј": "j", "о": "o", "р": "p", "с": "c", "ѕ": "s",
+    "у": "y", "х": "x", "ԁ": "d", "һ": "h", "ӏ": "l", "ո": "n", "А": "A", "В": "B",
+    "Е": "E", "І": "I", "К": "K", "М": "M", "Н": "H", "О": "O", "Р": "P", "С": "C",
+    "Т": "T", "Х": "X", "Ѕ": "S", "Ј": "J", "Ү": "Y", "Ԛ": "Q", "Ԝ": "W", "Ғ": "F",
+    // Greek
+    "α": "a", "ο": "o", "ρ": "p", "ε": "e", "υ": "y", "χ": "x", "ν": "v", "ι": "i",
+    "κ": "k", "Α": "A", "Β": "B", "Ε": "E", "Ζ": "Z", "Η": "H", "Ι": "I", "Κ": "K",
+    "Μ": "M", "Ν": "N", "Ο": "O", "Ρ": "P", "Τ": "T", "Υ": "Y", "Χ": "X",
+    // Armenian / Cherokee / other look-alikes occasionally used in evasion
+    "օ": "o", "ѵ": "v",
+};
+const HOMOGLYPH_RE = new RegExp(Object.keys(HOMOGLYPH_MAP).join("|"), "g");
+// Zero-width chars + BOM — used to split words like "ig<ZWSP>nore" across
+// the pattern boundary (U+200B..U+200D, U+2060, U+FEFF).
+const ZERO_WIDTH_RE = /[-‍⁠]/g;
+// Combining marks (diacritics) after NFKC can still slip through (U+0300..U+036F).
+const COMBINING_RE = /[̀-ͯ]/g;
+/**
+ * Normalize input for pattern matching. Returns the canonicalized string
+ * used only for scan decisions; the sanitized output passed to callers
+ * is still the original input.
+ *
+ * Order matters:
+ * 1. NFKD folds compatibility forms (fullwidth → ASCII, ligatures) AND
+ *    decomposes precomposed accented letters into base + combining mark.
+ * 2. Strip zero-width chars so "ig<ZWSP>nore" collapses to "ignore".
+ * 3. Strip combining marks (diacritics) left behind by NFKD.
+ * 4. Map remaining Cyrillic/Greek look-alikes to Latin.
+ */
+export function normalizeForInjectionScan(input) {
+    const nfkd = input.normalize("NFKD");
+    const noZW = nfkd.replace(ZERO_WIDTH_RE, "");
+    const noCombining = noZW.replace(COMBINING_RE, "");
+    return noCombining.replace(HOMOGLYPH_RE, (ch) => HOMOGLYPH_MAP[ch] ?? ch);
+}
+/**
+ * Collapse letter-splitting evasion: an attacker writes `i g n o r e` or
+ * `i.g.n.o.r.e` or `i-g-n-o-r-e` to break the literal token "ignore" across
+ * separators so the regex never matches. This produces an ADDITIONAL view
+ * where any run of `single-letter + separator` (≥4 letters) has its
+ * separators removed, so the spaced form collapses back to "ignore".
+ *
+ * Run as a second pass IN ADDITION to the normal normalized text — never
+ * as a replacement — because collapsing is lossy (it would also fuse the
+ * legitimate "a b c" list). Only single-letter groups separated by one
+ * space / dot / dash / underscore are collapsed; multi-letter words are
+ * left intact, which keeps benign prose untouched.
+ */
+export function collapseSpacedLetters(input) {
+    // Match ≥3 "<letter><sep>" groups closed by a final lone letter. The
+    // trailing `(?![A-Za-z])` stops the greedy match from swallowing the
+    // first letter of the next real word ("i g n o r e all" must collapse to
+    // "ignore all", not "ignorea ll"). Bounded, linear — no nested quantifier.
+    return input.replace(/(?:[A-Za-z][ \t._-]){3,}[A-Za-z](?![A-Za-z])/g, (run) => run.replace(/[ \t._-]/g, ""));
+}
 const PATTERNS = [
     // --- Instruction Override (weight: 0.25 each) ---
     {
@@ -304,7 +367,7 @@ const THRESHOLDS = {
     medium: 0.3,
     high: 0.15,
 };
-class HeuristicScanner {
+export class HeuristicScanner {
     name = "heuristic";
     patterns;
     threshold;
@@ -317,8 +380,25 @@ class HeuristicScanner {
         const start = performance.now();
         const violations = [];
         let totalScore = 0;
+        // Normalize once — pattern matching runs against the canonical form so
+        // homoglyph/zero-width evasion doesn't bypass the rules. The caller
+        // still sees the original input in `sanitized`.
+        const normalized = normalizeForInjectionScan(input);
+        // Second view that un-splits letter-splitting evasion ("i g n o r e").
+        // Only computed when it actually differs (cheap guard), and only the
+        // high-value override/role/extraction/tool categories are re-tested
+        // against it — collapsing is lossy and the low-value framing rules
+        // would false-positive on collapsed prose.
+        const collapsed = collapseSpacedLetters(normalized);
+        const collapsedDiffers = collapsed !== normalized;
+        const SPLIT_SENSITIVE = new Set([
+            "instruction_override",
+            "role_manipulation",
+            "system_prompt_extraction",
+            "tool_abuse",
+        ]);
         for (const rule of this.patterns) {
-            if (rule.pattern.test(input)) {
+            if (rule.pattern.test(normalized)) {
                 totalScore += rule.weight;
                 violations.push({
                     type: "prompt_injection",
@@ -329,8 +409,24 @@ class HeuristicScanner {
                     detail: `Rule ${rule.id} (${rule.category})`,
                 });
             }
+            else if (collapsedDiffers &&
+                SPLIT_SENSITIVE.has(rule.category) &&
+                rule.pattern.test(collapsed)) {
+                // Matched only after un-splitting → letter-splitting evasion.
+                totalScore += rule.weight;
+                violations.push({
+                    type: "prompt_injection",
+                    scanner: this.name,
+                    score: rule.weight,
+                    threshold: this.threshold,
+                    message: rule.description,
+                    detail: `Rule ${rule.id} (${rule.category}, letter-splitting evasion)`,
+                });
+            }
         }
-        // Structural signals (cumulative)
+        // Structural signals (cumulative) — intentionally run on the original
+        // input so real structural attacks (many newlines, long paddings) can
+        // still trip even when the textual patterns were evaded.
         const structuralScore = this.checkStructuralSignals(input);
         totalScore += structuralScore;
         // Cap at 1.0
@@ -360,6 +456,22 @@ class HeuristicScanner {
         // Very long input (potential padding attack)
         if (input.length > 5000)
             score += 0.05;
+        // Adversarial suffix (GCG-style): a long whitespace-free token packed
+        // with mixed punctuation/symbols, typically appended after the readable
+        // request. Conservative — needs ≥25 chars and ≥6 distinct punctuation
+        // marks so ordinary URLs, hashes and code tokens don't trip it.
+        const ADV_TOKEN_RE = /\S{25,}/g;
+        let advMatch;
+        let advCount = 0;
+        while ((advMatch = ADV_TOKEN_RE.exec(input)) !== null && advCount < 32) {
+            advCount += 1;
+            const tok = advMatch[0];
+            const distinctPunct = new Set((tok.match(/[!-/:-@[-`{-~]/g) ?? [])).size;
+            if (distinctPunct >= 6) {
+                score += 0.05;
+                break;
+            }
+        }
         return score;
     }
     /** Get all registered pattern IDs for testing */
@@ -371,5 +483,4 @@ class HeuristicScanner {
         return this.patterns.length;
     }
 }
-exports.HeuristicScanner = HeuristicScanner;
 //# sourceMappingURL=heuristic.js.map

package/dist/scanner/ingestion.d.ts ADDED Viewed

@@ -0,0 +1,147 @@
+import type { Scanner, ScannerResult, ScanContext, Violation, IngestionSource, TrustTier } from "../types.js";
+/**
+ * Default trust-tier inferred from source.
+ * `user` is still untrusted in this library's threat model — a user can
+ * inject too — but `system` is reserved for content the developer
+ * controls and labels via `wrapContext()`. Every ingestion source
+ * (including `user`) therefore returns `"untrusted"` by default; the
+ * parameter is kept on the signature so future per-source overrides
+ * (e.g. an installer marking a specific source as trusted) don't
+ * require a breaking API change.
+ */
+export declare function trustTierForSource(_source: IngestionSource): TrustTier;
+/**
+ * Result of `scanIngested()`.
+ *
+ * Shape parallels `ScanResult` from `chain.ts` so callers can treat
+ * both interchangeably.
+ */
+export interface IngestionScanResult {
+    safe: boolean;
+    decision: "allow" | "warn" | "block";
+    /**
+     * Sanitized output. When `decision === "block"` this is the empty
+     * string — the original content was deemed unsafe and the field name
+     * "sanitized" would otherwise mislead callers into using poisoned
+     * content. Use the source `content` argument if you need the raw input
+     * for logging or quarantine.
+     */
+    sanitized: string;
+    violations: Violation[];
+    source: IngestionSource;
+    meta: {
+        scanDurationMs: number;
+        scannersRun: string[];
+        /** Number of extra source-specific patterns that fired. */
+        sourceSpecificHits: number;
+        /**
+         * Always `false` from `scanIngested()` — ingestion scans don't go
+         * through the LRU cache. Field is present so callers can write a
+         * single result-handler for both `ScanResult` and `IngestionScanResult`.
+         */
+        cached: boolean;
+    };
+}
+export interface IngestionScannerConfig {
+    /** Override the per-source threshold lookup. */
+    threshold?: number;
+    /**
+     * Additional custom patterns to merge with the source profile's
+     * `extraPatterns`. Useful for org-specific markers.
+     */
+    customPatterns?: RegExp[];
+    /**
+     * Force the underlying heuristic scanner to a different strictness
+     * (default "high" because ingestion is always tighter than user input).
+     */
+    strictness?: "low" | "medium" | "high";
+}
+/**
+ * Scanner implementation. Composable into a `ScannerChain` when the
+ * caller wants ingestion to participate in the main scan flow rather
+ * than be invoked via the standalone `scanIngested()` helper.
+ *
+ * The scanner reads the `source` from `ScanContext` (or treats input
+ * as `"user"` when missing) and applies the source-specific profile.
+ */
+export declare class IngestionScanner implements Scanner {
+    readonly name = "ingestion";
+    private readonly threshold;
+    private readonly customPatterns;
+    private readonly heuristic;
+    constructor(config?: IngestionScannerConfig);
+    scan(input: string, context: ScanContext): Promise<ScannerResult>;
+}
+/**
+ * One-shot helper. Scans `content` against the source-specific profile
+ * and returns a result without needing an `AIShield` instance.
+ *
+ * Use when you want a quick gate at the ingestion boundary, e.g.
+ * before storing a chunk into a vector DB or before passing a tool
+ * description into the model's context.
+ *
+ * @example
+ * ```ts
+ * import { scanIngested } from "ai-shield-core";
+ *
+ * const ragChunk = "...retrieved document text...";
+ * const result = await scanIngested(ragChunk, "rag");
+ * if (!result.safe) {
+ *   // reject the chunk OR strip it before assembly
+ *   logger.warn("IPI candidate", result.violations);
+ * }
+ * ```
+ */
+export declare function scanIngested(content: string, source: IngestionSource, config?: IngestionScannerConfig): Promise<IngestionScanResult>;
+/**
+ * Scan the runtime *result* of a tool call before it re-enters the model
+ * context. The dominant indirect-injection channel in agentic loops: a
+ * search tool surfaces a poisoned page, an MCP server returns attacker-
+ * controlled data, a compromised upstream API embeds instructions in its
+ * response. PoisonedRAG (USENIX Security 2025) showed 5 planted documents
+ * reach a 90% attack-success rate in million-document knowledge bases —
+ * the payload arrives here, not in the user prompt.
+ *
+ * Thin wrapper over `scanIngested(content, "tool-output")` that also
+ * stamps the originating `toolName` into every violation detail, so an
+ * audit log can answer "which tool returned the poisoned content?".
+ *
+ * Pair with `CircuitBreakerRegistry` when you also want to rate-limit or
+ * trip the tool after repeated poisoned results:
+ *
+ * @example
+ * ```ts
+ * import { scanToolOutput } from "ai-shield-core";
+ *
+ * const result = await searchTool.call(query);          // untrusted
+ * const scan = await scanToolOutput("web_search", result);
+ * if (!scan.safe) {
+ *   // drop the result OR strip it before the next model turn
+ *   audit.warn("poisoned tool output", { tool: "web_search", v: scan.violations });
+ *   return; // do not feed `result` back into the model
+ * }
+ * model.continue(result);
+ * ```
+ */
+export declare function scanToolOutput(toolName: string, content: string, config?: IngestionScannerConfig): Promise<IngestionScanResult>;
+/**
+ * Try to decode common obfuscation layers an attacker uses to smuggle
+ * an injection past pattern matchers. Returns the decoded payload when
+ * it looks like a successful decode, else `null`.
+ *
+ * The function deliberately runs at most ONE decode layer to avoid
+ * decoding amplification (a chain of `base64(base64(...))` would force
+ * us into deep recursion); a single-layer decode is enough to catch
+ * the vast majority of in-the-wild bypasses while keeping execution
+ * cost bounded.
+ *
+ * Heuristics:
+ *  - Base64: contiguous run of 40+ Base64 chars, decodes to mostly
+ *    printable ASCII or the `\u00..` C0 range stays empty.
+ *  - Hex: 80+ hex chars in a row.
+ *  - Percent-encoding: more than 5 `%XX` sequences.
+ *
+ * Returns the longest decoded payload when multiple candidates fire.
+ */
+export declare function tryDecodeObfuscation(input: string): string | null;
+//# sourceMappingURL=ingestion.d.ts.map

package/dist/scanner/ingestion.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"ingestion.d.ts","sourceRoot":"","sources":["../../src/scanner/ingestion.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,aAAa,EACb,WAAW,EACX,SAAS,EACT,eAAe,EACf,SAAS,EACV,MAAM,aAAa,CAAC;AAoIrB;;;;;;;;;GASG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,eAAe,GAAG,SAAS,CAEtE;AAoFD;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,GAAG,MAAM,GAAG,OAAO,CAAC;IACrC;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,MAAM,EAAE,eAAe,CAAC;IACxB,IAAI,EAAE;QACJ,cAAc,EAAE,MAAM,CAAC;QACvB,WAAW,EAAE,MAAM,EAAE,CAAC;QACtB,2DAA2D;QAC3D,kBAAkB,EAAE,MAAM,CAAC;QAC3B;;;;WAIG;QACH,MAAM,EAAE,OAAO,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,sBAAsB;IACrC,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B;;;OAGG;IACH,UAAU,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;CACxC;AAED;;;;;;;GAOG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,QAAQ,CAAC,IAAI,eAAe;IAC5B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,MAAM,GAAE,sBAA2B;IAQzC,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC;CAmHxE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,YAAY,CAChC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,eAAe,EACvB,MAAM,GAAE,sBAA2B,GAClC,OAAO,CAAC,mBAAmB,CAAC,CA0B9B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,MAAM,GAAE,sBAA2B,GAClC,OAAO,CAAC,mBAAmB,CAAC,CAa9B;AAQD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA2EjE"}