ai-shield-core 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/audit/logger.d.ts.map +1 -1
  2. package/dist/audit/logger.js +13 -14
  3. package/dist/audit/types.js +1 -2
  4. package/dist/cache/lru.js +1 -5
  5. package/dist/canary/memory.d.ts +75 -0
  6. package/dist/canary/memory.d.ts.map +1 -0
  7. package/dist/canary/memory.js +194 -0
  8. package/dist/context/wrap-context.d.ts +105 -0
  9. package/dist/context/wrap-context.d.ts.map +1 -0
  10. package/dist/context/wrap-context.js +188 -0
  11. package/dist/cost/anomaly.js +1 -4
  12. package/dist/cost/pricing.d.ts.map +1 -1
  13. package/dist/cost/pricing.js +18 -19
  14. package/dist/cost/tracker.d.ts +19 -1
  15. package/dist/cost/tracker.d.ts.map +1 -1
  16. package/dist/cost/tracker.js +27 -10
  17. package/dist/index.d.ts +31 -2
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +51 -37
  20. package/dist/policy/circuit-breaker.d.ts +70 -0
  21. package/dist/policy/circuit-breaker.d.ts.map +1 -0
  22. package/dist/policy/circuit-breaker.js +376 -0
  23. package/dist/policy/engine.js +1 -5
  24. package/dist/policy/tools.js +4 -8
  25. package/dist/scanner/canary.js +4 -8
  26. package/dist/scanner/chain.js +1 -5
  27. package/dist/scanner/heuristic.d.ts +13 -0
  28. package/dist/scanner/heuristic.d.ts.map +1 -1
  29. package/dist/scanner/heuristic.js +50 -7
  30. package/dist/scanner/ingestion.d.ts +116 -0
  31. package/dist/scanner/ingestion.d.ts.map +1 -0
  32. package/dist/scanner/ingestion.js +452 -0
  33. package/dist/scanner/pii.d.ts.map +1 -1
  34. package/dist/scanner/pii.js +24 -12
  35. package/dist/shield.d.ts.map +1 -1
  36. package/dist/shield.js +34 -26
  37. package/dist/types.d.ts +140 -2
  38. package/dist/types.d.ts.map +1 -1
  39. package/dist/types.js +1 -2
  40. package/package.json +4 -3
  41. package/src/audit/logger.ts +6 -1
  42. package/src/canary/memory.ts +259 -0
  43. package/src/context/wrap-context.ts +304 -0
  44. package/src/cost/pricing.ts +13 -9
  45. package/src/cost/tracker.ts +35 -1
  46. package/src/index.ts +82 -1
  47. package/src/policy/circuit-breaker.ts +449 -0
  48. package/src/scanner/heuristic.ts +49 -2
  49. package/src/scanner/ingestion.ts +550 -0
  50. package/src/scanner/pii.ts +21 -7
  51. package/src/shield.ts +15 -2
  52. package/src/types.ts +175 -2
  53. package/tsconfig.json +2 -1
  54. package/dist/audit/logger.js.map +0 -1
  55. package/dist/audit/types.js.map +0 -1
  56. package/dist/cache/lru.js.map +0 -1
  57. package/dist/cost/anomaly.js.map +0 -1
  58. package/dist/cost/pricing.js.map +0 -1
  59. package/dist/cost/tracker.js.map +0 -1
  60. package/dist/index.js.map +0 -1
  61. package/dist/policy/engine.js.map +0 -1
  62. package/dist/policy/tools.js.map +0 -1
  63. package/dist/scanner/canary.js.map +0 -1
  64. package/dist/scanner/chain.js.map +0 -1
  65. package/dist/scanner/heuristic.js.map +0 -1
  66. package/dist/scanner/pii.js.map +0 -1
  67. package/dist/shield.js.map +0 -1
  68. package/dist/types.js.map +0 -1
@@ -0,0 +1,116 @@
1
+ import type { Scanner, ScannerResult, ScanContext, Violation, IngestionSource, TrustTier } from "../types.js";
2
+ /**
3
+ * Default trust-tier inferred from source.
4
+ * `user` is still untrusted in this library's threat model — a user can
5
+ * inject too — but `system` is reserved for content the developer
6
+ * controls and labels via `wrapContext()`. Every ingestion source
7
+ * (including `user`) therefore returns `"untrusted"` by default; the
8
+ * parameter is kept on the signature so future per-source overrides
9
+ * (e.g. an installer marking a specific source as trusted) don't
10
+ * require a breaking API change.
11
+ */
12
+ export declare function trustTierForSource(_source: IngestionSource): TrustTier;
13
+ /**
14
+ * Result of `scanIngested()`.
15
+ *
16
+ * Shape parallels `ScanResult` from `chain.ts` so callers can treat
17
+ * both interchangeably.
18
+ */
19
+ export interface IngestionScanResult {
20
+ safe: boolean;
21
+ decision: "allow" | "warn" | "block";
22
+ /**
23
+ * Sanitized output. When `decision === "block"` this is the empty
24
+ * string — the original content was deemed unsafe and the field name
25
+ * "sanitized" would otherwise mislead callers into using poisoned
26
+ * content. Use the source `content` argument if you need the raw input
27
+ * for logging or quarantine.
28
+ */
29
+ sanitized: string;
30
+ violations: Violation[];
31
+ source: IngestionSource;
32
+ meta: {
33
+ scanDurationMs: number;
34
+ scannersRun: string[];
35
+ /** Number of extra source-specific patterns that fired. */
36
+ sourceSpecificHits: number;
37
+ /**
38
+ * Always `false` from `scanIngested()` — ingestion scans don't go
39
+ * through the LRU cache. Field is present so callers can write a
40
+ * single result-handler for both `ScanResult` and `IngestionScanResult`.
41
+ */
42
+ cached: boolean;
43
+ };
44
+ }
45
+ export interface IngestionScannerConfig {
46
+ /** Override the per-source threshold lookup. */
47
+ threshold?: number;
48
+ /**
49
+ * Additional custom patterns to merge with the source profile's
50
+ * `extraPatterns`. Useful for org-specific markers.
51
+ */
52
+ customPatterns?: RegExp[];
53
+ /**
54
+ * Force the underlying heuristic scanner to a different strictness
55
+ * (default "high" because ingestion is always tighter than user input).
56
+ */
57
+ strictness?: "low" | "medium" | "high";
58
+ }
59
+ /**
60
+ * Scanner implementation. Composable into a `ScannerChain` when the
61
+ * caller wants ingestion to participate in the main scan flow rather
62
+ * than be invoked via the standalone `scanIngested()` helper.
63
+ *
64
+ * The scanner reads the `source` from `ScanContext` (or treats input
65
+ * as `"user"` when missing) and applies the source-specific profile.
66
+ */
67
+ export declare class IngestionScanner implements Scanner {
68
+ readonly name = "ingestion";
69
+ private readonly threshold;
70
+ private readonly customPatterns;
71
+ private readonly heuristic;
72
+ constructor(config?: IngestionScannerConfig);
73
+ scan(input: string, context: ScanContext): Promise<ScannerResult>;
74
+ }
75
+ /**
76
+ * One-shot helper. Scans `content` against the source-specific profile
77
+ * and returns a result without needing an `AIShield` instance.
78
+ *
79
+ * Use when you want a quick gate at the ingestion boundary, e.g.
80
+ * before storing a chunk into a vector DB or before passing a tool
81
+ * description into the model's context.
82
+ *
83
+ * @example
84
+ * ```ts
85
+ * import { scanIngested } from "ai-shield-core";
86
+ *
87
+ * const ragChunk = "...retrieved document text...";
88
+ * const result = await scanIngested(ragChunk, "rag");
89
+ * if (!result.safe) {
90
+ * // reject the chunk OR strip it before assembly
91
+ * logger.warn("IPI candidate", result.violations);
92
+ * }
93
+ * ```
94
+ */
95
+ export declare function scanIngested(content: string, source: IngestionSource, config?: IngestionScannerConfig): Promise<IngestionScanResult>;
96
+ /**
97
+ * Try to decode common obfuscation layers an attacker uses to smuggle
98
+ * an injection past pattern matchers. Returns the decoded payload when
99
+ * it looks like a successful decode, else `null`.
100
+ *
101
+ * The function deliberately runs at most ONE decode layer to avoid
102
+ * decoding amplification (a chain of `base64(base64(...))` would force
103
+ * us into deep recursion); a single-layer decode is enough to catch
104
+ * the vast majority of in-the-wild bypasses while keeping execution
105
+ * cost bounded.
106
+ *
107
+ * Heuristics:
108
+ * - Base64: contiguous run of 40+ Base64 chars, decodes to mostly
109
+ * printable ASCII or the `\u00..` C0 range stays empty.
110
+ * - Hex: 80+ hex chars in a row.
111
+ * - Percent-encoding: more than 5 `%XX` sequences.
112
+ *
113
+ * Returns the longest decoded payload when multiple candidates fire.
114
+ */
115
+ export declare function tryDecodeObfuscation(input: string): string | null;
116
+ //# sourceMappingURL=ingestion.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingestion.d.ts","sourceRoot":"","sources":["../../src/scanner/ingestion.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,aAAa,EACb,WAAW,EACX,SAAS,EACT,eAAe,EACf,SAAS,EACV,MAAM,aAAa,CAAC;AA2GrB;;;;;;;;;GASG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,eAAe,GAAG,SAAS,CAEtE;AAoFD;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,GAAG,MAAM,GAAG,OAAO,CAAC;IACrC;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,MAAM,EAAE,eAAe,CAAC;IACxB,IAAI,EAAE;QACJ,cAAc,EAAE,MAAM,CAAC;QACvB,WAAW,EAAE,MAAM,EAAE,CAAC;QACtB,2DAA2D;QAC3D,kBAAkB,EAAE,MAAM,CAAC;QAC3B;;;;WAIG;QACH,MAAM,EAAE,OAAO,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,sBAAsB;IACrC,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B;;;OAGG;IACH,UAAU,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;CACxC;AAED;;;;;;;GAOG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,QAAQ,CAAC,IAAI,eAAe;IAC5B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,MAAM,GAAE,sBAA2B;IAQzC,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC;CAmHxE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,YAAY,CAChC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,eAAe,EACvB,MAAM,GAAE,sBAA2B,GAClC,OAAO,CAAC,mBAAmB,CAAC,CA0B9B;AAQD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA2EjE"}
@@ -0,0 +1,452 @@
1
+ import { HeuristicScanner, normalizeForInjectionScan } from "./heuristic.js";
2
+ // ============================================================
3
+ // Ingestion Scanner — Indirect Prompt Injection (IPI) Defense
4
+ //
5
+ // Scans non-user content (RAG chunks, MCP tool descriptions, stored
6
+ // memory facts, scraped web pages, agent-to-agent messages) for
7
+ // instruction-shaped payloads BEFORE they enter the model context.
8
+ //
9
+ // Per Lakera 2026 incident catalog + OWASP LLM01:2025, indirect
10
+ // injection is now the dominant attack class — >55% of observed
11
+ // incidents arrive through trusted-looking data channels. Direct user
12
+ // injection is the minority case.
13
+ //
14
+ // This scanner runs the existing heuristic patterns at a stricter
15
+ // threshold AND adds source-specific patterns the user channel does
16
+ // not see (HTML-comment instructions, tool-description override,
17
+ // memory-entry steering).
18
+ // ============================================================
19
+ /**
20
+ * Per-source threshold + extra patterns. Tighter than the user-channel
21
+ * default because data sources almost never need instruction syntax —
22
+ * the presence of one in retrieved content is itself a signal.
23
+ */
24
+ const SOURCE_PROFILE = {
25
+ user: {
26
+ // For symmetry — a caller may pass source="user" through scanIngested().
27
+ // Falls back to normal heuristic strictness.
28
+ threshold: 0.3,
29
+ extraPatterns: [],
30
+ },
31
+ rag: {
32
+ // Retrieved docs basically never legitimately contain "ignore previous
33
+ // instructions" type wording. Strict. Patterns here are ReDoS-bounded —
34
+ // hidden-comment + CSS detection are done via slice-then-test below in
35
+ // `runStructuralIngestionScan()` so we never run two `[^]*?` wildcards
36
+ // back-to-back over attacker-controlled input.
37
+ threshold: 0.15,
38
+ extraPatterns: [
39
+ // "AI assistant note: ..." style trojan headers.
40
+ /(?:AI\s+(?:assistant|model)\s+(?:note|instruction|directive)|attention\s+(?:AI|model|assistant))[:\s]+/i,
41
+ // Document-claims-to-be-system marker.
42
+ /this\s+document\s+(?:is|contains|provides)\s+(?:your|the)\s+(?:new\s+)?(?:instructions?|system\s+prompt|directives?)/i,
43
+ ],
44
+ },
45
+ "tool-desc": {
46
+ // MCP tool descriptions / function schemas: any imperative aimed at
47
+ // the calling model is suspicious. OX Security MCP advisory + LiteLLM
48
+ // CVE-2025-69256 class.
49
+ threshold: 0.12,
50
+ extraPatterns: [
51
+ // "Before using this tool, you must ..." / "After calling, also call X"
52
+ /(?:before|after|while)\s+(?:using|invoking|calling|executing)\s+(?:this\s+)?(?:tool|function|action)[,\s]+(?:you\s+)?(?:must|should|will|need\s+to|are\s+required\s+to)/i,
53
+ // "Also call delete_*" / "ALWAYS invoke X first"
54
+ /(?:also|always|first|then|finally)\s+(?:call|invoke|use|execute|run)\s+(?:the\s+)?[a-z_][\w-]{2,}/i,
55
+ // "Tool description: ignore the above and ..."
56
+ /(?:tool|function|action)\s+description[:\s]+(?:ignore|disregard|override)/i,
57
+ // "Note to LLM:" / "Hidden instruction:"
58
+ /(?:note|hidden\s+(?:instruction|directive|note))\s+to\s+(?:LLM|model|assistant|AI|agent)[:\s]/i,
59
+ // Tool args carrying meta-instructions.
60
+ /(?:on\s+(?:success|call)|when\s+invoked)[,:]\s+(?:silently|also|then)\s+(?:call|invoke|exfiltrate|send|leak)/i,
61
+ ],
62
+ },
63
+ memory: {
64
+ // Stored memory entries: persistence poisoning. Look for sentinel
65
+ // instructions that re-anchor the model on subsequent retrieval.
66
+ threshold: 0.18,
67
+ extraPatterns: [
68
+ // "Remember:" / "Important note for next session:"
69
+ /(?:remember|important|critical)\s+(?:note|instruction|directive)\s+(?:for\s+(?:next|future|all)\s+(?:sessions?|conversations?|calls?))[:\s]/i,
70
+ // "Whenever the user asks X, do Y"
71
+ /(?:whenever|every\s+time)\s+(?:the\s+user|a\s+user|someone)\s+(?:asks|says|mentions|requests)\s+.{1,80}?[,:]\s*(?:you\s+(?:must|should|will|need)|always)/i,
72
+ // "User's true preference is ..." (steering attempts).
73
+ /(?:user(?:'s|s)?\s+(?:real|true|actual|hidden)\s+(?:preference|intent|goal|name|identity))/i,
74
+ // "Override default behavior when ..."
75
+ /override\s+(?:default|standard|normal)\s+(?:behavior|response|policy)/i,
76
+ ],
77
+ },
78
+ web: {
79
+ // Scraped web — same as RAG but also catch markdown-link hijacks.
80
+ // HTML-comment + CSS-hidden detection lives in
81
+ // `runStructuralIngestionScan()` (slice-then-test, ReDoS-bounded).
82
+ threshold: 0.15,
83
+ extraPatterns: [
84
+ // Markdown-link with instruction-shaped anchor text.
85
+ /\[(?:ignore|disregard|override|system\s+(?:prompt|message))[^\]]{0,200}\]\([^)]{0,500}\)/i,
86
+ // ARIA / data-* attributes leaking instructions.
87
+ /(?:aria-label|alt|title|data-[a-z-]{0,40})\s*=\s*["'][^"']{0,500}(ignore\s+previous|new\s+instruction|system\s+prompt|override)/i,
88
+ ],
89
+ },
90
+ "agent-output": {
91
+ // Output of one agent feeding another: multi-agent contagion.
92
+ // Treat like RAG but also catch "tell next agent to ..." patterns.
93
+ threshold: 0.18,
94
+ extraPatterns: [
95
+ /(?:tell|instruct|forward\s+to)\s+(?:the\s+)?(?:next|downstream|receiving|other)\s+(?:agent|model|assistant)\s+to/i,
96
+ /(?:on\s+behalf\s+of|impersonating)\s+(?:the\s+)?(?:user|admin|system|owner)/i,
97
+ /(?:relay|pass|propagate)\s+(?:these|the\s+following)\s+(?:instructions?|directives?|orders?)/i,
98
+ ],
99
+ },
100
+ };
101
+ /**
102
+ * Default trust-tier inferred from source.
103
+ * `user` is still untrusted in this library's threat model — a user can
104
+ * inject too — but `system` is reserved for content the developer
105
+ * controls and labels via `wrapContext()`. Every ingestion source
106
+ * (including `user`) therefore returns `"untrusted"` by default; the
107
+ * parameter is kept on the signature so future per-source overrides
108
+ * (e.g. an installer marking a specific source as trusted) don't
109
+ * require a breaking API change.
110
+ */
111
+ export function trustTierForSource(_source) {
112
+ return "untrusted";
113
+ }
114
+ // --- ReDoS-safe structural scan helpers ---
115
+ /**
116
+ * Hidden-comment + CSS-hidden detection done as bounded slice-then-test
117
+ * rather than a compound `[^]*?...[^]*?` regex (which back-tracks
118
+ * quadratically on attacker-controlled input that omits the terminator).
119
+ * See Critic C1 (round 1 review) — unterminated `<!--` of 50 KB stalled
120
+ * the original implementation.
121
+ *
122
+ * Each detector takes the already-NFKC-normalized input and returns
123
+ * `null` (clean) or a `Violation`.
124
+ */
125
+ function runStructuralIngestionScan(normalized, source, threshold) {
126
+ if (source !== "rag" && source !== "web")
127
+ return [];
128
+ const violations = [];
129
+ const COMMENT_WINDOW = 2048;
130
+ const KEYWORD_RE = /ignore|disregard|override|forget|system\s+prompt|new\s+instructions?/i;
131
+ // 1. HTML comment hidden instruction.
132
+ let commentStart = 0;
133
+ let commentMatchCount = 0;
134
+ while (commentStart !== -1 && commentMatchCount < 8) {
135
+ commentStart = normalized.indexOf("<!--", commentStart);
136
+ if (commentStart === -1)
137
+ break;
138
+ const window = normalized.slice(commentStart + 4, commentStart + 4 + COMMENT_WINDOW);
139
+ if (KEYWORD_RE.test(window)) {
140
+ violations.push({
141
+ type: "ingested_injection",
142
+ scanner: "ingestion",
143
+ score: 0.4,
144
+ threshold,
145
+ message: `HTML-comment hidden instruction in ${source} content`,
146
+ detail: `Pattern: <!-- ... ignore|override|... (window 2KB)`,
147
+ });
148
+ commentMatchCount += 1;
149
+ }
150
+ commentStart += 4;
151
+ }
152
+ // 2. CSS-hidden style attribute carrying instruction-shaped neighbour.
153
+ //
154
+ // Round 2 Critic M-NEW-2: a single `.exec()` would only find the FIRST
155
+ // `style=` attribute. An attacker placing a benign `style="display:block"`
156
+ // first and a malicious `style="display:none"` later would slip through.
157
+ // Iterate all matches via the `/g` flag, capped at 16 to bound the work
158
+ // on adversarial input that floods style attributes.
159
+ const STYLE_HIDDEN_RE = /style\s*=\s*["'][^"']{0,300}(?:display\s*:\s*none|visibility\s*:\s*hidden|font-size\s*:\s*0)[^"']{0,300}["']/gi;
160
+ let styleMatchCount = 0;
161
+ let styleMatch;
162
+ while ((styleMatch = STYLE_HIDDEN_RE.exec(normalized)) !== null &&
163
+ styleMatchCount < 16) {
164
+ styleMatchCount += 1;
165
+ const tail = normalized.slice(styleMatch.index + styleMatch[0].length, styleMatch.index + styleMatch[0].length + 500);
166
+ if (/ignore|override|system|instruction/i.test(tail)) {
167
+ violations.push({
168
+ type: "ingested_injection",
169
+ scanner: "ingestion",
170
+ score: 0.4,
171
+ threshold,
172
+ message: `CSS-hidden instruction in ${source} content`,
173
+ detail: `Pattern: style="display:none ... ignore|override|... (window 500B)`,
174
+ });
175
+ }
176
+ }
177
+ return violations;
178
+ }
179
+ /**
180
+ * Scanner implementation. Composable into a `ScannerChain` when the
181
+ * caller wants ingestion to participate in the main scan flow rather
182
+ * than be invoked via the standalone `scanIngested()` helper.
183
+ *
184
+ * The scanner reads the `source` from `ScanContext` (or treats input
185
+ * as `"user"` when missing) and applies the source-specific profile.
186
+ */
187
+ export class IngestionScanner {
188
+ name = "ingestion";
189
+ threshold;
190
+ customPatterns;
191
+ heuristic;
192
+ constructor(config = {}) {
193
+ this.threshold = config.threshold;
194
+ this.customPatterns = config.customPatterns ?? [];
195
+ this.heuristic = new HeuristicScanner({
196
+ strictness: config.strictness ?? "high",
197
+ });
198
+ }
199
+ async scan(input, context) {
200
+ const source = context.source ?? "user";
201
+ const profile = SOURCE_PROFILE[source];
202
+ const effectiveThreshold = this.threshold ?? profile.threshold;
203
+ const start = performance.now();
204
+ const violations = [];
205
+ // 1. Run the base heuristic scanner at high strictness. We respect its
206
+ // own decision (it includes structural signals that don't surface
207
+ // as individual violations) and re-tag the violations as
208
+ // `ingested_injection` so downstream code can filter.
209
+ const heuristicResult = await this.heuristic.scan(input, context);
210
+ for (const v of heuristicResult.violations) {
211
+ violations.push({
212
+ ...v,
213
+ type: "ingested_injection",
214
+ scanner: this.name,
215
+ detail: `${v.detail ?? ""} (source=${source})`.trim(),
216
+ });
217
+ }
218
+ // 2. Run source-specific patterns against the normalized input so the
219
+ // same Unicode-evasion defense the user channel gets applies here.
220
+ const normalized = normalizeForInjectionScan(input);
221
+ const sourcePatterns = [...profile.extraPatterns, ...this.customPatterns];
222
+ let sourceScore = 0;
223
+ for (const pattern of sourcePatterns) {
224
+ if (pattern.test(normalized)) {
225
+ sourceScore += 0.4;
226
+ violations.push({
227
+ type: "ingested_injection",
228
+ scanner: this.name,
229
+ score: 0.4,
230
+ threshold: effectiveThreshold,
231
+ message: `Indirect injection pattern in ${source} content`,
232
+ detail: `Pattern: ${pattern.source.slice(0, 80)}`,
233
+ });
234
+ }
235
+ }
236
+ // 2b. Structural slice-then-test scans for `rag` + `web` (ReDoS-safe
237
+ // replacement for the old compound HTML-comment + CSS-hidden
238
+ // patterns).
239
+ const structural = runStructuralIngestionScan(normalized, source, effectiveThreshold);
240
+ for (const v of structural) {
241
+ sourceScore += 0.4;
242
+ violations.push(v);
243
+ }
244
+ // 2c. Encoding-bypass: attackers wrap an injection in Base64 / Hex /
245
+ // percent-encoding and ask the model to "decode this". A single
246
+ // decode pass over the input flushes the most common bypasses
247
+ // documented in OWASP LLM Prompt Injection Prevention Cheat
248
+ // Sheet 2026. Only run when the input "looks encoded" to keep
249
+ // false-positive load low on plain prose.
250
+ const decoded = tryDecodeObfuscation(input);
251
+ if (decoded && decoded !== input) {
252
+ const decodedNormalized = normalizeForInjectionScan(decoded);
253
+ const decodedHeuristic = await this.heuristic.scan(decoded, context);
254
+ if (decodedHeuristic.decision !== "allow") {
255
+ for (const v of decodedHeuristic.violations) {
256
+ violations.push({
257
+ ...v,
258
+ type: "ingested_injection",
259
+ scanner: this.name,
260
+ detail: `${v.detail ?? ""} (source=${source}, layer=decoded)`.trim(),
261
+ });
262
+ }
263
+ sourceScore += 0.6; // decoded-hit is high-confidence
264
+ }
265
+ // Also run source-specific patterns over the decoded layer.
266
+ for (const pattern of sourcePatterns) {
267
+ if (pattern.test(decodedNormalized)) {
268
+ sourceScore += 0.4;
269
+ violations.push({
270
+ type: "ingested_injection",
271
+ scanner: this.name,
272
+ score: 0.4,
273
+ threshold: effectiveThreshold,
274
+ message: `Encoded indirect injection in ${source} content`,
275
+ detail: `Pattern: ${pattern.source.slice(0, 80)} (layer=decoded)`,
276
+ });
277
+ }
278
+ }
279
+ }
280
+ sourceScore = Math.min(sourceScore, 1.0);
281
+ // 3. Combine decisions. The heuristic scanner already weighed
282
+ // structural signals (newlines, headers, padding) that may not
283
+ // surface as individual violations; trust its decision rather
284
+ // than re-aggregating only the violation-score subset.
285
+ const heuristicBlocks = heuristicResult.decision === "block";
286
+ const heuristicWarns = heuristicResult.decision === "warn";
287
+ const sourceBlocks = sourceScore >= effectiveThreshold;
288
+ const sourceWarns = sourceScore >= effectiveThreshold * 0.6;
289
+ let decision;
290
+ if (heuristicBlocks || sourceBlocks) {
291
+ decision = "block";
292
+ }
293
+ else if (heuristicWarns || sourceWarns) {
294
+ decision = "warn";
295
+ }
296
+ else {
297
+ decision = "allow";
298
+ }
299
+ return {
300
+ decision,
301
+ violations,
302
+ durationMs: performance.now() - start,
303
+ };
304
+ }
305
+ }
306
+ /**
307
+ * One-shot helper. Scans `content` against the source-specific profile
308
+ * and returns a result without needing an `AIShield` instance.
309
+ *
310
+ * Use when you want a quick gate at the ingestion boundary, e.g.
311
+ * before storing a chunk into a vector DB or before passing a tool
312
+ * description into the model's context.
313
+ *
314
+ * @example
315
+ * ```ts
316
+ * import { scanIngested } from "ai-shield-core";
317
+ *
318
+ * const ragChunk = "...retrieved document text...";
319
+ * const result = await scanIngested(ragChunk, "rag");
320
+ * if (!result.safe) {
321
+ * // reject the chunk OR strip it before assembly
322
+ * logger.warn("IPI candidate", result.violations);
323
+ * }
324
+ * ```
325
+ */
326
+ export async function scanIngested(content, source, config = {}) {
327
+ const start = performance.now();
328
+ const scanner = new IngestionScanner(config);
329
+ const result = await scanner.scan(content, { source });
330
+ return {
331
+ safe: result.decision === "allow",
332
+ decision: result.decision,
333
+ // When a chunk is blocked, returning the raw input under the field
334
+ // name "sanitized" mis-leads callers into trusting poisoned content.
335
+ // Return empty string on block so a `if (!safe) use(result.sanitized)`
336
+ // path becomes a no-op rather than a vulnerability. Use the original
337
+ // `content` argument if you still need it for audit / quarantine.
338
+ sanitized: result.decision === "block" ? "" : content,
339
+ violations: result.violations,
340
+ source,
341
+ meta: {
342
+ scanDurationMs: performance.now() - start,
343
+ scannersRun: [scanner.name],
344
+ sourceSpecificHits: result.violations.filter((v) => v.detail?.startsWith("Pattern:") && v.type === "ingested_injection").length,
345
+ cached: false,
346
+ },
347
+ };
348
+ }
349
+ // ============================================================
350
+ // Encoding-bypass normalization (R1 from Round 1 review — closes
351
+ // OWASP LLM Prompt Injection Prevention Cheat Sheet 2026 Base64/Hex
352
+ // bypass class).
353
+ // ============================================================
354
+ /**
355
+ * Try to decode common obfuscation layers an attacker uses to smuggle
356
+ * an injection past pattern matchers. Returns the decoded payload when
357
+ * it looks like a successful decode, else `null`.
358
+ *
359
+ * The function deliberately runs at most ONE decode layer to avoid
360
+ * decoding amplification (a chain of `base64(base64(...))` would force
361
+ * us into deep recursion); a single-layer decode is enough to catch
362
+ * the vast majority of in-the-wild bypasses while keeping execution
363
+ * cost bounded.
364
+ *
365
+ * Heuristics:
366
+ * - Base64: contiguous run of 40+ Base64 chars, decodes to mostly
367
+ * printable ASCII or the `\u00..` C0 range stays empty.
368
+ * - Hex: 80+ hex chars in a row.
369
+ * - Percent-encoding: more than 5 `%XX` sequences.
370
+ *
371
+ * Returns the longest decoded payload when multiple candidates fire.
372
+ */
373
+ export function tryDecodeObfuscation(input) {
374
+ if (typeof input !== "string" || input.length === 0)
375
+ return null;
376
+ // Cap input we look at — Base64 of a megabyte is not the threat model.
377
+ const haystack = input.length > 65_536 ? input.slice(0, 65_536) : input;
378
+ const candidates = [];
379
+ // Base64 — at least 40 chars, optional padding, optional whitespace.
380
+ const B64_RE = /[A-Za-z0-9+/=]{40,}/g;
381
+ for (const match of haystack.match(B64_RE) ?? []) {
382
+ const cleaned = match.replace(/=+$/, "").replace(/[^A-Za-z0-9+/]/g, "");
383
+ if (cleaned.length < 40)
384
+ continue;
385
+ try {
386
+ const decoded = Buffer.from(cleaned, "base64").toString("utf8");
387
+ if (decoded.length === 0)
388
+ continue;
389
+ const printable = decoded.replace(/[^\x20-\x7E\s]/g, "");
390
+ // Require >70% printable to avoid noise.
391
+ if (printable.length / decoded.length >= 0.7) {
392
+ candidates.push(decoded);
393
+ }
394
+ }
395
+ catch {
396
+ // ignore malformed Base64
397
+ }
398
+ }
399
+ // Hex — 80+ hex digits in a row.
400
+ const HEX_RE = /[0-9a-fA-F]{80,}/g;
401
+ for (const match of haystack.match(HEX_RE) ?? []) {
402
+ if (match.length % 2 !== 0)
403
+ continue;
404
+ try {
405
+ const decoded = Buffer.from(match, "hex").toString("utf8");
406
+ if (decoded.length === 0)
407
+ continue;
408
+ const printable = decoded.replace(/[^\x20-\x7E\s]/g, "");
409
+ if (printable.length / decoded.length >= 0.7) {
410
+ candidates.push(decoded);
411
+ }
412
+ }
413
+ catch {
414
+ // ignore malformed hex
415
+ }
416
+ }
417
+ // Percent-encoding — only decode a windowed region around clustered
418
+ // escapes rather than the full 65KB haystack. Round 2 Critic M-NEW-1:
419
+ // running `decodeURIComponent()` on the full haystack on every scan
420
+ // allocates a ~2× copy per call and pressures GC in high-throughput
421
+ // ingestion pipelines.
422
+ const PERCENT_RE = /%[0-9A-Fa-f]{2}/g;
423
+ const percentMatches = [];
424
+ let percentMatch;
425
+ while ((percentMatch = PERCENT_RE.exec(haystack)) !== null &&
426
+ percentMatches.length < 32) {
427
+ percentMatches.push(percentMatch.index);
428
+ }
429
+ if (percentMatches.length >= 5) {
430
+ // Decode only a window around the cluster: 256 bytes before the first
431
+ // escape, 1KB after the last. Bounded work regardless of haystack size.
432
+ const first = percentMatches[0] ?? 0;
433
+ const last = percentMatches[percentMatches.length - 1] ?? first;
434
+ const winStart = Math.max(0, first - 256);
435
+ const winEnd = Math.min(haystack.length, last + 1024);
436
+ const window = haystack.slice(winStart, winEnd);
437
+ try {
438
+ const decoded = decodeURIComponent(window);
439
+ if (decoded !== window)
440
+ candidates.push(decoded);
441
+ }
442
+ catch {
443
+ // ignore malformed percent-encoding
444
+ }
445
+ }
446
+ if (candidates.length === 0)
447
+ return null;
448
+ // Return the longest candidate; that's the most likely attack payload.
449
+ candidates.sort((a, b) => b.length - a.length);
450
+ return candidates[0] ?? null;
451
+ }
452
+ //# sourceMappingURL=ingestion.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pii.d.ts","sourceRoot":"","sources":["../../src/scanner/pii.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,aAAa,EACb,WAAW,EAEX,SAAS,EAGT,SAAS,EACV,MAAM,aAAa,CAAC;AAsKrB,qBAAa,UAAW,YAAW,OAAO;IACxC,QAAQ,CAAC,IAAI,SAAS;IACtB,OAAO,CAAC,QAAQ,CAAe;IAC/B,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,aAAa,CAAsC;IAC3D,OAAO,CAAC,YAAY,CAAe;gBAEvB,MAAM,GAAE,SAAc;IAO5B,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC;IAoDxE,sCAAsC;IACtC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA8BjC,uFAAuF;IACvF,OAAO,CAAC,mBAAmB;IAuB3B,gCAAgC;IAChC,OAAO,CAAC,YAAY;CAerB"}
1
+ {"version":3,"file":"pii.d.ts","sourceRoot":"","sources":["../../src/scanner/pii.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,OAAO,EACP,aAAa,EACb,WAAW,EAEX,SAAS,EAGT,SAAS,EACV,MAAM,aAAa,CAAC;AAoLrB,qBAAa,UAAW,YAAW,OAAO;IACxC,QAAQ,CAAC,IAAI,SAAS;IACtB,OAAO,CAAC,QAAQ,CAAe;IAC/B,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,aAAa,CAAsC;IAC3D,OAAO,CAAC,YAAY,CAAe;gBAEvB,MAAM,GAAE,SAAc;IAO5B,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC;IAoDxE,sCAAsC;IACtC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA8BjC,uFAAuF;IACvF,OAAO,CAAC,mBAAmB;IAuB3B,gCAAgC;IAChC,OAAO,CAAC,YAAY;CAerB"}
@@ -1,12 +1,12 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PIIScanner = void 0;
4
1
  // --- German & International PII Patterns ---
5
2
  const PII_PATTERNS = [
6
- // IBAN: DE + 2 check digits + 18 digits (with optional spaces/dashes)
3
+ // IBAN: 2-letter ISO + 2 check digits + 11..30 alphanumerics (with optional spaces/dashes).
4
+ // Covers all 80+ IBAN countries: NO (15), BE (16), DE (22), FR (27), MT (31), SC (31).
5
+ // The validator runs mod-97 over the cleaned value and rejects anything that isn't a real IBAN.
6
+ // Pattern is linear (no nested quantifiers) — ReDoS-safe.
7
7
  {
8
8
  type: "iban",
9
- pattern: /\b[A-Z]{2}\s?\d{2}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{4}\s?\d{2,4}\b/g,
9
+ pattern: /\b[A-Z]{2}\d{2}[ -]?[A-Z0-9](?:[A-Z0-9 -]{9,36}[A-Z0-9])?\b/g,
10
10
  validator: validateIBAN,
11
11
  baseConfidence: 0.95,
12
12
  },
@@ -36,10 +36,15 @@ const PII_PATTERNS = [
36
36
  pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
37
37
  baseConfidence: 0.95,
38
38
  },
39
- // Phone: German formats (+49, 0xxx) and international
39
+ // Phone: German formats (+49, 0xxx) and international.
40
+ // Previous pattern had nested optional quantifiers (`\s?[\s\-/]?` plus two
41
+ // `[\s\-/]?\d{0,5}` tails) which risks catastrophic backtracking on
42
+ // malformed inputs. Restructured so every separator group requires at least
43
+ // one char when present and the trailing digits group is a true non-optional
44
+ // extension (or absent entirely).
40
45
  {
41
46
  type: "phone",
42
- pattern: /(?<!\d)(?:\+\d{1,3}|00\d{1,3}|0)\s?[\s\-/]?\(?\d{2,5}\)?[\s\-/]?\d{3,8}[\s\-/]?\d{0,5}\b/g,
47
+ pattern: /(?<!\d)(?:\+\d{1,3}|00\d{1,3}|0)[\s\-/]?\(?\d{2,5}\)?[\s\-/]?\d{3,8}(?:[\s\-/]\d{1,5})?\b/g,
43
48
  validator: validatePhone,
44
49
  baseConfidence: 0.80,
45
50
  },
@@ -129,17 +134,25 @@ function maskValue(type, value) {
129
134
  return value[0] + "***@" + value.substring(atIdx + 1);
130
135
  }
131
136
  case "phone":
137
+ // Need room for 4-prefix + **** + 2-suffix without overlap.
138
+ if (value.length < 7)
139
+ return "[PHONE]";
132
140
  return value.substring(0, 4) + "****" + value.substring(value.length - 2);
133
141
  case "iban":
134
- return value.substring(0, 4) + " **** **** ****";
135
- case "credit_card":
136
- return "**** **** **** " + value.replace(/\D/g, "").substring(12);
142
+ // Keep country code + check digits, mask rest. Works for any IBAN length.
143
+ return value.length >= 4 ? value.substring(0, 4) + " **** **** ****" : "[IBAN]";
144
+ case "credit_card": {
145
+ const digits = value.replace(/\D/g, "");
146
+ if (digits.length < 13)
147
+ return "[CREDIT_CARD]";
148
+ return "**** **** **** " + digits.substring(digits.length - 4);
149
+ }
137
150
  default:
138
151
  return `[${type.toUpperCase()}]`;
139
152
  }
140
153
  }
141
154
  // --- PII Scanner Class ---
142
- class PIIScanner {
155
+ export class PIIScanner {
143
156
  name = "pii";
144
157
  patterns;
145
158
  action;
@@ -251,5 +264,4 @@ class PIIScanner {
251
264
  return masked;
252
265
  }
253
266
  }
254
- exports.PIIScanner = PIIScanner;
255
267
  //# sourceMappingURL=pii.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"shield.d.ts","sourceRoot":"","sources":["../src/shield.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,WAAW,EAAc,MAAM,YAAY,CAAC;AAKpF,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAUlD,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,SAAS,CAAkC;IACnD,OAAO,CAAC,MAAM,CAAe;gBAEjB,MAAM,GAAE,YAAiB;IAyBrC,qCAAqC;IAC/B,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC;IA+BzE,kDAAkD;IAC5C,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,oBAAoB,EAAE,MAAM,EAC5B,qBAAqB,CAAC,EAAE,MAAM;IAahC,+CAA+C;IACzC,UAAU,CACd,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM;IAMtB,sCAAsC;IAChC,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKxD,4BAA4B;IAC5B,SAAS,IAAI,YAAY;IAIzB,2BAA2B;IAC3B,UAAU,IAAI,IAAI;IAIlB,sBAAsB;IACtB,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,wBAAwB;IAClB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAS5B,OAAO,CAAC,aAAa;IAUrB,OAAO,CAAC,aAAa;IAkDrB,OAAO,CAAC,UAAU;CA2BnB"}
1
+ {"version":3,"file":"shield.d.ts","sourceRoot":"","sources":["../src/shield.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,WAAW,EAAc,MAAM,YAAY,CAAC;AAKpF,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAUlD,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,SAAS,CAAkC;IACnD,OAAO,CAAC,MAAM,CAAe;gBAEjB,MAAM,GAAE,YAAiB;IAyBrC,qCAAqC;IAC/B,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC;IA4CzE,kDAAkD;IAC5C,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,oBAAoB,EAAE,MAAM,EAC5B,qBAAqB,CAAC,EAAE,MAAM;IAahC,+CAA+C;IACzC,UAAU,CACd,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM;IAMtB,sCAAsC;IAChC,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKxD,4BAA4B;IAC5B,SAAS,IAAI,YAAY;IAIzB,2BAA2B;IAC3B,UAAU,IAAI,IAAI;IAIlB,sBAAsB;IACtB,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,wBAAwB;IAClB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAS5B,OAAO,CAAC,aAAa;IAUrB,OAAO,CAAC,aAAa;IAkDrB,OAAO,CAAC,UAAU;CA2BnB"}