@nekzus/liop 1.2.0 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,8 +10,9 @@ import { zodToJsonSchema } from "zod-to-json-schema";
10
10
  import { MeshNode } from "../mesh/node.js";
11
11
  import { LiopRpcServer } from "../rpc/server.js";
12
12
  import { log } from "../utils/logger.js";
13
+ import { NerScanner } from "./ner-scanner.js";
13
14
  import { PII_PATTERNS, PII_PRESETS, PiiScanner } from "./pii.js";
14
- export { PII_PATTERNS, PII_PRESETS, PiiScanner };
15
+ export { NerScanner, PII_PATTERNS, PII_PRESETS, PiiScanner };
15
16
  /**
16
17
  * When enabled, `payload` tools that are not LIOP v1 envelopes are passed through to the
17
18
  * registered handler unchanged (no worker extraction). Default off for strict protocol tests.
@@ -29,6 +30,10 @@ export class LiopServer {
29
30
  CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
30
31
  THROTTLE_THRESHOLD = 5;
31
32
  THROTTLE_COOLDOWN_MS = 60 * 1000; // 60 seconds
33
+ // [OWASP-A01] Sliding window rate limiter — prevents micro-query exfiltration
34
+ toolCallWindows = new Map();
35
+ toolCallMaxPerWindow;
36
+ toolCallWindowMs;
32
37
  tools = new Map();
33
38
  resources = new Map();
34
39
  prompts = new Map();
@@ -67,8 +72,10 @@ export class LiopServer {
67
72
  const compact = logic.replace(/\s+/g, " ");
68
73
  if (policy.enforceAggregationFirst) {
69
74
  const rowExtractionPatterns = [
70
- /return\s+env\.records\b/i,
71
- /return\s*\{[\s\S]*\b(accounts|patients|rows|records)\s*:\s*env\.records/i,
75
+ // Block raw record dumps but allow safe aggregation chains
76
+ // (.reduce, .length, .filter().length, .every, .some)
77
+ /return\s+env\.records(?!\s*\.\s*(?:reduce|length|filter|every|some|find)\b)/i,
78
+ /return\s*\{[\s\S]*\b(accounts|patients|rows|records)\s*:\s*env\.records(?!\s*\.\s*(?:reduce|length|filter)\b)/i,
72
79
  ];
73
80
  if (rowExtractionPatterns.some((p) => p.test(compact))) {
74
81
  return "Preflight policy rejected: potential row-level export pattern detected.";
@@ -84,15 +91,29 @@ export class LiopServer {
84
91
  return null;
85
92
  const parsed = this.parseUnknownJson(output);
86
93
  if (policy.outputSchema) {
87
- const schemaResult = policy.outputSchema.safeParse(parsed);
94
+ // SEC-HARDENING: Force strict mode on ZodObject schemas to prevent
95
+ // key aliasing bypasses via .passthrough(). However, respect schemas
96
+ // that explicitly use .catchall() — calling .strict() would override
97
+ // the catchall with ZodNever, destroying the developer's intent.
98
+ const effectiveSchema = (() => {
99
+ if (!(policy.outputSchema instanceof z.ZodObject)) {
100
+ return policy.outputSchema;
101
+ }
102
+ const obj = policy.outputSchema;
103
+ // If schema has an explicit catchall (not ZodNever), respect it
104
+ if (!(obj._def.catchall instanceof z.ZodNever)) {
105
+ return obj;
106
+ }
107
+ // Otherwise force strict to block unrecognized keys by default
108
+ return obj.strict();
109
+ })();
110
+ const schemaResult = effectiveSchema.safeParse(parsed);
88
111
  if (!schemaResult.success) {
89
- // Include a truncated preview of the rejected value so the LLM can self-correct
90
- const preview = typeof parsed === "string"
91
- ? parsed.slice(0, 200)
92
- : JSON.stringify(parsed).slice(0, 200);
112
+ // SEC-CRITICAL: Never expose rejected data in error messages.
113
+ // Only report the structural violation (unrecognized keys, type mismatches).
93
114
  return `[LIOP] Output schema violation for ${toolName}: ${schemaResult.error.issues
94
115
  .map((i) => `${i.path.join(".") || "<root>"} ${i.message}`)
95
- .join("; ")}. Rejected value: ${preview}. HINT: Use 'env.records' to access the dataset inside your logic.`;
116
+ .join("; ")}. HINT: Your output must conform to the declared schema. Use 'env.records' to access the dataset and return only allowed fields.`;
96
117
  }
97
118
  }
98
119
  if (policy.enforceAggregationFirst &&
@@ -143,6 +164,14 @@ export class LiopServer {
143
164
  return this.unwrapForAggregationPolicyScan(joined);
144
165
  }
145
166
  violatesAggregationFirstPolicy(input, policyObj) {
167
+ const maxRows = typeof policyObj === "object" &&
168
+ typeof policyObj.maxOutputRows === "number"
169
+ ? policyObj.maxOutputRows
170
+ : 10;
171
+ const allowPrimitives = typeof policyObj === "object" &&
172
+ typeof policyObj.allowPrimitiveArrays === "boolean"
173
+ ? policyObj.allowPrimitiveArrays
174
+ : true;
146
175
  if (typeof input === "string") {
147
176
  const trimmed = input.trim();
148
177
  if ((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
@@ -157,14 +186,6 @@ export class LiopServer {
157
186
  return false;
158
187
  }
159
188
  if (Array.isArray(input)) {
160
- const maxRows = typeof policyObj === "object" &&
161
- typeof policyObj.maxOutputRows === "number"
162
- ? policyObj.maxOutputRows
163
- : 10;
164
- const allowPrimitives = typeof policyObj === "object" &&
165
- typeof policyObj.allowPrimitiveArrays === "boolean"
166
- ? policyObj.allowPrimitiveArrays
167
- : true;
168
189
  if (input.length > 0 &&
169
190
  input.every((item) => typeof item === "object" && item !== null)) {
170
191
  // Treat tabular row export as non-aggregated leakage risk if above threshold.
@@ -182,6 +203,11 @@ export class LiopServer {
182
203
  return input.some((item) => this.violatesAggregationFirstPolicy(item, policyObj));
183
204
  }
184
205
  if (input && typeof input === "object") {
206
+ const keys = Object.keys(input);
207
+ // Treat flat dictionary with too many keys as non-aggregated leakage risk (Dynamic Key Bypass).
208
+ if (keys.length > maxRows) {
209
+ return true;
210
+ }
185
211
  return Object.values(input).some((value) => this.violatesAggregationFirstPolicy(value, policyObj));
186
212
  }
187
213
  return false;
@@ -189,6 +215,9 @@ export class LiopServer {
189
215
  constructor(serverInfo, config) {
190
216
  this.serverInfo = serverInfo;
191
217
  this.config = config;
218
+ const nerScanner = this.config?.security?.enableNerScanning
219
+ ? new NerScanner()
220
+ : null;
192
221
  this.piiScanner = new PiiScanner(this.config?.security?.piiPatterns ?? PII_PRESETS.GLOBAL_STRICT, this.config?.security?.forbiddenKeys ?? [
193
222
  "id",
194
223
  "name",
@@ -210,7 +239,15 @@ export class LiopServer {
210
239
  "token",
211
240
  "secret",
212
241
  "privateKey",
213
- ]);
242
+ ], nerScanner);
243
+ // [OWASP-A01] Rate limit: config > env > default (30 calls/min)
244
+ const rlConfig = this.config?.security?.rateLimit;
245
+ this.toolCallWindowMs =
246
+ rlConfig?.windowMs ??
247
+ Number.parseInt(process.env.LIOP_RATE_LIMIT_WINDOW_MS ?? "60000", 10);
248
+ this.toolCallMaxPerWindow =
249
+ rlConfig?.maxPerWindow ??
250
+ Number.parseInt(process.env.LIOP_RATE_LIMIT_MAX ?? "30", 10);
214
251
  // Initialize Zero-Blocking Worker Pool for Heavy Cryptography & Sandboxing
215
252
  const isTS = import.meta.url.endsWith(".ts");
216
253
  const workerExt = isTS ? ".ts" : ".js";
@@ -239,6 +276,12 @@ export class LiopServer {
239
276
  maxQueue: "auto",
240
277
  taskQueue: new FixedQueue(),
241
278
  execArgv,
279
+ // [DoS Defense] Enforce hard memory ceiling per worker thread.
280
+ // Workers exceeding this limit are terminated by Node.js runtime.
281
+ resourceLimits: {
282
+ maxOldGenerationSizeMb: this.config?.workerPool?.maxHeapMb ??
283
+ Number.parseInt(process.env.LIOP_WORKER_MAX_HEAP_MB ?? "64", 10),
284
+ },
242
285
  });
243
286
  // [Token Economy] Auto-register LIOP protocol spec as a single Resource.
244
287
  // This centralizes the envelope documentation that was previously
@@ -568,6 +611,37 @@ Protocol Adherence is mandatory for successful execution.`,
568
611
  this.logicCache.clear();
569
612
  log.info("[LIOP-SDK] AST Security Cache cleared by Admin.");
570
613
  }
614
+ /**
615
+ * Sliding window rate limiter for tool call frequency.
616
+ * Prevents micro-query exfiltration attacks where an attacker
617
+ * makes hundreds of individually-legitimate calls to reconstruct
618
+ * the full dataset field by field. (OWASP A01)
619
+ */
620
+ checkToolCallRateLimit(toolName) {
621
+ const now = Date.now();
622
+ const windowMs = this.toolCallWindowMs;
623
+ const maxPerWindow = this.toolCallMaxPerWindow;
624
+ const window = this.toolCallWindows.get(toolName) || [];
625
+ // Evict expired timestamps outside the sliding window
626
+ const active = window.filter((t) => now - t < windowMs);
627
+ if (active.length >= maxPerWindow) {
628
+ const retryAfterSec = Math.ceil((active[0] + windowMs - now) / 1000);
629
+ return {
630
+ content: [
631
+ {
632
+ type: "text",
633
+ text: `LIOP_RATE_LIMITED: Too many calls to ${toolName}. ` +
634
+ `Max ${maxPerWindow} per ${windowMs / 1000}s window. ` +
635
+ `Retry after ${retryAfterSec}s.`,
636
+ },
637
+ ],
638
+ isError: true,
639
+ };
640
+ }
641
+ active.push(now);
642
+ this.toolCallWindows.set(toolName, active);
643
+ return null;
644
+ }
571
645
  /**
572
646
  * Emulates calling a tool (used locally or via LIOPMcpBridge)
573
647
  */
@@ -576,6 +650,10 @@ Protocol Adherence is mandatory for successful execution.`,
576
650
  if (!entry) {
577
651
  throw new Error(`Tool not found: ${request.name}`);
578
652
  }
653
+ // [OWASP-A01] Rate limiting: prevent micro-query exfiltration
654
+ const rateLimitResult = this.checkToolCallRateLimit(request.name);
655
+ if (rateLimitResult)
656
+ return rateLimitResult;
579
657
  try {
580
658
  // Validate inputs natively with Zod before execution
581
659
  const parsedArgs = entry.schema.parse(request.arguments || {});
@@ -813,10 +891,11 @@ Protocol Adherence is mandatory for successful execution.`,
813
891
  ]);
814
892
  const aggregationViolation = this.violatesAggregationFirstPolicy(this.unwrapForAggregationPolicyScan(finalOutput));
815
893
  if (violation || aggregationViolation) {
816
- const reason = violation ||
817
- "Aggregation-First Policy Violation: row-level export blocked. HINT: Use .reduce() to produce a flat {key:value} object. Do NOT use .map() to create arrays of objects.";
818
- log.info(`[LIOP-RPC] Secure egress blocked in gRPC stream: ${reason}`);
819
- response.semantic_evidence = `[LIOP] Egress Security Violation. Output blocked due to policy enforcement (${reason}).`;
894
+ // SEC-CRITICAL: Log details server-side, never expose to caller
895
+ const internalReason = violation || "Aggregation-First Policy Violation";
896
+ log.info(`[LIOP-RPC] Secure egress blocked in gRPC stream: ${internalReason}`);
897
+ response.semantic_evidence =
898
+ "[LIOP] Egress Security Violation. Output blocked due to policy enforcement.";
820
899
  response.is_error = true;
821
900
  }
822
901
  call.write(response, () => {
@@ -825,10 +904,16 @@ Protocol Adherence is mandatory for successful execution.`,
825
904
  }
826
905
  catch (error) {
827
906
  const e = error;
828
- log.error(`[LIOP-RPC] Execution Error: ${e.message}`);
907
+ const isDev = process.env.NODE_ENV === "development" ||
908
+ process.env.NODE_ENV === "test";
909
+ const detail = e.message || String(error);
910
+ log.error(`[LIOP-RPC] Execution Error: ${detail}`);
911
+ const errorMessage = isDev
912
+ ? `Execution Error: ${detail}`
913
+ : "[LIOP] Execution Failed. The injected logic violated runtime constraints or encountered a fatal error.";
829
914
  // Send error response before closing, avoiding "stream closed without results"
830
915
  const errorResponse = {
831
- semantic_evidence: `Execution Error: ${e.message}`,
916
+ semantic_evidence: errorMessage,
832
917
  cryptographic_proof: Buffer.from(""),
833
918
  zk_receipt: Buffer.from(""),
834
919
  is_error: true,
@@ -881,9 +966,20 @@ Protocol Adherence is mandatory for successful execution.`,
881
966
  : undefined;
882
967
  const policyViolation = this.validateOutputPolicy(toolName || "unknown_tool", workerResponse.output, toolPolicy);
883
968
  if (policyViolation) {
969
+ // SEC-CRITICAL: Log details server-side, never expose to caller in Production
884
970
  log.info(`[LIOP-SDK] Output policy blocked for ${toolName || "unknown_tool"}: ${policyViolation}`);
971
+ const isDev = process.env.NODE_ENV === "development" ||
972
+ process.env.NODE_ENV === "test";
973
+ const errorMessage = isDev
974
+ ? policyViolation
975
+ : "[LIOP] Egress Security Violation. Output blocked due to policy enforcement. HINT: Return only aggregated, non-PII results using .reduce() to produce a flat {key:value} object with allowed schema fields.";
885
976
  return {
886
- content: [{ type: "text", text: `[LIOP] ${policyViolation}` }],
977
+ content: [
978
+ {
979
+ type: "text",
980
+ text: errorMessage,
981
+ },
982
+ ],
887
983
  isError: true,
888
984
  };
889
985
  }
@@ -891,14 +987,21 @@ Protocol Adherence is mandatory for successful execution.`,
891
987
  const violation = this.piiScanner.scan(content);
892
988
  const aggregationViolation = this.violatesAggregationFirstPolicy(workerResponse.output);
893
989
  if (violation || aggregationViolation) {
894
- const reason = violation ||
895
- "Aggregation-First Policy Violation: row-level export blocked. HINT: Use .reduce() to produce a flat {key:value} object. Do NOT use .map() to create arrays of objects.";
896
- log.info(`[LIOP-SDK] Secure egress blocked in local execution: ${reason}`);
990
+ // SEC-CRITICAL: Log the specific violation reason server-side only.
991
+ // Never expose detection details (entity names, matched values) to the caller in Production.
992
+ const internalReason = violation ||
993
+ "Aggregation-First Policy Violation: Output blocked due to dynamic flat-key policy enforcement.";
994
+ log.info(`[LIOP-SDK] Secure egress blocked in local execution: ${internalReason}`);
995
+ const isDev = process.env.NODE_ENV === "development" ||
996
+ process.env.NODE_ENV === "test";
997
+ const errorMessage = isDev
998
+ ? `[LIOP] Egress Security Violation: ${internalReason}`
999
+ : "[LIOP] Egress Security Violation. Output blocked due to policy enforcement. HINT: Return only aggregated, non-PII results using .reduce() to produce a flat {key:value} object with allowed schema fields.";
897
1000
  return {
898
1001
  content: [
899
1002
  {
900
1003
  type: "text",
901
- text: `[LIOP] Egress Security Violation. Output blocked due to policy enforcement (${reason}).`,
1004
+ text: errorMessage,
902
1005
  },
903
1006
  ],
904
1007
  isError: true,
@@ -908,11 +1011,18 @@ Protocol Adherence is mandatory for successful execution.`,
908
1011
  }
909
1012
  catch (error) {
910
1013
  const e = error;
1014
+ const isDev = process.env.NODE_ENV === "development" ||
1015
+ process.env.NODE_ENV === "test";
1016
+ const detail = e.message || String(error);
1017
+ log.error(`[LIOP-SDK] WorkerPool Execution Fault: ${detail}`);
1018
+ const errorMessage = isDev
1019
+ ? `WorkerPoolError: ${detail}`
1020
+ : "[LIOP] Execution Failed. The injected logic violated runtime constraints or encountered a fatal error.";
911
1021
  return {
912
1022
  content: [
913
1023
  {
914
1024
  type: "text",
915
- text: `WorkerPoolError: ${e.message || String(error)}`,
1025
+ text: errorMessage,
916
1026
  },
917
1027
  ],
918
1028
  isError: true,
@@ -0,0 +1,29 @@
1
+ /** Single named entity detected by the NER scanner. */
2
+ export interface NerEntity {
3
+ type: "person" | "place" | "organization";
4
+ text: string;
5
+ }
6
+ /** Result of an NER scan operation. */
7
+ export interface NerScanResult {
8
+ detected: boolean;
9
+ entities: NerEntity[];
10
+ }
11
+ /**
12
+ * Scans text content for named entities that may represent PII.
13
+ * Uses `compromise/three` for person, place, and organization detection.
14
+ *
15
+ * Designed for egress filtering — optimized for recall over precision
16
+ * to ensure sensitive data does not leak through aliased output keys.
17
+ */
18
+ export declare class NerScanner {
19
+ /**
20
+ * Scans a single string value for named entities.
21
+ * Returns detected entities if the text contains recognizable PII.
22
+ */
23
+ scan(text: string): NerScanResult;
24
+ /**
25
+ * Recursively scans all string values within an object/array.
26
+ * Stops at the first detection for performance (fail-fast).
27
+ */
28
+ scanDeep(input: unknown, seen?: WeakSet<object>): NerScanResult;
29
+ }
@@ -0,0 +1,141 @@
1
+ /**
2
+ * LIOP NER Content Scanner (The Shield V3 — Named Entity Recognition Layer)
3
+ *
4
+ * Lightweight NER scanner using `compromise` NLP for detecting
5
+ * person names, places, and organizations in free-text output values.
6
+ *
7
+ * This layer operates AFTER the regex-based PII scanner and
8
+ * catches entities that lack a deterministic format pattern
9
+ * (e.g., "Evelyn Reed" cannot be detected by regex).
10
+ *
11
+ * Architecture: opt-in per-server via `enableNerScanning: true`.
12
+ * Performance: ~10ms for typical SDK output sizes (< 10KB).
13
+ *
14
+ * @see https://github.com/spencermountain/compromise
15
+ */
16
+ import nlp from "compromise/three";
17
+ /**
18
+ * Medical/pharmaceutical vocabulary safelist.
19
+ * These terms are tagged as #Medication to prevent the NER
20
+ * from misclassifying them as person/organization names.
21
+ * Extends progressively — add terms as false positives arise.
22
+ */
23
+ const MEDICAL_VOCABULARY = {
24
+ aspirin: "Medication",
25
+ lisinopril: "Medication",
26
+ metformin: "Medication",
27
+ amlodipine: "Medication",
28
+ atorvastatin: "Medication",
29
+ omeprazole: "Medication",
30
+ losartan: "Medication",
31
+ simvastatin: "Medication",
32
+ levothyroxine: "Medication",
33
+ ibuprofen: "Medication",
34
+ acetaminophen: "Medication",
35
+ amoxicillin: "Medication",
36
+ ciprofloxacin: "Medication",
37
+ prednisone: "Medication",
38
+ warfarin: "Medication",
39
+ insulin: "Medication",
40
+ hydrochlorothiazide: "Medication",
41
+ gabapentin: "Medication",
42
+ albuterol: "Medication",
43
+ pantoprazole: "Medication",
44
+ // Generic clinical terms
45
+ hypertension: "Condition",
46
+ diabetes: "Condition",
47
+ bronchitis: "Condition",
48
+ pneumonia: "Condition",
49
+ asthma: "Condition",
50
+ };
51
+ // Register medical vocabulary BEFORE any scan operations.
52
+ // compromise's addWords() overrides the default classification,
53
+ // preventing these terms from being tagged as #Person or #Organization.
54
+ nlp.addWords(MEDICAL_VOCABULARY);
55
+ // Minimum string length to attempt NER analysis.
56
+ // Shorter strings are unlikely to contain meaningful named entities.
57
+ const MIN_TEXT_LENGTH = 4;
58
+ // Pattern to identify strings that are purely numeric/symbolic (skip NER)
59
+ const NON_TEXT_PATTERN = /^[\d\s.,:;!?()[\]{}<>@#$%^&*+=|\\/"'`~_-]+$/;
60
+ /**
61
+ * Scans text content for named entities that may represent PII.
62
+ * Uses `compromise/three` for person, place, and organization detection.
63
+ *
64
+ * Designed for egress filtering — optimized for recall over precision
65
+ * to ensure sensitive data does not leak through aliased output keys.
66
+ */
67
+ export class NerScanner {
68
+ /**
69
+ * Scans a single string value for named entities.
70
+ * Returns detected entities if the text contains recognizable PII.
71
+ */
72
+ scan(text) {
73
+ if (text.length < MIN_TEXT_LENGTH || NON_TEXT_PATTERN.test(text)) {
74
+ return { detected: false, entities: [] };
75
+ }
76
+ const doc = nlp(text);
77
+ const entities = [];
78
+ const people = doc.people().out("array");
79
+ for (const person of people) {
80
+ const trimmed = person.trim();
81
+ if (trimmed.length >= MIN_TEXT_LENGTH) {
82
+ entities.push({ type: "person", text: trimmed });
83
+ }
84
+ }
85
+ const places = doc.places().out("array");
86
+ for (const place of places) {
87
+ const trimmed = place.trim();
88
+ if (trimmed.length >= MIN_TEXT_LENGTH) {
89
+ entities.push({ type: "place", text: trimmed });
90
+ }
91
+ }
92
+ const orgs = doc.organizations().out("array");
93
+ for (const org of orgs) {
94
+ const trimmed = org.trim();
95
+ if (trimmed.length >= MIN_TEXT_LENGTH) {
96
+ entities.push({ type: "organization", text: trimmed });
97
+ }
98
+ }
99
+ return {
100
+ detected: entities.length > 0,
101
+ entities,
102
+ };
103
+ }
104
+ /**
105
+ * Recursively scans all string values within an object/array.
106
+ * Stops at the first detection for performance (fail-fast).
107
+ */
108
+ scanDeep(input, seen = new WeakSet()) {
109
+ if (input === null || input === undefined) {
110
+ return { detected: false, entities: [] };
111
+ }
112
+ if (typeof input === "string") {
113
+ return this.scan(input);
114
+ }
115
+ if (typeof input === "object") {
116
+ if (seen.has(input)) {
117
+ return { detected: false, entities: [] };
118
+ }
119
+ seen.add(input);
120
+ const values = Array.isArray(input)
121
+ ? input
122
+ : Object.values(input);
123
+ const allEntities = [];
124
+ for (const value of values) {
125
+ const result = this.scanDeep(value, seen);
126
+ if (result.detected) {
127
+ allEntities.push(...result.entities);
128
+ // Fail-fast: return immediately on first person detection
129
+ if (result.entities.some((e) => e.type === "person")) {
130
+ return { detected: true, entities: allEntities };
131
+ }
132
+ }
133
+ }
134
+ return {
135
+ detected: allEntities.length > 0,
136
+ entities: allEntities,
137
+ };
138
+ }
139
+ return { detected: false, entities: [] };
140
+ }
141
+ }
@@ -30,11 +30,37 @@ export declare const PII_PRESETS: {
30
30
  export declare class PiiScanner {
31
31
  private patterns;
32
32
  private forbiddenKeysSet;
33
- constructor(patterns?: PiiRule[], forbiddenKeys?: string[]);
33
+ private nerScanner;
34
+ /**
35
+ * Safelist of keys that contain forbidden substrings but are NOT PII.
36
+ * Prevents false positives from fuzzy matching (e.g., "grid" contains "id").
37
+ */
38
+ private static readonly KEY_SAFELIST;
39
+ /**
40
+ * Short forbidden tokens (< 4 chars) that require boundary-aware matching.
41
+ * Uses regex boundary detection to avoid false positives.
42
+ */
43
+ private shortTokenBoundaryPatterns;
44
+ /**
45
+ * Long forbidden tokens (>= 4 chars) that use substring containment.
46
+ */
47
+ private longForbiddenTokens;
48
+ constructor(patterns?: PiiRule[], forbiddenKeys?: string[], nerScanner?: import("./ner-scanner.js").NerScanner | null);
34
49
  /**
35
50
  * Scans any input (string, object, array) for PII violations.
36
51
  * Returns the pattern/rule name that triggered the violation, or null if safe.
52
+ *
53
+ * Detection pipeline (fail-fast):
54
+ * 1. Exact key match (O(1) Set lookup)
55
+ * 2. Fuzzy key match (boundary detection for short tokens, substring for long)
56
+ * 3. Regex/algorithmic pattern match on string values
57
+ * 4. NER content scan on string values (if enabled)
37
58
  */
38
59
  scan(input: unknown, seen?: WeakSet<object>): string | null;
60
+ /**
61
+ * Checks a key against fuzzy matching rules.
62
+ * Short tokens use boundary-aware regex; long tokens use substring containment.
63
+ */
64
+ private checkKeyFuzzy;
39
65
  private checkString;
40
66
  }