shroud-privacy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +7 -0
  3. package/README.md +369 -0
  4. package/dist/audit.d.ts +46 -0
  5. package/dist/audit.js +127 -0
  6. package/dist/canary.d.ts +31 -0
  7. package/dist/canary.js +73 -0
  8. package/dist/config.d.ts +27 -0
  9. package/dist/config.js +123 -0
  10. package/dist/detectors/base.d.ts +8 -0
  11. package/dist/detectors/base.js +2 -0
  12. package/dist/detectors/code.d.ts +25 -0
  13. package/dist/detectors/code.js +144 -0
  14. package/dist/detectors/context.d.ts +31 -0
  15. package/dist/detectors/context.js +357 -0
  16. package/dist/detectors/patterns.d.ts +15 -0
  17. package/dist/detectors/patterns.js +58 -0
  18. package/dist/detectors/regex.d.ts +28 -0
  19. package/dist/detectors/regex.js +955 -0
  20. package/dist/generators/base.d.ts +6 -0
  21. package/dist/generators/base.js +2 -0
  22. package/dist/generators/codes.d.ts +20 -0
  23. package/dist/generators/codes.js +231 -0
  24. package/dist/generators/names.d.ts +29 -0
  25. package/dist/generators/names.js +194 -0
  26. package/dist/generators/network.d.ts +86 -0
  27. package/dist/generators/network.js +477 -0
  28. package/dist/hooks.d.ts +27 -0
  29. package/dist/hooks.js +457 -0
  30. package/dist/index.d.ts +12 -0
  31. package/dist/index.js +58 -0
  32. package/dist/mapping.d.ts +33 -0
  33. package/dist/mapping.js +72 -0
  34. package/dist/obfuscator.d.ts +78 -0
  35. package/dist/obfuscator.js +603 -0
  36. package/dist/redaction.d.ts +26 -0
  37. package/dist/redaction.js +76 -0
  38. package/dist/store.d.ts +40 -0
  39. package/dist/store.js +79 -0
  40. package/dist/types.d.ts +101 -0
  41. package/dist/types.js +35 -0
  42. package/ncg_adapter.py +530 -0
  43. package/openclaw.plugin.json +72 -0
  44. package/package.json +56 -0
  45. package/shroud_bridge.mjs +225 -0
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Core obfuscation engine: detect -> map -> replace / reverse-replace.
3
+ *
4
+ * Entirely synchronous (CPU-bound) -- this is important for the
5
+ * tool_result_persist hook which is sync-only.
6
+ */
7
+ import { DetectedEntity, ObfuscationResult, ShroudConfig } from "./types.js";
8
+ import { BaseDetector } from "./detectors/base.js";
9
+ export declare class Obfuscator {
10
+ readonly config: ShroudConfig;
11
+ private _store;
12
+ private _subnetMapper;
13
+ private _mapping;
14
+ private _detectors;
15
+ private _canary;
16
+ private _audit;
17
+ private _ruleHits;
18
+ private _detectionsByCategory;
19
+ private _replacementsByCategory;
20
+ private _redactionFormatter;
21
+ private _contextDetector;
22
+ private _toolDepth;
23
+ constructor(config: ShroudConfig);
24
+ private _initDetectors;
25
+ /** Add a custom detector at runtime. */
26
+ addDetector(detector: BaseDetector): void;
27
+ /** Track tool call depth. */
28
+ enterToolCall(): number;
29
+ /** Decrement tool depth. */
30
+ exitToolCall(): number;
31
+ /** Current tool depth. */
32
+ get toolDepth(): number;
33
+ /** Reset tool depth counter (called at the start of each LLM turn). */
34
+ resetToolDepth(): void;
35
+ /**
36
+ * Detect and replace all sensitive entities in text.
37
+ *
38
+ * The pipeline:
39
+ * 1. Learn subnets from text (via SubnetMapper)
40
+ * 2. Detect entities from all detectors
41
+ * 3. Apply denylist (force-add denylist values)
42
+ * 4. Sort by position, resolve overlaps (prefer higher confidence)
43
+ * 5. Filter by minConfidence, allowlist, and already-obfuscated
44
+ * 6. Map and replace (with redaction level)
45
+ * 7. Inject canary if enabled
46
+ */
47
+ obfuscate(text: string, context?: string): ObfuscationResult;
48
+ /**
49
+ * Reverse-map fake values back to real values in text.
50
+ *
51
+ * Uses longest-match-first replacement to avoid partial substitutions.
52
+ * Also strips canary tokens.
53
+ * Runs multiple passes for nested structures.
54
+ */
55
+ deobfuscate(text: string): string;
56
+ /**
57
+ * Deobfuscate text and return replacement count alongside the result.
58
+ * Used by audit logging to report deobfuscation stats without logging text.
59
+ */
60
+ deobfuscateWithStats(text: string): {
61
+ text: string;
62
+ replacementCount: number;
63
+ };
64
+ /**
65
+ * Subnet-aware reverse mapping for CGNAT IPs not in the store.
66
+ */
67
+ private _deobfuscateResidualCgnat;
68
+ /**
69
+ * Normalize-and-match deobfuscation for fd00::/8 ULA IPv6 addresses.
70
+ */
71
+ private _deobfuscateResidualUla;
72
+ /** Clear all mappings and start fresh. */
73
+ reset(): void;
74
+ /** Return stats from audit logger and store. */
75
+ getStats(): object;
76
+ }
77
+ /** Remove overlapping entities, keeping higher confidence ones. */
78
+ export declare function resolveOverlaps(entities: DetectedEntity[]): DetectedEntity[];
@@ -0,0 +1,603 @@
1
+ /**
2
+ * Core obfuscation engine: detect -> map -> replace / reverse-replace.
3
+ *
4
+ * Entirely synchronous (CPU-bound) -- this is important for the
5
+ * tool_result_persist hook which is sync-only.
6
+ */
7
+ import { Category, } from "./types.js";
8
+ import { MemoryStore } from "./store.js";
9
+ import { MappingEngine } from "./mapping.js";
10
+ import { SubnetMapper, CGNAT_BASE, CGNAT_MASK_10, ipToInt, intToIp } from "./generators/network.js";
11
+ import { CanaryInjector } from "./canary.js";
12
+ import { AuditLogger } from "./audit.js";
13
+ import { RegexDetector } from "./detectors/regex.js";
14
+ import { CustomPatternDetector } from "./detectors/patterns.js";
15
+ import { CodeDetector } from "./detectors/code.js";
16
+ import { ContextDetector } from "./detectors/context.js";
17
+ import { RedactionFormatter } from "./redaction.js";
18
+ /** Regex to find CGNAT IPs (100.64.0.0/10) in text. */
19
+ const CGNAT_IP_RE = /\b(100\.(?:6[4-9]|[7-9]\d|1[01]\d|12[0-7])\.\d{1,3}\.\d{1,3})\b/g;
20
+ /** Regex to find fd00::/8 ULA IPv6 addresses (Shroud fake range) in text. */
21
+ const ULA_IPV6_RE = /(?:^|(?<=[\s,;=(\[]))fd00(?::[0-9a-fA-F]{1,4}){0,7}(?:::(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?)?(?=$|[\s,;)\]\/])/gi;
22
+ /**
23
+ * Expand a compressed IPv6 address to full 8-group form.
24
+ * e.g. "fd00:a1b2::1" → "fd00:a1b2:0000:0000:0000:0000:0000:0001"
25
+ */
26
+ function expandIPv6(addr) {
27
+ // Remove any trailing CIDR prefix
28
+ const cidrIdx = addr.indexOf("/");
29
+ const clean = cidrIdx >= 0 ? addr.slice(0, cidrIdx) : addr;
30
+ if (!clean.includes("::")) {
31
+ // Already full form — just zero-pad each group
32
+ const groups = clean.split(":");
33
+ if (groups.length !== 8)
34
+ return clean.toLowerCase();
35
+ return groups.map((g) => g.padStart(4, "0")).join(":").toLowerCase();
36
+ }
37
+ const [left, right] = clean.split("::");
38
+ const leftGroups = left ? left.split(":") : [];
39
+ const rightGroups = right ? right.split(":") : [];
40
+ const missing = 8 - leftGroups.length - rightGroups.length;
41
+ const allGroups = [
42
+ ...leftGroups,
43
+ ...Array(missing).fill("0000"),
44
+ ...rightGroups,
45
+ ];
46
+ return allGroups.map((g) => g.padStart(4, "0")).join(":").toLowerCase();
47
+ }
48
+ /**
49
+ * Compress a full 8-group IPv6 address to shortest form.
50
+ * e.g. "2001:0db8:0000:0000:0000:0000:0000:0001" → "2001:db8::1"
51
+ */
52
+ function compressIPv6(addr) {
53
+ const groups = addr.split(":").map((g) => g.replace(/^0+/, "") || "0");
54
+ // Find longest run of consecutive "0" groups
55
+ let bestStart = -1;
56
+ let bestLen = 0;
57
+ let curStart = -1;
58
+ let curLen = 0;
59
+ for (let i = 0; i < groups.length; i++) {
60
+ if (groups[i] === "0") {
61
+ if (curStart === -1)
62
+ curStart = i;
63
+ curLen++;
64
+ if (curLen > bestLen) {
65
+ bestStart = curStart;
66
+ bestLen = curLen;
67
+ }
68
+ }
69
+ else {
70
+ curStart = -1;
71
+ curLen = 0;
72
+ }
73
+ }
74
+ if (bestLen >= 2) {
75
+ const left = groups.slice(0, bestStart).join(":");
76
+ const right = groups.slice(bestStart + bestLen).join(":");
77
+ return `${left}::${right}`;
78
+ }
79
+ return groups.join(":");
80
+ }
81
+ /**
82
+ * Build a single combined regex from an array of literal strings.
83
+ * Strings are escaped and joined with alternation (|), sorted longest-first
84
+ * so the regex engine matches greedily. Returns null for empty arrays.
85
+ */
86
+ function buildCombinedFakeRegex(fakes) {
87
+ if (fakes.length === 0)
88
+ return null;
89
+ const escaped = fakes.map((f) => f.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
90
+ return new RegExp(escaped.join("|"), "g");
91
+ }
92
+ /**
93
+ * Convert a simple wildcard pattern (* and ?) to a RegExp.
94
+ * Caches compiled patterns for reuse. Bounded to 500 entries.
95
+ */
96
+ const MAX_WILDCARD_CACHE = 500;
97
+ const _wildcardCache = new Map();
98
+ function wildcardMatch(value, pattern) {
99
+ // Fast path: no wildcards = exact match
100
+ if (!pattern.includes("*") && !pattern.includes("?")) {
101
+ return value === pattern;
102
+ }
103
+ let re = _wildcardCache.get(pattern);
104
+ if (!re) {
105
+ const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&");
106
+ const reStr = "^" + escaped.replace(/\*/g, ".*").replace(/\?/g, ".") + "$";
107
+ re = new RegExp(reStr, "i");
108
+ // Evict oldest entries if cache is full
109
+ if (_wildcardCache.size >= MAX_WILDCARD_CACHE) {
110
+ const firstKey = _wildcardCache.keys().next().value;
111
+ if (firstKey !== undefined)
112
+ _wildcardCache.delete(firstKey);
113
+ }
114
+ _wildcardCache.set(pattern, re);
115
+ }
116
+ return re.test(value);
117
+ }
118
+ export class Obfuscator {
119
+ config;
120
+ _store;
121
+ _subnetMapper;
122
+ _mapping;
123
+ _detectors;
124
+ _canary;
125
+ _audit;
126
+ _ruleHits = new Map();
127
+ _detectionsByCategory = new Map();
128
+ _replacementsByCategory = new Map();
129
+ _redactionFormatter;
130
+ _contextDetector = null;
131
+ _toolDepth = 0;
132
+ constructor(config) {
133
+ this.config = config;
134
+ this._store = new MemoryStore(config.maxStoreMappings);
135
+ this._subnetMapper = new SubnetMapper();
136
+ const salt = config.persistentSalt || undefined;
137
+ this._mapping = new MappingEngine(config.secretKey, salt, this._subnetMapper);
138
+ this._detectors = [];
139
+ this._canary = null;
140
+ this._audit = null;
141
+ if (config.canaryEnabled) {
142
+ this._canary = new CanaryInjector(config.canaryPrefix, config.secretKey);
143
+ }
144
+ if (config.auditEnabled) {
145
+ this._audit = new AuditLogger(config.secretKey);
146
+ }
147
+ // Redaction formatter
148
+ this._redactionFormatter = new RedactionFormatter();
149
+ this._initDetectors();
150
+ }
151
+ _initDetectors() {
152
+ const overrides = this.config.detectorOverrides;
153
+ // Always enable the regex detector (with optional overrides)
154
+ const regexDetector = new RegexDetector(undefined, overrides);
155
+ // Wrap with ContextDetector for confidence boosting, proximity,
156
+ // hostname propagation, learned entities, and frequency decay
157
+ this._contextDetector = new ContextDetector(regexDetector);
158
+ this._detectors.push(this._contextDetector);
159
+ // Custom patterns if configured
160
+ if (this.config.customPatterns.length > 0) {
161
+ this._detectors.push(new CustomPatternDetector(this.config.customPatterns));
162
+ }
163
+ // Code-aware detector shares the same configured regex detector
164
+ this._detectors.push(new CodeDetector(regexDetector));
165
+ }
166
+ /** Add a custom detector at runtime. */
167
+ addDetector(detector) {
168
+ this._detectors.push(detector);
169
+ }
170
+ /** Track tool call depth. */
171
+ enterToolCall() {
172
+ return ++this._toolDepth;
173
+ }
174
+ /** Decrement tool depth. */
175
+ exitToolCall() {
176
+ return Math.max(0, --this._toolDepth);
177
+ }
178
+ /** Current tool depth. */
179
+ get toolDepth() {
180
+ return this._toolDepth;
181
+ }
182
+ /** Reset tool depth counter (called at the start of each LLM turn). */
183
+ resetToolDepth() {
184
+ this._toolDepth = 0;
185
+ }
186
+ /**
187
+ * Detect and replace all sensitive entities in text.
188
+ *
189
+ * The pipeline:
190
+ * 1. Learn subnets from text (via SubnetMapper)
191
+ * 2. Detect entities from all detectors
192
+ * 3. Apply denylist (force-add denylist values)
193
+ * 4. Sort by position, resolve overlaps (prefer higher confidence)
194
+ * 5. Filter by minConfidence, allowlist, and already-obfuscated
195
+ * 6. Map and replace (with redaction level)
196
+ * 7. Inject canary if enabled
197
+ */
198
+ obfuscate(text, context) {
199
+ const startTime = Date.now();
200
+ // 1. Learn subnet context from CIDR notation and masks in text
201
+ this._subnetMapper.learnSubnetsFromText(text);
202
+ // 2. Detect all entities from all detectors
203
+ const allEntities = [];
204
+ for (const detector of this._detectors) {
205
+ allEntities.push(...detector.detect(text));
206
+ }
207
+ // 3. Apply denylist -- single-pass combined regex instead of per-entry indexOf
208
+ if (this.config.denylist.length > 0) {
209
+ const sorted = this.config.denylist.slice().sort((a, b) => b.length - a.length);
210
+ const escaped = sorted.map((d) => d.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
211
+ const denyRe = new RegExp(escaped.join("|"), "g");
212
+ let dm;
213
+ while ((dm = denyRe.exec(text)) !== null) {
214
+ allEntities.push({
215
+ value: dm[0],
216
+ start: dm.index,
217
+ end: dm.index + dm[0].length,
218
+ category: Category.CUSTOM,
219
+ confidence: 1.0,
220
+ detector: "denylist",
221
+ });
222
+ }
223
+ }
224
+ // 4. Sort by position and resolve overlaps (prefer higher confidence, then earlier)
225
+ allEntities.sort((a, b) => a.start - b.start || b.confidence - a.confidence);
226
+ const entities = resolveOverlaps(allEntities);
227
+ // 5. Filter by confidence threshold, allowlist, and already-obfuscated values
228
+ const allowExact = new Set();
229
+ const allowWild = [];
230
+ for (const a of this.config.allowlist) {
231
+ if (a.includes("*") || a.includes("?"))
232
+ allowWild.push(a);
233
+ else
234
+ allowExact.add(a);
235
+ }
236
+ let belowThreshold = 0;
237
+ let allowlisted = 0;
238
+ let alreadyObfuscated = 0;
239
+ const filtered = entities.filter((e) => {
240
+ if (e.confidence < this.config.minConfidence) {
241
+ belowThreshold++;
242
+ return false;
243
+ }
244
+ if (allowExact.has(e.value) || allowWild.some((p) => wildcardMatch(e.value, p))) {
245
+ allowlisted++;
246
+ return false;
247
+ }
248
+ // Prevent double-obfuscation: skip values that are already known fakes
249
+ if (this._store.getReal(e.value) !== undefined) {
250
+ alreadyObfuscated++;
251
+ return false;
252
+ }
253
+ return true;
254
+ });
255
+ // Accumulate per-category detection counts (all entities before filter)
256
+ for (const entity of entities) {
257
+ const cat = entity.category;
258
+ this._detectionsByCategory.set(cat, (this._detectionsByCategory.get(cat) ?? 0) + 1);
259
+ }
260
+ // Accumulate per-rule hit counts
261
+ for (const entity of filtered) {
262
+ this._ruleHits.set(entity.detector, (this._ruleHits.get(entity.detector) ?? 0) + 1);
263
+ }
264
+ // Determine redaction level
265
+ const level = this.config.redactionLevel;
266
+ this._redactionFormatter.resetCounters();
267
+ // 6. Map and replace using segment collection (single-pass, no repeated slicing).
268
+ // In dry-run mode, compute mappings but skip text replacement.
269
+ let resultText = text;
270
+ const mappingsUsed = {};
271
+ if (!this.config.dryRun && filtered.length > 0) {
272
+ // Collect text segments and replacements in one forward pass
273
+ const segments = [];
274
+ let cursor = 0;
275
+ for (const entity of filtered) {
276
+ // Append text before this entity
277
+ if (entity.start > cursor) {
278
+ segments.push(text.slice(cursor, entity.start));
279
+ }
280
+ // Check if we already have a mapping for this exact value
281
+ let fake = this._store.getFake(entity.value);
282
+ if (fake === undefined) {
283
+ fake = this._mapping.mapValue(entity.value, entity.category);
284
+ this._store.put(entity.value, fake, entity.category);
285
+ }
286
+ // Apply redaction level
287
+ const replacement = this._redactionFormatter.format(entity.value, fake, entity.category, level);
288
+ segments.push(replacement);
289
+ mappingsUsed[entity.value] = fake;
290
+ cursor = entity.end;
291
+ // Per-category replacement count
292
+ this._replacementsByCategory.set(entity.category, (this._replacementsByCategory.get(entity.category) ?? 0) + 1);
293
+ }
294
+ // Append trailing text
295
+ if (cursor < text.length) {
296
+ segments.push(text.slice(cursor));
297
+ }
298
+ resultText = segments.join("");
299
+ }
300
+ // 7. Inject canary token if enabled
301
+ if (this._canary) {
302
+ resultText = this._canary.inject(resultText);
303
+ }
304
+ // Audit log (no real values stored)
305
+ if (this._audit && filtered.length > 0) {
306
+ const elapsed = Date.now() - startTime;
307
+ this._audit.logObfuscation(filtered, text.length, undefined, elapsed);
308
+ }
309
+ // Build filter stats
310
+ const filterStats = {
311
+ totalDetected: entities.length,
312
+ replaced: filtered.length,
313
+ belowThreshold,
314
+ allowlisted,
315
+ docExamples: 0, // doc examples are filtered inside detectors before reaching here
316
+ alreadyObfuscated,
317
+ };
318
+ return {
319
+ original: text,
320
+ obfuscated: resultText,
321
+ entities: filtered,
322
+ mappingsUsed,
323
+ filterStats,
324
+ };
325
+ }
326
+ /**
327
+ * Reverse-map fake values back to real values in text.
328
+ *
329
+ * Uses longest-match-first replacement to avoid partial substitutions.
330
+ * Also strips canary tokens.
331
+ * Runs multiple passes for nested structures.
332
+ */
333
+ deobfuscate(text) {
334
+ const startTime = Date.now();
335
+ // Strip canary tokens
336
+ if (this._canary) {
337
+ const prefix = this.config.canaryPrefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
338
+ const canaryRe = new RegExp(`\\n?<!-- ${prefix}-[a-f0-9]+ -->`, "g");
339
+ text = text.replace(canaryRe, "");
340
+ }
341
+ const allMappings = this._store.allMappings();
342
+ if (allMappings.size === 0)
343
+ return text;
344
+ // Build reverse map: fake -> real, sorted by length descending
345
+ const reverse = new Map();
346
+ for (const [real, fake] of allMappings) {
347
+ reverse.set(fake, real);
348
+ }
349
+ // Build a single combined regex for all fakes (longest-match-first).
350
+ const fakes = [...reverse.keys()].sort((a, b) => b.length - a.length);
351
+ // Recursive deobfuscation — multiple passes for nested structures
352
+ let result = text;
353
+ let totalReplacements = 0;
354
+ const MAX_PASSES = 3;
355
+ // Collect known fakes that were NOT replaced (for residual pass)
356
+ const knownFakeSet = new Set(fakes);
357
+ // Build combined regex: escape each fake, join with alternation
358
+ const combinedRe = buildCombinedFakeRegex(fakes);
359
+ for (let pass = 0; pass < MAX_PASSES; pass++) {
360
+ let passReplacements = 0;
361
+ if (combinedRe) {
362
+ combinedRe.lastIndex = 0;
363
+ result = result.replace(combinedRe, (match) => {
364
+ const real = reverse.get(match);
365
+ if (real !== undefined) {
366
+ passReplacements++;
367
+ knownFakeSet.delete(match);
368
+ return real;
369
+ }
370
+ return match;
371
+ });
372
+ }
373
+ totalReplacements += passReplacements;
374
+ if (passReplacements === 0)
375
+ break; // No more replacements possible
376
+ }
377
+ // Subnet-aware deobfuscation: reverse-map CGNAT IPs the LLM derived
378
+ const residual = this._deobfuscateResidualCgnat(result, knownFakeSet);
379
+ if (residual.count > 0) {
380
+ result = residual.text;
381
+ totalReplacements += residual.count;
382
+ }
383
+ // IPv6 ULA residual deobfuscation (compressed forms, /64 prefixes)
384
+ const residualV6 = this._deobfuscateResidualUla(result, reverse);
385
+ if (residualV6.count > 0) {
386
+ result = residualV6.text;
387
+ totalReplacements += residualV6.count;
388
+ }
389
+ // Audit log
390
+ if (this._audit && totalReplacements > 0) {
391
+ const elapsed = Date.now() - startTime;
392
+ this._audit.logDeobfuscation(totalReplacements, undefined, elapsed);
393
+ }
394
+ return result;
395
+ }
396
+ /**
397
+ * Deobfuscate text and return replacement count alongside the result.
398
+ * Used by audit logging to report deobfuscation stats without logging text.
399
+ */
400
+ deobfuscateWithStats(text) {
401
+ const startTime = Date.now();
402
+ // Strip canary tokens
403
+ if (this._canary) {
404
+ const prefix = this.config.canaryPrefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
405
+ const canaryRe = new RegExp(`\\n?<!-- ${prefix}-[a-f0-9]+ -->`, "g");
406
+ text = text.replace(canaryRe, "");
407
+ }
408
+ const allMappings = this._store.allMappings();
409
+ if (allMappings.size === 0)
410
+ return { text, replacementCount: 0 };
411
+ const reverse = new Map();
412
+ for (const [real, fake] of allMappings) {
413
+ reverse.set(fake, real);
414
+ }
415
+ const fakes = [...reverse.keys()].sort((a, b) => b.length - a.length);
416
+ // Recursive deobfuscation
417
+ let result = text;
418
+ let replacementCount = 0;
419
+ const MAX_PASSES = 3;
420
+ const knownFakeSet = new Set(fakes);
421
+ const combinedRe = buildCombinedFakeRegex(fakes);
422
+ for (let pass = 0; pass < MAX_PASSES; pass++) {
423
+ let passReplacements = 0;
424
+ if (combinedRe) {
425
+ combinedRe.lastIndex = 0;
426
+ result = result.replace(combinedRe, (match) => {
427
+ const real = reverse.get(match);
428
+ if (real !== undefined) {
429
+ passReplacements++;
430
+ knownFakeSet.delete(match);
431
+ return real;
432
+ }
433
+ return match;
434
+ });
435
+ }
436
+ replacementCount += passReplacements;
437
+ if (passReplacements === 0)
438
+ break;
439
+ }
440
+ // Subnet-aware deobfuscation for LLM-derived CGNAT IPs
441
+ const residual = this._deobfuscateResidualCgnat(result, knownFakeSet);
442
+ if (residual.count > 0) {
443
+ result = residual.text;
444
+ replacementCount += residual.count;
445
+ }
446
+ // IPv6 ULA residual deobfuscation
447
+ const residualV6 = this._deobfuscateResidualUla(result, reverse);
448
+ if (residualV6.count > 0) {
449
+ result = residualV6.text;
450
+ replacementCount += residualV6.count;
451
+ }
452
+ if (this._audit && replacementCount > 0) {
453
+ const elapsed = Date.now() - startTime;
454
+ this._audit.logDeobfuscation(replacementCount, undefined, elapsed);
455
+ }
456
+ return { text: result, replacementCount };
457
+ }
458
+ /**
459
+ * Subnet-aware reverse mapping for CGNAT IPs not in the store.
460
+ */
461
+ _deobfuscateResidualCgnat(text, knownFakes) {
462
+ const mapper = this._subnetMapper;
463
+ if (mapper.subnetRev.size === 0)
464
+ return { text, count: 0 };
465
+ let count = 0;
466
+ const result = text.replace(CGNAT_IP_RE, (match) => {
467
+ // Skip if this IP was already deobfuscated via the store
468
+ if (knownFakes.has(match))
469
+ return match;
470
+ try {
471
+ const fakeInt = ipToInt(match);
472
+ // Check if this IP is in CGNAT range
473
+ if ((fakeInt & CGNAT_MASK_10) !== CGNAT_BASE)
474
+ return match;
475
+ // Try each known fake subnet to find which one this IP belongs to
476
+ for (const [fakeNetInt, key] of mapper.subnetRev) {
477
+ const [realNetStr, prefixLenStr] = key.split(",");
478
+ const prefixLen = parseInt(prefixLenStr, 10);
479
+ const mask = prefixLen === 0 ? 0 : ((0xffffffff << (32 - prefixLen)) >>> 0);
480
+ // Check if this fake IP is in this fake subnet
481
+ if (((fakeInt & mask) >>> 0) === fakeNetInt) {
482
+ const hostBits = (fakeInt & (~mask >>> 0)) >>> 0;
483
+ const realNetInt = parseInt(realNetStr, 10);
484
+ const realIp = intToIp((realNetInt | hostBits) >>> 0);
485
+ count++;
486
+ return realIp;
487
+ }
488
+ }
489
+ }
490
+ catch {
491
+ // skip invalid
492
+ }
493
+ return match;
494
+ });
495
+ return { text: result, count };
496
+ }
497
+ /**
498
+ * Normalize-and-match deobfuscation for fd00::/8 ULA IPv6 addresses.
499
+ */
500
+ _deobfuscateResidualUla(text, reverse) {
501
+ // Build expanded-form lookup from existing reverse map
502
+ const expandedReverse = new Map();
503
+ for (const [fake, real] of reverse) {
504
+ if (fake.includes(":") && fake.toLowerCase().startsWith("fd00")) {
505
+ expandedReverse.set(expandIPv6(fake), real);
506
+ }
507
+ }
508
+ if (expandedReverse.size === 0)
509
+ return { text, count: 0 };
510
+ let count = 0;
511
+ const result = text.replace(ULA_IPV6_RE, (match) => {
512
+ // Try exact match first
513
+ if (reverse.has(match))
514
+ return match;
515
+ try {
516
+ const expanded = expandIPv6(match);
517
+ const real = expandedReverse.get(expanded);
518
+ if (real) {
519
+ count++;
520
+ return real;
521
+ }
522
+ // Try prefix match for /64 subnet prefix extraction by the LLM
523
+ for (const [expandedFake, realVal] of expandedReverse) {
524
+ const matchGroups = expanded.split(":");
525
+ const fakeGroups = expandedFake.split(":");
526
+ let commonLen = 0;
527
+ for (let i = 0; i < 8; i++) {
528
+ if (matchGroups[i] === fakeGroups[i])
529
+ commonLen++;
530
+ else
531
+ break;
532
+ }
533
+ if (commonLen >= 4) {
534
+ const trailingZeros = matchGroups.slice(commonLen).every((g) => g === "0000");
535
+ if (trailingZeros) {
536
+ const realExpanded = expandIPv6(realVal);
537
+ const realGroups = realExpanded.split(":");
538
+ const reconstructed = [
539
+ ...realGroups.slice(0, commonLen),
540
+ ...matchGroups.slice(commonLen),
541
+ ].join(":");
542
+ count++;
543
+ return compressIPv6(reconstructed);
544
+ }
545
+ }
546
+ }
547
+ }
548
+ catch {
549
+ // skip invalid
550
+ }
551
+ return match;
552
+ });
553
+ return { text: result, count };
554
+ }
555
+ /** Clear all mappings and start fresh. */
556
+ reset() {
557
+ this._store.clear();
558
+ this._subnetMapper.reset();
559
+ this._ruleHits.clear();
560
+ this._detectionsByCategory.clear();
561
+ this._replacementsByCategory.clear();
562
+ this._toolDepth = 0;
563
+ if (this._contextDetector)
564
+ this._contextDetector.reset();
565
+ // New salt for new session
566
+ this._mapping = new MappingEngine(this.config.secretKey, undefined, this._subnetMapper);
567
+ if (this._canary) {
568
+ this._canary.reset();
569
+ }
570
+ }
571
+ /** Return stats from audit logger and store. */
572
+ getStats() {
573
+ const storeSize = this._store.size();
574
+ const auditStats = this._audit ? this._audit.getStats() : null;
575
+ const stats = {
576
+ storeMappings: storeSize,
577
+ salt: this._mapping.salt,
578
+ canarySessionId: this._canary?.sessionId ?? null,
579
+ audit: auditStats,
580
+ ruleHits: Object.fromEntries(this._ruleHits),
581
+ detectionsByCategory: Object.fromEntries(this._detectionsByCategory),
582
+ replacementsByCategory: Object.fromEntries(this._replacementsByCategory),
583
+ toolDepth: this._toolDepth,
584
+ redactionLevel: this.config.redactionLevel,
585
+ learnedEntities: this._contextDetector?.learnedCount ?? 0,
586
+ };
587
+ return stats;
588
+ }
589
+ }
590
+ /** Remove overlapping entities, keeping higher confidence ones. */
591
+ export function resolveOverlaps(entities) {
592
+ if (entities.length === 0)
593
+ return [];
594
+ const resolved = [];
595
+ let lastEnd = -1;
596
+ for (const entity of entities) {
597
+ if (entity.start >= lastEnd) {
598
+ resolved.push(entity);
599
+ lastEnd = entity.end;
600
+ }
601
+ }
602
+ return resolved;
603
+ }