shroud-privacy 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +7 -0
- package/README.md +369 -0
- package/dist/audit.d.ts +46 -0
- package/dist/audit.js +127 -0
- package/dist/canary.d.ts +31 -0
- package/dist/canary.js +73 -0
- package/dist/config.d.ts +27 -0
- package/dist/config.js +123 -0
- package/dist/detectors/base.d.ts +8 -0
- package/dist/detectors/base.js +2 -0
- package/dist/detectors/code.d.ts +25 -0
- package/dist/detectors/code.js +144 -0
- package/dist/detectors/context.d.ts +31 -0
- package/dist/detectors/context.js +357 -0
- package/dist/detectors/patterns.d.ts +15 -0
- package/dist/detectors/patterns.js +58 -0
- package/dist/detectors/regex.d.ts +28 -0
- package/dist/detectors/regex.js +955 -0
- package/dist/generators/base.d.ts +6 -0
- package/dist/generators/base.js +2 -0
- package/dist/generators/codes.d.ts +20 -0
- package/dist/generators/codes.js +231 -0
- package/dist/generators/names.d.ts +29 -0
- package/dist/generators/names.js +194 -0
- package/dist/generators/network.d.ts +86 -0
- package/dist/generators/network.js +477 -0
- package/dist/hooks.d.ts +27 -0
- package/dist/hooks.js +457 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +58 -0
- package/dist/mapping.d.ts +33 -0
- package/dist/mapping.js +72 -0
- package/dist/obfuscator.d.ts +78 -0
- package/dist/obfuscator.js +603 -0
- package/dist/redaction.d.ts +26 -0
- package/dist/redaction.js +76 -0
- package/dist/store.d.ts +40 -0
- package/dist/store.js +79 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +35 -0
- package/ncg_adapter.py +530 -0
- package/openclaw.plugin.json +72 -0
- package/package.json +56 -0
- package/shroud_bridge.mjs +225 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core obfuscation engine: detect -> map -> replace / reverse-replace.
|
|
3
|
+
*
|
|
4
|
+
* Entirely synchronous (CPU-bound) -- this is important for the
|
|
5
|
+
* tool_result_persist hook which is sync-only.
|
|
6
|
+
*/
|
|
7
|
+
import { DetectedEntity, ObfuscationResult, ShroudConfig } from "./types.js";
|
|
8
|
+
import { BaseDetector } from "./detectors/base.js";
|
|
9
|
+
export declare class Obfuscator {
|
|
10
|
+
readonly config: ShroudConfig;
|
|
11
|
+
private _store;
|
|
12
|
+
private _subnetMapper;
|
|
13
|
+
private _mapping;
|
|
14
|
+
private _detectors;
|
|
15
|
+
private _canary;
|
|
16
|
+
private _audit;
|
|
17
|
+
private _ruleHits;
|
|
18
|
+
private _detectionsByCategory;
|
|
19
|
+
private _replacementsByCategory;
|
|
20
|
+
private _redactionFormatter;
|
|
21
|
+
private _contextDetector;
|
|
22
|
+
private _toolDepth;
|
|
23
|
+
constructor(config: ShroudConfig);
|
|
24
|
+
private _initDetectors;
|
|
25
|
+
/** Add a custom detector at runtime. */
|
|
26
|
+
addDetector(detector: BaseDetector): void;
|
|
27
|
+
/** Track tool call depth. */
|
|
28
|
+
enterToolCall(): number;
|
|
29
|
+
/** Decrement tool depth. */
|
|
30
|
+
exitToolCall(): number;
|
|
31
|
+
/** Current tool depth. */
|
|
32
|
+
get toolDepth(): number;
|
|
33
|
+
/** Reset tool depth counter (called at the start of each LLM turn). */
|
|
34
|
+
resetToolDepth(): void;
|
|
35
|
+
/**
|
|
36
|
+
* Detect and replace all sensitive entities in text.
|
|
37
|
+
*
|
|
38
|
+
* The pipeline:
|
|
39
|
+
* 1. Learn subnets from text (via SubnetMapper)
|
|
40
|
+
* 2. Detect entities from all detectors
|
|
41
|
+
* 3. Apply denylist (force-add denylist values)
|
|
42
|
+
* 4. Sort by position, resolve overlaps (prefer higher confidence)
|
|
43
|
+
* 5. Filter by minConfidence, allowlist, and already-obfuscated
|
|
44
|
+
* 6. Map and replace (with redaction level)
|
|
45
|
+
* 7. Inject canary if enabled
|
|
46
|
+
*/
|
|
47
|
+
obfuscate(text: string, context?: string): ObfuscationResult;
|
|
48
|
+
/**
|
|
49
|
+
* Reverse-map fake values back to real values in text.
|
|
50
|
+
*
|
|
51
|
+
* Uses longest-match-first replacement to avoid partial substitutions.
|
|
52
|
+
* Also strips canary tokens.
|
|
53
|
+
* Runs multiple passes for nested structures.
|
|
54
|
+
*/
|
|
55
|
+
deobfuscate(text: string): string;
|
|
56
|
+
/**
|
|
57
|
+
* Deobfuscate text and return replacement count alongside the result.
|
|
58
|
+
* Used by audit logging to report deobfuscation stats without logging text.
|
|
59
|
+
*/
|
|
60
|
+
deobfuscateWithStats(text: string): {
|
|
61
|
+
text: string;
|
|
62
|
+
replacementCount: number;
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Subnet-aware reverse mapping for CGNAT IPs not in the store.
|
|
66
|
+
*/
|
|
67
|
+
private _deobfuscateResidualCgnat;
|
|
68
|
+
/**
|
|
69
|
+
* Normalize-and-match deobfuscation for fd00::/8 ULA IPv6 addresses.
|
|
70
|
+
*/
|
|
71
|
+
private _deobfuscateResidualUla;
|
|
72
|
+
/** Clear all mappings and start fresh. */
|
|
73
|
+
reset(): void;
|
|
74
|
+
/** Return stats from audit logger and store. */
|
|
75
|
+
getStats(): object;
|
|
76
|
+
}
|
|
77
|
+
/** Remove overlapping entities, keeping higher confidence ones. */
|
|
78
|
+
export declare function resolveOverlaps(entities: DetectedEntity[]): DetectedEntity[];
|
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core obfuscation engine: detect -> map -> replace / reverse-replace.
|
|
3
|
+
*
|
|
4
|
+
* Entirely synchronous (CPU-bound) -- this is important for the
|
|
5
|
+
* tool_result_persist hook which is sync-only.
|
|
6
|
+
*/
|
|
7
|
+
import { Category, } from "./types.js";
|
|
8
|
+
import { MemoryStore } from "./store.js";
|
|
9
|
+
import { MappingEngine } from "./mapping.js";
|
|
10
|
+
import { SubnetMapper, CGNAT_BASE, CGNAT_MASK_10, ipToInt, intToIp } from "./generators/network.js";
|
|
11
|
+
import { CanaryInjector } from "./canary.js";
|
|
12
|
+
import { AuditLogger } from "./audit.js";
|
|
13
|
+
import { RegexDetector } from "./detectors/regex.js";
|
|
14
|
+
import { CustomPatternDetector } from "./detectors/patterns.js";
|
|
15
|
+
import { CodeDetector } from "./detectors/code.js";
|
|
16
|
+
import { ContextDetector } from "./detectors/context.js";
|
|
17
|
+
import { RedactionFormatter } from "./redaction.js";
|
|
18
|
+
/** Regex to find CGNAT IPs (100.64.0.0/10) in text. */
|
|
19
|
+
const CGNAT_IP_RE = /\b(100\.(?:6[4-9]|[7-9]\d|1[01]\d|12[0-7])\.\d{1,3}\.\d{1,3})\b/g;
|
|
20
|
+
/** Regex to find fd00::/8 ULA IPv6 addresses (Shroud fake range) in text. */
|
|
21
|
+
const ULA_IPV6_RE = /(?:^|(?<=[\s,;=(\[]))fd00(?::[0-9a-fA-F]{1,4}){0,7}(?:::(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4})*)?)?(?=$|[\s,;)\]\/])/gi;
|
|
22
|
+
/**
|
|
23
|
+
* Expand a compressed IPv6 address to full 8-group form.
|
|
24
|
+
* e.g. "fd00:a1b2::1" → "fd00:a1b2:0000:0000:0000:0000:0000:0001"
|
|
25
|
+
*/
|
|
26
|
+
function expandIPv6(addr) {
|
|
27
|
+
// Remove any trailing CIDR prefix
|
|
28
|
+
const cidrIdx = addr.indexOf("/");
|
|
29
|
+
const clean = cidrIdx >= 0 ? addr.slice(0, cidrIdx) : addr;
|
|
30
|
+
if (!clean.includes("::")) {
|
|
31
|
+
// Already full form — just zero-pad each group
|
|
32
|
+
const groups = clean.split(":");
|
|
33
|
+
if (groups.length !== 8)
|
|
34
|
+
return clean.toLowerCase();
|
|
35
|
+
return groups.map((g) => g.padStart(4, "0")).join(":").toLowerCase();
|
|
36
|
+
}
|
|
37
|
+
const [left, right] = clean.split("::");
|
|
38
|
+
const leftGroups = left ? left.split(":") : [];
|
|
39
|
+
const rightGroups = right ? right.split(":") : [];
|
|
40
|
+
const missing = 8 - leftGroups.length - rightGroups.length;
|
|
41
|
+
const allGroups = [
|
|
42
|
+
...leftGroups,
|
|
43
|
+
...Array(missing).fill("0000"),
|
|
44
|
+
...rightGroups,
|
|
45
|
+
];
|
|
46
|
+
return allGroups.map((g) => g.padStart(4, "0")).join(":").toLowerCase();
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Compress a full 8-group IPv6 address to shortest form.
|
|
50
|
+
* e.g. "2001:0db8:0000:0000:0000:0000:0000:0001" → "2001:db8::1"
|
|
51
|
+
*/
|
|
52
|
+
function compressIPv6(addr) {
|
|
53
|
+
const groups = addr.split(":").map((g) => g.replace(/^0+/, "") || "0");
|
|
54
|
+
// Find longest run of consecutive "0" groups
|
|
55
|
+
let bestStart = -1;
|
|
56
|
+
let bestLen = 0;
|
|
57
|
+
let curStart = -1;
|
|
58
|
+
let curLen = 0;
|
|
59
|
+
for (let i = 0; i < groups.length; i++) {
|
|
60
|
+
if (groups[i] === "0") {
|
|
61
|
+
if (curStart === -1)
|
|
62
|
+
curStart = i;
|
|
63
|
+
curLen++;
|
|
64
|
+
if (curLen > bestLen) {
|
|
65
|
+
bestStart = curStart;
|
|
66
|
+
bestLen = curLen;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
curStart = -1;
|
|
71
|
+
curLen = 0;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
if (bestLen >= 2) {
|
|
75
|
+
const left = groups.slice(0, bestStart).join(":");
|
|
76
|
+
const right = groups.slice(bestStart + bestLen).join(":");
|
|
77
|
+
return `${left}::${right}`;
|
|
78
|
+
}
|
|
79
|
+
return groups.join(":");
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Build a single combined regex from an array of literal strings.
|
|
83
|
+
* Strings are escaped and joined with alternation (|), sorted longest-first
|
|
84
|
+
* so the regex engine matches greedily. Returns null for empty arrays.
|
|
85
|
+
*/
|
|
86
|
+
function buildCombinedFakeRegex(fakes) {
|
|
87
|
+
if (fakes.length === 0)
|
|
88
|
+
return null;
|
|
89
|
+
const escaped = fakes.map((f) => f.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
90
|
+
return new RegExp(escaped.join("|"), "g");
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Convert a simple wildcard pattern (* and ?) to a RegExp.
|
|
94
|
+
* Caches compiled patterns for reuse. Bounded to 500 entries.
|
|
95
|
+
*/
|
|
96
|
+
const MAX_WILDCARD_CACHE = 500;
|
|
97
|
+
const _wildcardCache = new Map();
|
|
98
|
+
function wildcardMatch(value, pattern) {
|
|
99
|
+
// Fast path: no wildcards = exact match
|
|
100
|
+
if (!pattern.includes("*") && !pattern.includes("?")) {
|
|
101
|
+
return value === pattern;
|
|
102
|
+
}
|
|
103
|
+
let re = _wildcardCache.get(pattern);
|
|
104
|
+
if (!re) {
|
|
105
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
106
|
+
const reStr = "^" + escaped.replace(/\*/g, ".*").replace(/\?/g, ".") + "$";
|
|
107
|
+
re = new RegExp(reStr, "i");
|
|
108
|
+
// Evict oldest entries if cache is full
|
|
109
|
+
if (_wildcardCache.size >= MAX_WILDCARD_CACHE) {
|
|
110
|
+
const firstKey = _wildcardCache.keys().next().value;
|
|
111
|
+
if (firstKey !== undefined)
|
|
112
|
+
_wildcardCache.delete(firstKey);
|
|
113
|
+
}
|
|
114
|
+
_wildcardCache.set(pattern, re);
|
|
115
|
+
}
|
|
116
|
+
return re.test(value);
|
|
117
|
+
}
|
|
118
|
+
export class Obfuscator {
|
|
119
|
+
config;
|
|
120
|
+
_store;
|
|
121
|
+
_subnetMapper;
|
|
122
|
+
_mapping;
|
|
123
|
+
_detectors;
|
|
124
|
+
_canary;
|
|
125
|
+
_audit;
|
|
126
|
+
_ruleHits = new Map();
|
|
127
|
+
_detectionsByCategory = new Map();
|
|
128
|
+
_replacementsByCategory = new Map();
|
|
129
|
+
_redactionFormatter;
|
|
130
|
+
_contextDetector = null;
|
|
131
|
+
_toolDepth = 0;
|
|
132
|
+
constructor(config) {
|
|
133
|
+
this.config = config;
|
|
134
|
+
this._store = new MemoryStore(config.maxStoreMappings);
|
|
135
|
+
this._subnetMapper = new SubnetMapper();
|
|
136
|
+
const salt = config.persistentSalt || undefined;
|
|
137
|
+
this._mapping = new MappingEngine(config.secretKey, salt, this._subnetMapper);
|
|
138
|
+
this._detectors = [];
|
|
139
|
+
this._canary = null;
|
|
140
|
+
this._audit = null;
|
|
141
|
+
if (config.canaryEnabled) {
|
|
142
|
+
this._canary = new CanaryInjector(config.canaryPrefix, config.secretKey);
|
|
143
|
+
}
|
|
144
|
+
if (config.auditEnabled) {
|
|
145
|
+
this._audit = new AuditLogger(config.secretKey);
|
|
146
|
+
}
|
|
147
|
+
// Redaction formatter
|
|
148
|
+
this._redactionFormatter = new RedactionFormatter();
|
|
149
|
+
this._initDetectors();
|
|
150
|
+
}
|
|
151
|
+
_initDetectors() {
|
|
152
|
+
const overrides = this.config.detectorOverrides;
|
|
153
|
+
// Always enable the regex detector (with optional overrides)
|
|
154
|
+
const regexDetector = new RegexDetector(undefined, overrides);
|
|
155
|
+
// Wrap with ContextDetector for confidence boosting, proximity,
|
|
156
|
+
// hostname propagation, learned entities, and frequency decay
|
|
157
|
+
this._contextDetector = new ContextDetector(regexDetector);
|
|
158
|
+
this._detectors.push(this._contextDetector);
|
|
159
|
+
// Custom patterns if configured
|
|
160
|
+
if (this.config.customPatterns.length > 0) {
|
|
161
|
+
this._detectors.push(new CustomPatternDetector(this.config.customPatterns));
|
|
162
|
+
}
|
|
163
|
+
// Code-aware detector shares the same configured regex detector
|
|
164
|
+
this._detectors.push(new CodeDetector(regexDetector));
|
|
165
|
+
}
|
|
166
|
+
/** Add a custom detector at runtime. */
|
|
167
|
+
addDetector(detector) {
|
|
168
|
+
this._detectors.push(detector);
|
|
169
|
+
}
|
|
170
|
+
/** Track tool call depth. */
|
|
171
|
+
enterToolCall() {
|
|
172
|
+
return ++this._toolDepth;
|
|
173
|
+
}
|
|
174
|
+
/** Decrement tool depth. */
|
|
175
|
+
exitToolCall() {
|
|
176
|
+
return Math.max(0, --this._toolDepth);
|
|
177
|
+
}
|
|
178
|
+
/** Current tool depth. */
|
|
179
|
+
get toolDepth() {
|
|
180
|
+
return this._toolDepth;
|
|
181
|
+
}
|
|
182
|
+
/** Reset tool depth counter (called at the start of each LLM turn). */
|
|
183
|
+
resetToolDepth() {
|
|
184
|
+
this._toolDepth = 0;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Detect and replace all sensitive entities in text.
|
|
188
|
+
*
|
|
189
|
+
* The pipeline:
|
|
190
|
+
* 1. Learn subnets from text (via SubnetMapper)
|
|
191
|
+
* 2. Detect entities from all detectors
|
|
192
|
+
* 3. Apply denylist (force-add denylist values)
|
|
193
|
+
* 4. Sort by position, resolve overlaps (prefer higher confidence)
|
|
194
|
+
* 5. Filter by minConfidence, allowlist, and already-obfuscated
|
|
195
|
+
* 6. Map and replace (with redaction level)
|
|
196
|
+
* 7. Inject canary if enabled
|
|
197
|
+
*/
|
|
198
|
+
obfuscate(text, context) {
|
|
199
|
+
const startTime = Date.now();
|
|
200
|
+
// 1. Learn subnet context from CIDR notation and masks in text
|
|
201
|
+
this._subnetMapper.learnSubnetsFromText(text);
|
|
202
|
+
// 2. Detect all entities from all detectors
|
|
203
|
+
const allEntities = [];
|
|
204
|
+
for (const detector of this._detectors) {
|
|
205
|
+
allEntities.push(...detector.detect(text));
|
|
206
|
+
}
|
|
207
|
+
// 3. Apply denylist -- single-pass combined regex instead of per-entry indexOf
|
|
208
|
+
if (this.config.denylist.length > 0) {
|
|
209
|
+
const sorted = this.config.denylist.slice().sort((a, b) => b.length - a.length);
|
|
210
|
+
const escaped = sorted.map((d) => d.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
211
|
+
const denyRe = new RegExp(escaped.join("|"), "g");
|
|
212
|
+
let dm;
|
|
213
|
+
while ((dm = denyRe.exec(text)) !== null) {
|
|
214
|
+
allEntities.push({
|
|
215
|
+
value: dm[0],
|
|
216
|
+
start: dm.index,
|
|
217
|
+
end: dm.index + dm[0].length,
|
|
218
|
+
category: Category.CUSTOM,
|
|
219
|
+
confidence: 1.0,
|
|
220
|
+
detector: "denylist",
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// 4. Sort by position and resolve overlaps (prefer higher confidence, then earlier)
|
|
225
|
+
allEntities.sort((a, b) => a.start - b.start || b.confidence - a.confidence);
|
|
226
|
+
const entities = resolveOverlaps(allEntities);
|
|
227
|
+
// 5. Filter by confidence threshold, allowlist, and already-obfuscated values
|
|
228
|
+
const allowExact = new Set();
|
|
229
|
+
const allowWild = [];
|
|
230
|
+
for (const a of this.config.allowlist) {
|
|
231
|
+
if (a.includes("*") || a.includes("?"))
|
|
232
|
+
allowWild.push(a);
|
|
233
|
+
else
|
|
234
|
+
allowExact.add(a);
|
|
235
|
+
}
|
|
236
|
+
let belowThreshold = 0;
|
|
237
|
+
let allowlisted = 0;
|
|
238
|
+
let alreadyObfuscated = 0;
|
|
239
|
+
const filtered = entities.filter((e) => {
|
|
240
|
+
if (e.confidence < this.config.minConfidence) {
|
|
241
|
+
belowThreshold++;
|
|
242
|
+
return false;
|
|
243
|
+
}
|
|
244
|
+
if (allowExact.has(e.value) || allowWild.some((p) => wildcardMatch(e.value, p))) {
|
|
245
|
+
allowlisted++;
|
|
246
|
+
return false;
|
|
247
|
+
}
|
|
248
|
+
// Prevent double-obfuscation: skip values that are already known fakes
|
|
249
|
+
if (this._store.getReal(e.value) !== undefined) {
|
|
250
|
+
alreadyObfuscated++;
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
return true;
|
|
254
|
+
});
|
|
255
|
+
// Accumulate per-category detection counts (all entities before filter)
|
|
256
|
+
for (const entity of entities) {
|
|
257
|
+
const cat = entity.category;
|
|
258
|
+
this._detectionsByCategory.set(cat, (this._detectionsByCategory.get(cat) ?? 0) + 1);
|
|
259
|
+
}
|
|
260
|
+
// Accumulate per-rule hit counts
|
|
261
|
+
for (const entity of filtered) {
|
|
262
|
+
this._ruleHits.set(entity.detector, (this._ruleHits.get(entity.detector) ?? 0) + 1);
|
|
263
|
+
}
|
|
264
|
+
// Determine redaction level
|
|
265
|
+
const level = this.config.redactionLevel;
|
|
266
|
+
this._redactionFormatter.resetCounters();
|
|
267
|
+
// 6. Map and replace using segment collection (single-pass, no repeated slicing).
|
|
268
|
+
// In dry-run mode, compute mappings but skip text replacement.
|
|
269
|
+
let resultText = text;
|
|
270
|
+
const mappingsUsed = {};
|
|
271
|
+
if (!this.config.dryRun && filtered.length > 0) {
|
|
272
|
+
// Collect text segments and replacements in one forward pass
|
|
273
|
+
const segments = [];
|
|
274
|
+
let cursor = 0;
|
|
275
|
+
for (const entity of filtered) {
|
|
276
|
+
// Append text before this entity
|
|
277
|
+
if (entity.start > cursor) {
|
|
278
|
+
segments.push(text.slice(cursor, entity.start));
|
|
279
|
+
}
|
|
280
|
+
// Check if we already have a mapping for this exact value
|
|
281
|
+
let fake = this._store.getFake(entity.value);
|
|
282
|
+
if (fake === undefined) {
|
|
283
|
+
fake = this._mapping.mapValue(entity.value, entity.category);
|
|
284
|
+
this._store.put(entity.value, fake, entity.category);
|
|
285
|
+
}
|
|
286
|
+
// Apply redaction level
|
|
287
|
+
const replacement = this._redactionFormatter.format(entity.value, fake, entity.category, level);
|
|
288
|
+
segments.push(replacement);
|
|
289
|
+
mappingsUsed[entity.value] = fake;
|
|
290
|
+
cursor = entity.end;
|
|
291
|
+
// Per-category replacement count
|
|
292
|
+
this._replacementsByCategory.set(entity.category, (this._replacementsByCategory.get(entity.category) ?? 0) + 1);
|
|
293
|
+
}
|
|
294
|
+
// Append trailing text
|
|
295
|
+
if (cursor < text.length) {
|
|
296
|
+
segments.push(text.slice(cursor));
|
|
297
|
+
}
|
|
298
|
+
resultText = segments.join("");
|
|
299
|
+
}
|
|
300
|
+
// 7. Inject canary token if enabled
|
|
301
|
+
if (this._canary) {
|
|
302
|
+
resultText = this._canary.inject(resultText);
|
|
303
|
+
}
|
|
304
|
+
// Audit log (no real values stored)
|
|
305
|
+
if (this._audit && filtered.length > 0) {
|
|
306
|
+
const elapsed = Date.now() - startTime;
|
|
307
|
+
this._audit.logObfuscation(filtered, text.length, undefined, elapsed);
|
|
308
|
+
}
|
|
309
|
+
// Build filter stats
|
|
310
|
+
const filterStats = {
|
|
311
|
+
totalDetected: entities.length,
|
|
312
|
+
replaced: filtered.length,
|
|
313
|
+
belowThreshold,
|
|
314
|
+
allowlisted,
|
|
315
|
+
docExamples: 0, // doc examples are filtered inside detectors before reaching here
|
|
316
|
+
alreadyObfuscated,
|
|
317
|
+
};
|
|
318
|
+
return {
|
|
319
|
+
original: text,
|
|
320
|
+
obfuscated: resultText,
|
|
321
|
+
entities: filtered,
|
|
322
|
+
mappingsUsed,
|
|
323
|
+
filterStats,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Reverse-map fake values back to real values in text.
|
|
328
|
+
*
|
|
329
|
+
* Uses longest-match-first replacement to avoid partial substitutions.
|
|
330
|
+
* Also strips canary tokens.
|
|
331
|
+
* Runs multiple passes for nested structures.
|
|
332
|
+
*/
|
|
333
|
+
deobfuscate(text) {
|
|
334
|
+
const startTime = Date.now();
|
|
335
|
+
// Strip canary tokens
|
|
336
|
+
if (this._canary) {
|
|
337
|
+
const prefix = this.config.canaryPrefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
338
|
+
const canaryRe = new RegExp(`\\n?<!-- ${prefix}-[a-f0-9]+ -->`, "g");
|
|
339
|
+
text = text.replace(canaryRe, "");
|
|
340
|
+
}
|
|
341
|
+
const allMappings = this._store.allMappings();
|
|
342
|
+
if (allMappings.size === 0)
|
|
343
|
+
return text;
|
|
344
|
+
// Build reverse map: fake -> real, sorted by length descending
|
|
345
|
+
const reverse = new Map();
|
|
346
|
+
for (const [real, fake] of allMappings) {
|
|
347
|
+
reverse.set(fake, real);
|
|
348
|
+
}
|
|
349
|
+
// Build a single combined regex for all fakes (longest-match-first).
|
|
350
|
+
const fakes = [...reverse.keys()].sort((a, b) => b.length - a.length);
|
|
351
|
+
// Recursive deobfuscation — multiple passes for nested structures
|
|
352
|
+
let result = text;
|
|
353
|
+
let totalReplacements = 0;
|
|
354
|
+
const MAX_PASSES = 3;
|
|
355
|
+
// Collect known fakes that were NOT replaced (for residual pass)
|
|
356
|
+
const knownFakeSet = new Set(fakes);
|
|
357
|
+
// Build combined regex: escape each fake, join with alternation
|
|
358
|
+
const combinedRe = buildCombinedFakeRegex(fakes);
|
|
359
|
+
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
|
360
|
+
let passReplacements = 0;
|
|
361
|
+
if (combinedRe) {
|
|
362
|
+
combinedRe.lastIndex = 0;
|
|
363
|
+
result = result.replace(combinedRe, (match) => {
|
|
364
|
+
const real = reverse.get(match);
|
|
365
|
+
if (real !== undefined) {
|
|
366
|
+
passReplacements++;
|
|
367
|
+
knownFakeSet.delete(match);
|
|
368
|
+
return real;
|
|
369
|
+
}
|
|
370
|
+
return match;
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
totalReplacements += passReplacements;
|
|
374
|
+
if (passReplacements === 0)
|
|
375
|
+
break; // No more replacements possible
|
|
376
|
+
}
|
|
377
|
+
// Subnet-aware deobfuscation: reverse-map CGNAT IPs the LLM derived
|
|
378
|
+
const residual = this._deobfuscateResidualCgnat(result, knownFakeSet);
|
|
379
|
+
if (residual.count > 0) {
|
|
380
|
+
result = residual.text;
|
|
381
|
+
totalReplacements += residual.count;
|
|
382
|
+
}
|
|
383
|
+
// IPv6 ULA residual deobfuscation (compressed forms, /64 prefixes)
|
|
384
|
+
const residualV6 = this._deobfuscateResidualUla(result, reverse);
|
|
385
|
+
if (residualV6.count > 0) {
|
|
386
|
+
result = residualV6.text;
|
|
387
|
+
totalReplacements += residualV6.count;
|
|
388
|
+
}
|
|
389
|
+
// Audit log
|
|
390
|
+
if (this._audit && totalReplacements > 0) {
|
|
391
|
+
const elapsed = Date.now() - startTime;
|
|
392
|
+
this._audit.logDeobfuscation(totalReplacements, undefined, elapsed);
|
|
393
|
+
}
|
|
394
|
+
return result;
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Deobfuscate text and return replacement count alongside the result.
|
|
398
|
+
* Used by audit logging to report deobfuscation stats without logging text.
|
|
399
|
+
*/
|
|
400
|
+
deobfuscateWithStats(text) {
|
|
401
|
+
const startTime = Date.now();
|
|
402
|
+
// Strip canary tokens
|
|
403
|
+
if (this._canary) {
|
|
404
|
+
const prefix = this.config.canaryPrefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
405
|
+
const canaryRe = new RegExp(`\\n?<!-- ${prefix}-[a-f0-9]+ -->`, "g");
|
|
406
|
+
text = text.replace(canaryRe, "");
|
|
407
|
+
}
|
|
408
|
+
const allMappings = this._store.allMappings();
|
|
409
|
+
if (allMappings.size === 0)
|
|
410
|
+
return { text, replacementCount: 0 };
|
|
411
|
+
const reverse = new Map();
|
|
412
|
+
for (const [real, fake] of allMappings) {
|
|
413
|
+
reverse.set(fake, real);
|
|
414
|
+
}
|
|
415
|
+
const fakes = [...reverse.keys()].sort((a, b) => b.length - a.length);
|
|
416
|
+
// Recursive deobfuscation
|
|
417
|
+
let result = text;
|
|
418
|
+
let replacementCount = 0;
|
|
419
|
+
const MAX_PASSES = 3;
|
|
420
|
+
const knownFakeSet = new Set(fakes);
|
|
421
|
+
const combinedRe = buildCombinedFakeRegex(fakes);
|
|
422
|
+
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
|
423
|
+
let passReplacements = 0;
|
|
424
|
+
if (combinedRe) {
|
|
425
|
+
combinedRe.lastIndex = 0;
|
|
426
|
+
result = result.replace(combinedRe, (match) => {
|
|
427
|
+
const real = reverse.get(match);
|
|
428
|
+
if (real !== undefined) {
|
|
429
|
+
passReplacements++;
|
|
430
|
+
knownFakeSet.delete(match);
|
|
431
|
+
return real;
|
|
432
|
+
}
|
|
433
|
+
return match;
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
replacementCount += passReplacements;
|
|
437
|
+
if (passReplacements === 0)
|
|
438
|
+
break;
|
|
439
|
+
}
|
|
440
|
+
// Subnet-aware deobfuscation for LLM-derived CGNAT IPs
|
|
441
|
+
const residual = this._deobfuscateResidualCgnat(result, knownFakeSet);
|
|
442
|
+
if (residual.count > 0) {
|
|
443
|
+
result = residual.text;
|
|
444
|
+
replacementCount += residual.count;
|
|
445
|
+
}
|
|
446
|
+
// IPv6 ULA residual deobfuscation
|
|
447
|
+
const residualV6 = this._deobfuscateResidualUla(result, reverse);
|
|
448
|
+
if (residualV6.count > 0) {
|
|
449
|
+
result = residualV6.text;
|
|
450
|
+
replacementCount += residualV6.count;
|
|
451
|
+
}
|
|
452
|
+
if (this._audit && replacementCount > 0) {
|
|
453
|
+
const elapsed = Date.now() - startTime;
|
|
454
|
+
this._audit.logDeobfuscation(replacementCount, undefined, elapsed);
|
|
455
|
+
}
|
|
456
|
+
return { text: result, replacementCount };
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Subnet-aware reverse mapping for CGNAT IPs not in the store.
|
|
460
|
+
*/
|
|
461
|
+
_deobfuscateResidualCgnat(text, knownFakes) {
|
|
462
|
+
const mapper = this._subnetMapper;
|
|
463
|
+
if (mapper.subnetRev.size === 0)
|
|
464
|
+
return { text, count: 0 };
|
|
465
|
+
let count = 0;
|
|
466
|
+
const result = text.replace(CGNAT_IP_RE, (match) => {
|
|
467
|
+
// Skip if this IP was already deobfuscated via the store
|
|
468
|
+
if (knownFakes.has(match))
|
|
469
|
+
return match;
|
|
470
|
+
try {
|
|
471
|
+
const fakeInt = ipToInt(match);
|
|
472
|
+
// Check if this IP is in CGNAT range
|
|
473
|
+
if ((fakeInt & CGNAT_MASK_10) !== CGNAT_BASE)
|
|
474
|
+
return match;
|
|
475
|
+
// Try each known fake subnet to find which one this IP belongs to
|
|
476
|
+
for (const [fakeNetInt, key] of mapper.subnetRev) {
|
|
477
|
+
const [realNetStr, prefixLenStr] = key.split(",");
|
|
478
|
+
const prefixLen = parseInt(prefixLenStr, 10);
|
|
479
|
+
const mask = prefixLen === 0 ? 0 : ((0xffffffff << (32 - prefixLen)) >>> 0);
|
|
480
|
+
// Check if this fake IP is in this fake subnet
|
|
481
|
+
if (((fakeInt & mask) >>> 0) === fakeNetInt) {
|
|
482
|
+
const hostBits = (fakeInt & (~mask >>> 0)) >>> 0;
|
|
483
|
+
const realNetInt = parseInt(realNetStr, 10);
|
|
484
|
+
const realIp = intToIp((realNetInt | hostBits) >>> 0);
|
|
485
|
+
count++;
|
|
486
|
+
return realIp;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
catch {
|
|
491
|
+
// skip invalid
|
|
492
|
+
}
|
|
493
|
+
return match;
|
|
494
|
+
});
|
|
495
|
+
return { text: result, count };
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Normalize-and-match deobfuscation for fd00::/8 ULA IPv6 addresses.
|
|
499
|
+
*/
|
|
500
|
+
_deobfuscateResidualUla(text, reverse) {
|
|
501
|
+
// Build expanded-form lookup from existing reverse map
|
|
502
|
+
const expandedReverse = new Map();
|
|
503
|
+
for (const [fake, real] of reverse) {
|
|
504
|
+
if (fake.includes(":") && fake.toLowerCase().startsWith("fd00")) {
|
|
505
|
+
expandedReverse.set(expandIPv6(fake), real);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
if (expandedReverse.size === 0)
|
|
509
|
+
return { text, count: 0 };
|
|
510
|
+
let count = 0;
|
|
511
|
+
const result = text.replace(ULA_IPV6_RE, (match) => {
|
|
512
|
+
// Try exact match first
|
|
513
|
+
if (reverse.has(match))
|
|
514
|
+
return match;
|
|
515
|
+
try {
|
|
516
|
+
const expanded = expandIPv6(match);
|
|
517
|
+
const real = expandedReverse.get(expanded);
|
|
518
|
+
if (real) {
|
|
519
|
+
count++;
|
|
520
|
+
return real;
|
|
521
|
+
}
|
|
522
|
+
// Try prefix match for /64 subnet prefix extraction by the LLM
|
|
523
|
+
for (const [expandedFake, realVal] of expandedReverse) {
|
|
524
|
+
const matchGroups = expanded.split(":");
|
|
525
|
+
const fakeGroups = expandedFake.split(":");
|
|
526
|
+
let commonLen = 0;
|
|
527
|
+
for (let i = 0; i < 8; i++) {
|
|
528
|
+
if (matchGroups[i] === fakeGroups[i])
|
|
529
|
+
commonLen++;
|
|
530
|
+
else
|
|
531
|
+
break;
|
|
532
|
+
}
|
|
533
|
+
if (commonLen >= 4) {
|
|
534
|
+
const trailingZeros = matchGroups.slice(commonLen).every((g) => g === "0000");
|
|
535
|
+
if (trailingZeros) {
|
|
536
|
+
const realExpanded = expandIPv6(realVal);
|
|
537
|
+
const realGroups = realExpanded.split(":");
|
|
538
|
+
const reconstructed = [
|
|
539
|
+
...realGroups.slice(0, commonLen),
|
|
540
|
+
...matchGroups.slice(commonLen),
|
|
541
|
+
].join(":");
|
|
542
|
+
count++;
|
|
543
|
+
return compressIPv6(reconstructed);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
catch {
|
|
549
|
+
// skip invalid
|
|
550
|
+
}
|
|
551
|
+
return match;
|
|
552
|
+
});
|
|
553
|
+
return { text: result, count };
|
|
554
|
+
}
|
|
555
|
+
/** Clear all mappings and start fresh. */
|
|
556
|
+
reset() {
|
|
557
|
+
this._store.clear();
|
|
558
|
+
this._subnetMapper.reset();
|
|
559
|
+
this._ruleHits.clear();
|
|
560
|
+
this._detectionsByCategory.clear();
|
|
561
|
+
this._replacementsByCategory.clear();
|
|
562
|
+
this._toolDepth = 0;
|
|
563
|
+
if (this._contextDetector)
|
|
564
|
+
this._contextDetector.reset();
|
|
565
|
+
// New salt for new session
|
|
566
|
+
this._mapping = new MappingEngine(this.config.secretKey, undefined, this._subnetMapper);
|
|
567
|
+
if (this._canary) {
|
|
568
|
+
this._canary.reset();
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
/** Return stats from audit logger and store. */
|
|
572
|
+
getStats() {
|
|
573
|
+
const storeSize = this._store.size();
|
|
574
|
+
const auditStats = this._audit ? this._audit.getStats() : null;
|
|
575
|
+
const stats = {
|
|
576
|
+
storeMappings: storeSize,
|
|
577
|
+
salt: this._mapping.salt,
|
|
578
|
+
canarySessionId: this._canary?.sessionId ?? null,
|
|
579
|
+
audit: auditStats,
|
|
580
|
+
ruleHits: Object.fromEntries(this._ruleHits),
|
|
581
|
+
detectionsByCategory: Object.fromEntries(this._detectionsByCategory),
|
|
582
|
+
replacementsByCategory: Object.fromEntries(this._replacementsByCategory),
|
|
583
|
+
toolDepth: this._toolDepth,
|
|
584
|
+
redactionLevel: this.config.redactionLevel,
|
|
585
|
+
learnedEntities: this._contextDetector?.learnedCount ?? 0,
|
|
586
|
+
};
|
|
587
|
+
return stats;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
/** Remove overlapping entities, keeping higher confidence ones. */
|
|
591
|
+
export function resolveOverlaps(entities) {
|
|
592
|
+
if (entities.length === 0)
|
|
593
|
+
return [];
|
|
594
|
+
const resolved = [];
|
|
595
|
+
let lastEnd = -1;
|
|
596
|
+
for (const entity of entities) {
|
|
597
|
+
if (entity.start >= lastEnd) {
|
|
598
|
+
resolved.push(entity);
|
|
599
|
+
lastEnd = entity.end;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return resolved;
|
|
603
|
+
}
|