shroud-privacy 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +7 -0
- package/README.md +369 -0
- package/dist/audit.d.ts +46 -0
- package/dist/audit.js +127 -0
- package/dist/canary.d.ts +31 -0
- package/dist/canary.js +73 -0
- package/dist/config.d.ts +27 -0
- package/dist/config.js +123 -0
- package/dist/detectors/base.d.ts +8 -0
- package/dist/detectors/base.js +2 -0
- package/dist/detectors/code.d.ts +25 -0
- package/dist/detectors/code.js +144 -0
- package/dist/detectors/context.d.ts +31 -0
- package/dist/detectors/context.js +357 -0
- package/dist/detectors/patterns.d.ts +15 -0
- package/dist/detectors/patterns.js +58 -0
- package/dist/detectors/regex.d.ts +28 -0
- package/dist/detectors/regex.js +955 -0
- package/dist/generators/base.d.ts +6 -0
- package/dist/generators/base.js +2 -0
- package/dist/generators/codes.d.ts +20 -0
- package/dist/generators/codes.js +231 -0
- package/dist/generators/names.d.ts +29 -0
- package/dist/generators/names.js +194 -0
- package/dist/generators/network.d.ts +86 -0
- package/dist/generators/network.js +477 -0
- package/dist/hooks.d.ts +27 -0
- package/dist/hooks.js +457 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +58 -0
- package/dist/mapping.d.ts +33 -0
- package/dist/mapping.js +72 -0
- package/dist/obfuscator.d.ts +78 -0
- package/dist/obfuscator.js +603 -0
- package/dist/redaction.d.ts +26 -0
- package/dist/redaction.js +76 -0
- package/dist/store.d.ts +40 -0
- package/dist/store.js +79 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +35 -0
- package/ncg_adapter.py +530 -0
- package/openclaw.plugin.json +72 -0
- package/package.json +56 -0
- package/shroud_bridge.mjs +225 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-aware detection enhancements.
|
|
3
|
+
*
|
|
4
|
+
* Wraps another detector and applies post-detection intelligence:
|
|
5
|
+
* 1. Context-aware confidence boosting (config keyword density)
|
|
6
|
+
* 3. Proximity-based PII clustering (nearby entities boost each other)
|
|
7
|
+
* 4. Config-block hostname extraction (hostname X -> detect bare X)
|
|
8
|
+
* 9. Learned entity propagation (cross-invocation memory)
|
|
9
|
+
* 10. Confidence decay by frequency (common words lose confidence)
|
|
10
|
+
*/
|
|
11
|
+
import { Category } from "../types.js";
|
|
12
|
+
/**
|
|
13
|
+
* Single-pass multi-string scanner using a combined regex.
|
|
14
|
+
* Replaces per-string indexOf loops with one regex alternation pass — O(M)
|
|
15
|
+
* instead of O(S*M) where S = number of strings, M = text length.
|
|
16
|
+
*/
|
|
17
|
+
function scanMultiplePatterns(text, values, covered, category, confidence, detector) {
|
|
18
|
+
if (values.length === 0)
|
|
19
|
+
return [];
|
|
20
|
+
// Sort longest-first so regex matches greedily
|
|
21
|
+
const sorted = values.slice().sort((a, b) => b.length - a.length);
|
|
22
|
+
const escaped = sorted.map((v) => v.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
23
|
+
const re = new RegExp(escaped.join("|"), "g");
|
|
24
|
+
const results = [];
|
|
25
|
+
let m;
|
|
26
|
+
while ((m = re.exec(text)) !== null) {
|
|
27
|
+
const pos = m.index;
|
|
28
|
+
const val = m[0];
|
|
29
|
+
const key = `${pos}:${pos + val.length}`;
|
|
30
|
+
if (!covered.has(key)) {
|
|
31
|
+
covered.add(key);
|
|
32
|
+
results.push({
|
|
33
|
+
value: val,
|
|
34
|
+
start: pos,
|
|
35
|
+
end: pos + val.length,
|
|
36
|
+
category,
|
|
37
|
+
confidence,
|
|
38
|
+
detector,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return results;
|
|
43
|
+
}
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Config keyword sets for context boosting (#1)
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
const CONFIG_KEYWORDS = [
|
|
48
|
+
"interface ", "router ", "ip route ", "hostname ",
|
|
49
|
+
"switchport ", "vlan ", "access-list ", "route-map ",
|
|
50
|
+
"ip address ", "description ", "ntp ", "snmp-server ",
|
|
51
|
+
"logging ", "banner ", "crypto ", "line ",
|
|
52
|
+
"set address ", "set zone ", "set security ",
|
|
53
|
+
"set interfaces ", "set protocols ",
|
|
54
|
+
];
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// PII cluster groups for proximity boosting (#3)
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
const CLUSTER_GROUPS = [
|
|
59
|
+
[Category.PERSON_NAME, Category.EMAIL, Category.PHONE, Category.SSN],
|
|
60
|
+
[Category.IP_ADDRESS, Category.HOSTNAME, Category.MAC_ADDRESS],
|
|
61
|
+
[Category.CREDIT_CARD, Category.PERSON_NAME],
|
|
62
|
+
];
|
|
63
|
+
function getClusterPeers(category) {
|
|
64
|
+
const peers = new Set();
|
|
65
|
+
for (const group of CLUSTER_GROUPS) {
|
|
66
|
+
if (group.includes(category)) {
|
|
67
|
+
for (const c of group) {
|
|
68
|
+
if (c !== category)
|
|
69
|
+
peers.add(c);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return peers;
|
|
74
|
+
}
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Common words that should decay in confidence (#10)
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
const COMMON_WORDS = new Set([
|
|
79
|
+
"permit", "deny", "default", "service", "system",
|
|
80
|
+
"access", "network", "global", "local", "public",
|
|
81
|
+
"private", "standard", "extended", "input", "output",
|
|
82
|
+
"inside", "outside", "trust", "untrust", "management",
|
|
83
|
+
"control", "data", "voice", "video", "wireless",
|
|
84
|
+
"primary", "secondary", "backup", "active", "standby",
|
|
85
|
+
]);
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
// Patterns for hostname extraction (#4)
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
const HOSTNAME_CMD_RE = /(?:^|\n)\s*hostname\s+(\S+)/gi;
|
|
90
|
+
const SWITCHNAME_CMD_RE = /(?:^|\n)\s*switchname\s+(\S+)/gi;
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// ContextDetector
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
/** Proximity window in characters for PII clustering. */
|
|
95
|
+
const PROXIMITY_WINDOW = 200;
|
|
96
|
+
/** Confidence boost for context (config block). */
|
|
97
|
+
const CONTEXT_BOOST = 0.10;
|
|
98
|
+
/** Confidence boost for proximity clustering. */
|
|
99
|
+
const PROXIMITY_BOOST = 0.08;
|
|
100
|
+
export class ContextDetector {
|
|
101
|
+
name = "context";
|
|
102
|
+
_inner;
|
|
103
|
+
/** Feature 9: Learned entities from previous invocations. */
|
|
104
|
+
_learnedEntities = new Map();
|
|
105
|
+
constructor(inner) {
|
|
106
|
+
this._inner = inner;
|
|
107
|
+
}
|
|
108
|
+
detect(text) {
|
|
109
|
+
// Run inner detector
|
|
110
|
+
let entities = this._inner.detect(text);
|
|
111
|
+
// #9: Inject learned entities (from previous invocations)
|
|
112
|
+
entities = this._injectLearnedEntities(text, entities);
|
|
113
|
+
// #4: Extract hostnames from config lines and find bare occurrences
|
|
114
|
+
entities = this._extractAndPropagateHostnames(text, entities);
|
|
115
|
+
// #1: Context-aware confidence boosting
|
|
116
|
+
entities = this._boostFromContext(text, entities);
|
|
117
|
+
// #3: Proximity-based PII clustering
|
|
118
|
+
entities = this._boostByProximity(entities);
|
|
119
|
+
// #10: Confidence decay for common words
|
|
120
|
+
entities = this._decayCommonWords(entities);
|
|
121
|
+
// #9: Learn from this invocation for next time
|
|
122
|
+
this._learnEntities(entities);
|
|
123
|
+
return entities;
|
|
124
|
+
}
|
|
125
|
+
/** Reset learned entities (called on Obfuscator.reset()). */
|
|
126
|
+
reset() {
|
|
127
|
+
this._learnedEntities.clear();
|
|
128
|
+
}
|
|
129
|
+
/** Get count of learned entities. */
|
|
130
|
+
get learnedCount() {
|
|
131
|
+
return this._learnedEntities.size;
|
|
132
|
+
}
|
|
133
|
+
// -------------------------------------------------------------------------
|
|
134
|
+
// #1: Context-aware confidence boosting
|
|
135
|
+
// -------------------------------------------------------------------------
|
|
136
|
+
_boostFromContext(text, entities) {
|
|
137
|
+
if (entities.length === 0)
|
|
138
|
+
return entities;
|
|
139
|
+
// Split text into blocks (paragraphs or ~20 line chunks)
|
|
140
|
+
const blocks = this._splitBlocks(text);
|
|
141
|
+
// Score each block for config keyword density
|
|
142
|
+
const blockScores = [];
|
|
143
|
+
for (const block of blocks) {
|
|
144
|
+
let score = 0;
|
|
145
|
+
const lower = block.text.toLowerCase();
|
|
146
|
+
for (const kw of CONFIG_KEYWORDS) {
|
|
147
|
+
if (lower.includes(kw.toLowerCase())) {
|
|
148
|
+
score++;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
blockScores.push({ start: block.start, end: block.end, score });
|
|
152
|
+
}
|
|
153
|
+
// Boost entities in high-scoring blocks.
|
|
154
|
+
// Blocks are sorted by start position, so use binary search — O(log B) per entity.
|
|
155
|
+
return entities.map((e) => {
|
|
156
|
+
// Binary search for block containing entity
|
|
157
|
+
let lo = 0, hi = blockScores.length - 1;
|
|
158
|
+
let block = null;
|
|
159
|
+
while (lo <= hi) {
|
|
160
|
+
const mid = (lo + hi) >>> 1;
|
|
161
|
+
const b = blockScores[mid];
|
|
162
|
+
if (e.start >= b.start && e.end <= b.end) {
|
|
163
|
+
block = b;
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
if (e.start < b.start)
|
|
167
|
+
hi = mid - 1;
|
|
168
|
+
else
|
|
169
|
+
lo = mid + 1;
|
|
170
|
+
}
|
|
171
|
+
if (block && block.score >= 2) {
|
|
172
|
+
return {
|
|
173
|
+
...e,
|
|
174
|
+
confidence: Math.min(1.0, e.confidence + CONTEXT_BOOST),
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
return e;
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
_splitBlocks(text) {
|
|
181
|
+
const blocks = [];
|
|
182
|
+
// Use matchAll to find paragraph separators and derive block positions
|
|
183
|
+
// without re-scanning the text with indexOf.
|
|
184
|
+
const sepRe = /\n\s*\n/g;
|
|
185
|
+
let lastEnd = 0;
|
|
186
|
+
let m;
|
|
187
|
+
while ((m = sepRe.exec(text)) !== null) {
|
|
188
|
+
if (m.index > lastEnd) {
|
|
189
|
+
blocks.push({ text: text.slice(lastEnd, m.index), start: lastEnd, end: m.index });
|
|
190
|
+
}
|
|
191
|
+
lastEnd = m.index + m[0].length;
|
|
192
|
+
}
|
|
193
|
+
// Trailing block
|
|
194
|
+
if (lastEnd < text.length) {
|
|
195
|
+
blocks.push({ text: text.slice(lastEnd), start: lastEnd, end: text.length });
|
|
196
|
+
}
|
|
197
|
+
// If no paragraph breaks, treat whole text as one block
|
|
198
|
+
if (blocks.length <= 1) {
|
|
199
|
+
blocks.length = 0;
|
|
200
|
+
blocks.push({ text, start: 0, end: text.length });
|
|
201
|
+
}
|
|
202
|
+
return blocks;
|
|
203
|
+
}
|
|
204
|
+
// -------------------------------------------------------------------------
|
|
205
|
+
// #3: Proximity-based PII clustering
|
|
206
|
+
// -------------------------------------------------------------------------
|
|
207
|
+
_boostByProximity(entities) {
|
|
208
|
+
if (entities.length < 2)
|
|
209
|
+
return entities;
|
|
210
|
+
// Sort by start position for two-pointer window scan — O(n log n)
|
|
211
|
+
const sorted = entities.slice().sort((a, b) => a.start - b.start);
|
|
212
|
+
// For each entity, count cluster peers within PROXIMITY_WINDOW using
|
|
213
|
+
// a sliding window instead of O(n²) pairwise comparison.
|
|
214
|
+
const nearbyCounts = new Map();
|
|
215
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
216
|
+
const e = sorted[i];
|
|
217
|
+
const peers = getClusterPeers(e.category);
|
|
218
|
+
if (peers.size === 0)
|
|
219
|
+
continue;
|
|
220
|
+
let count = 0;
|
|
221
|
+
// Scan forward within window
|
|
222
|
+
for (let j = i + 1; j < sorted.length; j++) {
|
|
223
|
+
if (sorted[j].start - e.end > PROXIMITY_WINDOW)
|
|
224
|
+
break;
|
|
225
|
+
if (peers.has(sorted[j].category))
|
|
226
|
+
count++;
|
|
227
|
+
}
|
|
228
|
+
// Scan backward within window
|
|
229
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
230
|
+
if (e.start - sorted[j].end > PROXIMITY_WINDOW)
|
|
231
|
+
break;
|
|
232
|
+
if (peers.has(sorted[j].category))
|
|
233
|
+
count++;
|
|
234
|
+
}
|
|
235
|
+
if (count > 0)
|
|
236
|
+
nearbyCounts.set(e, count);
|
|
237
|
+
}
|
|
238
|
+
if (nearbyCounts.size === 0)
|
|
239
|
+
return entities;
|
|
240
|
+
return entities.map((e) => {
|
|
241
|
+
const count = nearbyCounts.get(e);
|
|
242
|
+
if (count) {
|
|
243
|
+
return {
|
|
244
|
+
...e,
|
|
245
|
+
confidence: Math.min(1.0, e.confidence + PROXIMITY_BOOST * count),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
return e;
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
// -------------------------------------------------------------------------
|
|
252
|
+
// #4: Config-block hostname extraction
|
|
253
|
+
// -------------------------------------------------------------------------
|
|
254
|
+
_extractAndPropagateHostnames(text, entities) {
|
|
255
|
+
// Find hostname values from cisco_hostname pattern matches
|
|
256
|
+
const hostnames = new Set();
|
|
257
|
+
for (const e of entities) {
|
|
258
|
+
if (e.detector === "regex:cisco_hostname" ||
|
|
259
|
+
e.detector.endsWith(":cisco_hostname")) {
|
|
260
|
+
hostnames.add(e.value);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Also scan with our own regex for hostname/switchname commands
|
|
264
|
+
for (const re of [HOSTNAME_CMD_RE, SWITCHNAME_CMD_RE]) {
|
|
265
|
+
re.lastIndex = 0;
|
|
266
|
+
for (const m of text.matchAll(re)) {
|
|
267
|
+
if (m[1])
|
|
268
|
+
hostnames.add(m[1]);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (hostnames.size === 0)
|
|
272
|
+
return entities;
|
|
273
|
+
// Track existing entity positions
|
|
274
|
+
const covered = new Set(entities.map((e) => `${e.start}:${e.end}`));
|
|
275
|
+
// Single-pass combined regex for all hostnames instead of per-hostname indexOf
|
|
276
|
+
const additional = scanMultiplePatterns(text, [...hostnames], covered, Category.HOSTNAME, 0.85, "context:hostname_propagation");
|
|
277
|
+
if (additional.length === 0)
|
|
278
|
+
return entities;
|
|
279
|
+
return [...entities, ...additional].sort((a, b) => a.start - b.start);
|
|
280
|
+
}
|
|
281
|
+
// -------------------------------------------------------------------------
|
|
282
|
+
// #9: Learned entity propagation
|
|
283
|
+
// -------------------------------------------------------------------------
|
|
284
|
+
_injectLearnedEntities(text, entities) {
|
|
285
|
+
if (this._learnedEntities.size === 0)
|
|
286
|
+
return entities;
|
|
287
|
+
const covered = new Set(entities.map((e) => `${e.start}:${e.end}`));
|
|
288
|
+
// Group learned entities by category for batch scanning
|
|
289
|
+
const byCat = new Map();
|
|
290
|
+
for (const [value, category] of this._learnedEntities) {
|
|
291
|
+
let arr = byCat.get(category);
|
|
292
|
+
if (!arr) {
|
|
293
|
+
arr = [];
|
|
294
|
+
byCat.set(category, arr);
|
|
295
|
+
}
|
|
296
|
+
arr.push(value);
|
|
297
|
+
}
|
|
298
|
+
const additional = [];
|
|
299
|
+
for (const [category, values] of byCat) {
|
|
300
|
+
const hits = scanMultiplePatterns(text, values, covered, category, 0.80, "context:learned_entity");
|
|
301
|
+
additional.push(...hits);
|
|
302
|
+
}
|
|
303
|
+
if (additional.length === 0)
|
|
304
|
+
return entities;
|
|
305
|
+
return [...entities, ...additional].sort((a, b) => a.start - b.start);
|
|
306
|
+
}
|
|
307
|
+
_learnEntities(entities) {
|
|
308
|
+
// Learn high-confidence entities from config-context patterns
|
|
309
|
+
const learnableDetectors = new Set([
|
|
310
|
+
"regex:cisco_hostname",
|
|
311
|
+
"regex:route_map_name",
|
|
312
|
+
"regex:acl_name",
|
|
313
|
+
"regex:prefix_list_name",
|
|
314
|
+
"regex:vlan_name",
|
|
315
|
+
"regex:interface_description",
|
|
316
|
+
"regex:device_name_dotted",
|
|
317
|
+
"regex:device_name_short",
|
|
318
|
+
"context:hostname_propagation",
|
|
319
|
+
]);
|
|
320
|
+
for (const e of entities) {
|
|
321
|
+
if (e.confidence >= 0.80 &&
|
|
322
|
+
(learnableDetectors.has(e.detector) ||
|
|
323
|
+
e.category === Category.HOSTNAME)) {
|
|
324
|
+
// Only learn values that look like identifiers (not too short, not common words)
|
|
325
|
+
if (e.value.length >= 3 && !COMMON_WORDS.has(e.value.toLowerCase())) {
|
|
326
|
+
this._learnedEntities.set(e.value, e.category);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
// Cap learned entities to prevent unbounded growth.
|
|
331
|
+
// Delete oldest entries (Map preserves insertion order) without rebuilding.
|
|
332
|
+
if (this._learnedEntities.size > 1000) {
|
|
333
|
+
const toDelete = this._learnedEntities.size - 500;
|
|
334
|
+
let deleted = 0;
|
|
335
|
+
for (const key of this._learnedEntities.keys()) {
|
|
336
|
+
if (deleted >= toDelete)
|
|
337
|
+
break;
|
|
338
|
+
this._learnedEntities.delete(key);
|
|
339
|
+
deleted++;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
// -------------------------------------------------------------------------
|
|
344
|
+
// #10: Confidence decay for common words
|
|
345
|
+
// -------------------------------------------------------------------------
|
|
346
|
+
_decayCommonWords(entities) {
|
|
347
|
+
return entities.map((e) => {
|
|
348
|
+
if (COMMON_WORDS.has(e.value.toLowerCase())) {
|
|
349
|
+
return {
|
|
350
|
+
...e,
|
|
351
|
+
confidence: e.confidence * 0.5,
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
return e;
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/** User-defined custom pattern detector. */
|
|
2
|
+
import { DetectedEntity } from "../types.js";
|
|
3
|
+
import { BaseDetector } from "./base.js";
|
|
4
|
+
export interface CustomPatternDef {
|
|
5
|
+
name: string;
|
|
6
|
+
pattern: string;
|
|
7
|
+
category?: string;
|
|
8
|
+
}
|
|
9
|
+
/** Detector that uses user-defined regex patterns from config. */
|
|
10
|
+
export declare class CustomPatternDetector implements BaseDetector {
|
|
11
|
+
readonly name = "patterns";
|
|
12
|
+
private _patterns;
|
|
13
|
+
constructor(patterns: CustomPatternDef[]);
|
|
14
|
+
detect(text: string): DetectedEntity[];
|
|
15
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/** User-defined custom pattern detector. */
|
|
2
|
+
import { Category } from "../types.js";
|
|
3
|
+
/** Detector that uses user-defined regex patterns from config. */
|
|
4
|
+
export class CustomPatternDetector {
|
|
5
|
+
name = "patterns";
|
|
6
|
+
_patterns;
|
|
7
|
+
constructor(patterns) {
|
|
8
|
+
this._patterns = patterns.map((p) => {
|
|
9
|
+
let cat;
|
|
10
|
+
const catStr = p.category ?? "custom";
|
|
11
|
+
if (Object.values(Category).includes(catStr)) {
|
|
12
|
+
cat = catStr;
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
cat = Category.CUSTOM;
|
|
16
|
+
}
|
|
17
|
+
return {
|
|
18
|
+
name: p.name,
|
|
19
|
+
regex: new RegExp(p.pattern, "g"),
|
|
20
|
+
category: cat,
|
|
21
|
+
};
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
detect(text) {
|
|
25
|
+
const entities = [];
|
|
26
|
+
const seenSpans = [];
|
|
27
|
+
for (const { name, regex, category } of this._patterns) {
|
|
28
|
+
regex.lastIndex = 0;
|
|
29
|
+
for (const match of text.matchAll(regex)) {
|
|
30
|
+
const start = match.index;
|
|
31
|
+
const end = start + match[0].length;
|
|
32
|
+
const span = [start, end];
|
|
33
|
+
// Check for overlap with existing spans
|
|
34
|
+
let overlaps = false;
|
|
35
|
+
for (const [s, e] of seenSpans) {
|
|
36
|
+
if ((s <= span[0] && span[0] < e) || (s < span[1] && span[1] <= e)) {
|
|
37
|
+
overlaps = true;
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (overlaps) {
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
seenSpans.push(span);
|
|
45
|
+
entities.push({
|
|
46
|
+
value: match[0],
|
|
47
|
+
start,
|
|
48
|
+
end,
|
|
49
|
+
category,
|
|
50
|
+
confidence: 0.9,
|
|
51
|
+
detector: `custom:${name}`,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
entities.sort((a, b) => a.start - b.start);
|
|
56
|
+
return entities;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/** Regex-based detectors for structured sensitive data. */
|
|
2
|
+
import { Category, DetectedEntity } from "../types.js";
|
|
3
|
+
import { BaseDetector } from "./base.js";
|
|
4
|
+
/** Check if a value is a well-known documentation/example/placeholder. */
|
|
5
|
+
export declare function isDocExample(value: string, category: Category): boolean;
|
|
6
|
+
/** Heuristic: return true for subnet masks and wildcard masks. */
|
|
7
|
+
export declare function isMask(ip: string): boolean;
|
|
8
|
+
/** A named regex pattern with its category. */
|
|
9
|
+
export interface PatternDef {
|
|
10
|
+
name: string;
|
|
11
|
+
pattern: RegExp;
|
|
12
|
+
category: Category;
|
|
13
|
+
confidence: number;
|
|
14
|
+
}
|
|
15
|
+
/** All built-in patterns. */
|
|
16
|
+
export declare const BUILTIN_PATTERNS: PatternDef[];
|
|
17
|
+
/** Override config for individual rules: disable or change confidence. */
|
|
18
|
+
export type DetectorOverrides = Record<string, {
|
|
19
|
+
enabled?: boolean;
|
|
20
|
+
confidence?: number;
|
|
21
|
+
}>;
|
|
22
|
+
/** Detects sensitive entities using regex patterns. */
|
|
23
|
+
export declare class RegexDetector implements BaseDetector {
|
|
24
|
+
readonly name = "regex";
|
|
25
|
+
private patterns;
|
|
26
|
+
constructor(extraPatterns?: PatternDef[], overrides?: DetectorOverrides);
|
|
27
|
+
detect(text: string): DetectedEntity[];
|
|
28
|
+
}
|