shroud-privacy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +7 -0
  3. package/README.md +369 -0
  4. package/dist/audit.d.ts +46 -0
  5. package/dist/audit.js +127 -0
  6. package/dist/canary.d.ts +31 -0
  7. package/dist/canary.js +73 -0
  8. package/dist/config.d.ts +27 -0
  9. package/dist/config.js +123 -0
  10. package/dist/detectors/base.d.ts +8 -0
  11. package/dist/detectors/base.js +2 -0
  12. package/dist/detectors/code.d.ts +25 -0
  13. package/dist/detectors/code.js +144 -0
  14. package/dist/detectors/context.d.ts +31 -0
  15. package/dist/detectors/context.js +357 -0
  16. package/dist/detectors/patterns.d.ts +15 -0
  17. package/dist/detectors/patterns.js +58 -0
  18. package/dist/detectors/regex.d.ts +28 -0
  19. package/dist/detectors/regex.js +955 -0
  20. package/dist/generators/base.d.ts +6 -0
  21. package/dist/generators/base.js +2 -0
  22. package/dist/generators/codes.d.ts +20 -0
  23. package/dist/generators/codes.js +231 -0
  24. package/dist/generators/names.d.ts +29 -0
  25. package/dist/generators/names.js +194 -0
  26. package/dist/generators/network.d.ts +86 -0
  27. package/dist/generators/network.js +477 -0
  28. package/dist/hooks.d.ts +27 -0
  29. package/dist/hooks.js +457 -0
  30. package/dist/index.d.ts +12 -0
  31. package/dist/index.js +58 -0
  32. package/dist/mapping.d.ts +33 -0
  33. package/dist/mapping.js +72 -0
  34. package/dist/obfuscator.d.ts +78 -0
  35. package/dist/obfuscator.js +603 -0
  36. package/dist/redaction.d.ts +26 -0
  37. package/dist/redaction.js +76 -0
  38. package/dist/store.d.ts +40 -0
  39. package/dist/store.js +79 -0
  40. package/dist/types.d.ts +101 -0
  41. package/dist/types.js +35 -0
  42. package/ncg_adapter.py +530 -0
  43. package/openclaw.plugin.json +72 -0
  44. package/package.json +56 -0
  45. package/shroud_bridge.mjs +225 -0
@@ -0,0 +1,357 @@
1
+ /**
2
+ * Context-aware detection enhancements.
3
+ *
4
+ * Wraps another detector and applies post-detection intelligence:
5
+ * 1. Context-aware confidence boosting (config keyword density)
6
+ * 3. Proximity-based PII clustering (nearby entities boost each other)
7
+ * 4. Config-block hostname extraction (hostname X -> detect bare X)
8
+ * 9. Learned entity propagation (cross-invocation memory)
9
+ * 10. Confidence decay by frequency (common words lose confidence)
10
+ */
11
+ import { Category } from "../types.js";
12
+ /**
13
+ * Single-pass multi-string scanner using a combined regex.
14
+ * Replaces per-string indexOf loops with one regex alternation pass — O(M)
15
+ * instead of O(S*M) where S = number of strings, M = text length.
16
+ */
17
+ function scanMultiplePatterns(text, values, covered, category, confidence, detector) {
18
+ if (values.length === 0)
19
+ return [];
20
+ // Sort longest-first so regex matches greedily
21
+ const sorted = values.slice().sort((a, b) => b.length - a.length);
22
+ const escaped = sorted.map((v) => v.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
23
+ const re = new RegExp(escaped.join("|"), "g");
24
+ const results = [];
25
+ let m;
26
+ while ((m = re.exec(text)) !== null) {
27
+ const pos = m.index;
28
+ const val = m[0];
29
+ const key = `${pos}:${pos + val.length}`;
30
+ if (!covered.has(key)) {
31
+ covered.add(key);
32
+ results.push({
33
+ value: val,
34
+ start: pos,
35
+ end: pos + val.length,
36
+ category,
37
+ confidence,
38
+ detector,
39
+ });
40
+ }
41
+ }
42
+ return results;
43
+ }
44
+ // ---------------------------------------------------------------------------
45
+ // Config keyword sets for context boosting (#1)
46
+ // ---------------------------------------------------------------------------
47
+ const CONFIG_KEYWORDS = [
48
+ "interface ", "router ", "ip route ", "hostname ",
49
+ "switchport ", "vlan ", "access-list ", "route-map ",
50
+ "ip address ", "description ", "ntp ", "snmp-server ",
51
+ "logging ", "banner ", "crypto ", "line ",
52
+ "set address ", "set zone ", "set security ",
53
+ "set interfaces ", "set protocols ",
54
+ ];
55
+ // ---------------------------------------------------------------------------
56
+ // PII cluster groups for proximity boosting (#3)
57
+ // ---------------------------------------------------------------------------
58
+ const CLUSTER_GROUPS = [
59
+ [Category.PERSON_NAME, Category.EMAIL, Category.PHONE, Category.SSN],
60
+ [Category.IP_ADDRESS, Category.HOSTNAME, Category.MAC_ADDRESS],
61
+ [Category.CREDIT_CARD, Category.PERSON_NAME],
62
+ ];
63
+ function getClusterPeers(category) {
64
+ const peers = new Set();
65
+ for (const group of CLUSTER_GROUPS) {
66
+ if (group.includes(category)) {
67
+ for (const c of group) {
68
+ if (c !== category)
69
+ peers.add(c);
70
+ }
71
+ }
72
+ }
73
+ return peers;
74
+ }
75
+ // ---------------------------------------------------------------------------
76
+ // Common words that should decay in confidence (#10)
77
+ // ---------------------------------------------------------------------------
78
+ const COMMON_WORDS = new Set([
79
+ "permit", "deny", "default", "service", "system",
80
+ "access", "network", "global", "local", "public",
81
+ "private", "standard", "extended", "input", "output",
82
+ "inside", "outside", "trust", "untrust", "management",
83
+ "control", "data", "voice", "video", "wireless",
84
+ "primary", "secondary", "backup", "active", "standby",
85
+ ]);
86
+ // ---------------------------------------------------------------------------
87
+ // Patterns for hostname extraction (#4)
88
+ // ---------------------------------------------------------------------------
89
+ const HOSTNAME_CMD_RE = /(?:^|\n)\s*hostname\s+(\S+)/gi;
90
+ const SWITCHNAME_CMD_RE = /(?:^|\n)\s*switchname\s+(\S+)/gi;
91
+ // ---------------------------------------------------------------------------
92
+ // ContextDetector
93
+ // ---------------------------------------------------------------------------
94
+ /** Proximity window in characters for PII clustering. */
95
+ const PROXIMITY_WINDOW = 200;
96
+ /** Confidence boost for context (config block). */
97
+ const CONTEXT_BOOST = 0.10;
98
+ /** Confidence boost for proximity clustering. */
99
+ const PROXIMITY_BOOST = 0.08;
100
+ export class ContextDetector {
101
+ name = "context";
102
+ _inner;
103
+ /** Feature 9: Learned entities from previous invocations. */
104
+ _learnedEntities = new Map();
105
+ constructor(inner) {
106
+ this._inner = inner;
107
+ }
108
+ detect(text) {
109
+ // Run inner detector
110
+ let entities = this._inner.detect(text);
111
+ // #9: Inject learned entities (from previous invocations)
112
+ entities = this._injectLearnedEntities(text, entities);
113
+ // #4: Extract hostnames from config lines and find bare occurrences
114
+ entities = this._extractAndPropagateHostnames(text, entities);
115
+ // #1: Context-aware confidence boosting
116
+ entities = this._boostFromContext(text, entities);
117
+ // #3: Proximity-based PII clustering
118
+ entities = this._boostByProximity(entities);
119
+ // #10: Confidence decay for common words
120
+ entities = this._decayCommonWords(entities);
121
+ // #9: Learn from this invocation for next time
122
+ this._learnEntities(entities);
123
+ return entities;
124
+ }
125
+ /** Reset learned entities (called on Obfuscator.reset()). */
126
+ reset() {
127
+ this._learnedEntities.clear();
128
+ }
129
+ /** Get count of learned entities. */
130
+ get learnedCount() {
131
+ return this._learnedEntities.size;
132
+ }
133
+ // -------------------------------------------------------------------------
134
+ // #1: Context-aware confidence boosting
135
+ // -------------------------------------------------------------------------
136
+ _boostFromContext(text, entities) {
137
+ if (entities.length === 0)
138
+ return entities;
139
+ // Split text into blocks (paragraphs or ~20 line chunks)
140
+ const blocks = this._splitBlocks(text);
141
+ // Score each block for config keyword density
142
+ const blockScores = [];
143
+ for (const block of blocks) {
144
+ let score = 0;
145
+ const lower = block.text.toLowerCase();
146
+ for (const kw of CONFIG_KEYWORDS) {
147
+ if (lower.includes(kw.toLowerCase())) {
148
+ score++;
149
+ }
150
+ }
151
+ blockScores.push({ start: block.start, end: block.end, score });
152
+ }
153
+ // Boost entities in high-scoring blocks.
154
+ // Blocks are sorted by start position, so use binary search — O(log B) per entity.
155
+ return entities.map((e) => {
156
+ // Binary search for block containing entity
157
+ let lo = 0, hi = blockScores.length - 1;
158
+ let block = null;
159
+ while (lo <= hi) {
160
+ const mid = (lo + hi) >>> 1;
161
+ const b = blockScores[mid];
162
+ if (e.start >= b.start && e.end <= b.end) {
163
+ block = b;
164
+ break;
165
+ }
166
+ if (e.start < b.start)
167
+ hi = mid - 1;
168
+ else
169
+ lo = mid + 1;
170
+ }
171
+ if (block && block.score >= 2) {
172
+ return {
173
+ ...e,
174
+ confidence: Math.min(1.0, e.confidence + CONTEXT_BOOST),
175
+ };
176
+ }
177
+ return e;
178
+ });
179
+ }
180
+ _splitBlocks(text) {
181
+ const blocks = [];
182
+ // Use matchAll to find paragraph separators and derive block positions
183
+ // without re-scanning the text with indexOf.
184
+ const sepRe = /\n\s*\n/g;
185
+ let lastEnd = 0;
186
+ let m;
187
+ while ((m = sepRe.exec(text)) !== null) {
188
+ if (m.index > lastEnd) {
189
+ blocks.push({ text: text.slice(lastEnd, m.index), start: lastEnd, end: m.index });
190
+ }
191
+ lastEnd = m.index + m[0].length;
192
+ }
193
+ // Trailing block
194
+ if (lastEnd < text.length) {
195
+ blocks.push({ text: text.slice(lastEnd), start: lastEnd, end: text.length });
196
+ }
197
+ // If no paragraph breaks, treat whole text as one block
198
+ if (blocks.length <= 1) {
199
+ blocks.length = 0;
200
+ blocks.push({ text, start: 0, end: text.length });
201
+ }
202
+ return blocks;
203
+ }
204
+ // -------------------------------------------------------------------------
205
+ // #3: Proximity-based PII clustering
206
+ // -------------------------------------------------------------------------
207
+ _boostByProximity(entities) {
208
+ if (entities.length < 2)
209
+ return entities;
210
+ // Sort by start position for two-pointer window scan — O(n log n)
211
+ const sorted = entities.slice().sort((a, b) => a.start - b.start);
212
+ // For each entity, count cluster peers within PROXIMITY_WINDOW using
213
+ // a sliding window instead of O(n²) pairwise comparison.
214
+ const nearbyCounts = new Map();
215
+ for (let i = 0; i < sorted.length; i++) {
216
+ const e = sorted[i];
217
+ const peers = getClusterPeers(e.category);
218
+ if (peers.size === 0)
219
+ continue;
220
+ let count = 0;
221
+ // Scan forward within window
222
+ for (let j = i + 1; j < sorted.length; j++) {
223
+ if (sorted[j].start - e.end > PROXIMITY_WINDOW)
224
+ break;
225
+ if (peers.has(sorted[j].category))
226
+ count++;
227
+ }
228
+ // Scan backward within window
229
+ for (let j = i - 1; j >= 0; j--) {
230
+ if (e.start - sorted[j].end > PROXIMITY_WINDOW)
231
+ break;
232
+ if (peers.has(sorted[j].category))
233
+ count++;
234
+ }
235
+ if (count > 0)
236
+ nearbyCounts.set(e, count);
237
+ }
238
+ if (nearbyCounts.size === 0)
239
+ return entities;
240
+ return entities.map((e) => {
241
+ const count = nearbyCounts.get(e);
242
+ if (count) {
243
+ return {
244
+ ...e,
245
+ confidence: Math.min(1.0, e.confidence + PROXIMITY_BOOST * count),
246
+ };
247
+ }
248
+ return e;
249
+ });
250
+ }
251
+ // -------------------------------------------------------------------------
252
+ // #4: Config-block hostname extraction
253
+ // -------------------------------------------------------------------------
254
+ _extractAndPropagateHostnames(text, entities) {
255
+ // Find hostname values from cisco_hostname pattern matches
256
+ const hostnames = new Set();
257
+ for (const e of entities) {
258
+ if (e.detector === "regex:cisco_hostname" ||
259
+ e.detector.endsWith(":cisco_hostname")) {
260
+ hostnames.add(e.value);
261
+ }
262
+ }
263
+ // Also scan with our own regex for hostname/switchname commands
264
+ for (const re of [HOSTNAME_CMD_RE, SWITCHNAME_CMD_RE]) {
265
+ re.lastIndex = 0;
266
+ for (const m of text.matchAll(re)) {
267
+ if (m[1])
268
+ hostnames.add(m[1]);
269
+ }
270
+ }
271
+ if (hostnames.size === 0)
272
+ return entities;
273
+ // Track existing entity positions
274
+ const covered = new Set(entities.map((e) => `${e.start}:${e.end}`));
275
+ // Single-pass combined regex for all hostnames instead of per-hostname indexOf
276
+ const additional = scanMultiplePatterns(text, [...hostnames], covered, Category.HOSTNAME, 0.85, "context:hostname_propagation");
277
+ if (additional.length === 0)
278
+ return entities;
279
+ return [...entities, ...additional].sort((a, b) => a.start - b.start);
280
+ }
281
+ // -------------------------------------------------------------------------
282
+ // #9: Learned entity propagation
283
+ // -------------------------------------------------------------------------
284
+ _injectLearnedEntities(text, entities) {
285
+ if (this._learnedEntities.size === 0)
286
+ return entities;
287
+ const covered = new Set(entities.map((e) => `${e.start}:${e.end}`));
288
+ // Group learned entities by category for batch scanning
289
+ const byCat = new Map();
290
+ for (const [value, category] of this._learnedEntities) {
291
+ let arr = byCat.get(category);
292
+ if (!arr) {
293
+ arr = [];
294
+ byCat.set(category, arr);
295
+ }
296
+ arr.push(value);
297
+ }
298
+ const additional = [];
299
+ for (const [category, values] of byCat) {
300
+ const hits = scanMultiplePatterns(text, values, covered, category, 0.80, "context:learned_entity");
301
+ additional.push(...hits);
302
+ }
303
+ if (additional.length === 0)
304
+ return entities;
305
+ return [...entities, ...additional].sort((a, b) => a.start - b.start);
306
+ }
307
+ _learnEntities(entities) {
308
+ // Learn high-confidence entities from config-context patterns
309
+ const learnableDetectors = new Set([
310
+ "regex:cisco_hostname",
311
+ "regex:route_map_name",
312
+ "regex:acl_name",
313
+ "regex:prefix_list_name",
314
+ "regex:vlan_name",
315
+ "regex:interface_description",
316
+ "regex:device_name_dotted",
317
+ "regex:device_name_short",
318
+ "context:hostname_propagation",
319
+ ]);
320
+ for (const e of entities) {
321
+ if (e.confidence >= 0.80 &&
322
+ (learnableDetectors.has(e.detector) ||
323
+ e.category === Category.HOSTNAME)) {
324
+ // Only learn values that look like identifiers (not too short, not common words)
325
+ if (e.value.length >= 3 && !COMMON_WORDS.has(e.value.toLowerCase())) {
326
+ this._learnedEntities.set(e.value, e.category);
327
+ }
328
+ }
329
+ }
330
+ // Cap learned entities to prevent unbounded growth.
331
+ // Delete oldest entries (Map preserves insertion order) without rebuilding.
332
+ if (this._learnedEntities.size > 1000) {
333
+ const toDelete = this._learnedEntities.size - 500;
334
+ let deleted = 0;
335
+ for (const key of this._learnedEntities.keys()) {
336
+ if (deleted >= toDelete)
337
+ break;
338
+ this._learnedEntities.delete(key);
339
+ deleted++;
340
+ }
341
+ }
342
+ }
343
+ // -------------------------------------------------------------------------
344
+ // #10: Confidence decay for common words
345
+ // -------------------------------------------------------------------------
346
+ _decayCommonWords(entities) {
347
+ return entities.map((e) => {
348
+ if (COMMON_WORDS.has(e.value.toLowerCase())) {
349
+ return {
350
+ ...e,
351
+ confidence: e.confidence * 0.5,
352
+ };
353
+ }
354
+ return e;
355
+ });
356
+ }
357
+ }
@@ -0,0 +1,15 @@
1
+ /** User-defined custom pattern detector. */
2
+ import { DetectedEntity } from "../types.js";
3
+ import { BaseDetector } from "./base.js";
4
+ export interface CustomPatternDef {
5
+ name: string;
6
+ pattern: string;
7
+ category?: string;
8
+ }
9
+ /** Detector that uses user-defined regex patterns from config. */
10
+ export declare class CustomPatternDetector implements BaseDetector {
11
+ readonly name = "patterns";
12
+ private _patterns;
13
+ constructor(patterns: CustomPatternDef[]);
14
+ detect(text: string): DetectedEntity[];
15
+ }
@@ -0,0 +1,58 @@
1
+ /** User-defined custom pattern detector. */
2
+ import { Category } from "../types.js";
3
+ /** Detector that uses user-defined regex patterns from config. */
4
+ export class CustomPatternDetector {
5
+ name = "patterns";
6
+ _patterns;
7
+ constructor(patterns) {
8
+ this._patterns = patterns.map((p) => {
9
+ let cat;
10
+ const catStr = p.category ?? "custom";
11
+ if (Object.values(Category).includes(catStr)) {
12
+ cat = catStr;
13
+ }
14
+ else {
15
+ cat = Category.CUSTOM;
16
+ }
17
+ return {
18
+ name: p.name,
19
+ regex: new RegExp(p.pattern, "g"),
20
+ category: cat,
21
+ };
22
+ });
23
+ }
24
+ detect(text) {
25
+ const entities = [];
26
+ const seenSpans = [];
27
+ for (const { name, regex, category } of this._patterns) {
28
+ regex.lastIndex = 0;
29
+ for (const match of text.matchAll(regex)) {
30
+ const start = match.index;
31
+ const end = start + match[0].length;
32
+ const span = [start, end];
33
+ // Check for overlap with existing spans
34
+ let overlaps = false;
35
+ for (const [s, e] of seenSpans) {
36
+ if ((s <= span[0] && span[0] < e) || (s < span[1] && span[1] <= e)) {
37
+ overlaps = true;
38
+ break;
39
+ }
40
+ }
41
+ if (overlaps) {
42
+ continue;
43
+ }
44
+ seenSpans.push(span);
45
+ entities.push({
46
+ value: match[0],
47
+ start,
48
+ end,
49
+ category,
50
+ confidence: 0.9,
51
+ detector: `custom:${name}`,
52
+ });
53
+ }
54
+ }
55
+ entities.sort((a, b) => a.start - b.start);
56
+ return entities;
57
+ }
58
+ }
@@ -0,0 +1,28 @@
1
+ /** Regex-based detectors for structured sensitive data. */
2
+ import { Category, DetectedEntity } from "../types.js";
3
+ import { BaseDetector } from "./base.js";
4
+ /** Check if a value is a well-known documentation/example/placeholder. */
5
+ export declare function isDocExample(value: string, category: Category): boolean;
6
+ /** Heuristic: return true for subnet masks and wildcard masks. */
7
+ export declare function isMask(ip: string): boolean;
8
+ /** A named regex pattern with its category. */
9
+ export interface PatternDef {
10
+ name: string;
11
+ pattern: RegExp;
12
+ category: Category;
13
+ confidence: number;
14
+ }
15
+ /** All built-in patterns. */
16
+ export declare const BUILTIN_PATTERNS: PatternDef[];
17
+ /** Override config for individual rules: disable or change confidence. */
18
+ export type DetectorOverrides = Record<string, {
19
+ enabled?: boolean;
20
+ confidence?: number;
21
+ }>;
22
+ /** Detects sensitive entities using regex patterns. */
23
+ export declare class RegexDetector implements BaseDetector {
24
+ readonly name = "regex";
25
+ private patterns;
26
+ constructor(extraPatterns?: PatternDef[], overrides?: DetectorOverrides);
27
+ detect(text: string): DetectedEntity[];
28
+ }