shroud-privacy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +7 -0
  3. package/README.md +369 -0
  4. package/dist/audit.d.ts +46 -0
  5. package/dist/audit.js +127 -0
  6. package/dist/canary.d.ts +31 -0
  7. package/dist/canary.js +73 -0
  8. package/dist/config.d.ts +27 -0
  9. package/dist/config.js +123 -0
  10. package/dist/detectors/base.d.ts +8 -0
  11. package/dist/detectors/base.js +2 -0
  12. package/dist/detectors/code.d.ts +25 -0
  13. package/dist/detectors/code.js +144 -0
  14. package/dist/detectors/context.d.ts +31 -0
  15. package/dist/detectors/context.js +357 -0
  16. package/dist/detectors/patterns.d.ts +15 -0
  17. package/dist/detectors/patterns.js +58 -0
  18. package/dist/detectors/regex.d.ts +28 -0
  19. package/dist/detectors/regex.js +955 -0
  20. package/dist/generators/base.d.ts +6 -0
  21. package/dist/generators/base.js +2 -0
  22. package/dist/generators/codes.d.ts +20 -0
  23. package/dist/generators/codes.js +231 -0
  24. package/dist/generators/names.d.ts +29 -0
  25. package/dist/generators/names.js +194 -0
  26. package/dist/generators/network.d.ts +86 -0
  27. package/dist/generators/network.js +477 -0
  28. package/dist/hooks.d.ts +27 -0
  29. package/dist/hooks.js +457 -0
  30. package/dist/index.d.ts +12 -0
  31. package/dist/index.js +58 -0
  32. package/dist/mapping.d.ts +33 -0
  33. package/dist/mapping.js +72 -0
  34. package/dist/obfuscator.d.ts +78 -0
  35. package/dist/obfuscator.js +603 -0
  36. package/dist/redaction.d.ts +26 -0
  37. package/dist/redaction.js +76 -0
  38. package/dist/store.d.ts +40 -0
  39. package/dist/store.js +79 -0
  40. package/dist/types.d.ts +101 -0
  41. package/dist/types.js +35 -0
  42. package/ncg_adapter.py +530 -0
  43. package/openclaw.plugin.json +72 -0
  44. package/package.json +56 -0
  45. package/shroud_bridge.mjs +225 -0
@@ -0,0 +1,955 @@
1
+ /** Regex-based detectors for structured sensitive data. */
2
+ import { Category } from "../types.js";
3
+ /**
4
+ * Subnet masks and wildcard masks should never be obfuscated.
5
+ * Common mask prefixes used to avoid false-positive IP obfuscation.
6
+ */
7
+ const MASK_PREFIXES = new Set([
8
+ "255.", "0.0.0.", "0.0.255.", "0.0.15.", "0.0.3.", "0.0.1.",
9
+ "0.255.", "0.128.", "0.192.", "0.224.", "0.240.", "0.248.",
10
+ "0.252.", "128.0.", "192.0.0.", "224.0.", "240.0.", "248.0.",
11
+ "252.0.", "254.0.", "127.0.",
12
+ ]);
13
+ /**
14
+ * RFC 5737 documentation/example ranges and well-known placeholders.
15
+ * These should never be obfuscated — they're teaching/testing values.
16
+ */
17
+ const DOC_IP_PREFIXES = [
18
+ "192.0.2.", // TEST-NET-1 (RFC 5737)
19
+ "198.51.100.", // TEST-NET-2 (RFC 5737)
20
+ "203.0.113.", // TEST-NET-3 (RFC 5737)
21
+ "233.252.0.", // MCAST-TEST-NET (RFC 6676)
22
+ "100.51.16.", // Benchmarking (RFC 5180)
23
+ ];
24
+ const DOC_DOMAINS = new Set([
25
+ "example.com", "example.net", "example.org", // RFC 2606
26
+ "localhost", "invalid",
27
+ ]);
28
+ const DOC_HOSTNAMES = new Set([
29
+ "localhost", "HOSTNAME", "EXAMPLE", "CHANGEME",
30
+ "YOUR_HOST", "YOURHOST", "hostname", "example",
31
+ ]);
32
+ /** IPv6 documentation/reserved prefixes that should not be obfuscated. */
33
+ const DOC_IPV6_PREFIXES = [
34
+ "2001:db8:", // RFC 3849 documentation prefix
35
+ "2001:0db8:", // Same, zero-padded
36
+ ];
37
+ const DOC_IPV6_EXACT = new Set([
38
+ "::1", // Loopback
39
+ "::0", // Unspecified
40
+ "::", // Unspecified
41
+ ]);
42
+ /** Check if a value is a well-known documentation/example/placeholder. */
43
+ export function isDocExample(value, category) {
44
+ switch (category) {
45
+ case Category.IP_ADDRESS: {
46
+ // IPv6 check
47
+ if (value.includes(":")) {
48
+ const lower = value.toLowerCase();
49
+ if (DOC_IPV6_EXACT.has(lower))
50
+ return true;
51
+ for (const pfx of DOC_IPV6_PREFIXES) {
52
+ if (lower.startsWith(pfx))
53
+ return true;
54
+ }
55
+ return false;
56
+ }
57
+ // IPv4 check
58
+ for (const pfx of DOC_IP_PREFIXES) {
59
+ if (value.startsWith(pfx))
60
+ return true;
61
+ }
62
+ return false;
63
+ }
64
+ case Category.EMAIL:
65
+ case Category.URL: {
66
+ const lower = value.toLowerCase();
67
+ for (const d of DOC_DOMAINS) {
68
+ if (lower.includes(`@${d}`) || lower.includes(`//${d}`) || lower.endsWith(`.${d}`)) {
69
+ return true;
70
+ }
71
+ }
72
+ return false;
73
+ }
74
+ case Category.BGP_ASN:
75
+ // Private ASNs are real infra identifiers — don't skip them
76
+ return false;
77
+ case Category.HOSTNAME:
78
+ return DOC_HOSTNAMES.has(value) || DOC_HOSTNAMES.has(value.toUpperCase());
79
+ default:
80
+ return false;
81
+ }
82
+ }
83
+ /** Heuristic: return true for subnet masks and wildcard masks. */
84
+ export function isMask(ip) {
85
+ for (const pfx of MASK_PREFIXES) {
86
+ if (ip.startsWith(pfx)) {
87
+ return true;
88
+ }
89
+ }
90
+ const octets = ip.split(".");
91
+ if (octets.length === 4) {
92
+ // Common masks: all octets are 0 or 255
93
+ if (octets.every((o) => o === "0" || o === "255")) {
94
+ return true;
95
+ }
96
+ // Wildcard masks like 0.0.0.X
97
+ if (octets[0] === "0" && octets[1] === "0" && octets[2] === "0") {
98
+ return true;
99
+ }
100
+ }
101
+ return false;
102
+ }
103
+ /** All built-in patterns. */
104
+ export const BUILTIN_PATTERNS = [
105
+ // --- Core PII ---
106
+ {
107
+ name: "email",
108
+ // Stricter: local part must start/end with alnum, no consecutive dots
109
+ pattern: /\b[a-zA-Z0-9](?:[a-zA-Z0-9._%+\-]*[a-zA-Z0-9])?@[a-zA-Z0-9](?:[a-zA-Z0-9\-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9\-]*[a-zA-Z0-9])?)*\.[a-zA-Z]{2,}\b/g,
110
+ category: Category.EMAIL,
111
+ confidence: 0.95,
112
+ },
113
+ {
114
+ name: "ipv4",
115
+ pattern: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g,
116
+ category: Category.IP_ADDRESS,
117
+ confidence: 0.95,
118
+ },
119
+ {
120
+ name: "ipv6",
121
+ // Full 8-group, compressed ::, loopback ::1, link-local, IPv4-mapped
122
+ // Uses \b where possible; :: forms use lookaround for proper boundary
123
+ pattern: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,7}:[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,6}(?::[0-9a-fA-F]{1,4}){1,2}\b|\b(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,3}\b|\b(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,5}\b|\b(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,6}\b|\b[0-9a-fA-F]{1,4}:(?::[0-9a-fA-F]{1,4}){1,7}\b|(?:^|(?<=[\s,;=(]))::(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4}){0,6})?(?=$|[\s,;)\]\/])|(?:^|(?<=[\s,;=(]))::(?:ffff:)?(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)(?=$|[\s,;)\]\/])/g,
124
+ category: Category.IP_ADDRESS,
125
+ confidence: 0.9,
126
+ },
127
+ {
128
+ name: "phone_us",
129
+ pattern: /\b(?:\+1[\s\-]?)?\(?\d{3}\)?[\s\-]?\d{3}[\s\-]?\d{4}\b/g,
130
+ category: Category.PHONE,
131
+ confidence: 0.8,
132
+ },
133
+ {
134
+ name: "phone_intl",
135
+ pattern: /(?<!\w)\+\d{1,3}[\s\-]?\d{4,14}\b/g,
136
+ category: Category.PHONE,
137
+ confidence: 0.75,
138
+ },
139
+ {
140
+ name: "credit_card",
141
+ pattern: /\b(?:\d{4}[\s\-]?){3}\d{4}\b/g,
142
+ category: Category.CREDIT_CARD,
143
+ confidence: 0.85,
144
+ },
145
+ {
146
+ name: "ssn",
147
+ pattern: /\b\d{3}[\s\-]\d{2}[\s\-]\d{4}\b/g,
148
+ category: Category.SSN,
149
+ confidence: 0.9,
150
+ },
151
+ // --- API keys and tokens ---
152
+ {
153
+ name: "api_key_generic",
154
+ pattern: /\b(?:sk|pk|api|key|token|secret|access)[-_][a-zA-Z0-9\-_]{20,}\b/gi,
155
+ category: Category.API_KEY,
156
+ confidence: 0.95,
157
+ },
158
+ {
159
+ name: "api_key_aws",
160
+ pattern: /\bAKIA[0-9A-Z]{16}\b/g,
161
+ category: Category.API_KEY,
162
+ confidence: 0.95,
163
+ },
164
+ {
165
+ name: "bearer_token",
166
+ pattern: /(?:Bearer\s+)([A-Za-z0-9\-_=]+\.?[A-Za-z0-9\-_=]*\.?[A-Za-z0-9\-_=]*)/g,
167
+ category: Category.API_KEY,
168
+ confidence: 0.9,
169
+ },
170
+ // --- URL/connection-string embedded credentials (before URL pattern to claim spans first) ---
171
+ {
172
+ name: "url_query_password",
173
+ pattern: /[?&](?:password|passwd|secret|token|api_key|apikey|auth_token|access_token)=([^&\s]{3,})/gi,
174
+ category: Category.NETWORK_CREDENTIAL,
175
+ confidence: 0.95,
176
+ },
177
+ {
178
+ name: "connection_string_password",
179
+ pattern: /(?:postgres|mysql|mongodb|redis|amqp|mssql|mariadb|oracle):\/\/[^:]+:([^@]{3,})@/gi,
180
+ category: Category.NETWORK_CREDENTIAL,
181
+ confidence: 0.95,
182
+ },
183
+ // --- URLs and paths ---
184
+ {
185
+ name: "url",
186
+ pattern: /https?:\/\/[^\s<>"')\]]+/g,
187
+ category: Category.URL,
188
+ confidence: 0.9,
189
+ },
190
+ {
191
+ name: "file_path_unix",
192
+ // Require at least 3 segments to avoid matching git diff /a/ /b/ paths
193
+ pattern: /(?<!\w)(?:\/[\w.\-]+){3,}(?:\.\w+)?/g,
194
+ category: Category.FILE_PATH,
195
+ confidence: 0.7,
196
+ },
197
+ {
198
+ name: "file_path_windows",
199
+ pattern: /\b[A-Z]:\\(?:[\w.\-]+\\)*[\w.\-]+\b/g,
200
+ category: Category.FILE_PATH,
201
+ confidence: 0.8,
202
+ },
203
+ // --- Network infrastructure ---
204
+ {
205
+ name: "mac_address",
206
+ pattern: /\b(?:[0-9a-fA-F]{2}[:\-]){5}[0-9a-fA-F]{2}\b|\b(?:[0-9a-fA-F]{4}\.){2}[0-9a-fA-F]{4}\b/g,
207
+ category: Category.MAC_ADDRESS,
208
+ confidence: 0.95,
209
+ },
210
+ {
211
+ name: "snmp_community",
212
+ pattern: /(?:snmp-server\s+community\s+)(\S+)/gi,
213
+ category: Category.SNMP_COMMUNITY,
214
+ confidence: 1.0,
215
+ },
216
+ {
217
+ name: "snmp_auth_priv",
218
+ pattern: /(?:auth\s+\S+\s+)(\S+)(?:\s+priv\s+\S+\s+\d*\s*)(\S+)/gi,
219
+ category: Category.NETWORK_CREDENTIAL,
220
+ confidence: 1.0,
221
+ },
222
+ // --- Cisco secrets and hashes ---
223
+ {
224
+ name: "cisco_enable_secret",
225
+ pattern: /(?:enable\s+secret\s+\d+\s+)(\S+)/g,
226
+ category: Category.NETWORK_CREDENTIAL,
227
+ confidence: 1.0,
228
+ },
229
+ {
230
+ name: "cisco_password_line",
231
+ // "password 7 XXXX" or "password 0 XXXX"
232
+ pattern: /(?:password\s+(?:[057]\s+))(\S+)/g,
233
+ category: Category.NETWORK_CREDENTIAL,
234
+ confidence: 1.0,
235
+ },
236
+ {
237
+ name: "cisco_username_secret",
238
+ // "username admin secret 5 $1$..." or "username admin password 7 ..."
239
+ pattern: /(?:username\s+\S+\s+(?:secret|password)\s+\d+\s+)(\S+)/g,
240
+ category: Category.NETWORK_CREDENTIAL,
241
+ confidence: 1.0,
242
+ },
243
+ {
244
+ name: "cisco_password_hash_type5",
245
+ pattern: /\$1\$[A-Za-z0-9./]+\$[A-Za-z0-9./]+/g,
246
+ category: Category.NETWORK_CREDENTIAL,
247
+ confidence: 1.0,
248
+ },
249
+ {
250
+ name: "cisco_password_hash_type8",
251
+ pattern: /\$8\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+/g,
252
+ category: Category.NETWORK_CREDENTIAL,
253
+ confidence: 1.0,
254
+ },
255
+ {
256
+ name: "cisco_password_hash_type9",
257
+ pattern: /\$9\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+/g,
258
+ category: Category.NETWORK_CREDENTIAL,
259
+ confidence: 1.0,
260
+ },
261
+ {
262
+ name: "cisco_type7",
263
+ // Cisco type 7 obfuscated passwords: even-length hex starting with known salts
264
+ pattern: /(?:password\s+7\s+)([0-9A-Fa-f]{4,})/g,
265
+ category: Category.NETWORK_CREDENTIAL,
266
+ confidence: 1.0,
267
+ },
268
+ {
269
+ name: "key_string",
270
+ pattern: /(?:key-string\s+(?:\d+\s+)?)(\S+)/g,
271
+ category: Category.NETWORK_CREDENTIAL,
272
+ confidence: 1.0,
273
+ },
274
+ {
275
+ name: "tacacs_key",
276
+ pattern: /(?:tacacs-server\s+(?:host\s+\S+\s+)?key\s+(?:\d+\s+)?)(\S+)/g,
277
+ category: Category.NETWORK_CREDENTIAL,
278
+ confidence: 1.0,
279
+ },
280
+ {
281
+ name: "radius_key",
282
+ pattern: /(?:radius-server\s+(?:host\s+\S+\s+)?key\s+(?:\d+\s+)?)(\S+)/g,
283
+ category: Category.NETWORK_CREDENTIAL,
284
+ confidence: 1.0,
285
+ },
286
+ {
287
+ name: "ntp_auth_key",
288
+ pattern: /(?:ntp\s+authentication-key\s+\d+\s+md5\s+)(\S+)/g,
289
+ category: Category.NETWORK_CREDENTIAL,
290
+ confidence: 1.0,
291
+ },
292
+ // --- BGP / OSPF / routing ---
293
+ {
294
+ name: "bgp_asn",
295
+ pattern: /\b(?:router\s+bgp|remote-as|local-as|peer-as)\s+(\d{4,6})\b/gi,
296
+ category: Category.BGP_ASN,
297
+ confidence: 0.95,
298
+ },
299
+ {
300
+ name: "bgp_neighbor_password",
301
+ pattern: /(?:neighbor\s+\S+\s+password\s+(?:\d+\s+)?)(\S+)/g,
302
+ category: Category.NETWORK_CREDENTIAL,
303
+ confidence: 1.0,
304
+ },
305
+ {
306
+ name: "ospf_router_id",
307
+ pattern: /(?:router-id\s+)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/g,
308
+ category: Category.OSPF_ID,
309
+ confidence: 0.95,
310
+ },
311
+ {
312
+ name: "ospf_area",
313
+ // "area 0.0.0.1" or "area 1" style
314
+ pattern: /(?:area\s+)(\d{1,3}(?:\.\d{1,3}){3})\b/g,
315
+ category: Category.OSPF_ID,
316
+ confidence: 0.85,
317
+ },
318
+ {
319
+ name: "ospf_auth_key",
320
+ pattern: /(?:(?:ip\s+ospf\s+)?(?:authentication-key|message-digest-key\s+\d+\s+md5)\s+(?:\d+\s+)?)(\S+)/g,
321
+ category: Category.NETWORK_CREDENTIAL,
322
+ confidence: 1.0,
323
+ },
324
+ // --- VLAN ---
325
+ {
326
+ name: "vlan_name",
327
+ // "name VLAN_NAME" inside a vlan context, or "vlan 100" with a name
328
+ pattern: /(?:vlan\s+\d+\s*\n\s*name\s+)(\S+)/gm,
329
+ category: Category.VLAN_ID,
330
+ confidence: 0.9,
331
+ },
332
+ {
333
+ name: "vlan_range",
334
+ // "switchport trunk allowed vlan 100,200,300-400"
335
+ pattern: /(?:allowed\s+vlan\s+(?:add\s+)?)(\d[\d,\-]+)/gi,
336
+ category: Category.VLAN_ID,
337
+ confidence: 0.85,
338
+ },
339
+ // --- Interface descriptions ---
340
+ {
341
+ name: "interface_description",
342
+ // "description LINK TO CUSTOMER-X" on an interface
343
+ pattern: /(?:^\s*description\s+)(.+)$/gm,
344
+ category: Category.INTERFACE_DESC,
345
+ confidence: 0.9,
346
+ },
347
+ // --- Route maps / ACLs ---
348
+ {
349
+ name: "route_map_name",
350
+ pattern: /(?:route-map\s+)(\S+)(?:\s+(?:permit|deny))?/g,
351
+ category: Category.ROUTE_MAP,
352
+ confidence: 0.85,
353
+ },
354
+ {
355
+ name: "prefix_list_name",
356
+ pattern: /(?:ip\s+prefix-list\s+)(\S+)/g,
357
+ category: Category.ACL_NAME,
358
+ confidence: 0.85,
359
+ },
360
+ {
361
+ name: "acl_name",
362
+ pattern: /(?:ip\s+access-list\s+(?:standard|extended)\s+)(\S+)/g,
363
+ category: Category.ACL_NAME,
364
+ confidence: 0.85,
365
+ },
366
+ // --- Network device hostnames ---
367
+ {
368
+ // Cisco/IOS "hostname <name>" config line
369
+ name: "cisco_hostname",
370
+ pattern: /(?:^|\n)\s*hostname\s+(\S+)/g,
371
+ category: Category.HOSTNAME,
372
+ confidence: 0.95,
373
+ },
374
+ {
375
+ // Dotted hierarchical device names: 24.rou.acn.atccv.care, 1a.sw.atm.atvie.ops
376
+ name: "device_name_dotted",
377
+ pattern: /\b(\w{1,4}\.(?:rou|sw|rtr|fw)\.(?:[a-z]{2,8}\.){1,3}(?:care|ops|mgmt|cnet|prod|lab|dev))\b/gi,
378
+ category: Category.HOSTNAME,
379
+ confidence: 0.90,
380
+ },
381
+ {
382
+ // Short device codes: FCNETR1, WCNETR2, LCNETR3 — uppercase letter(s) + "CNET" or role + digit(s)
383
+ name: "device_name_short",
384
+ pattern: /\b([A-Z]{1,4}(?:CNET|ONET|MNET|ANET)[A-Z]?\d{1,2})\b/g,
385
+ category: Category.HOSTNAME,
386
+ confidence: 0.85,
387
+ },
388
+ {
389
+ // Hyphenated device names with site/zone/role pattern: f-o-w-cnetr1, l-care-acn-rou24
390
+ name: "device_name_hyphenated",
391
+ pattern: /\b([a-z]{1,6}(?:-[a-z]{1,8}){2,5}[a-z]?\d{1,3})\b/gi,
392
+ category: Category.HOSTNAME,
393
+ confidence: 0.70,
394
+ },
395
+ // --- Syslog / monitoring (#5) ---
396
+ {
397
+ // Cisco syslog facility: %SYS-5-CONFIG_I, %LINK-3-UPDOWN
398
+ name: "syslog_facility",
399
+ pattern: /%([A-Z][A-Z_]+-\d+-[A-Z_]+)/g,
400
+ category: Category.HOSTNAME,
401
+ confidence: 0.80,
402
+ },
403
+ {
404
+ // Source interface in logging/SNMP: trap-source Loopback0, logging source-interface Vlan1
405
+ name: "syslog_source_interface",
406
+ pattern: /(?:trap-source|source-interface|logging\s+source-interface)\s+(\S+)/gi,
407
+ category: Category.HOSTNAME,
408
+ confidence: 0.85,
409
+ },
410
+ // --- Description field sub-entities (#6) ---
411
+ {
412
+ // Circuit ID in description: CID: ABC-123, circuit-id XYZ/456
413
+ name: "circuit_id",
414
+ pattern: /(?:CID|circuit[- ]?id|circuit)\s*[:# ]\s*([A-Za-z0-9\-/]{3,30})/gi,
415
+ category: Category.CUSTOM,
416
+ confidence: 0.85,
417
+ },
418
+ {
419
+ // Org/customer name in description: LINK TO Acme Corp, CONNECTION FROM BigCo
420
+ name: "description_org",
421
+ pattern: /(?:(?:LINK|CONN(?:ECTION)?|CIRCUIT|PEER|UPLINK)\s+(?:TO|FROM|WITH)\s+)([A-Z][A-Za-z0-9\s&,.\-]{2,30})/g,
422
+ category: Category.ORG_NAME,
423
+ confidence: 0.75,
424
+ },
425
+ // ==========================================================================
426
+ // Wave 1: Enterprise / Regulated / Critical Infrastructure
427
+ // ==========================================================================
428
+ // --- Austrian / EU identifiers ---
429
+ {
430
+ name: "iban",
431
+ pattern: /\b[A-Z]{2}\d{2}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{0,4}\b/g,
432
+ category: Category.IBAN,
433
+ confidence: 0.90,
434
+ },
435
+ {
436
+ name: "austrian_svnr",
437
+ pattern: /\b\d{4}[0-3]\d[01]\d\d{2}\b/g,
438
+ category: Category.NATIONAL_ID,
439
+ confidence: 0.80,
440
+ },
441
+ {
442
+ name: "german_personalausweis",
443
+ pattern: /\b[LMNTPRV][A-Z0-9]{8}\d\b/g,
444
+ category: Category.NATIONAL_ID,
445
+ confidence: 0.85,
446
+ },
447
+ {
448
+ name: "eu_vat_number",
449
+ pattern: /\b(?:AT|DE|FR|IT|NL|ES|BE|PL|CZ|SE|DK|FI|IE|PT|GR|HU|RO|BG|HR|SI|SK|LT|LV|EE|LU|MT|CY)U?\d{8,12}\b/g,
450
+ category: Category.NATIONAL_ID,
451
+ confidence: 0.85,
452
+ },
453
+ {
454
+ name: "gps_coordinate",
455
+ pattern: /(?<!\w)-?\d{1,3}\.\d{4,8}[,\s]+-?\d{1,3}\.\d{4,8}(?!\w)/g,
456
+ category: Category.GPS_COORDINATE,
457
+ confidence: 0.85,
458
+ },
459
+ // --- JWT and OAuth ---
460
+ {
461
+ name: "jwt_token",
462
+ pattern: /\beyJ[A-Za-z0-9\-_]+\.eyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\b/g,
463
+ category: Category.JWT,
464
+ confidence: 0.95,
465
+ },
466
+ {
467
+ name: "oauth_refresh_token",
468
+ pattern: /(?:refresh_token["':\s]+)([A-Za-z0-9\-_]{20,})/g,
469
+ category: Category.API_KEY,
470
+ confidence: 0.90,
471
+ },
472
+ // --- Cloud provider tokens ---
473
+ {
474
+ name: "aws_secret_key",
475
+ pattern: /(?:SecretAccessKey|aws_secret_access_key)["':\s=]+([A-Za-z0-9/+=]{40})/gi,
476
+ category: Category.API_KEY,
477
+ confidence: 0.95,
478
+ },
479
+ {
480
+ name: "gcp_api_key",
481
+ pattern: /\bAIza[0-9A-Za-z\-_]{35}\b/g,
482
+ category: Category.API_KEY,
483
+ confidence: 0.95,
484
+ },
485
+ {
486
+ name: "azure_connection_string",
487
+ pattern: /DefaultEndpointsProtocol=[^;\s]+;AccountName=[^;\s]+;AccountKey=[^;\s]+/g,
488
+ category: Category.NETWORK_CREDENTIAL,
489
+ confidence: 1.00,
490
+ },
491
+ {
492
+ name: "slack_token",
493
+ pattern: /\bxox[bpsar]-[A-Za-z0-9\-]{10,}/g,
494
+ category: Category.API_KEY,
495
+ confidence: 0.95,
496
+ },
497
+ {
498
+ name: "github_pat",
499
+ pattern: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b/g,
500
+ category: Category.API_KEY,
501
+ confidence: 0.95,
502
+ },
503
+ {
504
+ name: "gitlab_token",
505
+ pattern: /\bglpat-[A-Za-z0-9\-]{20,}\b/g,
506
+ category: Category.API_KEY,
507
+ confidence: 0.95,
508
+ },
509
+ {
510
+ name: "stripe_key",
511
+ pattern: /\b[sr]k_(?:live|test)_[A-Za-z0-9]{24,}\b/g,
512
+ category: Category.API_KEY,
513
+ confidence: 0.95,
514
+ },
515
+ {
516
+ name: "sendgrid_key",
517
+ pattern: /\bSG\.[A-Za-z0-9\-_]{22}\.[A-Za-z0-9\-_]{43}\b/g,
518
+ category: Category.API_KEY,
519
+ confidence: 0.95,
520
+ },
521
+ {
522
+ name: "hashicorp_vault_token",
523
+ pattern: /\b(?:hvs\.[A-Za-z0-9]{24,}|s\.[A-Za-z0-9]{24})\b/g,
524
+ category: Category.API_KEY,
525
+ confidence: 0.95,
526
+ },
527
+ // --- Database connection strings ---
528
+ {
529
+ name: "db_connection_string",
530
+ pattern: /(?:postgres|mysql|mongodb|mongodb\+srv|redis|amqp)s?:\/\/[^\s<>"']+/g,
531
+ category: Category.NETWORK_CREDENTIAL,
532
+ confidence: 1.00,
533
+ },
534
+ {
535
+ name: "jdbc_url",
536
+ pattern: /jdbc:(?:oracle|sqlserver|mysql|postgresql|mariadb):[^\s<>"']+/g,
537
+ category: Category.NETWORK_CREDENTIAL,
538
+ confidence: 0.95,
539
+ },
540
+ // --- Certificates and keys ---
541
+ {
542
+ name: "pem_private_key",
543
+ pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----/g,
544
+ category: Category.CERTIFICATE,
545
+ confidence: 1.00,
546
+ },
547
+ {
548
+ name: "pem_certificate",
549
+ pattern: /-----BEGIN CERTIFICATE-----[\s\S]*?-----END CERTIFICATE-----/g,
550
+ category: Category.CERTIFICATE,
551
+ confidence: 0.85,
552
+ },
553
+ // --- LDAP / Active Directory ---
554
+ {
555
+ name: "ldap_bind_dn",
556
+ pattern: /\bCN=[^,]+(?:,(?:OU|DC|O|C)=[^,]+){2,}/gi,
557
+ category: Category.PERSON_NAME,
558
+ confidence: 0.90,
559
+ },
560
+ {
561
+ name: "ldap_bind_password",
562
+ pattern: /(?:bindPassword|LDAP_BIND_PW|ldap_password)["':\s=]+(\S+)/gi,
563
+ category: Category.NETWORK_CREDENTIAL,
564
+ confidence: 1.00,
565
+ },
566
+ {
567
+ name: "ad_domain_login",
568
+ pattern: /\b[A-Z][A-Z0-9]{1,15}\\[a-zA-Z][a-zA-Z0-9._\-]{0,30}\b/g,
569
+ category: Category.PERSON_NAME,
570
+ confidence: 0.85,
571
+ },
572
+ {
573
+ name: "windows_sid",
574
+ pattern: /\bS-1-5-21-\d+-\d+-\d+(?:-\d+)?\b/g,
575
+ category: Category.NATIONAL_ID,
576
+ confidence: 0.90,
577
+ },
578
+ // --- Juniper ---
579
+ {
580
+ name: "junos_secret",
581
+ pattern: /"\$9\$[A-Za-z0-9./]+"/g,
582
+ category: Category.NETWORK_CREDENTIAL,
583
+ confidence: 1.00,
584
+ },
585
+ {
586
+ name: "junos_preshared_key",
587
+ pattern: /(?:pre-shared-key\s+(?:ascii-text|hexadecimal)\s+)"([^"]+)"/g,
588
+ category: Category.NETWORK_CREDENTIAL,
589
+ confidence: 1.00,
590
+ },
591
+ {
592
+ name: "junos_root_auth",
593
+ pattern: /(?:encrypted-password\s+)"([^"]+)"/g,
594
+ category: Category.NETWORK_CREDENTIAL,
595
+ confidence: 1.00,
596
+ },
597
+ {
598
+ name: "junos_community",
599
+ pattern: /(?:community\s+)(\S+)(?:\s+(?:authorization|clients))/g,
600
+ category: Category.SNMP_COMMUNITY,
601
+ confidence: 1.00,
602
+ },
603
+ {
604
+ name: "junos_description",
605
+ pattern: /(?:description\s+)"([^"]+)"/g,
606
+ category: Category.INTERFACE_DESC,
607
+ confidence: 0.90,
608
+ },
609
+ // --- Palo Alto ---
610
+ {
611
+ name: "panos_api_key",
612
+ pattern: /\bLUFRPT[A-Za-z0-9=+/]{20,}\b/g,
613
+ category: Category.API_KEY,
614
+ confidence: 0.95,
615
+ },
616
+ {
617
+ name: "panos_password_hash",
618
+ pattern: /(?:phash\s+)(\S+)/g,
619
+ category: Category.NETWORK_CREDENTIAL,
620
+ confidence: 1.00,
621
+ },
622
+ {
623
+ name: "panos_master_key",
624
+ pattern: /(?:master-key\s+)(\S+)/g,
625
+ category: Category.NETWORK_CREDENTIAL,
626
+ confidence: 1.00,
627
+ },
628
+ {
629
+ name: "panos_address_object",
630
+ pattern: /(?:set\s+address\s+)(\S+)(?:\s+ip-netmask)/g,
631
+ category: Category.HOSTNAME,
632
+ confidence: 0.80,
633
+ },
634
+ {
635
+ name: "panos_zone_name",
636
+ pattern: /(?:set\s+zone\s+)(\S+)(?:\s+network)/g,
637
+ category: Category.ACL_NAME,
638
+ confidence: 0.80,
639
+ },
640
+ {
641
+ name: "panos_rule_name",
642
+ pattern: /(?:set\s+rulebase\s+security\s+rules\s+)"?([^"\s]+)"?/g,
643
+ category: Category.ACL_NAME,
644
+ confidence: 0.85,
645
+ },
646
+ // --- Check Point ---
647
+ {
648
+ name: "checkpoint_password_hash",
649
+ pattern: /(?:set\s+password-hash\s+)(\S+)/g,
650
+ category: Category.NETWORK_CREDENTIAL,
651
+ confidence: 1.00,
652
+ },
653
+ {
654
+ name: "checkpoint_sic_key",
655
+ pattern: /(?:sic\s+(?:init|key)\s+)(\S+)/gi,
656
+ category: Category.NETWORK_CREDENTIAL,
657
+ confidence: 1.00,
658
+ },
659
+ {
660
+ name: "checkpoint_api_key",
661
+ pattern: /(?:api-key\s+)"?([A-Za-z0-9+/=]{20,})"?/g,
662
+ category: Category.API_KEY,
663
+ confidence: 0.95,
664
+ },
665
+ {
666
+ name: "checkpoint_object_name",
667
+ pattern: /(?:add\s+(?:host|network|group|service-tcp|service-udp)\s+name\s+)"?([^"\s]+)"?/g,
668
+ category: Category.HOSTNAME,
669
+ confidence: 0.80,
670
+ },
671
+ {
672
+ name: "checkpoint_rule_name",
673
+ pattern: /(?:add\s+access-rule\s+.*name\s+)"?([^"\s]+)"?/g,
674
+ category: Category.ACL_NAME,
675
+ confidence: 0.85,
676
+ },
677
+ {
678
+ name: "checkpoint_vpn_community",
679
+ pattern: /(?:set\s+vpn-community\s+)"?([^"\s]+)"?/g,
680
+ category: Category.ACL_NAME,
681
+ confidence: 0.85,
682
+ },
683
+ // --- Arista ---
684
+ {
685
+ name: "arista_secret",
686
+ pattern: /(?:secret\s+sha512\s+)(\$6\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+)/g,
687
+ category: Category.NETWORK_CREDENTIAL,
688
+ confidence: 1.00,
689
+ },
690
+ // --- F5 BIG-IP ---
691
+ {
692
+ name: "f5_password",
693
+ pattern: /(?:auth\s+password\s+)(\S+)/g,
694
+ category: Category.NETWORK_CREDENTIAL,
695
+ confidence: 1.00,
696
+ },
697
+ {
698
+ name: "f5_ssl_passphrase",
699
+ pattern: /(?:passphrase\s+)(\S+)/g,
700
+ category: Category.NETWORK_CREDENTIAL,
701
+ confidence: 0.90,
702
+ },
703
+ // --- Fortinet ---
704
+ {
705
+ name: "fortinet_password",
706
+ pattern: /(?:set\s+password\s+ENC\s+)(\S+)/g,
707
+ category: Category.NETWORK_CREDENTIAL,
708
+ confidence: 1.00,
709
+ },
710
+ {
711
+ name: "fortinet_private_key",
712
+ pattern: /(?:set\s+private-key\s+)"(-----BEGIN[\s\S]*?-----END[^"]+)"/g,
713
+ category: Category.CERTIFICATE,
714
+ confidence: 1.00,
715
+ },
716
+ // --- VPN / IPSec / RADIUS ---
717
+ {
718
+ name: "vpn_preshared_key",
719
+ pattern: /(?:pre-shared-key|preshared-key|crypto\s+isakmp\s+key)\s+(?:\d+\s+)?(\S+)/gi,
720
+ category: Category.NETWORK_CREDENTIAL,
721
+ confidence: 1.00,
722
+ },
723
+ {
724
+ name: "ipsec_transform_set",
725
+ pattern: /(?:crypto\s+ipsec\s+transform-set\s+)(\S+)/g,
726
+ category: Category.ACL_NAME,
727
+ confidence: 0.80,
728
+ },
729
+ // --- ICS / SCADA ---
730
+ {
731
+ name: "opc_ua_endpoint",
732
+ pattern: /opc\.tcp:\/\/[^\s<>"']+/g,
733
+ category: Category.ICS_IDENTIFIER,
734
+ confidence: 0.90,
735
+ },
736
+ {
737
+ name: "modbus_address",
738
+ pattern: /(?:modbus|slave|unit[\-_]?id)[\s:=]+(\d{1,3})/gi,
739
+ category: Category.ICS_IDENTIFIER,
740
+ confidence: 0.80,
741
+ },
742
+ {
743
+ name: "scada_credential",
744
+ pattern: /(?:scada|hmi|plc|rtu|ied)[\-_\s]?(?:password|pass|pwd|credential|auth)[\s:="']+(\S+)/gi,
745
+ category: Category.NETWORK_CREDENTIAL,
746
+ confidence: 1.00,
747
+ },
748
+ {
749
+ name: "iec61850_ied_name",
750
+ pattern: /(?:iedName\s*=\s*)"([^"]+)"/g,
751
+ category: Category.ICS_IDENTIFIER,
752
+ confidence: 0.90,
753
+ },
754
+ {
755
+ name: "dnp3_address",
756
+ pattern: /(?:dnp3|outstation|master)[\-_\s]?(?:address|addr)[\s:=]+(\d{1,5})/gi,
757
+ category: Category.ICS_IDENTIFIER,
758
+ confidence: 0.85,
759
+ },
760
+ {
761
+ name: "bacnet_device_id",
762
+ pattern: /(?:bacnet|device[\-_]?instance)[\s:=]+(\d{1,7})/gi,
763
+ category: Category.ICS_IDENTIFIER,
764
+ confidence: 0.80,
765
+ },
766
+ {
767
+ name: "historian_tag",
768
+ pattern: /\\\\[A-Za-z0-9\-_.]+\\[A-Za-z0-9\-_.]+(?:\\[A-Za-z0-9\-_.]+)*/g,
769
+ category: Category.ICS_IDENTIFIER,
770
+ confidence: 0.85,
771
+ },
772
+ // --- Aviation / ATC ---
773
+ {
774
+ name: "atc_sector_id",
775
+ pattern: /\b(?:TWR|APP|ACC|CTR|GND|DEL|ATIS)[\-_][A-Z0-9]{2,10}\b/g,
776
+ category: Category.ICS_IDENTIFIER,
777
+ confidence: 0.90,
778
+ },
779
+ {
780
+ name: "nav_frequency",
781
+ pattern: /\b1[01]\d\.\d{1,3}\s?MHz\b/g,
782
+ category: Category.ICS_IDENTIFIER,
783
+ confidence: 0.85,
784
+ },
785
+ {
786
+ name: "icao_designator",
787
+ pattern: /\b[A-Z]{4}\b(?=[\s\-](?:TWR|APP|GND|CTR|ATIS|RWY|SID|STAR))/g,
788
+ category: Category.ICS_IDENTIFIER,
789
+ confidence: 0.85,
790
+ },
791
+ // --- Telecom ---
792
+ {
793
+ name: "imsi",
794
+ pattern: /(?:IMSI|imsi)[\s:=]+(\d{15})/g,
795
+ category: Category.NATIONAL_ID,
796
+ confidence: 0.95,
797
+ },
798
+ {
799
+ name: "imei",
800
+ pattern: /(?:IMEI|imei)[\s:=]+(\d{15})/g,
801
+ category: Category.NATIONAL_ID,
802
+ confidence: 0.90,
803
+ },
804
+ {
805
+ name: "clli_code",
806
+ pattern: /\b[A-Z]{6}\d{2}[A-Z0-9]{3}\b/g,
807
+ category: Category.ICS_IDENTIFIER,
808
+ confidence: 0.85,
809
+ },
810
+ // --- Base64-encoded secrets ---
811
+ {
812
+ name: "base64_secret_assignment",
813
+ pattern: /(?:SECRET|PRIVATE_KEY|PASSWORD|TOKEN|API_KEY|APIKEY|AUTH)[\s]*[=:]\s*[A-Za-z0-9+/]{20,}={0,2}/gi,
814
+ category: Category.API_KEY,
815
+ confidence: 0.90,
816
+ },
817
+ {
818
+ name: "base64_prefixed",
819
+ pattern: /\bbase64:[A-Za-z0-9+/]{8,}={0,2}/g,
820
+ category: Category.API_KEY,
821
+ confidence: 0.85,
822
+ },
823
+ ];
824
+ /** Check if two spans overlap. */
825
+ function spansOverlap(spanStart, spanEnd, seenSpans) {
826
+ for (const [s, e] of seenSpans) {
827
+ if ((s <= spanStart && spanStart < e) || (s < spanEnd && spanEnd <= e)) {
828
+ return true;
829
+ }
830
+ }
831
+ return false;
832
+ }
833
+ /** Detects sensitive entities using regex patterns. */
834
+ export class RegexDetector {
835
+ name = "regex";
836
+ patterns;
837
+ constructor(extraPatterns, overrides) {
838
+ let patterns = [...BUILTIN_PATTERNS];
839
+ if (extraPatterns) {
840
+ patterns.push(...extraPatterns);
841
+ }
842
+ if (overrides) {
843
+ patterns = patterns.filter((p) => {
844
+ const ov = overrides[p.name];
845
+ return ov?.enabled !== false;
846
+ });
847
+ patterns = patterns.map((p) => {
848
+ const ov = overrides[p.name];
849
+ if (ov?.confidence !== undefined) {
850
+ return { ...p, confidence: ov.confidence };
851
+ }
852
+ return p;
853
+ });
854
+ }
855
+ this.patterns = patterns;
856
+ }
857
+ detect(text) {
858
+ const entities = [];
859
+ const seenSpans = [];
860
+ for (const pdef of this.patterns) {
861
+ // Reset lastIndex for the global regex
862
+ pdef.pattern.lastIndex = 0;
863
+ for (const match of text.matchAll(pdef.pattern)) {
864
+ // If the pattern has capture groups, emit each group as a
865
+ // separate entity. Otherwise use the full match.
866
+ const groups = match.slice(1);
867
+ const hasGroups = groups.some((g) => g !== undefined);
868
+ if (hasGroups) {
869
+ for (let i = 1; i < match.length; i++) {
870
+ const grp = match[i];
871
+ if (grp === undefined) {
872
+ continue;
873
+ }
874
+ // Get the start of this capture group from the match indices
875
+ // We need to find the position of the group within the full match
876
+ const fullMatchStart = match.index;
877
+ const fullMatch = match[0];
878
+ // Find the group's position within the full match string
879
+ const grpStart = findGroupStart(fullMatch, fullMatchStart, grp, match, i);
880
+ const grpEnd = grpStart + grp.length;
881
+ const span = [grpStart, grpEnd];
882
+ if (spansOverlap(span[0], span[1], seenSpans)) {
883
+ continue;
884
+ }
885
+ // Skip subnet/wildcard masks for IP-like values
886
+ if (pdef.category === Category.IP_ADDRESS && isMask(grp)) {
887
+ continue;
888
+ }
889
+ // Skip documentation/example values (#7)
890
+ if (isDocExample(grp, pdef.category)) {
891
+ continue;
892
+ }
893
+ seenSpans.push(span);
894
+ entities.push({
895
+ value: grp,
896
+ start: grpStart,
897
+ end: grpEnd,
898
+ category: pdef.category,
899
+ confidence: pdef.confidence,
900
+ detector: `${this.name}:${pdef.name}`,
901
+ });
902
+ }
903
+ }
904
+ else {
905
+ const start = match.index;
906
+ const end = start + match[0].length;
907
+ const span = [start, end];
908
+ if (spansOverlap(span[0], span[1], seenSpans)) {
909
+ continue;
910
+ }
911
+ const value = match[0];
912
+ // Skip subnet/wildcard masks
913
+ if (pdef.category === Category.IP_ADDRESS && isMask(value)) {
914
+ continue;
915
+ }
916
+ // Skip documentation/example values (#7)
917
+ if (isDocExample(value, pdef.category)) {
918
+ continue;
919
+ }
920
+ seenSpans.push(span);
921
+ entities.push({
922
+ value,
923
+ start: span[0],
924
+ end: span[1],
925
+ category: pdef.category,
926
+ confidence: pdef.confidence,
927
+ detector: `${this.name}:${pdef.name}`,
928
+ });
929
+ }
930
+ }
931
+ }
932
+ entities.sort((a, b) => a.start - b.start);
933
+ return entities;
934
+ }
935
+ }
936
+ /**
937
+ * Find the absolute start position of a capture group within text.
938
+ * Uses the full match string and searches for the group value
939
+ * starting from after previous groups.
940
+ */
941
+ function findGroupStart(fullMatch, fullMatchStart, groupValue, match, groupIndex) {
942
+ // Search for the group value within the full match, accounting for
943
+ // previous groups that may contain the same text.
944
+ let searchFrom = 0;
945
+ for (let prev = 1; prev < groupIndex; prev++) {
946
+ if (match[prev] !== undefined) {
947
+ const prevPos = fullMatch.indexOf(match[prev], searchFrom);
948
+ if (prevPos !== -1) {
949
+ searchFrom = prevPos + match[prev].length;
950
+ }
951
+ }
952
+ }
953
+ const posInMatch = fullMatch.indexOf(groupValue, searchFrom);
954
+ return fullMatchStart + posInMatch;
955
+ }