shroud-privacy 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +7 -0
- package/README.md +369 -0
- package/dist/audit.d.ts +46 -0
- package/dist/audit.js +127 -0
- package/dist/canary.d.ts +31 -0
- package/dist/canary.js +73 -0
- package/dist/config.d.ts +27 -0
- package/dist/config.js +123 -0
- package/dist/detectors/base.d.ts +8 -0
- package/dist/detectors/base.js +2 -0
- package/dist/detectors/code.d.ts +25 -0
- package/dist/detectors/code.js +144 -0
- package/dist/detectors/context.d.ts +31 -0
- package/dist/detectors/context.js +357 -0
- package/dist/detectors/patterns.d.ts +15 -0
- package/dist/detectors/patterns.js +58 -0
- package/dist/detectors/regex.d.ts +28 -0
- package/dist/detectors/regex.js +955 -0
- package/dist/generators/base.d.ts +6 -0
- package/dist/generators/base.js +2 -0
- package/dist/generators/codes.d.ts +20 -0
- package/dist/generators/codes.js +231 -0
- package/dist/generators/names.d.ts +29 -0
- package/dist/generators/names.js +194 -0
- package/dist/generators/network.d.ts +86 -0
- package/dist/generators/network.js +477 -0
- package/dist/hooks.d.ts +27 -0
- package/dist/hooks.js +457 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +58 -0
- package/dist/mapping.d.ts +33 -0
- package/dist/mapping.js +72 -0
- package/dist/obfuscator.d.ts +78 -0
- package/dist/obfuscator.js +603 -0
- package/dist/redaction.d.ts +26 -0
- package/dist/redaction.js +76 -0
- package/dist/store.d.ts +40 -0
- package/dist/store.js +79 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +35 -0
- package/ncg_adapter.py +530 -0
- package/openclaw.plugin.json +72 -0
- package/package.json +56 -0
- package/shroud_bridge.mjs +225 -0
|
@@ -0,0 +1,955 @@
|
|
|
1
|
+
/** Regex-based detectors for structured sensitive data. */
|
|
2
|
+
import { Category } from "../types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Subnet masks and wildcard masks should never be obfuscated.
|
|
5
|
+
* Common mask prefixes used to avoid false-positive IP obfuscation.
|
|
6
|
+
*/
|
|
7
|
+
const MASK_PREFIXES = new Set([
|
|
8
|
+
"255.", "0.0.0.", "0.0.255.", "0.0.15.", "0.0.3.", "0.0.1.",
|
|
9
|
+
"0.255.", "0.128.", "0.192.", "0.224.", "0.240.", "0.248.",
|
|
10
|
+
"0.252.", "128.0.", "192.0.0.", "224.0.", "240.0.", "248.0.",
|
|
11
|
+
"252.0.", "254.0.", "127.0.",
|
|
12
|
+
]);
|
|
13
|
+
/**
|
|
14
|
+
* RFC 5737 documentation/example ranges and well-known placeholders.
|
|
15
|
+
* These should never be obfuscated — they're teaching/testing values.
|
|
16
|
+
*/
|
|
17
|
+
const DOC_IP_PREFIXES = [
|
|
18
|
+
"192.0.2.", // TEST-NET-1 (RFC 5737)
|
|
19
|
+
"198.51.100.", // TEST-NET-2 (RFC 5737)
|
|
20
|
+
"203.0.113.", // TEST-NET-3 (RFC 5737)
|
|
21
|
+
"233.252.0.", // MCAST-TEST-NET (RFC 6676)
|
|
22
|
+
"100.51.16.", // Benchmarking (RFC 5180)
|
|
23
|
+
];
|
|
24
|
+
const DOC_DOMAINS = new Set([
|
|
25
|
+
"example.com", "example.net", "example.org", // RFC 2606
|
|
26
|
+
"localhost", "invalid",
|
|
27
|
+
]);
|
|
28
|
+
const DOC_HOSTNAMES = new Set([
|
|
29
|
+
"localhost", "HOSTNAME", "EXAMPLE", "CHANGEME",
|
|
30
|
+
"YOUR_HOST", "YOURHOST", "hostname", "example",
|
|
31
|
+
]);
|
|
32
|
+
/** IPv6 documentation/reserved prefixes that should not be obfuscated. */
|
|
33
|
+
const DOC_IPV6_PREFIXES = [
|
|
34
|
+
"2001:db8:", // RFC 3849 documentation prefix
|
|
35
|
+
"2001:0db8:", // Same, zero-padded
|
|
36
|
+
];
|
|
37
|
+
const DOC_IPV6_EXACT = new Set([
|
|
38
|
+
"::1", // Loopback
|
|
39
|
+
"::0", // Unspecified
|
|
40
|
+
"::", // Unspecified
|
|
41
|
+
]);
|
|
42
|
+
/** Check if a value is a well-known documentation/example/placeholder. */
|
|
43
|
+
export function isDocExample(value, category) {
|
|
44
|
+
switch (category) {
|
|
45
|
+
case Category.IP_ADDRESS: {
|
|
46
|
+
// IPv6 check
|
|
47
|
+
if (value.includes(":")) {
|
|
48
|
+
const lower = value.toLowerCase();
|
|
49
|
+
if (DOC_IPV6_EXACT.has(lower))
|
|
50
|
+
return true;
|
|
51
|
+
for (const pfx of DOC_IPV6_PREFIXES) {
|
|
52
|
+
if (lower.startsWith(pfx))
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
// IPv4 check
|
|
58
|
+
for (const pfx of DOC_IP_PREFIXES) {
|
|
59
|
+
if (value.startsWith(pfx))
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
case Category.EMAIL:
|
|
65
|
+
case Category.URL: {
|
|
66
|
+
const lower = value.toLowerCase();
|
|
67
|
+
for (const d of DOC_DOMAINS) {
|
|
68
|
+
if (lower.includes(`@${d}`) || lower.includes(`//${d}`) || lower.endsWith(`.${d}`)) {
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
case Category.BGP_ASN:
|
|
75
|
+
// Private ASNs are real infra identifiers — don't skip them
|
|
76
|
+
return false;
|
|
77
|
+
case Category.HOSTNAME:
|
|
78
|
+
return DOC_HOSTNAMES.has(value) || DOC_HOSTNAMES.has(value.toUpperCase());
|
|
79
|
+
default:
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/** Heuristic: return true for subnet masks and wildcard masks. */
|
|
84
|
+
export function isMask(ip) {
|
|
85
|
+
for (const pfx of MASK_PREFIXES) {
|
|
86
|
+
if (ip.startsWith(pfx)) {
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const octets = ip.split(".");
|
|
91
|
+
if (octets.length === 4) {
|
|
92
|
+
// Common masks: all octets are 0 or 255
|
|
93
|
+
if (octets.every((o) => o === "0" || o === "255")) {
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
// Wildcard masks like 0.0.0.X
|
|
97
|
+
if (octets[0] === "0" && octets[1] === "0" && octets[2] === "0") {
|
|
98
|
+
return true;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
/** All built-in patterns. */
|
|
104
|
+
export const BUILTIN_PATTERNS = [
|
|
105
|
+
// --- Core PII ---
|
|
106
|
+
{
|
|
107
|
+
name: "email",
|
|
108
|
+
// Stricter: local part must start/end with alnum, no consecutive dots
|
|
109
|
+
pattern: /\b[a-zA-Z0-9](?:[a-zA-Z0-9._%+\-]*[a-zA-Z0-9])?@[a-zA-Z0-9](?:[a-zA-Z0-9\-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9\-]*[a-zA-Z0-9])?)*\.[a-zA-Z]{2,}\b/g,
|
|
110
|
+
category: Category.EMAIL,
|
|
111
|
+
confidence: 0.95,
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
name: "ipv4",
|
|
115
|
+
pattern: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g,
|
|
116
|
+
category: Category.IP_ADDRESS,
|
|
117
|
+
confidence: 0.95,
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
name: "ipv6",
|
|
121
|
+
// Full 8-group, compressed ::, loopback ::1, link-local, IPv4-mapped
|
|
122
|
+
// Uses \b where possible; :: forms use lookaround for proper boundary
|
|
123
|
+
pattern: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,7}:[0-9a-fA-F]{1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,6}(?::[0-9a-fA-F]{1,4}){1,2}\b|\b(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,3}\b|\b(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,4}\b|\b(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,5}\b|\b(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,6}\b|\b[0-9a-fA-F]{1,4}:(?::[0-9a-fA-F]{1,4}){1,7}\b|(?:^|(?<=[\s,;=(]))::(?:[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{1,4}){0,6})?(?=$|[\s,;)\]\/])|(?:^|(?<=[\s,;=(]))::(?:ffff:)?(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)(?=$|[\s,;)\]\/])/g,
|
|
124
|
+
category: Category.IP_ADDRESS,
|
|
125
|
+
confidence: 0.9,
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
name: "phone_us",
|
|
129
|
+
pattern: /\b(?:\+1[\s\-]?)?\(?\d{3}\)?[\s\-]?\d{3}[\s\-]?\d{4}\b/g,
|
|
130
|
+
category: Category.PHONE,
|
|
131
|
+
confidence: 0.8,
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
name: "phone_intl",
|
|
135
|
+
pattern: /(?<!\w)\+\d{1,3}[\s\-]?\d{4,14}\b/g,
|
|
136
|
+
category: Category.PHONE,
|
|
137
|
+
confidence: 0.75,
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
name: "credit_card",
|
|
141
|
+
pattern: /\b(?:\d{4}[\s\-]?){3}\d{4}\b/g,
|
|
142
|
+
category: Category.CREDIT_CARD,
|
|
143
|
+
confidence: 0.85,
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
name: "ssn",
|
|
147
|
+
pattern: /\b\d{3}[\s\-]\d{2}[\s\-]\d{4}\b/g,
|
|
148
|
+
category: Category.SSN,
|
|
149
|
+
confidence: 0.9,
|
|
150
|
+
},
|
|
151
|
+
// --- API keys and tokens ---
|
|
152
|
+
{
|
|
153
|
+
name: "api_key_generic",
|
|
154
|
+
pattern: /\b(?:sk|pk|api|key|token|secret|access)[-_][a-zA-Z0-9\-_]{20,}\b/gi,
|
|
155
|
+
category: Category.API_KEY,
|
|
156
|
+
confidence: 0.95,
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
name: "api_key_aws",
|
|
160
|
+
pattern: /\bAKIA[0-9A-Z]{16}\b/g,
|
|
161
|
+
category: Category.API_KEY,
|
|
162
|
+
confidence: 0.95,
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
name: "bearer_token",
|
|
166
|
+
pattern: /(?:Bearer\s+)([A-Za-z0-9\-_=]+\.?[A-Za-z0-9\-_=]*\.?[A-Za-z0-9\-_=]*)/g,
|
|
167
|
+
category: Category.API_KEY,
|
|
168
|
+
confidence: 0.9,
|
|
169
|
+
},
|
|
170
|
+
// --- URL/connection-string embedded credentials (before URL pattern to claim spans first) ---
|
|
171
|
+
{
|
|
172
|
+
name: "url_query_password",
|
|
173
|
+
pattern: /[?&](?:password|passwd|secret|token|api_key|apikey|auth_token|access_token)=([^&\s]{3,})/gi,
|
|
174
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
175
|
+
confidence: 0.95,
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
name: "connection_string_password",
|
|
179
|
+
pattern: /(?:postgres|mysql|mongodb|redis|amqp|mssql|mariadb|oracle):\/\/[^:]+:([^@]{3,})@/gi,
|
|
180
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
181
|
+
confidence: 0.95,
|
|
182
|
+
},
|
|
183
|
+
// --- URLs and paths ---
|
|
184
|
+
{
|
|
185
|
+
name: "url",
|
|
186
|
+
pattern: /https?:\/\/[^\s<>"')\]]+/g,
|
|
187
|
+
category: Category.URL,
|
|
188
|
+
confidence: 0.9,
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
name: "file_path_unix",
|
|
192
|
+
// Require at least 3 segments to avoid matching git diff /a/ /b/ paths
|
|
193
|
+
pattern: /(?<!\w)(?:\/[\w.\-]+){3,}(?:\.\w+)?/g,
|
|
194
|
+
category: Category.FILE_PATH,
|
|
195
|
+
confidence: 0.7,
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
name: "file_path_windows",
|
|
199
|
+
pattern: /\b[A-Z]:\\(?:[\w.\-]+\\)*[\w.\-]+\b/g,
|
|
200
|
+
category: Category.FILE_PATH,
|
|
201
|
+
confidence: 0.8,
|
|
202
|
+
},
|
|
203
|
+
// --- Network infrastructure ---
|
|
204
|
+
{
|
|
205
|
+
name: "mac_address",
|
|
206
|
+
pattern: /\b(?:[0-9a-fA-F]{2}[:\-]){5}[0-9a-fA-F]{2}\b|\b(?:[0-9a-fA-F]{4}\.){2}[0-9a-fA-F]{4}\b/g,
|
|
207
|
+
category: Category.MAC_ADDRESS,
|
|
208
|
+
confidence: 0.95,
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
name: "snmp_community",
|
|
212
|
+
pattern: /(?:snmp-server\s+community\s+)(\S+)/gi,
|
|
213
|
+
category: Category.SNMP_COMMUNITY,
|
|
214
|
+
confidence: 1.0,
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
name: "snmp_auth_priv",
|
|
218
|
+
pattern: /(?:auth\s+\S+\s+)(\S+)(?:\s+priv\s+\S+\s+\d*\s*)(\S+)/gi,
|
|
219
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
220
|
+
confidence: 1.0,
|
|
221
|
+
},
|
|
222
|
+
// --- Cisco secrets and hashes ---
|
|
223
|
+
{
|
|
224
|
+
name: "cisco_enable_secret",
|
|
225
|
+
pattern: /(?:enable\s+secret\s+\d+\s+)(\S+)/g,
|
|
226
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
227
|
+
confidence: 1.0,
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
name: "cisco_password_line",
|
|
231
|
+
// "password 7 XXXX" or "password 0 XXXX"
|
|
232
|
+
pattern: /(?:password\s+(?:[057]\s+))(\S+)/g,
|
|
233
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
234
|
+
confidence: 1.0,
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
name: "cisco_username_secret",
|
|
238
|
+
// "username admin secret 5 $1$..." or "username admin password 7 ..."
|
|
239
|
+
pattern: /(?:username\s+\S+\s+(?:secret|password)\s+\d+\s+)(\S+)/g,
|
|
240
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
241
|
+
confidence: 1.0,
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
name: "cisco_password_hash_type5",
|
|
245
|
+
pattern: /\$1\$[A-Za-z0-9./]+\$[A-Za-z0-9./]+/g,
|
|
246
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
247
|
+
confidence: 1.0,
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
name: "cisco_password_hash_type8",
|
|
251
|
+
pattern: /\$8\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+/g,
|
|
252
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
253
|
+
confidence: 1.0,
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
name: "cisco_password_hash_type9",
|
|
257
|
+
pattern: /\$9\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+/g,
|
|
258
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
259
|
+
confidence: 1.0,
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
name: "cisco_type7",
|
|
263
|
+
// Cisco type 7 obfuscated passwords: even-length hex starting with known salts
|
|
264
|
+
pattern: /(?:password\s+7\s+)([0-9A-Fa-f]{4,})/g,
|
|
265
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
266
|
+
confidence: 1.0,
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
name: "key_string",
|
|
270
|
+
pattern: /(?:key-string\s+(?:\d+\s+)?)(\S+)/g,
|
|
271
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
272
|
+
confidence: 1.0,
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
name: "tacacs_key",
|
|
276
|
+
pattern: /(?:tacacs-server\s+(?:host\s+\S+\s+)?key\s+(?:\d+\s+)?)(\S+)/g,
|
|
277
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
278
|
+
confidence: 1.0,
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
name: "radius_key",
|
|
282
|
+
pattern: /(?:radius-server\s+(?:host\s+\S+\s+)?key\s+(?:\d+\s+)?)(\S+)/g,
|
|
283
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
284
|
+
confidence: 1.0,
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
name: "ntp_auth_key",
|
|
288
|
+
pattern: /(?:ntp\s+authentication-key\s+\d+\s+md5\s+)(\S+)/g,
|
|
289
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
290
|
+
confidence: 1.0,
|
|
291
|
+
},
|
|
292
|
+
// --- BGP / OSPF / routing ---
|
|
293
|
+
{
|
|
294
|
+
name: "bgp_asn",
|
|
295
|
+
pattern: /\b(?:router\s+bgp|remote-as|local-as|peer-as)\s+(\d{4,6})\b/gi,
|
|
296
|
+
category: Category.BGP_ASN,
|
|
297
|
+
confidence: 0.95,
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
name: "bgp_neighbor_password",
|
|
301
|
+
pattern: /(?:neighbor\s+\S+\s+password\s+(?:\d+\s+)?)(\S+)/g,
|
|
302
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
303
|
+
confidence: 1.0,
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
name: "ospf_router_id",
|
|
307
|
+
pattern: /(?:router-id\s+)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/g,
|
|
308
|
+
category: Category.OSPF_ID,
|
|
309
|
+
confidence: 0.95,
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
name: "ospf_area",
|
|
313
|
+
// "area 0.0.0.1" or "area 1" style
|
|
314
|
+
pattern: /(?:area\s+)(\d{1,3}(?:\.\d{1,3}){3})\b/g,
|
|
315
|
+
category: Category.OSPF_ID,
|
|
316
|
+
confidence: 0.85,
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
name: "ospf_auth_key",
|
|
320
|
+
pattern: /(?:(?:ip\s+ospf\s+)?(?:authentication-key|message-digest-key\s+\d+\s+md5)\s+(?:\d+\s+)?)(\S+)/g,
|
|
321
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
322
|
+
confidence: 1.0,
|
|
323
|
+
},
|
|
324
|
+
// --- VLAN ---
|
|
325
|
+
{
|
|
326
|
+
name: "vlan_name",
|
|
327
|
+
// "name VLAN_NAME" inside a vlan context, or "vlan 100" with a name
|
|
328
|
+
pattern: /(?:vlan\s+\d+\s*\n\s*name\s+)(\S+)/gm,
|
|
329
|
+
category: Category.VLAN_ID,
|
|
330
|
+
confidence: 0.9,
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
name: "vlan_range",
|
|
334
|
+
// "switchport trunk allowed vlan 100,200,300-400"
|
|
335
|
+
pattern: /(?:allowed\s+vlan\s+(?:add\s+)?)(\d[\d,\-]+)/gi,
|
|
336
|
+
category: Category.VLAN_ID,
|
|
337
|
+
confidence: 0.85,
|
|
338
|
+
},
|
|
339
|
+
// --- Interface descriptions ---
|
|
340
|
+
{
|
|
341
|
+
name: "interface_description",
|
|
342
|
+
// "description LINK TO CUSTOMER-X" on an interface
|
|
343
|
+
pattern: /(?:^\s*description\s+)(.+)$/gm,
|
|
344
|
+
category: Category.INTERFACE_DESC,
|
|
345
|
+
confidence: 0.9,
|
|
346
|
+
},
|
|
347
|
+
// --- Route maps / ACLs ---
|
|
348
|
+
{
|
|
349
|
+
name: "route_map_name",
|
|
350
|
+
pattern: /(?:route-map\s+)(\S+)(?:\s+(?:permit|deny))?/g,
|
|
351
|
+
category: Category.ROUTE_MAP,
|
|
352
|
+
confidence: 0.85,
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
name: "prefix_list_name",
|
|
356
|
+
pattern: /(?:ip\s+prefix-list\s+)(\S+)/g,
|
|
357
|
+
category: Category.ACL_NAME,
|
|
358
|
+
confidence: 0.85,
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
name: "acl_name",
|
|
362
|
+
pattern: /(?:ip\s+access-list\s+(?:standard|extended)\s+)(\S+)/g,
|
|
363
|
+
category: Category.ACL_NAME,
|
|
364
|
+
confidence: 0.85,
|
|
365
|
+
},
|
|
366
|
+
// --- Network device hostnames ---
|
|
367
|
+
{
|
|
368
|
+
// Cisco/IOS "hostname <name>" config line
|
|
369
|
+
name: "cisco_hostname",
|
|
370
|
+
pattern: /(?:^|\n)\s*hostname\s+(\S+)/g,
|
|
371
|
+
category: Category.HOSTNAME,
|
|
372
|
+
confidence: 0.95,
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
// Dotted hierarchical device names: 24.rou.acn.atccv.care, 1a.sw.atm.atvie.ops
|
|
376
|
+
name: "device_name_dotted",
|
|
377
|
+
pattern: /\b(\w{1,4}\.(?:rou|sw|rtr|fw)\.(?:[a-z]{2,8}\.){1,3}(?:care|ops|mgmt|cnet|prod|lab|dev))\b/gi,
|
|
378
|
+
category: Category.HOSTNAME,
|
|
379
|
+
confidence: 0.90,
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
// Short device codes: FCNETR1, WCNETR2, LCNETR3 — uppercase letter(s) + "CNET" or role + digit(s)
|
|
383
|
+
name: "device_name_short",
|
|
384
|
+
pattern: /\b([A-Z]{1,4}(?:CNET|ONET|MNET|ANET)[A-Z]?\d{1,2})\b/g,
|
|
385
|
+
category: Category.HOSTNAME,
|
|
386
|
+
confidence: 0.85,
|
|
387
|
+
},
|
|
388
|
+
{
|
|
389
|
+
// Hyphenated device names with site/zone/role pattern: f-o-w-cnetr1, l-care-acn-rou24
|
|
390
|
+
name: "device_name_hyphenated",
|
|
391
|
+
pattern: /\b([a-z]{1,6}(?:-[a-z]{1,8}){2,5}[a-z]?\d{1,3})\b/gi,
|
|
392
|
+
category: Category.HOSTNAME,
|
|
393
|
+
confidence: 0.70,
|
|
394
|
+
},
|
|
395
|
+
// --- Syslog / monitoring (#5) ---
|
|
396
|
+
{
|
|
397
|
+
// Cisco syslog facility: %SYS-5-CONFIG_I, %LINK-3-UPDOWN
|
|
398
|
+
name: "syslog_facility",
|
|
399
|
+
pattern: /%([A-Z][A-Z_]+-\d+-[A-Z_]+)/g,
|
|
400
|
+
category: Category.HOSTNAME,
|
|
401
|
+
confidence: 0.80,
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
// Source interface in logging/SNMP: trap-source Loopback0, logging source-interface Vlan1
|
|
405
|
+
name: "syslog_source_interface",
|
|
406
|
+
pattern: /(?:trap-source|source-interface|logging\s+source-interface)\s+(\S+)/gi,
|
|
407
|
+
category: Category.HOSTNAME,
|
|
408
|
+
confidence: 0.85,
|
|
409
|
+
},
|
|
410
|
+
// --- Description field sub-entities (#6) ---
|
|
411
|
+
{
|
|
412
|
+
// Circuit ID in description: CID: ABC-123, circuit-id XYZ/456
|
|
413
|
+
name: "circuit_id",
|
|
414
|
+
pattern: /(?:CID|circuit[- ]?id|circuit)\s*[:# ]\s*([A-Za-z0-9\-/]{3,30})/gi,
|
|
415
|
+
category: Category.CUSTOM,
|
|
416
|
+
confidence: 0.85,
|
|
417
|
+
},
|
|
418
|
+
{
|
|
419
|
+
// Org/customer name in description: LINK TO Acme Corp, CONNECTION FROM BigCo
|
|
420
|
+
name: "description_org",
|
|
421
|
+
pattern: /(?:(?:LINK|CONN(?:ECTION)?|CIRCUIT|PEER|UPLINK)\s+(?:TO|FROM|WITH)\s+)([A-Z][A-Za-z0-9\s&,.\-]{2,30})/g,
|
|
422
|
+
category: Category.ORG_NAME,
|
|
423
|
+
confidence: 0.75,
|
|
424
|
+
},
|
|
425
|
+
// ==========================================================================
|
|
426
|
+
// Wave 1: Enterprise / Regulated / Critical Infrastructure
|
|
427
|
+
// ==========================================================================
|
|
428
|
+
// --- Austrian / EU identifiers ---
|
|
429
|
+
{
|
|
430
|
+
name: "iban",
|
|
431
|
+
pattern: /\b[A-Z]{2}\d{2}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{0,4}\b/g,
|
|
432
|
+
category: Category.IBAN,
|
|
433
|
+
confidence: 0.90,
|
|
434
|
+
},
|
|
435
|
+
{
|
|
436
|
+
name: "austrian_svnr",
|
|
437
|
+
pattern: /\b\d{4}[0-3]\d[01]\d\d{2}\b/g,
|
|
438
|
+
category: Category.NATIONAL_ID,
|
|
439
|
+
confidence: 0.80,
|
|
440
|
+
},
|
|
441
|
+
{
|
|
442
|
+
name: "german_personalausweis",
|
|
443
|
+
pattern: /\b[LMNTPRV][A-Z0-9]{8}\d\b/g,
|
|
444
|
+
category: Category.NATIONAL_ID,
|
|
445
|
+
confidence: 0.85,
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
name: "eu_vat_number",
|
|
449
|
+
pattern: /\b(?:AT|DE|FR|IT|NL|ES|BE|PL|CZ|SE|DK|FI|IE|PT|GR|HU|RO|BG|HR|SI|SK|LT|LV|EE|LU|MT|CY)U?\d{8,12}\b/g,
|
|
450
|
+
category: Category.NATIONAL_ID,
|
|
451
|
+
confidence: 0.85,
|
|
452
|
+
},
|
|
453
|
+
{
|
|
454
|
+
name: "gps_coordinate",
|
|
455
|
+
pattern: /(?<!\w)-?\d{1,3}\.\d{4,8}[,\s]+-?\d{1,3}\.\d{4,8}(?!\w)/g,
|
|
456
|
+
category: Category.GPS_COORDINATE,
|
|
457
|
+
confidence: 0.85,
|
|
458
|
+
},
|
|
459
|
+
// --- JWT and OAuth ---
|
|
460
|
+
{
|
|
461
|
+
name: "jwt_token",
|
|
462
|
+
pattern: /\beyJ[A-Za-z0-9\-_]+\.eyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\b/g,
|
|
463
|
+
category: Category.JWT,
|
|
464
|
+
confidence: 0.95,
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
name: "oauth_refresh_token",
|
|
468
|
+
pattern: /(?:refresh_token["':\s]+)([A-Za-z0-9\-_]{20,})/g,
|
|
469
|
+
category: Category.API_KEY,
|
|
470
|
+
confidence: 0.90,
|
|
471
|
+
},
|
|
472
|
+
// --- Cloud provider tokens ---
|
|
473
|
+
{
|
|
474
|
+
name: "aws_secret_key",
|
|
475
|
+
pattern: /(?:SecretAccessKey|aws_secret_access_key)["':\s=]+([A-Za-z0-9/+=]{40})/gi,
|
|
476
|
+
category: Category.API_KEY,
|
|
477
|
+
confidence: 0.95,
|
|
478
|
+
},
|
|
479
|
+
{
|
|
480
|
+
name: "gcp_api_key",
|
|
481
|
+
pattern: /\bAIza[0-9A-Za-z\-_]{35}\b/g,
|
|
482
|
+
category: Category.API_KEY,
|
|
483
|
+
confidence: 0.95,
|
|
484
|
+
},
|
|
485
|
+
{
|
|
486
|
+
name: "azure_connection_string",
|
|
487
|
+
pattern: /DefaultEndpointsProtocol=[^;\s]+;AccountName=[^;\s]+;AccountKey=[^;\s]+/g,
|
|
488
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
489
|
+
confidence: 1.00,
|
|
490
|
+
},
|
|
491
|
+
{
|
|
492
|
+
name: "slack_token",
|
|
493
|
+
pattern: /\bxox[bpsar]-[A-Za-z0-9\-]{10,}/g,
|
|
494
|
+
category: Category.API_KEY,
|
|
495
|
+
confidence: 0.95,
|
|
496
|
+
},
|
|
497
|
+
{
|
|
498
|
+
name: "github_pat",
|
|
499
|
+
pattern: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b/g,
|
|
500
|
+
category: Category.API_KEY,
|
|
501
|
+
confidence: 0.95,
|
|
502
|
+
},
|
|
503
|
+
{
|
|
504
|
+
name: "gitlab_token",
|
|
505
|
+
pattern: /\bglpat-[A-Za-z0-9\-]{20,}\b/g,
|
|
506
|
+
category: Category.API_KEY,
|
|
507
|
+
confidence: 0.95,
|
|
508
|
+
},
|
|
509
|
+
{
|
|
510
|
+
name: "stripe_key",
|
|
511
|
+
pattern: /\b[sr]k_(?:live|test)_[A-Za-z0-9]{24,}\b/g,
|
|
512
|
+
category: Category.API_KEY,
|
|
513
|
+
confidence: 0.95,
|
|
514
|
+
},
|
|
515
|
+
{
|
|
516
|
+
name: "sendgrid_key",
|
|
517
|
+
pattern: /\bSG\.[A-Za-z0-9\-_]{22}\.[A-Za-z0-9\-_]{43}\b/g,
|
|
518
|
+
category: Category.API_KEY,
|
|
519
|
+
confidence: 0.95,
|
|
520
|
+
},
|
|
521
|
+
{
|
|
522
|
+
name: "hashicorp_vault_token",
|
|
523
|
+
pattern: /\b(?:hvs\.[A-Za-z0-9]{24,}|s\.[A-Za-z0-9]{24})\b/g,
|
|
524
|
+
category: Category.API_KEY,
|
|
525
|
+
confidence: 0.95,
|
|
526
|
+
},
|
|
527
|
+
// --- Database connection strings ---
|
|
528
|
+
{
|
|
529
|
+
name: "db_connection_string",
|
|
530
|
+
pattern: /(?:postgres|mysql|mongodb|mongodb\+srv|redis|amqp)s?:\/\/[^\s<>"']+/g,
|
|
531
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
532
|
+
confidence: 1.00,
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
name: "jdbc_url",
|
|
536
|
+
pattern: /jdbc:(?:oracle|sqlserver|mysql|postgresql|mariadb):[^\s<>"']+/g,
|
|
537
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
538
|
+
confidence: 0.95,
|
|
539
|
+
},
|
|
540
|
+
// --- Certificates and keys ---
|
|
541
|
+
{
|
|
542
|
+
name: "pem_private_key",
|
|
543
|
+
pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----/g,
|
|
544
|
+
category: Category.CERTIFICATE,
|
|
545
|
+
confidence: 1.00,
|
|
546
|
+
},
|
|
547
|
+
{
|
|
548
|
+
name: "pem_certificate",
|
|
549
|
+
pattern: /-----BEGIN CERTIFICATE-----[\s\S]*?-----END CERTIFICATE-----/g,
|
|
550
|
+
category: Category.CERTIFICATE,
|
|
551
|
+
confidence: 0.85,
|
|
552
|
+
},
|
|
553
|
+
// --- LDAP / Active Directory ---
|
|
554
|
+
{
|
|
555
|
+
name: "ldap_bind_dn",
|
|
556
|
+
pattern: /\bCN=[^,]+(?:,(?:OU|DC|O|C)=[^,]+){2,}/gi,
|
|
557
|
+
category: Category.PERSON_NAME,
|
|
558
|
+
confidence: 0.90,
|
|
559
|
+
},
|
|
560
|
+
{
|
|
561
|
+
name: "ldap_bind_password",
|
|
562
|
+
pattern: /(?:bindPassword|LDAP_BIND_PW|ldap_password)["':\s=]+(\S+)/gi,
|
|
563
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
564
|
+
confidence: 1.00,
|
|
565
|
+
},
|
|
566
|
+
{
|
|
567
|
+
name: "ad_domain_login",
|
|
568
|
+
pattern: /\b[A-Z][A-Z0-9]{1,15}\\[a-zA-Z][a-zA-Z0-9._\-]{0,30}\b/g,
|
|
569
|
+
category: Category.PERSON_NAME,
|
|
570
|
+
confidence: 0.85,
|
|
571
|
+
},
|
|
572
|
+
{
|
|
573
|
+
name: "windows_sid",
|
|
574
|
+
pattern: /\bS-1-5-21-\d+-\d+-\d+(?:-\d+)?\b/g,
|
|
575
|
+
category: Category.NATIONAL_ID,
|
|
576
|
+
confidence: 0.90,
|
|
577
|
+
},
|
|
578
|
+
// --- Juniper ---
|
|
579
|
+
{
|
|
580
|
+
name: "junos_secret",
|
|
581
|
+
pattern: /"\$9\$[A-Za-z0-9./]+"/g,
|
|
582
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
583
|
+
confidence: 1.00,
|
|
584
|
+
},
|
|
585
|
+
{
|
|
586
|
+
name: "junos_preshared_key",
|
|
587
|
+
pattern: /(?:pre-shared-key\s+(?:ascii-text|hexadecimal)\s+)"([^"]+)"/g,
|
|
588
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
589
|
+
confidence: 1.00,
|
|
590
|
+
},
|
|
591
|
+
{
|
|
592
|
+
name: "junos_root_auth",
|
|
593
|
+
pattern: /(?:encrypted-password\s+)"([^"]+)"/g,
|
|
594
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
595
|
+
confidence: 1.00,
|
|
596
|
+
},
|
|
597
|
+
{
|
|
598
|
+
name: "junos_community",
|
|
599
|
+
pattern: /(?:community\s+)(\S+)(?:\s+(?:authorization|clients))/g,
|
|
600
|
+
category: Category.SNMP_COMMUNITY,
|
|
601
|
+
confidence: 1.00,
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
name: "junos_description",
|
|
605
|
+
pattern: /(?:description\s+)"([^"]+)"/g,
|
|
606
|
+
category: Category.INTERFACE_DESC,
|
|
607
|
+
confidence: 0.90,
|
|
608
|
+
},
|
|
609
|
+
// --- Palo Alto ---
|
|
610
|
+
{
|
|
611
|
+
name: "panos_api_key",
|
|
612
|
+
pattern: /\bLUFRPT[A-Za-z0-9=+/]{20,}\b/g,
|
|
613
|
+
category: Category.API_KEY,
|
|
614
|
+
confidence: 0.95,
|
|
615
|
+
},
|
|
616
|
+
{
|
|
617
|
+
name: "panos_password_hash",
|
|
618
|
+
pattern: /(?:phash\s+)(\S+)/g,
|
|
619
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
620
|
+
confidence: 1.00,
|
|
621
|
+
},
|
|
622
|
+
{
|
|
623
|
+
name: "panos_master_key",
|
|
624
|
+
pattern: /(?:master-key\s+)(\S+)/g,
|
|
625
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
626
|
+
confidence: 1.00,
|
|
627
|
+
},
|
|
628
|
+
{
|
|
629
|
+
name: "panos_address_object",
|
|
630
|
+
pattern: /(?:set\s+address\s+)(\S+)(?:\s+ip-netmask)/g,
|
|
631
|
+
category: Category.HOSTNAME,
|
|
632
|
+
confidence: 0.80,
|
|
633
|
+
},
|
|
634
|
+
{
|
|
635
|
+
name: "panos_zone_name",
|
|
636
|
+
pattern: /(?:set\s+zone\s+)(\S+)(?:\s+network)/g,
|
|
637
|
+
category: Category.ACL_NAME,
|
|
638
|
+
confidence: 0.80,
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
name: "panos_rule_name",
|
|
642
|
+
pattern: /(?:set\s+rulebase\s+security\s+rules\s+)"?([^"\s]+)"?/g,
|
|
643
|
+
category: Category.ACL_NAME,
|
|
644
|
+
confidence: 0.85,
|
|
645
|
+
},
|
|
646
|
+
// --- Check Point ---
|
|
647
|
+
{
|
|
648
|
+
name: "checkpoint_password_hash",
|
|
649
|
+
pattern: /(?:set\s+password-hash\s+)(\S+)/g,
|
|
650
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
651
|
+
confidence: 1.00,
|
|
652
|
+
},
|
|
653
|
+
{
|
|
654
|
+
name: "checkpoint_sic_key",
|
|
655
|
+
pattern: /(?:sic\s+(?:init|key)\s+)(\S+)/gi,
|
|
656
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
657
|
+
confidence: 1.00,
|
|
658
|
+
},
|
|
659
|
+
{
|
|
660
|
+
name: "checkpoint_api_key",
|
|
661
|
+
pattern: /(?:api-key\s+)"?([A-Za-z0-9+/=]{20,})"?/g,
|
|
662
|
+
category: Category.API_KEY,
|
|
663
|
+
confidence: 0.95,
|
|
664
|
+
},
|
|
665
|
+
{
|
|
666
|
+
name: "checkpoint_object_name",
|
|
667
|
+
pattern: /(?:add\s+(?:host|network|group|service-tcp|service-udp)\s+name\s+)"?([^"\s]+)"?/g,
|
|
668
|
+
category: Category.HOSTNAME,
|
|
669
|
+
confidence: 0.80,
|
|
670
|
+
},
|
|
671
|
+
{
|
|
672
|
+
name: "checkpoint_rule_name",
|
|
673
|
+
pattern: /(?:add\s+access-rule\s+.*name\s+)"?([^"\s]+)"?/g,
|
|
674
|
+
category: Category.ACL_NAME,
|
|
675
|
+
confidence: 0.85,
|
|
676
|
+
},
|
|
677
|
+
{
|
|
678
|
+
name: "checkpoint_vpn_community",
|
|
679
|
+
pattern: /(?:set\s+vpn-community\s+)"?([^"\s]+)"?/g,
|
|
680
|
+
category: Category.ACL_NAME,
|
|
681
|
+
confidence: 0.85,
|
|
682
|
+
},
|
|
683
|
+
// --- Arista ---
|
|
684
|
+
{
|
|
685
|
+
name: "arista_secret",
|
|
686
|
+
pattern: /(?:secret\s+sha512\s+)(\$6\$[A-Za-z0-9./]+\$[A-Za-z0-9./+]+)/g,
|
|
687
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
688
|
+
confidence: 1.00,
|
|
689
|
+
},
|
|
690
|
+
// --- F5 BIG-IP ---
|
|
691
|
+
{
|
|
692
|
+
name: "f5_password",
|
|
693
|
+
pattern: /(?:auth\s+password\s+)(\S+)/g,
|
|
694
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
695
|
+
confidence: 1.00,
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
name: "f5_ssl_passphrase",
|
|
699
|
+
pattern: /(?:passphrase\s+)(\S+)/g,
|
|
700
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
701
|
+
confidence: 0.90,
|
|
702
|
+
},
|
|
703
|
+
// --- Fortinet ---
|
|
704
|
+
{
|
|
705
|
+
name: "fortinet_password",
|
|
706
|
+
pattern: /(?:set\s+password\s+ENC\s+)(\S+)/g,
|
|
707
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
708
|
+
confidence: 1.00,
|
|
709
|
+
},
|
|
710
|
+
{
|
|
711
|
+
name: "fortinet_private_key",
|
|
712
|
+
pattern: /(?:set\s+private-key\s+)"(-----BEGIN[\s\S]*?-----END[^"]+)"/g,
|
|
713
|
+
category: Category.CERTIFICATE,
|
|
714
|
+
confidence: 1.00,
|
|
715
|
+
},
|
|
716
|
+
// --- VPN / IPSec / RADIUS ---
|
|
717
|
+
{
|
|
718
|
+
name: "vpn_preshared_key",
|
|
719
|
+
pattern: /(?:pre-shared-key|preshared-key|crypto\s+isakmp\s+key)\s+(?:\d+\s+)?(\S+)/gi,
|
|
720
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
721
|
+
confidence: 1.00,
|
|
722
|
+
},
|
|
723
|
+
{
|
|
724
|
+
name: "ipsec_transform_set",
|
|
725
|
+
pattern: /(?:crypto\s+ipsec\s+transform-set\s+)(\S+)/g,
|
|
726
|
+
category: Category.ACL_NAME,
|
|
727
|
+
confidence: 0.80,
|
|
728
|
+
},
|
|
729
|
+
// --- ICS / SCADA ---
|
|
730
|
+
{
|
|
731
|
+
name: "opc_ua_endpoint",
|
|
732
|
+
pattern: /opc\.tcp:\/\/[^\s<>"']+/g,
|
|
733
|
+
category: Category.ICS_IDENTIFIER,
|
|
734
|
+
confidence: 0.90,
|
|
735
|
+
},
|
|
736
|
+
{
|
|
737
|
+
name: "modbus_address",
|
|
738
|
+
pattern: /(?:modbus|slave|unit[\-_]?id)[\s:=]+(\d{1,3})/gi,
|
|
739
|
+
category: Category.ICS_IDENTIFIER,
|
|
740
|
+
confidence: 0.80,
|
|
741
|
+
},
|
|
742
|
+
{
|
|
743
|
+
name: "scada_credential",
|
|
744
|
+
pattern: /(?:scada|hmi|plc|rtu|ied)[\-_\s]?(?:password|pass|pwd|credential|auth)[\s:="']+(\S+)/gi,
|
|
745
|
+
category: Category.NETWORK_CREDENTIAL,
|
|
746
|
+
confidence: 1.00,
|
|
747
|
+
},
|
|
748
|
+
{
|
|
749
|
+
name: "iec61850_ied_name",
|
|
750
|
+
pattern: /(?:iedName\s*=\s*)"([^"]+)"/g,
|
|
751
|
+
category: Category.ICS_IDENTIFIER,
|
|
752
|
+
confidence: 0.90,
|
|
753
|
+
},
|
|
754
|
+
{
|
|
755
|
+
name: "dnp3_address",
|
|
756
|
+
pattern: /(?:dnp3|outstation|master)[\-_\s]?(?:address|addr)[\s:=]+(\d{1,5})/gi,
|
|
757
|
+
category: Category.ICS_IDENTIFIER,
|
|
758
|
+
confidence: 0.85,
|
|
759
|
+
},
|
|
760
|
+
{
|
|
761
|
+
name: "bacnet_device_id",
|
|
762
|
+
pattern: /(?:bacnet|device[\-_]?instance)[\s:=]+(\d{1,7})/gi,
|
|
763
|
+
category: Category.ICS_IDENTIFIER,
|
|
764
|
+
confidence: 0.80,
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
name: "historian_tag",
|
|
768
|
+
pattern: /\\\\[A-Za-z0-9\-_.]+\\[A-Za-z0-9\-_.]+(?:\\[A-Za-z0-9\-_.]+)*/g,
|
|
769
|
+
category: Category.ICS_IDENTIFIER,
|
|
770
|
+
confidence: 0.85,
|
|
771
|
+
},
|
|
772
|
+
// --- Aviation / ATC ---
|
|
773
|
+
{
|
|
774
|
+
name: "atc_sector_id",
|
|
775
|
+
pattern: /\b(?:TWR|APP|ACC|CTR|GND|DEL|ATIS)[\-_][A-Z0-9]{2,10}\b/g,
|
|
776
|
+
category: Category.ICS_IDENTIFIER,
|
|
777
|
+
confidence: 0.90,
|
|
778
|
+
},
|
|
779
|
+
{
|
|
780
|
+
name: "nav_frequency",
|
|
781
|
+
pattern: /\b1[01]\d\.\d{1,3}\s?MHz\b/g,
|
|
782
|
+
category: Category.ICS_IDENTIFIER,
|
|
783
|
+
confidence: 0.85,
|
|
784
|
+
},
|
|
785
|
+
{
|
|
786
|
+
name: "icao_designator",
|
|
787
|
+
pattern: /\b[A-Z]{4}\b(?=[\s\-](?:TWR|APP|GND|CTR|ATIS|RWY|SID|STAR))/g,
|
|
788
|
+
category: Category.ICS_IDENTIFIER,
|
|
789
|
+
confidence: 0.85,
|
|
790
|
+
},
|
|
791
|
+
// --- Telecom ---
|
|
792
|
+
{
|
|
793
|
+
name: "imsi",
|
|
794
|
+
pattern: /(?:IMSI|imsi)[\s:=]+(\d{15})/g,
|
|
795
|
+
category: Category.NATIONAL_ID,
|
|
796
|
+
confidence: 0.95,
|
|
797
|
+
},
|
|
798
|
+
{
|
|
799
|
+
name: "imei",
|
|
800
|
+
pattern: /(?:IMEI|imei)[\s:=]+(\d{15})/g,
|
|
801
|
+
category: Category.NATIONAL_ID,
|
|
802
|
+
confidence: 0.90,
|
|
803
|
+
},
|
|
804
|
+
{
|
|
805
|
+
name: "clli_code",
|
|
806
|
+
pattern: /\b[A-Z]{6}\d{2}[A-Z0-9]{3}\b/g,
|
|
807
|
+
category: Category.ICS_IDENTIFIER,
|
|
808
|
+
confidence: 0.85,
|
|
809
|
+
},
|
|
810
|
+
// --- Base64-encoded secrets ---
|
|
811
|
+
{
|
|
812
|
+
name: "base64_secret_assignment",
|
|
813
|
+
pattern: /(?:SECRET|PRIVATE_KEY|PASSWORD|TOKEN|API_KEY|APIKEY|AUTH)[\s]*[=:]\s*[A-Za-z0-9+/]{20,}={0,2}/gi,
|
|
814
|
+
category: Category.API_KEY,
|
|
815
|
+
confidence: 0.90,
|
|
816
|
+
},
|
|
817
|
+
{
|
|
818
|
+
name: "base64_prefixed",
|
|
819
|
+
pattern: /\bbase64:[A-Za-z0-9+/]{8,}={0,2}/g,
|
|
820
|
+
category: Category.API_KEY,
|
|
821
|
+
confidence: 0.85,
|
|
822
|
+
},
|
|
823
|
+
];
|
|
824
|
+
/** Check if two spans overlap. */
|
|
825
|
+
function spansOverlap(spanStart, spanEnd, seenSpans) {
|
|
826
|
+
for (const [s, e] of seenSpans) {
|
|
827
|
+
if ((s <= spanStart && spanStart < e) || (s < spanEnd && spanEnd <= e)) {
|
|
828
|
+
return true;
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
return false;
|
|
832
|
+
}
|
|
833
|
+
/** Detects sensitive entities using regex patterns. */
|
|
834
|
+
export class RegexDetector {
|
|
835
|
+
name = "regex";
|
|
836
|
+
patterns;
|
|
837
|
+
constructor(extraPatterns, overrides) {
|
|
838
|
+
let patterns = [...BUILTIN_PATTERNS];
|
|
839
|
+
if (extraPatterns) {
|
|
840
|
+
patterns.push(...extraPatterns);
|
|
841
|
+
}
|
|
842
|
+
if (overrides) {
|
|
843
|
+
patterns = patterns.filter((p) => {
|
|
844
|
+
const ov = overrides[p.name];
|
|
845
|
+
return ov?.enabled !== false;
|
|
846
|
+
});
|
|
847
|
+
patterns = patterns.map((p) => {
|
|
848
|
+
const ov = overrides[p.name];
|
|
849
|
+
if (ov?.confidence !== undefined) {
|
|
850
|
+
return { ...p, confidence: ov.confidence };
|
|
851
|
+
}
|
|
852
|
+
return p;
|
|
853
|
+
});
|
|
854
|
+
}
|
|
855
|
+
this.patterns = patterns;
|
|
856
|
+
}
|
|
857
|
+
detect(text) {
|
|
858
|
+
const entities = [];
|
|
859
|
+
const seenSpans = [];
|
|
860
|
+
for (const pdef of this.patterns) {
|
|
861
|
+
// Reset lastIndex for the global regex
|
|
862
|
+
pdef.pattern.lastIndex = 0;
|
|
863
|
+
for (const match of text.matchAll(pdef.pattern)) {
|
|
864
|
+
// If the pattern has capture groups, emit each group as a
|
|
865
|
+
// separate entity. Otherwise use the full match.
|
|
866
|
+
const groups = match.slice(1);
|
|
867
|
+
const hasGroups = groups.some((g) => g !== undefined);
|
|
868
|
+
if (hasGroups) {
|
|
869
|
+
for (let i = 1; i < match.length; i++) {
|
|
870
|
+
const grp = match[i];
|
|
871
|
+
if (grp === undefined) {
|
|
872
|
+
continue;
|
|
873
|
+
}
|
|
874
|
+
// Get the start of this capture group from the match indices
|
|
875
|
+
// We need to find the position of the group within the full match
|
|
876
|
+
const fullMatchStart = match.index;
|
|
877
|
+
const fullMatch = match[0];
|
|
878
|
+
// Find the group's position within the full match string
|
|
879
|
+
const grpStart = findGroupStart(fullMatch, fullMatchStart, grp, match, i);
|
|
880
|
+
const grpEnd = grpStart + grp.length;
|
|
881
|
+
const span = [grpStart, grpEnd];
|
|
882
|
+
if (spansOverlap(span[0], span[1], seenSpans)) {
|
|
883
|
+
continue;
|
|
884
|
+
}
|
|
885
|
+
// Skip subnet/wildcard masks for IP-like values
|
|
886
|
+
if (pdef.category === Category.IP_ADDRESS && isMask(grp)) {
|
|
887
|
+
continue;
|
|
888
|
+
}
|
|
889
|
+
// Skip documentation/example values (#7)
|
|
890
|
+
if (isDocExample(grp, pdef.category)) {
|
|
891
|
+
continue;
|
|
892
|
+
}
|
|
893
|
+
seenSpans.push(span);
|
|
894
|
+
entities.push({
|
|
895
|
+
value: grp,
|
|
896
|
+
start: grpStart,
|
|
897
|
+
end: grpEnd,
|
|
898
|
+
category: pdef.category,
|
|
899
|
+
confidence: pdef.confidence,
|
|
900
|
+
detector: `${this.name}:${pdef.name}`,
|
|
901
|
+
});
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
else {
|
|
905
|
+
const start = match.index;
|
|
906
|
+
const end = start + match[0].length;
|
|
907
|
+
const span = [start, end];
|
|
908
|
+
if (spansOverlap(span[0], span[1], seenSpans)) {
|
|
909
|
+
continue;
|
|
910
|
+
}
|
|
911
|
+
const value = match[0];
|
|
912
|
+
// Skip subnet/wildcard masks
|
|
913
|
+
if (pdef.category === Category.IP_ADDRESS && isMask(value)) {
|
|
914
|
+
continue;
|
|
915
|
+
}
|
|
916
|
+
// Skip documentation/example values (#7)
|
|
917
|
+
if (isDocExample(value, pdef.category)) {
|
|
918
|
+
continue;
|
|
919
|
+
}
|
|
920
|
+
seenSpans.push(span);
|
|
921
|
+
entities.push({
|
|
922
|
+
value,
|
|
923
|
+
start: span[0],
|
|
924
|
+
end: span[1],
|
|
925
|
+
category: pdef.category,
|
|
926
|
+
confidence: pdef.confidence,
|
|
927
|
+
detector: `${this.name}:${pdef.name}`,
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
entities.sort((a, b) => a.start - b.start);
|
|
933
|
+
return entities;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
/**
|
|
937
|
+
* Find the absolute start position of a capture group within text.
|
|
938
|
+
* Uses the full match string and searches for the group value
|
|
939
|
+
* starting from after previous groups.
|
|
940
|
+
*/
|
|
941
|
+
function findGroupStart(fullMatch, fullMatchStart, groupValue, match, groupIndex) {
|
|
942
|
+
// Search for the group value within the full match, accounting for
|
|
943
|
+
// previous groups that may contain the same text.
|
|
944
|
+
let searchFrom = 0;
|
|
945
|
+
for (let prev = 1; prev < groupIndex; prev++) {
|
|
946
|
+
if (match[prev] !== undefined) {
|
|
947
|
+
const prevPos = fullMatch.indexOf(match[prev], searchFrom);
|
|
948
|
+
if (prevPos !== -1) {
|
|
949
|
+
searchFrom = prevPos + match[prev].length;
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
const posInMatch = fullMatch.indexOf(groupValue, searchFrom);
|
|
954
|
+
return fullMatchStart + posInMatch;
|
|
955
|
+
}
|