@floatingsidewal/bulkhead-core 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -0
- package/dist/cascade/bert-worker.js +84 -0
- package/dist/cascade/bert-worker.js.map +1 -0
- package/dist/cascade/index.d.mts +1 -0
- package/dist/cascade/index.d.ts +1 -0
- package/dist/cascade/index.js +386 -0
- package/dist/cascade/index.js.map +1 -0
- package/dist/cascade/index.mjs +11 -0
- package/dist/cascade/index.mjs.map +1 -0
- package/dist/chunk-4KUXRYNS.mjs +358 -0
- package/dist/chunk-4KUXRYNS.mjs.map +1 -0
- package/dist/index-BNiM_sPB.d.mts +237 -0
- package/dist/index-BNiM_sPB.d.ts +237 -0
- package/dist/index.d.mts +265 -0
- package/dist/index.d.ts +265 -0
- package/dist/index.js +3470 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3082 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +70 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3470 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
BUILTIN_POLICIES: () => BUILTIN_POLICIES,
|
|
24
|
+
BaseGuard: () => BaseGuard,
|
|
25
|
+
DEFAULT_CONFIG: () => DEFAULT_CONFIG2,
|
|
26
|
+
GuardrailsEngine: () => GuardrailsEngine,
|
|
27
|
+
InjectionGuard: () => InjectionGuard,
|
|
28
|
+
LeakageGuard: () => LeakageGuard,
|
|
29
|
+
PiiGuard: () => PiiGuard,
|
|
30
|
+
SecretGuard: () => SecretGuard,
|
|
31
|
+
TestDataGuard: () => TestDataGuard,
|
|
32
|
+
assessRisk: () => assessRisk,
|
|
33
|
+
createEngine: () => createEngine,
|
|
34
|
+
getPolicy: () => getPolicy,
|
|
35
|
+
policyToEngineConfig: () => policyToEngineConfig,
|
|
36
|
+
resolvePolicy: () => resolvePolicy
|
|
37
|
+
});
|
|
38
|
+
module.exports = __toCommonJS(index_exports);
|
|
39
|
+
|
|
40
|
+
// src/cascade/bert-layer.ts
|
|
41
|
+
var import_node_worker_threads = require("worker_threads");
|
|
42
|
+
var import_node_path = require("path");
|
|
43
|
+
var import_node_fs = require("fs");
|
|
44
|
+
var DEFAULT_MODEL_ID = "Xenova/bert-base-NER";
|
|
45
|
+
var BertLayer = class {
|
|
46
|
+
worker = null;
|
|
47
|
+
pendingRequests = /* @__PURE__ */ new Map();
|
|
48
|
+
requestId = 0;
|
|
49
|
+
config;
|
|
50
|
+
/** Whether the BERT model has been loaded and first inference completed */
|
|
51
|
+
_loaded = false;
|
|
52
|
+
get loaded() {
|
|
53
|
+
return this._loaded;
|
|
54
|
+
}
|
|
55
|
+
constructor(config) {
|
|
56
|
+
this.config = {
|
|
57
|
+
escalationThreshold: 0.75,
|
|
58
|
+
...config
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
/** Resolve the worker path — supports both compiled .js and source .ts */
|
|
62
|
+
resolveWorkerPath() {
|
|
63
|
+
const compiledPath = (0, import_node_path.resolve)(__dirname, "cascade", "bert-worker.js");
|
|
64
|
+
if ((0, import_node_fs.existsSync)(compiledPath)) return compiledPath;
|
|
65
|
+
const tsPath = (0, import_node_path.resolve)(__dirname, "bert-worker.ts");
|
|
66
|
+
if ((0, import_node_fs.existsSync)(tsPath)) return tsPath;
|
|
67
|
+
return compiledPath;
|
|
68
|
+
}
|
|
69
|
+
/** Ensure the worker thread is running */
|
|
70
|
+
ensureWorker() {
|
|
71
|
+
if (!this.worker) {
|
|
72
|
+
const workerPath = this.resolveWorkerPath();
|
|
73
|
+
const isTs = workerPath.endsWith(".ts");
|
|
74
|
+
this.worker = isTs ? new import_node_worker_threads.Worker(workerPath, {
|
|
75
|
+
execArgv: ["--require", "tsx/cjs"]
|
|
76
|
+
}) : new import_node_worker_threads.Worker(workerPath);
|
|
77
|
+
this.worker.on("message", (msg) => {
|
|
78
|
+
const pending = this.pendingRequests.get(msg.id);
|
|
79
|
+
if (!pending) return;
|
|
80
|
+
this.pendingRequests.delete(msg.id);
|
|
81
|
+
if (msg.type === "error") {
|
|
82
|
+
pending.reject(new Error(msg.error ?? "Unknown worker error"));
|
|
83
|
+
} else {
|
|
84
|
+
pending.resolve(msg.tokens ?? []);
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
this.worker.on("error", (err) => {
|
|
88
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
89
|
+
pending.reject(err);
|
|
90
|
+
this.pendingRequests.delete(id);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
return this.worker;
|
|
95
|
+
}
|
|
96
|
+
/** Send text to the BERT worker and get raw token results */
|
|
97
|
+
async analyzeRaw(text) {
|
|
98
|
+
const worker = this.ensureWorker();
|
|
99
|
+
const id = String(++this.requestId);
|
|
100
|
+
return new Promise((resolve2, reject) => {
|
|
101
|
+
this.pendingRequests.set(id, { resolve: resolve2, reject });
|
|
102
|
+
worker.postMessage({
|
|
103
|
+
type: "analyze",
|
|
104
|
+
id,
|
|
105
|
+
text,
|
|
106
|
+
modelId: this.config.modelId ?? DEFAULT_MODEL_ID
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Analyze text and return Detection objects with escalation disposition.
|
|
112
|
+
* Tokens above the escalation threshold are "confirmed",
|
|
113
|
+
* tokens below are "escalate" (need LLM review).
|
|
114
|
+
*/
|
|
115
|
+
async analyze(text) {
|
|
116
|
+
const tokens = await this.analyzeRaw(text);
|
|
117
|
+
this._loaded = true;
|
|
118
|
+
return tokens.map((token) => {
|
|
119
|
+
const entityType = token.entity.replace(/^[BI]-/, "");
|
|
120
|
+
const isConfirmed = token.score >= this.config.escalationThreshold;
|
|
121
|
+
const confidence = token.score >= 0.9 ? "high" : token.score >= 0.7 ? "medium" : "low";
|
|
122
|
+
return {
|
|
123
|
+
entityType,
|
|
124
|
+
start: token.start,
|
|
125
|
+
end: token.end,
|
|
126
|
+
text: token.word,
|
|
127
|
+
confidence,
|
|
128
|
+
score: token.score,
|
|
129
|
+
guardName: "cascade-bert",
|
|
130
|
+
source: "bert",
|
|
131
|
+
context: text.slice(
|
|
132
|
+
Math.max(0, token.start - 150),
|
|
133
|
+
Math.min(text.length, token.end + 150)
|
|
134
|
+
),
|
|
135
|
+
disposition: isConfirmed ? "confirmed" : "escalate"
|
|
136
|
+
};
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
/** Terminate the worker thread */
|
|
140
|
+
async dispose() {
|
|
141
|
+
if (this.worker) {
|
|
142
|
+
await this.worker.terminate();
|
|
143
|
+
this.worker = null;
|
|
144
|
+
this.pendingRequests.clear();
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
// src/cascade/llm-layer.ts
|
|
150
|
+
var LlmLayer = class {
|
|
151
|
+
config;
|
|
152
|
+
constructor(config) {
|
|
153
|
+
this.config = {
|
|
154
|
+
contextSentences: 3,
|
|
155
|
+
...config
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/** Set the LLM provider (can be swapped at runtime) */
|
|
159
|
+
setProvider(provider) {
|
|
160
|
+
this.config.provider = provider;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Disambiguate escalated detections using an LLM.
|
|
164
|
+
* @param escalated Detections with disposition "escalate"
|
|
165
|
+
* @param fullText The full document text
|
|
166
|
+
* @param confirmed Already-confirmed detections (passed as context to help disambiguation)
|
|
167
|
+
*/
|
|
168
|
+
async disambiguate(escalated, fullText, confirmed) {
|
|
169
|
+
if (!this.config.provider) {
|
|
170
|
+
return escalated;
|
|
171
|
+
}
|
|
172
|
+
const results = [];
|
|
173
|
+
for (const detection of escalated) {
|
|
174
|
+
const prompt = this.buildPrompt(detection, fullText, confirmed);
|
|
175
|
+
try {
|
|
176
|
+
const response = await this.config.provider(prompt);
|
|
177
|
+
const parsed = this.parseResponse(response);
|
|
178
|
+
if (parsed && parsed.type !== "NONE") {
|
|
179
|
+
results.push({
|
|
180
|
+
...detection,
|
|
181
|
+
entityType: parsed.type,
|
|
182
|
+
score: parsed.confidence,
|
|
183
|
+
confidence: parsed.confidence >= 0.9 ? "high" : parsed.confidence >= 0.7 ? "medium" : "low",
|
|
184
|
+
source: "llm",
|
|
185
|
+
disposition: "confirmed"
|
|
186
|
+
});
|
|
187
|
+
} else {
|
|
188
|
+
results.push({
|
|
189
|
+
...detection,
|
|
190
|
+
source: "llm",
|
|
191
|
+
disposition: "dismissed"
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
} catch {
|
|
195
|
+
results.push(detection);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return results;
|
|
199
|
+
}
|
|
200
|
+
/** Build a focused disambiguation prompt */
|
|
201
|
+
buildPrompt(detection, fullText, confirmed) {
|
|
202
|
+
const contextWindow = this.extractSentenceContext(
|
|
203
|
+
fullText,
|
|
204
|
+
detection.start,
|
|
205
|
+
detection.end
|
|
206
|
+
);
|
|
207
|
+
const confirmedList = confirmed.filter((d) => d.disposition === "confirmed").map((d) => `${d.text} (${d.entityType})`).slice(0, 10);
|
|
208
|
+
return `You are a PII detection system. Determine if the highlighted span is personally identifiable information.
|
|
209
|
+
|
|
210
|
+
Context: "${contextWindow}"
|
|
211
|
+
Span: "${detection.text}"
|
|
212
|
+
BERT suggested: ${detection.entityType} (confidence: ${detection.score.toFixed(2)})
|
|
213
|
+
${confirmedList.length > 0 ? `Other confirmed entities in document: [${confirmedList.join(", ")}]` : ""}
|
|
214
|
+
|
|
215
|
+
Is this span PII? If yes, what type? If it's ambiguous (e.g., "Jordan" could be a person or country), use the context to decide.
|
|
216
|
+
|
|
217
|
+
Respond with ONLY a JSON object: { "type": "PERSON"|"LOCATION"|"ORGANIZATION"|"NONE", "confidence": 0.0-1.0 }`;
|
|
218
|
+
}
|
|
219
|
+
/** Extract ±N sentences around a span */
|
|
220
|
+
extractSentenceContext(text, start, end) {
|
|
221
|
+
const n = this.config.contextSentences;
|
|
222
|
+
const sentenceBreaks = [0];
|
|
223
|
+
const sentenceRegex = /[.!?]+\s+/g;
|
|
224
|
+
let match;
|
|
225
|
+
while ((match = sentenceRegex.exec(text)) !== null) {
|
|
226
|
+
sentenceBreaks.push(match.index + match[0].length);
|
|
227
|
+
}
|
|
228
|
+
sentenceBreaks.push(text.length);
|
|
229
|
+
let spanSentenceIdx = 0;
|
|
230
|
+
for (let i = 0; i < sentenceBreaks.length - 1; i++) {
|
|
231
|
+
if (sentenceBreaks[i] <= start && start < sentenceBreaks[i + 1]) {
|
|
232
|
+
spanSentenceIdx = i;
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
const contextStart = sentenceBreaks[Math.max(0, spanSentenceIdx - n)];
|
|
237
|
+
const contextEnd = sentenceBreaks[Math.min(sentenceBreaks.length - 1, spanSentenceIdx + n + 1)];
|
|
238
|
+
return text.slice(contextStart, contextEnd).trim();
|
|
239
|
+
}
|
|
240
|
+
/** Parse the LLM response JSON */
|
|
241
|
+
parseResponse(response) {
|
|
242
|
+
try {
|
|
243
|
+
const jsonMatch = response.match(/\{[^}]+\}/);
|
|
244
|
+
if (!jsonMatch) return null;
|
|
245
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
246
|
+
if (typeof parsed.type === "string" && typeof parsed.confidence === "number") {
|
|
247
|
+
return parsed;
|
|
248
|
+
}
|
|
249
|
+
return null;
|
|
250
|
+
} catch {
|
|
251
|
+
return null;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
// src/cascade/cascade.ts
|
|
257
|
+
var DEFAULT_CASCADE_CONFIG = {
|
|
258
|
+
escalationThreshold: 0.75,
|
|
259
|
+
contextSentences: 3,
|
|
260
|
+
bertEnabled: true,
|
|
261
|
+
llmEnabled: false,
|
|
262
|
+
modelId: "Xenova/bert-base-NER"
|
|
263
|
+
};
|
|
264
|
+
var CascadeClassifier = class {
|
|
265
|
+
config;
|
|
266
|
+
bertLayer = null;
|
|
267
|
+
llmLayer;
|
|
268
|
+
regexGuards = [];
|
|
269
|
+
constructor(config) {
|
|
270
|
+
this.config = { ...DEFAULT_CASCADE_CONFIG, ...config };
|
|
271
|
+
this.llmLayer = new LlmLayer({
|
|
272
|
+
contextSentences: this.config.contextSentences,
|
|
273
|
+
provider: this.config.llmProvider
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
/** Whether the cascade is ready to serve (BERT model loaded if enabled) */
|
|
277
|
+
get ready() {
|
|
278
|
+
if (!this.config.bertEnabled) return true;
|
|
279
|
+
if (!this.bertLayer) return true;
|
|
280
|
+
return this.bertLayer.loaded;
|
|
281
|
+
}
|
|
282
|
+
/** Register regex-based guards (Layer 1) */
|
|
283
|
+
addRegexGuard(guard) {
|
|
284
|
+
this.regexGuards.push(guard);
|
|
285
|
+
return this;
|
|
286
|
+
}
|
|
287
|
+
/** Set the LLM provider for Layer 3 */
|
|
288
|
+
setLlmProvider(provider) {
|
|
289
|
+
this.config.llmProvider = provider;
|
|
290
|
+
this.llmLayer.setProvider(provider);
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Run the full cascade: Regex → BERT → LLM
|
|
294
|
+
* Returns a unified GuardResult with all detections carrying provenance.
|
|
295
|
+
*/
|
|
296
|
+
async deepScan(text) {
|
|
297
|
+
const regexDetections = await this.runRegexLayer(text);
|
|
298
|
+
if (!this.config.bertEnabled) {
|
|
299
|
+
return this.buildCascadeResult(text, regexDetections);
|
|
300
|
+
}
|
|
301
|
+
const bertDetections = await this.runBertLayer(text);
|
|
302
|
+
const mergedBert = this.deduplicateAgainstRegex(
|
|
303
|
+
bertDetections,
|
|
304
|
+
regexDetections
|
|
305
|
+
);
|
|
306
|
+
const allDetections = [...regexDetections, ...mergedBert];
|
|
307
|
+
const escalated = allDetections.filter((d) => d.disposition === "escalate");
|
|
308
|
+
if (!this.config.llmEnabled || escalated.length === 0 || !this.config.llmProvider) {
|
|
309
|
+
return this.buildCascadeResult(text, allDetections);
|
|
310
|
+
}
|
|
311
|
+
const confirmed = allDetections.filter((d) => d.disposition === "confirmed");
|
|
312
|
+
const resolved = await this.llmLayer.disambiguate(
|
|
313
|
+
escalated,
|
|
314
|
+
text,
|
|
315
|
+
confirmed
|
|
316
|
+
);
|
|
317
|
+
const finalDetections = [
|
|
318
|
+
...allDetections.filter((d) => d.disposition !== "escalate"),
|
|
319
|
+
...resolved
|
|
320
|
+
];
|
|
321
|
+
return this.buildCascadeResult(text, finalDetections);
|
|
322
|
+
}
|
|
323
|
+
/** Run Layer 1 only (for fast auto-scan path) */
|
|
324
|
+
async regexScan(text) {
|
|
325
|
+
const detections = await this.runRegexLayer(text);
|
|
326
|
+
return this.buildCascadeResult(text, detections);
|
|
327
|
+
}
|
|
328
|
+
/** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */
|
|
329
|
+
async modelScan(text) {
|
|
330
|
+
const regexDetections = await this.runRegexLayer(text);
|
|
331
|
+
if (!this.config.bertEnabled) {
|
|
332
|
+
return this.buildCascadeResult(text, regexDetections);
|
|
333
|
+
}
|
|
334
|
+
const bertDetections = await this.runBertLayer(text);
|
|
335
|
+
const mergedBert = this.deduplicateAgainstRegex(
|
|
336
|
+
bertDetections,
|
|
337
|
+
regexDetections
|
|
338
|
+
);
|
|
339
|
+
return this.buildCascadeResult(text, [...regexDetections, ...mergedBert]);
|
|
340
|
+
}
|
|
341
|
+
// --- Private methods ---
|
|
342
|
+
async runRegexLayer(text) {
|
|
343
|
+
const allDetections = [];
|
|
344
|
+
for (const guard of this.regexGuards) {
|
|
345
|
+
const result = await guard.analyze(text);
|
|
346
|
+
allDetections.push(...result.detections);
|
|
347
|
+
}
|
|
348
|
+
return allDetections;
|
|
349
|
+
}
|
|
350
|
+
async runBertLayer(text) {
|
|
351
|
+
if (!this.bertLayer) {
|
|
352
|
+
this.bertLayer = new BertLayer({
|
|
353
|
+
modelId: this.config.modelId,
|
|
354
|
+
escalationThreshold: this.config.escalationThreshold
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
return this.bertLayer.analyze(text);
|
|
358
|
+
}
|
|
359
|
+
/** Remove BERT detections that overlap with regex detections */
|
|
360
|
+
deduplicateAgainstRegex(bertDetections, regexDetections) {
|
|
361
|
+
return bertDetections.filter((bert) => {
|
|
362
|
+
return !regexDetections.some(
|
|
363
|
+
(regex) => bert.start < regex.end && bert.end > regex.start
|
|
364
|
+
);
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
buildCascadeResult(text, detections) {
|
|
368
|
+
const activeDetections = detections.filter(
|
|
369
|
+
(d) => d.disposition !== "dismissed"
|
|
370
|
+
);
|
|
371
|
+
const passed = activeDetections.length === 0;
|
|
372
|
+
const score = activeDetections.length > 0 ? Math.max(...activeDetections.map((d) => d.score)) : 0;
|
|
373
|
+
const sources = [...new Set(detections.map((d) => d.source))];
|
|
374
|
+
const types = [...new Set(activeDetections.map((d) => d.entityType))];
|
|
375
|
+
return {
|
|
376
|
+
passed,
|
|
377
|
+
reason: passed ? "No issues detected" : `Detected via ${sources.join("+")}: ${types.join(", ")}`,
|
|
378
|
+
guardName: "cascade",
|
|
379
|
+
score,
|
|
380
|
+
detections
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
/** Clean up resources */
|
|
384
|
+
async dispose() {
|
|
385
|
+
if (this.bertLayer) {
|
|
386
|
+
await this.bertLayer.dispose();
|
|
387
|
+
this.bertLayer = null;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
// src/policy/risk.ts
|
|
393
|
+
function assessRisk(results, policy) {
|
|
394
|
+
const allDetections = results.flatMap((r) => r.detections);
|
|
395
|
+
const testDetections = allDetections.filter(
|
|
396
|
+
(d) => d.entityType.startsWith("TEST_DATA_")
|
|
397
|
+
);
|
|
398
|
+
const realDetections = allDetections.filter(
|
|
399
|
+
(d) => !d.entityType.startsWith("TEST_DATA_")
|
|
400
|
+
);
|
|
401
|
+
const testDataFlags = testDetections.map((d) => ({
|
|
402
|
+
value: d.text,
|
|
403
|
+
reason: d.entityType.toLowerCase().replace("test_data_", "") + "-pattern",
|
|
404
|
+
start: d.start,
|
|
405
|
+
end: d.end
|
|
406
|
+
}));
|
|
407
|
+
const score = realDetections.length > 0 ? Math.max(...realDetections.map((d) => d.score)) : 0;
|
|
408
|
+
const level = scoreToLevel(score, policy.riskThresholds);
|
|
409
|
+
const guards = {};
|
|
410
|
+
for (const result of results) {
|
|
411
|
+
if (result.guardName === "testdata") continue;
|
|
412
|
+
const guardDetections = result.detections.filter(
|
|
413
|
+
(d) => !d.entityType.startsWith("TEST_DATA_")
|
|
414
|
+
);
|
|
415
|
+
guards[result.guardName] = {
|
|
416
|
+
level: scoreToLevel(result.score, policy.riskThresholds),
|
|
417
|
+
score: result.score,
|
|
418
|
+
detectionCount: guardDetections.length
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
const issues = classifyIssues(realDetections, testDetections, policy);
|
|
422
|
+
return { level, score, guards, issues, testDataFlags };
|
|
423
|
+
}
|
|
424
|
+
function scoreToLevel(score, thresholds) {
|
|
425
|
+
if (score >= thresholds.critical) return "critical";
|
|
426
|
+
if (score >= thresholds.high) return "high";
|
|
427
|
+
if (score >= thresholds.medium) return "medium";
|
|
428
|
+
if (score >= thresholds.low) return "low";
|
|
429
|
+
return "none";
|
|
430
|
+
}
|
|
431
|
+
function classifyIssues(realDetections, testDetections, policy) {
|
|
432
|
+
const groups = /* @__PURE__ */ new Map();
|
|
433
|
+
for (const d of realDetections) {
|
|
434
|
+
const key = `${d.guardName}:${d.entityType}`;
|
|
435
|
+
const group = groups.get(key);
|
|
436
|
+
if (group) {
|
|
437
|
+
group.detections.push(d);
|
|
438
|
+
} else {
|
|
439
|
+
groups.set(key, {
|
|
440
|
+
detections: [d],
|
|
441
|
+
guardName: d.guardName,
|
|
442
|
+
entityType: d.entityType
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
const issues = [];
|
|
447
|
+
for (const [, group] of groups) {
|
|
448
|
+
const maxScore = Math.max(...group.detections.map((d) => d.score));
|
|
449
|
+
const severity = scoreToLevel(maxScore, policy.riskThresholds);
|
|
450
|
+
const isTestData = group.detections.some(
|
|
451
|
+
(d) => testDetections.some(
|
|
452
|
+
(td) => d.start < td.end && d.end > td.start
|
|
453
|
+
)
|
|
454
|
+
);
|
|
455
|
+
const category = guardNameToCategory(group.guardName);
|
|
456
|
+
const sample = group.detections[0]?.text?.slice(0, 50);
|
|
457
|
+
issues.push({
|
|
458
|
+
category,
|
|
459
|
+
entityType: group.entityType,
|
|
460
|
+
severity,
|
|
461
|
+
count: group.detections.length,
|
|
462
|
+
isTestData,
|
|
463
|
+
sample
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
return issues.sort((a, b) => {
|
|
467
|
+
const levelOrder = {
|
|
468
|
+
critical: 0,
|
|
469
|
+
high: 1,
|
|
470
|
+
medium: 2,
|
|
471
|
+
low: 3,
|
|
472
|
+
none: 4
|
|
473
|
+
};
|
|
474
|
+
const levelDiff = levelOrder[a.severity] - levelOrder[b.severity];
|
|
475
|
+
if (levelDiff !== 0) return levelDiff;
|
|
476
|
+
return b.count - a.count;
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
function guardNameToCategory(guardName) {
|
|
480
|
+
switch (guardName) {
|
|
481
|
+
case "pii":
|
|
482
|
+
return "pii";
|
|
483
|
+
case "secret":
|
|
484
|
+
return "secret";
|
|
485
|
+
case "injection":
|
|
486
|
+
return "injection";
|
|
487
|
+
case "leakage":
|
|
488
|
+
return "leakage";
|
|
489
|
+
default:
|
|
490
|
+
return "pii";
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// src/engine/engine.ts
|
|
495
|
+
var GuardrailsEngine = class {
|
|
496
|
+
guards = [];
|
|
497
|
+
config;
|
|
498
|
+
cascade = null;
|
|
499
|
+
constructor(config) {
|
|
500
|
+
this.config = {
|
|
501
|
+
guards: {},
|
|
502
|
+
...config
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
/** Register a guard with the engine */
|
|
506
|
+
addGuard(guard) {
|
|
507
|
+
this.guards.push(guard);
|
|
508
|
+
return this;
|
|
509
|
+
}
|
|
510
|
+
/** Register multiple guards */
|
|
511
|
+
addGuards(guards) {
|
|
512
|
+
for (const guard of guards) {
|
|
513
|
+
this.addGuard(guard);
|
|
514
|
+
}
|
|
515
|
+
return this;
|
|
516
|
+
}
|
|
517
|
+
/** Get configuration for a specific guard */
|
|
518
|
+
getGuardConfig(guardName) {
|
|
519
|
+
return this.config.guards[guardName];
|
|
520
|
+
}
|
|
521
|
+
/** Run all enabled guards against the input text */
|
|
522
|
+
async analyze(text) {
|
|
523
|
+
const results = [];
|
|
524
|
+
for (const guard of this.guards) {
|
|
525
|
+
const guardConfig = this.getGuardConfig(guard.name);
|
|
526
|
+
if (guardConfig?.enabled === false) {
|
|
527
|
+
continue;
|
|
528
|
+
}
|
|
529
|
+
const result = await guard.analyze(text, guardConfig);
|
|
530
|
+
results.push(result);
|
|
531
|
+
}
|
|
532
|
+
return results;
|
|
533
|
+
}
|
|
534
|
+
/** Run all guards and return a single pass/fail with all detections */
|
|
535
|
+
async scan(text) {
|
|
536
|
+
const results = await this.analyze(text);
|
|
537
|
+
const passed = results.every((r) => r.passed);
|
|
538
|
+
let redactedText;
|
|
539
|
+
for (const result of results) {
|
|
540
|
+
if (result.redactedText) {
|
|
541
|
+
redactedText = result.redactedText;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
return { passed, results, redactedText };
|
|
545
|
+
}
|
|
546
|
+
/** Get list of registered guard names */
|
|
547
|
+
get guardNames() {
|
|
548
|
+
return this.guards.map((g) => g.name);
|
|
549
|
+
}
|
|
550
|
+
/** Whether the cascade is ready (BERT model loaded if enabled) */
|
|
551
|
+
get cascadeReady() {
|
|
552
|
+
if (!this.cascade) return true;
|
|
553
|
+
return this.cascade.ready;
|
|
554
|
+
}
|
|
555
|
+
/** Initialize or update the cascade classifier */
|
|
556
|
+
initCascade(config) {
|
|
557
|
+
this.cascade = new CascadeClassifier(config);
|
|
558
|
+
for (const guard of this.guards) {
|
|
559
|
+
this.cascade.addRegexGuard(guard);
|
|
560
|
+
}
|
|
561
|
+
return this.cascade;
|
|
562
|
+
}
|
|
563
|
+
/** Run the full cascade (regex + BERT + optional LLM) */
|
|
564
|
+
async deepScan(text) {
|
|
565
|
+
if (!this.cascade) {
|
|
566
|
+
return this.analyze(text);
|
|
567
|
+
}
|
|
568
|
+
const cascadeResult = await this.cascade.deepScan(text);
|
|
569
|
+
return [cascadeResult];
|
|
570
|
+
}
|
|
571
|
+
/** Run regex + BERT only (no LLM) */
|
|
572
|
+
async modelScan(text) {
|
|
573
|
+
if (!this.cascade) {
|
|
574
|
+
return this.analyze(text);
|
|
575
|
+
}
|
|
576
|
+
const cascadeResult = await this.cascade.modelScan(text);
|
|
577
|
+
return [cascadeResult];
|
|
578
|
+
}
|
|
579
|
+
/** Update engine configuration */
|
|
580
|
+
updateConfig(config) {
|
|
581
|
+
this.config = { ...this.config, ...config };
|
|
582
|
+
if (config.guards) {
|
|
583
|
+
this.config.guards = { ...this.config.guards, ...config.guards };
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
/** Run all guards and return risk assessment alongside results */
|
|
587
|
+
async policyScan(text, policy) {
|
|
588
|
+
const { passed, results, redactedText } = await this.scan(text);
|
|
589
|
+
const risk = assessRisk(results, policy);
|
|
590
|
+
return { passed, risk, results, redactedText };
|
|
591
|
+
}
|
|
592
|
+
/** Clean up resources (terminate BERT worker, etc.) */
|
|
593
|
+
async dispose() {
|
|
594
|
+
if (this.cascade) {
|
|
595
|
+
await this.cascade.dispose();
|
|
596
|
+
this.cascade = null;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
};
|
|
600
|
+
|
|
601
|
+
// src/guards/base.guard.ts
|
|
602
|
+
var CONTEXT_RADIUS = 150;
|
|
603
|
+
var DEFAULT_CONFIG = {
|
|
604
|
+
enabled: true,
|
|
605
|
+
threshold: 0.5,
|
|
606
|
+
mode: "redact"
|
|
607
|
+
};
|
|
608
|
+
var BaseGuard = class {
|
|
609
|
+
mergeConfig(config) {
|
|
610
|
+
return { ...DEFAULT_CONFIG, ...config };
|
|
611
|
+
}
|
|
612
|
+
/** Build a GuardResult from detections */
|
|
613
|
+
buildResult(text, detections, mode) {
|
|
614
|
+
const passed = detections.length === 0;
|
|
615
|
+
const score = detections.length > 0 ? Math.max(...detections.map((d) => d.score)) : 0;
|
|
616
|
+
let reason;
|
|
617
|
+
if (passed) {
|
|
618
|
+
reason = "No issues detected";
|
|
619
|
+
} else {
|
|
620
|
+
const types = [...new Set(detections.map((d) => d.entityType))];
|
|
621
|
+
reason = `Detected: ${types.join(", ")}`;
|
|
622
|
+
}
|
|
623
|
+
const result = {
|
|
624
|
+
passed,
|
|
625
|
+
reason,
|
|
626
|
+
guardName: this.name,
|
|
627
|
+
score,
|
|
628
|
+
detections
|
|
629
|
+
};
|
|
630
|
+
if (mode === "redact" && !passed) {
|
|
631
|
+
result.redactedText = this.applyRedactions(text, detections);
|
|
632
|
+
}
|
|
633
|
+
return result;
|
|
634
|
+
}
|
|
635
|
+
/** Extract surrounding context for a detection */
|
|
636
|
+
extractContext(text, start, end) {
|
|
637
|
+
const ctxStart = Math.max(0, start - CONTEXT_RADIUS);
|
|
638
|
+
const ctxEnd = Math.min(text.length, end + CONTEXT_RADIUS);
|
|
639
|
+
return text.slice(ctxStart, ctxEnd);
|
|
640
|
+
}
|
|
641
|
+
/** Create a detection with provenance fields pre-filled for regex source */
|
|
642
|
+
makeDetection(text, partial, source = "regex", disposition = "confirmed") {
|
|
643
|
+
return {
|
|
644
|
+
...partial,
|
|
645
|
+
source,
|
|
646
|
+
context: this.extractContext(text, partial.start, partial.end),
|
|
647
|
+
disposition
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
/** Replace detected text with [REDACTED-TYPE] markers */
|
|
651
|
+
applyRedactions(text, detections) {
|
|
652
|
+
const sorted = [...detections].sort((a, b) => b.start - a.start);
|
|
653
|
+
let result = text;
|
|
654
|
+
for (const detection of sorted) {
|
|
655
|
+
const replacement = `[REDACTED-${detection.entityType}]`;
|
|
656
|
+
result = result.slice(0, detection.start) + replacement + result.slice(detection.end);
|
|
657
|
+
}
|
|
658
|
+
return result;
|
|
659
|
+
}
|
|
660
|
+
};
|
|
661
|
+
|
|
662
|
+
// src/validators/checksums.ts
|
|
663
|
+
function luhn(value) {
|
|
664
|
+
const digits = value.replace(/\D/g, "");
|
|
665
|
+
if (digits.length === 0) return false;
|
|
666
|
+
let sum = 0;
|
|
667
|
+
let alternate = false;
|
|
668
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
669
|
+
let n = parseInt(digits[i], 10);
|
|
670
|
+
if (alternate) {
|
|
671
|
+
n *= 2;
|
|
672
|
+
if (n > 9) n -= 9;
|
|
673
|
+
}
|
|
674
|
+
sum += n;
|
|
675
|
+
alternate = !alternate;
|
|
676
|
+
}
|
|
677
|
+
return sum % 10 === 0;
|
|
678
|
+
}
|
|
679
|
+
function ibanMod97(iban) {
|
|
680
|
+
const cleaned = iban.replace(/[\s-]/g, "").toUpperCase();
|
|
681
|
+
if (cleaned.length < 4) return false;
|
|
682
|
+
const rearranged = cleaned.slice(4) + cleaned.slice(0, 4);
|
|
683
|
+
let numeric = "";
|
|
684
|
+
for (const char of rearranged) {
|
|
685
|
+
const code = char.charCodeAt(0);
|
|
686
|
+
if (code >= 65 && code <= 90) {
|
|
687
|
+
numeric += (code - 55).toString();
|
|
688
|
+
} else {
|
|
689
|
+
numeric += char;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
let remainder = 0;
|
|
693
|
+
for (const char of numeric) {
|
|
694
|
+
remainder = (remainder * 10 + parseInt(char, 10)) % 97;
|
|
695
|
+
}
|
|
696
|
+
return remainder === 1;
|
|
697
|
+
}
|
|
698
|
+
function abaRouting(value) {
|
|
699
|
+
const digits = value.replace(/\D/g, "");
|
|
700
|
+
if (digits.length !== 9) return false;
|
|
701
|
+
const weights = [3, 7, 1, 3, 7, 1, 3, 7, 1];
|
|
702
|
+
let sum = 0;
|
|
703
|
+
for (let i = 0; i < 9; i++) {
|
|
704
|
+
sum += parseInt(digits[i], 10) * weights[i];
|
|
705
|
+
}
|
|
706
|
+
return sum % 10 === 0;
|
|
707
|
+
}
|
|
708
|
+
function npiLuhn(value) {
|
|
709
|
+
const digits = value.replace(/\D/g, "");
|
|
710
|
+
if (digits.length !== 10) return false;
|
|
711
|
+
const prefixed = "80840" + digits;
|
|
712
|
+
const nums = prefixed.split("").map(Number);
|
|
713
|
+
let checksum = 0;
|
|
714
|
+
for (let i = nums.length - 1; i >= 0; i--) {
|
|
715
|
+
const pos = nums.length - 1 - i;
|
|
716
|
+
let n = nums[i];
|
|
717
|
+
if (pos % 2 === 1) {
|
|
718
|
+
n *= 2;
|
|
719
|
+
if (n > 9) n -= 9;
|
|
720
|
+
}
|
|
721
|
+
checksum += n;
|
|
722
|
+
}
|
|
723
|
+
return checksum % 10 === 0;
|
|
724
|
+
}
|
|
725
|
+
function deaChecksum(value) {
|
|
726
|
+
const cleaned = value.replace(/[\s-]/g, "");
|
|
727
|
+
if (cleaned.length < 3) return false;
|
|
728
|
+
const numericPart = cleaned.slice(2);
|
|
729
|
+
const digits = numericPart.split("").map(Number);
|
|
730
|
+
if (digits.some(isNaN)) return false;
|
|
731
|
+
const check = digits.pop();
|
|
732
|
+
const even = digits.filter((_, i) => i % 2 === 0);
|
|
733
|
+
const odd = digits.filter((_, i) => i % 2 === 1);
|
|
734
|
+
const sum = 2 * even.reduce((a, b) => a + b, 0) + odd.reduce((a, b) => a + b, 0);
|
|
735
|
+
return (sum - check) % 10 === 0;
|
|
736
|
+
}
|
|
737
|
+
function shannonEntropy(value) {
|
|
738
|
+
if (value.length === 0) return 0;
|
|
739
|
+
const freq = /* @__PURE__ */ new Map();
|
|
740
|
+
for (const char of value) {
|
|
741
|
+
freq.set(char, (freq.get(char) || 0) + 1);
|
|
742
|
+
}
|
|
743
|
+
let entropy = 0;
|
|
744
|
+
for (const count of freq.values()) {
|
|
745
|
+
const p = count / value.length;
|
|
746
|
+
if (p > 0) {
|
|
747
|
+
entropy -= p * Math.log2(p);
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
return entropy;
|
|
751
|
+
}
|
|
752
|
+
function validateSsn(value) {
|
|
753
|
+
const digits = value.replace(/\D/g, "");
|
|
754
|
+
if (digits.length !== 9) return false;
|
|
755
|
+
if (digits.split("").every((c) => c === digits[0])) return false;
|
|
756
|
+
if (digits.slice(3, 5) === "00") return false;
|
|
757
|
+
if (digits.slice(5) === "0000") return false;
|
|
758
|
+
const invalidPrefixes = ["000", "666", "123456789", "98765432", "078051120"];
|
|
759
|
+
for (const prefix of invalidPrefixes) {
|
|
760
|
+
if (digits.startsWith(prefix)) return false;
|
|
761
|
+
}
|
|
762
|
+
if (digits[0] === "9") return false;
|
|
763
|
+
return true;
|
|
764
|
+
}
|
|
765
|
+
function validateMac(value) {
|
|
766
|
+
const cleaned = value.replace(/[:\-.]/g, "").toUpperCase();
|
|
767
|
+
if (!/^[0-9A-F]{12}$/.test(cleaned)) return false;
|
|
768
|
+
if (cleaned === "FFFFFFFFFFFF") return false;
|
|
769
|
+
if (cleaned === "000000000000") return false;
|
|
770
|
+
return true;
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
// src/patterns/pii/generic.ts
|
|
774
|
+
var CREDIT_CARD = {
|
|
775
|
+
entityType: "CREDIT_CARD",
|
|
776
|
+
patterns: [
|
|
777
|
+
/\b(?!1\d{12}(?!\d))((4\d{3})|(5[0-5]\d{2})|(6\d{3})|(1\d{3})|(3\d{3}))[- ]?(\d{3,4})[- ]?(\d{3,4})[- ]?(\d{3,5})\b/g
|
|
778
|
+
],
|
|
779
|
+
validate: (match) => luhn(match.replace(/[\s-]/g, "")),
|
|
780
|
+
contextWords: [
|
|
781
|
+
"credit",
|
|
782
|
+
"card",
|
|
783
|
+
"visa",
|
|
784
|
+
"mastercard",
|
|
785
|
+
"cc",
|
|
786
|
+
"amex",
|
|
787
|
+
"discover",
|
|
788
|
+
"jcb",
|
|
789
|
+
"diners",
|
|
790
|
+
"maestro",
|
|
791
|
+
"instapayment"
|
|
792
|
+
],
|
|
793
|
+
baseConfidence: "medium",
|
|
794
|
+
baseScore: 0.3
|
|
795
|
+
};
|
|
796
|
+
var EMAIL_ADDRESS = {
|
|
797
|
+
entityType: "EMAIL_ADDRESS",
|
|
798
|
+
patterns: [
|
|
799
|
+
/\b(?:[a-zA-Z0-9!#$%&'*+\-/=?^_`{|}~](?:[a-zA-Z0-9!#$%&'*+\-/=?^_`{|}~.]{0,62}[a-zA-Z0-9!#$%&'*+\-/=?^_`{|}~])?)@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\.[a-zA-Z]{2,}\b/g
|
|
800
|
+
],
|
|
801
|
+
contextWords: ["email", "e-mail", "mail"],
|
|
802
|
+
baseConfidence: "medium",
|
|
803
|
+
baseScore: 0.5
|
|
804
|
+
};
|
|
805
|
+
var IBAN_CODE = {
|
|
806
|
+
entityType: "IBAN_CODE",
|
|
807
|
+
patterns: [
|
|
808
|
+
/(?<![A-Z0-9])([A-Z]{2}[0-9]{2}(?:[ -]?[A-Z0-9]{4}){2,7}(?:[ -]?[A-Z0-9]{1,4})?)(?![A-Z0-9])/g
|
|
809
|
+
],
|
|
810
|
+
validate: (match) => ibanMod97(match),
|
|
811
|
+
contextWords: ["iban", "bank", "transaction"],
|
|
812
|
+
baseConfidence: "medium",
|
|
813
|
+
baseScore: 0.5
|
|
814
|
+
};
|
|
815
|
+
var IP_ADDRESS = {
|
|
816
|
+
entityType: "IP_ADDRESS",
|
|
817
|
+
patterns: [
|
|
818
|
+
// IPv4
|
|
819
|
+
/\b(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g,
|
|
820
|
+
// IPv6 (simplified — common formats)
|
|
821
|
+
/\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
|
|
822
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,7}:\b/g,
|
|
823
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}\b/g
|
|
824
|
+
],
|
|
825
|
+
contextWords: ["ip", "ipv4", "ipv6", "address"],
|
|
826
|
+
baseConfidence: "medium",
|
|
827
|
+
baseScore: 0.6
|
|
828
|
+
};
|
|
829
|
+
var MAC_ADDRESS = {
|
|
830
|
+
entityType: "MAC_ADDRESS",
|
|
831
|
+
patterns: [
|
|
832
|
+
// Colon or hyphen separated
|
|
833
|
+
/\b[0-9A-Fa-f]{2}([:-])(?:[0-9A-Fa-f]{2}\1){4}[0-9A-Fa-f]{2}\b/g,
|
|
834
|
+
// Cisco dot format
|
|
835
|
+
/\b[0-9A-Fa-f]{4}\.[0-9A-Fa-f]{4}\.[0-9A-Fa-f]{4}\b/g
|
|
836
|
+
],
|
|
837
|
+
validate: validateMac,
|
|
838
|
+
contextWords: [
|
|
839
|
+
"mac",
|
|
840
|
+
"mac address",
|
|
841
|
+
"hardware address",
|
|
842
|
+
"physical address",
|
|
843
|
+
"ethernet"
|
|
844
|
+
],
|
|
845
|
+
baseConfidence: "medium",
|
|
846
|
+
baseScore: 0.6
|
|
847
|
+
};
|
|
848
|
+
var PHONE_NUMBER = {
|
|
849
|
+
entityType: "PHONE_NUMBER",
|
|
850
|
+
patterns: [
|
|
851
|
+
// International format
|
|
852
|
+
/\b\+?1?[\s.-]?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b/g,
|
|
853
|
+
// International with country code
|
|
854
|
+
/\b\+\d{1,3}[\s.-]?\(?\d{1,4}\)?[\s.-]?\d{2,4}[\s.-]?\d{2,4}[\s.-]?\d{0,4}\b/g
|
|
855
|
+
],
|
|
856
|
+
contextWords: [
|
|
857
|
+
"phone",
|
|
858
|
+
"number",
|
|
859
|
+
"telephone",
|
|
860
|
+
"cell",
|
|
861
|
+
"cellphone",
|
|
862
|
+
"mobile",
|
|
863
|
+
"call",
|
|
864
|
+
"tel",
|
|
865
|
+
"fax"
|
|
866
|
+
],
|
|
867
|
+
baseConfidence: "low",
|
|
868
|
+
baseScore: 0.4
|
|
869
|
+
};
|
|
870
|
+
var URL = {
|
|
871
|
+
entityType: "URL",
|
|
872
|
+
patterns: [
|
|
873
|
+
/\bhttps?:\/\/[^\s<>"']+/gi,
|
|
874
|
+
/\bwww\.[^\s<>"']+/gi
|
|
875
|
+
],
|
|
876
|
+
contextWords: ["url", "website", "link", "href"],
|
|
877
|
+
baseConfidence: "medium",
|
|
878
|
+
baseScore: 0.6
|
|
879
|
+
};
|
|
880
|
+
var CRYPTO = {
|
|
881
|
+
entityType: "CRYPTO",
|
|
882
|
+
patterns: [
|
|
883
|
+
// Bitcoin addresses (P2PKH, P2SH, Bech32)
|
|
884
|
+
/(bc1|[13])[a-zA-HJ-NP-Z0-9]{25,59}/g
|
|
885
|
+
],
|
|
886
|
+
contextWords: ["wallet", "btc", "bitcoin", "crypto", "blockchain"],
|
|
887
|
+
baseConfidence: "medium",
|
|
888
|
+
baseScore: 0.5
|
|
889
|
+
};
|
|
890
|
+
var DATE_TIME = {
|
|
891
|
+
entityType: "DATE_TIME",
|
|
892
|
+
patterns: [
|
|
893
|
+
// ISO 8601
|
|
894
|
+
/\b\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(?:\.\d+)?(?:[+-][0-2]\d:[0-5]\d|Z)\b/g,
|
|
895
|
+
// mm/dd/yyyy or dd/mm/yyyy
|
|
896
|
+
/\b(?:[0-3]?\d[/.-][0-3]?\d[/.-](?:\d{4}|\d{2}))\b/g,
|
|
897
|
+
// yyyy-mm-dd
|
|
898
|
+
/\b\d{4}[/.-](?:0?[1-9]|1[0-2])[/.-](?:0?[1-9]|[12]\d|3[01])\b/g
|
|
899
|
+
],
|
|
900
|
+
contextWords: ["date", "birthday", "born", "dob"],
|
|
901
|
+
baseConfidence: "low",
|
|
902
|
+
baseScore: 0.3
|
|
903
|
+
};
|
|
904
|
+
var GENERIC_PATTERNS = [
|
|
905
|
+
CREDIT_CARD,
|
|
906
|
+
EMAIL_ADDRESS,
|
|
907
|
+
IBAN_CODE,
|
|
908
|
+
IP_ADDRESS,
|
|
909
|
+
MAC_ADDRESS,
|
|
910
|
+
PHONE_NUMBER,
|
|
911
|
+
URL,
|
|
912
|
+
CRYPTO,
|
|
913
|
+
DATE_TIME
|
|
914
|
+
];
|
|
915
|
+
|
|
916
|
+
// src/patterns/pii/us.ts
|
|
917
|
+
var US_SSN = {
|
|
918
|
+
entityType: "US_SSN",
|
|
919
|
+
patterns: [
|
|
920
|
+
// SSN with delimiters (medium confidence)
|
|
921
|
+
/\b(\d{3})[- .](\d{2})[- .](\d{4})\b/g,
|
|
922
|
+
// SSN without delimiters (very weak — needs context)
|
|
923
|
+
/\b\d{9}\b/g
|
|
924
|
+
],
|
|
925
|
+
validate: validateSsn,
|
|
926
|
+
contextWords: ["social", "security", "ssn", "ssns", "ssid"],
|
|
927
|
+
baseConfidence: "medium",
|
|
928
|
+
baseScore: 0.5
|
|
929
|
+
};
|
|
930
|
+
var US_DRIVER_LICENSE = {
|
|
931
|
+
entityType: "US_DRIVER_LICENSE",
|
|
932
|
+
patterns: [
|
|
933
|
+
// State-specific alphanumeric formats
|
|
934
|
+
/\b(?:[A-Z]\d{3,6}|[A-Z]\d{5,9}|[A-Z]\d{6,8}|[A-Z]\d{4,8}|[A-Z]\d{9,11}|[A-Z]{1,2}\d{5,6}|H\d{8}|V\d{6}|X\d{8}|[A-Z]{2}\d{2,5}|[A-Z]{2}\d{3,7}|\d{2}[A-Z]{3}\d{5,6}|[A-Z]\d{13,14}|[A-Z]\d{18}|[A-Z]\d{6}R|[A-Z]\d{9}|[A-Z]\d{1,12}|\d{9}[A-Z]|[A-Z]{2}\d{6}[A-Z]|\d{8}[A-Z]{2}|\d{3}[A-Z]{2}\d{4}|[A-Z]\d[A-Z]\d[A-Z]|\d{7,8}[A-Z])\b/g
|
|
935
|
+
],
|
|
936
|
+
contextWords: [
|
|
937
|
+
"driver",
|
|
938
|
+
"license",
|
|
939
|
+
"permit",
|
|
940
|
+
"lic",
|
|
941
|
+
"identification",
|
|
942
|
+
"dls",
|
|
943
|
+
"cdls",
|
|
944
|
+
"driving"
|
|
945
|
+
],
|
|
946
|
+
baseConfidence: "low",
|
|
947
|
+
baseScore: 0.3
|
|
948
|
+
};
|
|
949
|
+
var US_PASSPORT = {
|
|
950
|
+
entityType: "US_PASSPORT",
|
|
951
|
+
patterns: [
|
|
952
|
+
// Next generation passport (letter + 8 digits)
|
|
953
|
+
/\b[A-Z]\d{8}\b/g,
|
|
954
|
+
// Standard passport (9 digits)
|
|
955
|
+
/\b\d{9}\b/g
|
|
956
|
+
],
|
|
957
|
+
contextWords: [
|
|
958
|
+
"us",
|
|
959
|
+
"united",
|
|
960
|
+
"states",
|
|
961
|
+
"passport",
|
|
962
|
+
"travel",
|
|
963
|
+
"document"
|
|
964
|
+
],
|
|
965
|
+
baseConfidence: "low",
|
|
966
|
+
baseScore: 0.1
|
|
967
|
+
};
|
|
968
|
+
var US_BANK_NUMBER = {
|
|
969
|
+
entityType: "US_BANK_NUMBER",
|
|
970
|
+
patterns: [/\b\d{8,17}\b/g],
|
|
971
|
+
contextWords: [
|
|
972
|
+
"check",
|
|
973
|
+
"account",
|
|
974
|
+
"acct",
|
|
975
|
+
"bank",
|
|
976
|
+
"save",
|
|
977
|
+
"debit",
|
|
978
|
+
"routing"
|
|
979
|
+
],
|
|
980
|
+
baseConfidence: "low",
|
|
981
|
+
baseScore: 0.05
|
|
982
|
+
};
|
|
983
|
+
var US_ITIN = {
|
|
984
|
+
entityType: "US_ITIN",
|
|
985
|
+
patterns: [
|
|
986
|
+
// With delimiters (medium)
|
|
987
|
+
/\b9\d{2}[- ](5\d|6[0-5]|7\d|8[0-8]|9[0-24-9])[- ]\d{4}\b/g,
|
|
988
|
+
// Without delimiters (weak)
|
|
989
|
+
/\b9\d{2}(5\d|6[0-5]|7\d|8[0-8]|9[0-24-9])\d{4}\b/g
|
|
990
|
+
],
|
|
991
|
+
contextWords: [
|
|
992
|
+
"individual",
|
|
993
|
+
"taxpayer",
|
|
994
|
+
"itin",
|
|
995
|
+
"tax",
|
|
996
|
+
"payer",
|
|
997
|
+
"taxid",
|
|
998
|
+
"tin"
|
|
999
|
+
],
|
|
1000
|
+
baseConfidence: "medium",
|
|
1001
|
+
baseScore: 0.5
|
|
1002
|
+
};
|
|
1003
|
+
var US_MBI = {
|
|
1004
|
+
entityType: "US_MBI",
|
|
1005
|
+
patterns: (() => {
|
|
1006
|
+
const A = "[ACDEFGHJKMNPQRTUVWXY]";
|
|
1007
|
+
const AN = "[0-9ACDEFGHJKMNPQRTUVWXY]";
|
|
1008
|
+
const N = "[0-9]";
|
|
1009
|
+
const base = `${N}${A}${AN}${N}${A}${AN}${N}${A}${A}${N}${N}`;
|
|
1010
|
+
const withDash = `${N}${A}${AN}${N}-${A}${AN}${N}-${A}${A}${N}${N}`;
|
|
1011
|
+
return [new RegExp(`\\b${base}\\b`, "g"), new RegExp(`\\b${withDash}\\b`, "g")];
|
|
1012
|
+
})(),
|
|
1013
|
+
contextWords: [
|
|
1014
|
+
"medicare",
|
|
1015
|
+
"mbi",
|
|
1016
|
+
"beneficiary",
|
|
1017
|
+
"cms",
|
|
1018
|
+
"medicaid",
|
|
1019
|
+
"hic",
|
|
1020
|
+
"hicn"
|
|
1021
|
+
],
|
|
1022
|
+
baseConfidence: "medium",
|
|
1023
|
+
baseScore: 0.5
|
|
1024
|
+
};
|
|
1025
|
+
var US_NPI = {
|
|
1026
|
+
entityType: "US_NPI",
|
|
1027
|
+
patterns: [
|
|
1028
|
+
// With delimiters
|
|
1029
|
+
/\b[12]\d{3}[ -]\d{3}[ -]\d{3}\b/g,
|
|
1030
|
+
// Without delimiters
|
|
1031
|
+
/\b[12]\d{9}\b/g
|
|
1032
|
+
],
|
|
1033
|
+
validate: (match) => {
|
|
1034
|
+
const digits = match.replace(/\D/g, "");
|
|
1035
|
+
if (digits.length > 1 && new Set(digits.slice(0, -1)).size === 1) return false;
|
|
1036
|
+
return npiLuhn(match);
|
|
1037
|
+
},
|
|
1038
|
+
contextWords: [
|
|
1039
|
+
"npi",
|
|
1040
|
+
"national provider",
|
|
1041
|
+
"provider",
|
|
1042
|
+
"provider id",
|
|
1043
|
+
"provider identifier",
|
|
1044
|
+
"taxonomy"
|
|
1045
|
+
],
|
|
1046
|
+
baseConfidence: "low",
|
|
1047
|
+
baseScore: 0.1
|
|
1048
|
+
};
|
|
1049
|
+
var ABA_ROUTING_NUMBER = {
|
|
1050
|
+
entityType: "ABA_ROUTING_NUMBER",
|
|
1051
|
+
patterns: [
|
|
1052
|
+
// With dashes
|
|
1053
|
+
/\b[0123678]\d{3}-\d{4}-\d\b/g,
|
|
1054
|
+
// Without dashes
|
|
1055
|
+
/\b[0123678]\d{8}\b/g
|
|
1056
|
+
],
|
|
1057
|
+
validate: abaRouting,
|
|
1058
|
+
contextWords: [
|
|
1059
|
+
"aba",
|
|
1060
|
+
"routing",
|
|
1061
|
+
"abarouting",
|
|
1062
|
+
"association",
|
|
1063
|
+
"bankrouting"
|
|
1064
|
+
],
|
|
1065
|
+
baseConfidence: "low",
|
|
1066
|
+
baseScore: 0.05
|
|
1067
|
+
};
|
|
1068
|
+
var MEDICAL_LICENSE = {
|
|
1069
|
+
entityType: "MEDICAL_LICENSE",
|
|
1070
|
+
patterns: [
|
|
1071
|
+
/[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX][a-zA-Z]\d{7}/g,
|
|
1072
|
+
/[abcdefghjklmprstuxABCDEFGHJKLMPRSTUX]9\d{7}/g
|
|
1073
|
+
],
|
|
1074
|
+
validate: deaChecksum,
|
|
1075
|
+
contextWords: ["medical", "certificate", "DEA", "dea"],
|
|
1076
|
+
baseConfidence: "low",
|
|
1077
|
+
baseScore: 0.4
|
|
1078
|
+
};
|
|
1079
|
+
var US_PATTERNS = [
|
|
1080
|
+
US_SSN,
|
|
1081
|
+
US_DRIVER_LICENSE,
|
|
1082
|
+
US_PASSPORT,
|
|
1083
|
+
US_BANK_NUMBER,
|
|
1084
|
+
US_ITIN,
|
|
1085
|
+
US_MBI,
|
|
1086
|
+
US_NPI,
|
|
1087
|
+
ABA_ROUTING_NUMBER,
|
|
1088
|
+
MEDICAL_LICENSE
|
|
1089
|
+
];
|
|
1090
|
+
|
|
1091
|
+
// src/patterns/pii/uk.ts
|
|
1092
|
+
var UK_NHS = {
|
|
1093
|
+
entityType: "UK_NHS",
|
|
1094
|
+
patterns: [/\b(\d{3})[- ]?(\d{3})[- ]?(\d{4})\b/g],
|
|
1095
|
+
validate: (match) => {
|
|
1096
|
+
const digits = match.replace(/\D/g, "");
|
|
1097
|
+
if (digits.length !== 10) return false;
|
|
1098
|
+
let total = 0;
|
|
1099
|
+
for (let i = 0; i < 10; i++) {
|
|
1100
|
+
total += parseInt(digits[i], 10) * (10 - i);
|
|
1101
|
+
}
|
|
1102
|
+
return total % 11 === 0;
|
|
1103
|
+
},
|
|
1104
|
+
contextWords: [
|
|
1105
|
+
"national health service",
|
|
1106
|
+
"nhs",
|
|
1107
|
+
"health services authority",
|
|
1108
|
+
"health authority"
|
|
1109
|
+
],
|
|
1110
|
+
baseConfidence: "medium",
|
|
1111
|
+
baseScore: 0.5
|
|
1112
|
+
};
|
|
1113
|
+
var UK_NINO = {
|
|
1114
|
+
entityType: "UK_NINO",
|
|
1115
|
+
patterns: [
|
|
1116
|
+
/\b(?!BG|GB|NK|KN|NT|TN|ZZ)(?:[A-CEGHJ-PR-TW-Z][A-CEGHJ-NPR-TW-Z])\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-D]\b/gi
|
|
1117
|
+
],
|
|
1118
|
+
contextWords: ["national insurance", "ni number", "nino"],
|
|
1119
|
+
baseConfidence: "medium",
|
|
1120
|
+
baseScore: 0.5
|
|
1121
|
+
};
|
|
1122
|
+
var UK_PASSPORT = {
|
|
1123
|
+
entityType: "UK_PASSPORT",
|
|
1124
|
+
patterns: [/\b[A-Z]{2}\d{7}\b/g],
|
|
1125
|
+
contextWords: [
|
|
1126
|
+
"passport",
|
|
1127
|
+
"passport number",
|
|
1128
|
+
"travel document",
|
|
1129
|
+
"uk passport",
|
|
1130
|
+
"british passport",
|
|
1131
|
+
"her majesty",
|
|
1132
|
+
"his majesty",
|
|
1133
|
+
"hm passport",
|
|
1134
|
+
"hmpo"
|
|
1135
|
+
],
|
|
1136
|
+
baseConfidence: "low",
|
|
1137
|
+
baseScore: 0.1
|
|
1138
|
+
};
|
|
1139
|
+
var UK_POSTCODE = {
|
|
1140
|
+
entityType: "UK_POSTCODE",
|
|
1141
|
+
patterns: [
|
|
1142
|
+
/\b(?:GIR\s?0AA|[A-PR-UWYZ]\d[A-HJKPSTUW]?\s?\d[ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ]\d{2}\s?\d[ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ][A-HK-Y]\d[ABEHMNPRVWXY]?\s?\d[ABD-HJLNP-UW-Z]{2}|[A-PR-UWYZ][A-HK-Y]\d{2}\s?\d[ABD-HJLNP-UW-Z]{2})\b/g
|
|
1143
|
+
],
|
|
1144
|
+
contextWords: [
|
|
1145
|
+
"postcode",
|
|
1146
|
+
"post code",
|
|
1147
|
+
"postal code",
|
|
1148
|
+
"zip",
|
|
1149
|
+
"address",
|
|
1150
|
+
"delivery",
|
|
1151
|
+
"mailing",
|
|
1152
|
+
"shipping"
|
|
1153
|
+
],
|
|
1154
|
+
baseConfidence: "low",
|
|
1155
|
+
baseScore: 0.1
|
|
1156
|
+
};
|
|
1157
|
+
var UK_VEHICLE_REGISTRATION = {
|
|
1158
|
+
entityType: "UK_VEHICLE_REGISTRATION",
|
|
1159
|
+
patterns: [
|
|
1160
|
+
// Current format
|
|
1161
|
+
/\b[A-HJ-PR-Y]{2}(?:0[1-9]|[1-7]\d)[- ]?[A-HJ-PR-Z]{3}\b/g,
|
|
1162
|
+
// Prefix format
|
|
1163
|
+
/\b[A-HJ-NPR-TV-Y]\d{1,3}[- ]?[A-HJ-PR-Y][A-HJ-PR-Z]{2}\b/g,
|
|
1164
|
+
// Suffix format
|
|
1165
|
+
/\b[A-HJ-PR-Z]{3}[- ]?\d{1,3}[- ]?[A-HJ-NPR-TV-Y]\b/g
|
|
1166
|
+
],
|
|
1167
|
+
contextWords: [
|
|
1168
|
+
"vehicle",
|
|
1169
|
+
"registration",
|
|
1170
|
+
"number plate",
|
|
1171
|
+
"licence plate",
|
|
1172
|
+
"license plate",
|
|
1173
|
+
"reg",
|
|
1174
|
+
"vrn",
|
|
1175
|
+
"dvla",
|
|
1176
|
+
"v5c",
|
|
1177
|
+
"mot",
|
|
1178
|
+
"car"
|
|
1179
|
+
],
|
|
1180
|
+
baseConfidence: "low",
|
|
1181
|
+
baseScore: 0.2
|
|
1182
|
+
};
|
|
1183
|
+
var UK_PATTERNS = [
|
|
1184
|
+
UK_NHS,
|
|
1185
|
+
UK_NINO,
|
|
1186
|
+
UK_PASSPORT,
|
|
1187
|
+
UK_POSTCODE,
|
|
1188
|
+
UK_VEHICLE_REGISTRATION
|
|
1189
|
+
];
|
|
1190
|
+
|
|
1191
|
+
// src/patterns/pii/eu.ts
|
|
1192
|
+
var ES_NIF = {
|
|
1193
|
+
entityType: "ES_NIF",
|
|
1194
|
+
patterns: [/\b\d{7,8}[-]?[A-Z]\b/g],
|
|
1195
|
+
validate: (match) => {
|
|
1196
|
+
const cleaned = match.replace(/-/g, "");
|
|
1197
|
+
const letter = cleaned.slice(-1);
|
|
1198
|
+
const number = parseInt(cleaned.replace(/[^0-9]/g, ""), 10);
|
|
1199
|
+
const letters = "TRWAGMYFPDXBNJZSQVHLCKE";
|
|
1200
|
+
return letter === letters[number % 23];
|
|
1201
|
+
},
|
|
1202
|
+
contextWords: ["documento nacional de identidad", "dni", "nif", "identificaci\xF3n"],
|
|
1203
|
+
baseConfidence: "medium",
|
|
1204
|
+
baseScore: 0.5
|
|
1205
|
+
};
|
|
1206
|
+
var ES_NIE = {
|
|
1207
|
+
entityType: "ES_NIE",
|
|
1208
|
+
patterns: [/\b[XYZ]\d{7}[-]?[A-Z]\b/g],
|
|
1209
|
+
validate: (match) => {
|
|
1210
|
+
const cleaned = match.replace(/-/g, "");
|
|
1211
|
+
if (cleaned.length < 8 || cleaned.length > 9) return false;
|
|
1212
|
+
const letter = cleaned.slice(-1);
|
|
1213
|
+
const prefix = "XYZ".indexOf(cleaned[0]);
|
|
1214
|
+
if (prefix === -1) return false;
|
|
1215
|
+
const number = parseInt(prefix.toString() + cleaned.slice(1, -1), 10);
|
|
1216
|
+
const letters = "TRWAGMYFPDXBNJZSQVHLCKE";
|
|
1217
|
+
return letter === letters[number % 23];
|
|
1218
|
+
},
|
|
1219
|
+
contextWords: ["n\xFAmero de identificaci\xF3n de extranjero", "nie"],
|
|
1220
|
+
baseConfidence: "medium",
|
|
1221
|
+
baseScore: 0.5
|
|
1222
|
+
};
|
|
1223
|
+
var IT_FISCAL_CODE = {
|
|
1224
|
+
entityType: "IT_FISCAL_CODE",
|
|
1225
|
+
patterns: [
|
|
1226
|
+
/(?:[A-Z][AEIOU][AEIOUX]|[AEIOU]X{2}|[B-DF-HJ-NP-TV-Z]{2}[A-Z]){2}(?:[\dLMNP-V]{2}(?:[A-EHLMPR-T](?:[04LQ][1-9MNP-V]|[15MR][\dLMNP-V]|[26NS][0-8LMNP-U])|[DHPS][37PT][0L]|[ACELMRT][37PT][01LM]|[AC-EHLMPR-T][26NS][9V])|(?:[02468LNQSU][048LQU]|[13579MPRTV][26NS])B[26NS][9V])(?:[A-MZ][1-9MNP-V][\dLMNP-V]{2}|[A-M][0L](?:[1-9MNP-V][\dLMNP-V]|[0L][1-9MNP-V]))[A-Z]/gi
|
|
1227
|
+
],
|
|
1228
|
+
validate: (match) => {
|
|
1229
|
+
const text = match.toUpperCase();
|
|
1230
|
+
if (text.length !== 16) return false;
|
|
1231
|
+
const control = text[15];
|
|
1232
|
+
const toValidate = text.slice(0, 15);
|
|
1233
|
+
const mapOdd = {
|
|
1234
|
+
"0": 1,
|
|
1235
|
+
"1": 0,
|
|
1236
|
+
"2": 5,
|
|
1237
|
+
"3": 7,
|
|
1238
|
+
"4": 9,
|
|
1239
|
+
"5": 13,
|
|
1240
|
+
"6": 15,
|
|
1241
|
+
"7": 17,
|
|
1242
|
+
"8": 19,
|
|
1243
|
+
"9": 21,
|
|
1244
|
+
A: 1,
|
|
1245
|
+
B: 0,
|
|
1246
|
+
C: 5,
|
|
1247
|
+
D: 7,
|
|
1248
|
+
E: 9,
|
|
1249
|
+
F: 13,
|
|
1250
|
+
G: 15,
|
|
1251
|
+
H: 17,
|
|
1252
|
+
I: 19,
|
|
1253
|
+
J: 21,
|
|
1254
|
+
K: 2,
|
|
1255
|
+
L: 4,
|
|
1256
|
+
M: 18,
|
|
1257
|
+
N: 20,
|
|
1258
|
+
O: 11,
|
|
1259
|
+
P: 3,
|
|
1260
|
+
Q: 6,
|
|
1261
|
+
R: 8,
|
|
1262
|
+
S: 12,
|
|
1263
|
+
T: 14,
|
|
1264
|
+
U: 16,
|
|
1265
|
+
V: 10,
|
|
1266
|
+
W: 22,
|
|
1267
|
+
X: 25,
|
|
1268
|
+
Y: 24,
|
|
1269
|
+
Z: 23
|
|
1270
|
+
};
|
|
1271
|
+
const mapEven = {
|
|
1272
|
+
"0": 0,
|
|
1273
|
+
"1": 1,
|
|
1274
|
+
"2": 2,
|
|
1275
|
+
"3": 3,
|
|
1276
|
+
"4": 4,
|
|
1277
|
+
"5": 5,
|
|
1278
|
+
"6": 6,
|
|
1279
|
+
"7": 7,
|
|
1280
|
+
"8": 8,
|
|
1281
|
+
"9": 9,
|
|
1282
|
+
A: 0,
|
|
1283
|
+
B: 1,
|
|
1284
|
+
C: 2,
|
|
1285
|
+
D: 3,
|
|
1286
|
+
E: 4,
|
|
1287
|
+
F: 5,
|
|
1288
|
+
G: 6,
|
|
1289
|
+
H: 7,
|
|
1290
|
+
I: 8,
|
|
1291
|
+
J: 9,
|
|
1292
|
+
K: 10,
|
|
1293
|
+
L: 11,
|
|
1294
|
+
M: 12,
|
|
1295
|
+
N: 13,
|
|
1296
|
+
O: 14,
|
|
1297
|
+
P: 15,
|
|
1298
|
+
Q: 16,
|
|
1299
|
+
R: 17,
|
|
1300
|
+
S: 18,
|
|
1301
|
+
T: 19,
|
|
1302
|
+
U: 20,
|
|
1303
|
+
V: 21,
|
|
1304
|
+
W: 22,
|
|
1305
|
+
X: 23,
|
|
1306
|
+
Y: 24,
|
|
1307
|
+
Z: 25
|
|
1308
|
+
};
|
|
1309
|
+
let oddSum = 0;
|
|
1310
|
+
let evenSum = 0;
|
|
1311
|
+
for (let i = 0; i < toValidate.length; i++) {
|
|
1312
|
+
if (i % 2 === 0) {
|
|
1313
|
+
oddSum += mapOdd[toValidate[i]] ?? 0;
|
|
1314
|
+
} else {
|
|
1315
|
+
evenSum += mapEven[toValidate[i]] ?? 0;
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
const expected = String.fromCharCode(65 + (oddSum + evenSum) % 26);
|
|
1319
|
+
return expected === control;
|
|
1320
|
+
},
|
|
1321
|
+
contextWords: ["codice fiscale", "cf"],
|
|
1322
|
+
baseConfidence: "medium",
|
|
1323
|
+
baseScore: 0.3
|
|
1324
|
+
};
|
|
1325
|
+
var IT_DRIVER_LICENSE = {
|
|
1326
|
+
entityType: "IT_DRIVER_LICENSE",
|
|
1327
|
+
patterns: [
|
|
1328
|
+
/\b(?:[A-Z]{2}\d{7}[A-Z]|U1[BCDEFGHLJKMNPRSTUWYXZ0-9]{7}[A-Z])\b/gi
|
|
1329
|
+
],
|
|
1330
|
+
contextWords: ["patente", "patente di guida", "licenza", "licenza di guida"],
|
|
1331
|
+
baseConfidence: "low",
|
|
1332
|
+
baseScore: 0.2
|
|
1333
|
+
};
|
|
1334
|
+
var IT_VAT_CODE = {
|
|
1335
|
+
entityType: "IT_VAT_CODE",
|
|
1336
|
+
patterns: [/\b\d{11}\b/g],
|
|
1337
|
+
validate: (match) => {
|
|
1338
|
+
const digits = match.replace(/[\s_]/g, "");
|
|
1339
|
+
if (digits.length !== 11) return false;
|
|
1340
|
+
if (digits === "00000000000") return false;
|
|
1341
|
+
let x = 0;
|
|
1342
|
+
let y = 0;
|
|
1343
|
+
for (let i = 0; i < 5; i++) {
|
|
1344
|
+
x += parseInt(digits[2 * i], 10);
|
|
1345
|
+
let tmpY = parseInt(digits[2 * i + 1], 10) * 2;
|
|
1346
|
+
if (tmpY > 9) tmpY -= 9;
|
|
1347
|
+
y += tmpY;
|
|
1348
|
+
}
|
|
1349
|
+
const t = (x + y) % 10;
|
|
1350
|
+
const c = (10 - t) % 10;
|
|
1351
|
+
return c === parseInt(digits[10], 10);
|
|
1352
|
+
},
|
|
1353
|
+
contextWords: ["piva", "partita iva", "pi"],
|
|
1354
|
+
baseConfidence: "low",
|
|
1355
|
+
baseScore: 0.1
|
|
1356
|
+
};
|
|
1357
|
+
var IT_PASSPORT = {
|
|
1358
|
+
entityType: "IT_PASSPORT",
|
|
1359
|
+
patterns: [/\b[A-Z]{2}\d{7}\b/gi],
|
|
1360
|
+
contextWords: ["passaporto", "elettronico", "italiano", "viaggio", "documento"],
|
|
1361
|
+
baseConfidence: "low",
|
|
1362
|
+
baseScore: 0.01
|
|
1363
|
+
};
|
|
1364
|
+
var IT_IDENTITY_CARD = {
|
|
1365
|
+
entityType: "IT_IDENTITY_CARD",
|
|
1366
|
+
patterns: [
|
|
1367
|
+
/\b[A-Z]{2}\s?\d{7}\b/gi,
|
|
1368
|
+
/\b\d{7}[A-Z]{2}\b/gi,
|
|
1369
|
+
/\b[A-Z]{2}\d{5}[A-Z]{2}\b/gi
|
|
1370
|
+
],
|
|
1371
|
+
contextWords: ["carta", "identit\xE0", "elettronica", "cie", "documento"],
|
|
1372
|
+
baseConfidence: "low",
|
|
1373
|
+
baseScore: 0.01
|
|
1374
|
+
};
|
|
1375
|
+
var PL_PESEL = {
|
|
1376
|
+
entityType: "PL_PESEL",
|
|
1377
|
+
patterns: [
|
|
1378
|
+
/\b\d{2}(?:[02468][1-9]|[13579][012])(?:0[1-9]|[12]\d|3[01])\d{5}\b/g
|
|
1379
|
+
],
|
|
1380
|
+
validate: (match) => {
|
|
1381
|
+
if (match.length !== 11) return false;
|
|
1382
|
+
const digits = match.split("").map(Number);
|
|
1383
|
+
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
1384
|
+
let checksum = 0;
|
|
1385
|
+
for (let i = 0; i < 10; i++) {
|
|
1386
|
+
checksum += digits[i] * weights[i];
|
|
1387
|
+
}
|
|
1388
|
+
return checksum % 10 === digits[10];
|
|
1389
|
+
},
|
|
1390
|
+
contextWords: ["pesel"],
|
|
1391
|
+
baseConfidence: "medium",
|
|
1392
|
+
baseScore: 0.4
|
|
1393
|
+
};
|
|
1394
|
+
var FI_PERSONAL_IDENTITY_CODE = {
|
|
1395
|
+
entityType: "FI_PERSONAL_IDENTITY_CODE",
|
|
1396
|
+
patterns: [
|
|
1397
|
+
/\b(\d{6})([+\-ABCDEFYXWVU])(\d{3})([0-9ABCDEFHJKLMNPRSTUVWXY])\b/g
|
|
1398
|
+
],
|
|
1399
|
+
validate: (match) => {
|
|
1400
|
+
if (match.length !== 11) return false;
|
|
1401
|
+
const datePart = match.slice(0, 6);
|
|
1402
|
+
const individual = match.slice(7, 10);
|
|
1403
|
+
const control = match[10];
|
|
1404
|
+
const validChars = "0123456789ABCDEFHJKLMNPRSTUVWXY";
|
|
1405
|
+
const num = parseInt(datePart + individual, 10);
|
|
1406
|
+
return validChars[num % 31] === control;
|
|
1407
|
+
},
|
|
1408
|
+
contextWords: ["hetu", "henkil\xF6tunnus", "personal identity code"],
|
|
1409
|
+
baseConfidence: "medium",
|
|
1410
|
+
baseScore: 0.5
|
|
1411
|
+
};
|
|
1412
|
+
var SE_PERSONNUMMER = {
|
|
1413
|
+
entityType: "SE_PERSONNUMMER",
|
|
1414
|
+
patterns: [/\b(\d{6,8})([-+]?)\d{4}\b/g],
|
|
1415
|
+
validate: (match) => {
|
|
1416
|
+
const digits = match.replace(/[-+]/g, "");
|
|
1417
|
+
const last10 = digits.slice(-10);
|
|
1418
|
+
if (last10.length !== 10) return false;
|
|
1419
|
+
const month = parseInt(last10.slice(2, 4), 10);
|
|
1420
|
+
let day = parseInt(last10.slice(4, 6), 10);
|
|
1421
|
+
if (day >= 61) day -= 60;
|
|
1422
|
+
if (month < 1 || month > 12 || day < 1 || day > 31) return false;
|
|
1423
|
+
const nums = last10.split("").map(Number);
|
|
1424
|
+
const check = nums[9];
|
|
1425
|
+
let sum = 0;
|
|
1426
|
+
for (let i = 0; i < 9; i++) {
|
|
1427
|
+
let d = nums[i];
|
|
1428
|
+
if (i % 2 === 0) {
|
|
1429
|
+
d *= 2;
|
|
1430
|
+
if (d > 9) d -= 9;
|
|
1431
|
+
}
|
|
1432
|
+
sum += d;
|
|
1433
|
+
}
|
|
1434
|
+
return (sum + check) % 10 === 0;
|
|
1435
|
+
},
|
|
1436
|
+
contextWords: [
|
|
1437
|
+
"personnummer",
|
|
1438
|
+
"svenskt personnummer",
|
|
1439
|
+
"svensk id",
|
|
1440
|
+
"personal identity number",
|
|
1441
|
+
"samordningsnummer"
|
|
1442
|
+
],
|
|
1443
|
+
baseConfidence: "medium",
|
|
1444
|
+
baseScore: 0.5
|
|
1445
|
+
};
|
|
1446
|
+
var DE_TAX_ID = {
|
|
1447
|
+
entityType: "DE_TAX_ID",
|
|
1448
|
+
patterns: [/\b[1-9]\d{10}\b/g],
|
|
1449
|
+
validate: (match) => {
|
|
1450
|
+
if (match.length !== 11 || !/^\d+$/.test(match)) return false;
|
|
1451
|
+
const digits = match.split("").map(Number);
|
|
1452
|
+
if (new Set(digits.slice(0, 10)).size === 1) return false;
|
|
1453
|
+
let product = 10;
|
|
1454
|
+
for (let i = 0; i < 10; i++) {
|
|
1455
|
+
let total = (digits[i] + product) % 10;
|
|
1456
|
+
if (total === 0) total = 10;
|
|
1457
|
+
product = total * 2 % 11;
|
|
1458
|
+
}
|
|
1459
|
+
let check = 11 - product;
|
|
1460
|
+
if (check === 10) check = 0;
|
|
1461
|
+
return check === digits[10];
|
|
1462
|
+
},
|
|
1463
|
+
contextWords: [
|
|
1464
|
+
"steueridentifikationsnummer",
|
|
1465
|
+
"steuer-id",
|
|
1466
|
+
"steuerid",
|
|
1467
|
+
"idnr",
|
|
1468
|
+
"steuer-idnr",
|
|
1469
|
+
"steuernummer"
|
|
1470
|
+
],
|
|
1471
|
+
baseConfidence: "medium",
|
|
1472
|
+
baseScore: 0.5
|
|
1473
|
+
};
|
|
1474
|
+
var DE_PASSPORT = {
|
|
1475
|
+
entityType: "DE_PASSPORT",
|
|
1476
|
+
patterns: [
|
|
1477
|
+
/\b[CFGHJKLMNPRTVWXYZ][CFGHJKLMNPRTVWXYZ0-9]{7}[CFGHJKLMNPRTVWXYZ0-9]\b/g,
|
|
1478
|
+
/\bT\d{8}\b/g
|
|
1479
|
+
],
|
|
1480
|
+
contextWords: [
|
|
1481
|
+
"personalausweis",
|
|
1482
|
+
"ausweis",
|
|
1483
|
+
"reisepass",
|
|
1484
|
+
"pass",
|
|
1485
|
+
"dokumentennummer",
|
|
1486
|
+
"seriennummer"
|
|
1487
|
+
],
|
|
1488
|
+
baseConfidence: "low",
|
|
1489
|
+
baseScore: 0.4
|
|
1490
|
+
};
|
|
1491
|
+
var EU_PATTERNS = [
|
|
1492
|
+
ES_NIF,
|
|
1493
|
+
ES_NIE,
|
|
1494
|
+
IT_FISCAL_CODE,
|
|
1495
|
+
IT_DRIVER_LICENSE,
|
|
1496
|
+
IT_VAT_CODE,
|
|
1497
|
+
IT_PASSPORT,
|
|
1498
|
+
IT_IDENTITY_CARD,
|
|
1499
|
+
PL_PESEL,
|
|
1500
|
+
FI_PERSONAL_IDENTITY_CODE,
|
|
1501
|
+
SE_PERSONNUMMER,
|
|
1502
|
+
DE_TAX_ID,
|
|
1503
|
+
DE_PASSPORT
|
|
1504
|
+
];
|
|
1505
|
+
|
|
1506
|
+
// src/validators/verhoeff.ts
|
|
1507
|
+
var D = [
|
|
1508
|
+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
|
1509
|
+
[1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
|
|
1510
|
+
[2, 3, 4, 0, 1, 7, 8, 9, 5, 6],
|
|
1511
|
+
[3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
|
|
1512
|
+
[4, 0, 1, 2, 3, 9, 5, 6, 7, 8],
|
|
1513
|
+
[5, 9, 8, 7, 6, 0, 4, 3, 2, 1],
|
|
1514
|
+
[6, 5, 9, 8, 7, 1, 0, 4, 3, 2],
|
|
1515
|
+
[7, 6, 5, 9, 8, 2, 1, 0, 4, 3],
|
|
1516
|
+
[8, 7, 6, 5, 9, 3, 2, 1, 0, 4],
|
|
1517
|
+
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
|
|
1518
|
+
];
|
|
1519
|
+
var P = [
|
|
1520
|
+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
|
1521
|
+
[1, 5, 7, 6, 2, 8, 3, 0, 9, 4],
|
|
1522
|
+
[5, 8, 0, 3, 7, 9, 6, 1, 4, 2],
|
|
1523
|
+
[8, 9, 1, 6, 0, 4, 3, 5, 2, 7],
|
|
1524
|
+
[9, 4, 5, 3, 1, 2, 6, 8, 7, 0],
|
|
1525
|
+
[4, 2, 8, 6, 5, 7, 3, 9, 0, 1],
|
|
1526
|
+
[2, 7, 9, 3, 8, 0, 6, 4, 1, 5],
|
|
1527
|
+
[7, 0, 4, 6, 9, 1, 3, 2, 5, 8]
|
|
1528
|
+
];
|
|
1529
|
+
var INV = [0, 4, 3, 2, 1, 5, 6, 7, 8, 9];
|
|
1530
|
+
function verhoeff(input) {
|
|
1531
|
+
const digits = String(input).split("").map(Number).reverse();
|
|
1532
|
+
let c = 0;
|
|
1533
|
+
for (let i = 0; i < digits.length; i++) {
|
|
1534
|
+
c = D[c][P[i % 8][digits[i]]];
|
|
1535
|
+
}
|
|
1536
|
+
return INV[c] === 0;
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
// src/patterns/pii/apac.ts
|
|
1540
|
+
var SG_NRIC_FIN = {
|
|
1541
|
+
entityType: "SG_NRIC_FIN",
|
|
1542
|
+
patterns: [/\b[STFGM]\d{7}[A-Z]\b/gi],
|
|
1543
|
+
contextWords: ["fin", "nric"],
|
|
1544
|
+
baseConfidence: "medium",
|
|
1545
|
+
baseScore: 0.5
|
|
1546
|
+
};
|
|
1547
|
+
var SG_UEN = {
|
|
1548
|
+
entityType: "SG_UEN",
|
|
1549
|
+
patterns: [
|
|
1550
|
+
/\b\d{8}[A-Z]\b/g,
|
|
1551
|
+
/\b\d{9}[A-Z]\b/g,
|
|
1552
|
+
/\b[TS]\d{2}[A-Z]{2}\d{4}[A-Z]\b/g
|
|
1553
|
+
],
|
|
1554
|
+
contextWords: ["uen", "unique entity number", "business registration", "acra"],
|
|
1555
|
+
baseConfidence: "low",
|
|
1556
|
+
baseScore: 0.3
|
|
1557
|
+
};
|
|
1558
|
+
var AU_ABN = {
|
|
1559
|
+
entityType: "AU_ABN",
|
|
1560
|
+
patterns: [
|
|
1561
|
+
/\b\d{2}\s\d{3}\s\d{3}\s\d{3}\b/g,
|
|
1562
|
+
/\b\d{11}\b/g
|
|
1563
|
+
],
|
|
1564
|
+
validate: (match) => {
|
|
1565
|
+
const digits = match.replace(/\s/g, "").split("").map(Number);
|
|
1566
|
+
if (digits.length !== 11) return false;
|
|
1567
|
+
const weights = [10, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19];
|
|
1568
|
+
digits[0] = digits[0] === 0 ? 9 : digits[0] - 1;
|
|
1569
|
+
let sum = 0;
|
|
1570
|
+
for (let i = 0; i < 11; i++) {
|
|
1571
|
+
sum += digits[i] * weights[i];
|
|
1572
|
+
}
|
|
1573
|
+
return sum % 89 === 0;
|
|
1574
|
+
},
|
|
1575
|
+
contextWords: ["australian business number", "abn"],
|
|
1576
|
+
baseConfidence: "low",
|
|
1577
|
+
baseScore: 0.1
|
|
1578
|
+
};
|
|
1579
|
+
var AU_ACN = {
|
|
1580
|
+
entityType: "AU_ACN",
|
|
1581
|
+
patterns: [
|
|
1582
|
+
/\b\d{3}\s\d{3}\s\d{3}\b/g,
|
|
1583
|
+
/\b\d{9}\b/g
|
|
1584
|
+
],
|
|
1585
|
+
validate: (match) => {
|
|
1586
|
+
const digits = match.replace(/\s/g, "").split("").map(Number);
|
|
1587
|
+
if (digits.length !== 9) return false;
|
|
1588
|
+
const weights = [8, 7, 6, 5, 4, 3, 2, 1];
|
|
1589
|
+
let sum = 0;
|
|
1590
|
+
for (let i = 0; i < 8; i++) {
|
|
1591
|
+
sum += digits[i] * weights[i];
|
|
1592
|
+
}
|
|
1593
|
+
const complement = (10 - sum % 10) % 10;
|
|
1594
|
+
return complement === digits[8];
|
|
1595
|
+
},
|
|
1596
|
+
contextWords: ["australian company number", "acn"],
|
|
1597
|
+
baseConfidence: "low",
|
|
1598
|
+
baseScore: 0.1
|
|
1599
|
+
};
|
|
1600
|
+
var AU_TFN = {
|
|
1601
|
+
entityType: "AU_TFN",
|
|
1602
|
+
patterns: [
|
|
1603
|
+
/\b\d{3}\s\d{3}\s\d{3}\b/g,
|
|
1604
|
+
/\b\d{9}\b/g
|
|
1605
|
+
],
|
|
1606
|
+
validate: (match) => {
|
|
1607
|
+
const digits = match.replace(/\s/g, "").split("").map(Number);
|
|
1608
|
+
if (digits.length !== 9) return false;
|
|
1609
|
+
const weights = [1, 4, 3, 7, 5, 8, 6, 9, 10];
|
|
1610
|
+
let sum = 0;
|
|
1611
|
+
for (let i = 0; i < 9; i++) {
|
|
1612
|
+
sum += digits[i] * weights[i];
|
|
1613
|
+
}
|
|
1614
|
+
return sum % 11 === 0;
|
|
1615
|
+
},
|
|
1616
|
+
contextWords: ["tax file number", "tfn"],
|
|
1617
|
+
baseConfidence: "low",
|
|
1618
|
+
baseScore: 0.1
|
|
1619
|
+
};
|
|
1620
|
+
var AU_MEDICARE = {
|
|
1621
|
+
entityType: "AU_MEDICARE",
|
|
1622
|
+
patterns: [
|
|
1623
|
+
/\b[2-6]\d{3}\s\d{5}\s\d\b/g,
|
|
1624
|
+
/\b[2-6]\d{9}\b/g
|
|
1625
|
+
],
|
|
1626
|
+
validate: (match) => {
|
|
1627
|
+
const digits = match.replace(/\s/g, "").split("").map(Number);
|
|
1628
|
+
if (digits.length !== 10) return false;
|
|
1629
|
+
const weights = [1, 3, 7, 9, 1, 3, 7, 9];
|
|
1630
|
+
let sum = 0;
|
|
1631
|
+
for (let i = 0; i < 8; i++) {
|
|
1632
|
+
sum += digits[i] * weights[i];
|
|
1633
|
+
}
|
|
1634
|
+
return sum % 10 === digits[8];
|
|
1635
|
+
},
|
|
1636
|
+
contextWords: ["medicare"],
|
|
1637
|
+
baseConfidence: "low",
|
|
1638
|
+
baseScore: 0.1
|
|
1639
|
+
};
|
|
1640
|
+
var IN_PAN = {
|
|
1641
|
+
entityType: "IN_PAN",
|
|
1642
|
+
patterns: [
|
|
1643
|
+
/\b[A-Z]{3}[ABCFGHHJLPT][A-Z]\d{4}[A-Z]\b/gi,
|
|
1644
|
+
/\b[A-Z]{5}\d{4}[A-Z]\b/gi
|
|
1645
|
+
],
|
|
1646
|
+
contextWords: ["permanent account number", "pan"],
|
|
1647
|
+
baseConfidence: "medium",
|
|
1648
|
+
baseScore: 0.5
|
|
1649
|
+
};
|
|
1650
|
+
var IN_AADHAAR = {
|
|
1651
|
+
entityType: "IN_AADHAAR",
|
|
1652
|
+
patterns: [
|
|
1653
|
+
/\b\d{4}[- :]\d{4}[- :]\d{4}\b/g,
|
|
1654
|
+
/\b\d{12}\b/g
|
|
1655
|
+
],
|
|
1656
|
+
validate: (match) => {
|
|
1657
|
+
const digits = match.replace(/\D/g, "");
|
|
1658
|
+
if (digits.length !== 12) return false;
|
|
1659
|
+
if (parseInt(digits[0], 10) < 2) return false;
|
|
1660
|
+
if (digits === digits.split("").reverse().join("")) return false;
|
|
1661
|
+
return verhoeff(parseInt(digits, 10));
|
|
1662
|
+
},
|
|
1663
|
+
contextWords: ["aadhaar", "uidai"],
|
|
1664
|
+
baseConfidence: "low",
|
|
1665
|
+
baseScore: 0.01
|
|
1666
|
+
};
|
|
1667
|
+
var IN_VEHICLE_REGISTRATION = {
|
|
1668
|
+
entityType: "IN_VEHICLE_REGISTRATION",
|
|
1669
|
+
patterns: [
|
|
1670
|
+
/\b[A-Z]{2}\d{2}[A-Z]{1,2}\d{4}\b/g,
|
|
1671
|
+
/\b[A-Z]{2}\d[A-Z]{1,3}\d{4}\b/g
|
|
1672
|
+
],
|
|
1673
|
+
contextWords: ["rto", "vehicle", "plate", "registration"],
|
|
1674
|
+
baseConfidence: "medium",
|
|
1675
|
+
baseScore: 0.5
|
|
1676
|
+
};
|
|
1677
|
+
var IN_VOTER = {
|
|
1678
|
+
entityType: "IN_VOTER",
|
|
1679
|
+
patterns: [
|
|
1680
|
+
/\b[A-Z]{3}\d{7}\b/gi
|
|
1681
|
+
],
|
|
1682
|
+
contextWords: ["voter", "epic", "elector photo identity card"],
|
|
1683
|
+
baseConfidence: "low",
|
|
1684
|
+
baseScore: 0.3
|
|
1685
|
+
};
|
|
1686
|
+
var IN_PASSPORT = {
|
|
1687
|
+
entityType: "IN_PASSPORT",
|
|
1688
|
+
patterns: [/\b[A-Z][1-9]\d\s?\d{4}[1-9]\b/g],
|
|
1689
|
+
contextWords: ["passport", "indian passport", "passport number"],
|
|
1690
|
+
baseConfidence: "low",
|
|
1691
|
+
baseScore: 0.1
|
|
1692
|
+
};
|
|
1693
|
+
var KR_RRN = {
|
|
1694
|
+
entityType: "KR_RRN",
|
|
1695
|
+
patterns: [
|
|
1696
|
+
/(?<!\d)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])-?[1-4]\d{6}(?!\d)/g
|
|
1697
|
+
],
|
|
1698
|
+
validate: (match) => {
|
|
1699
|
+
const digits = match.replace(/-/g, "");
|
|
1700
|
+
if (digits.length !== 13 || !/^\d+$/.test(digits)) return false;
|
|
1701
|
+
const regionCode = parseInt(digits.slice(7, 9), 10);
|
|
1702
|
+
if (regionCode > 95) return false;
|
|
1703
|
+
const weights = [2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5];
|
|
1704
|
+
let sum = 0;
|
|
1705
|
+
for (let i = 0; i < 12; i++) {
|
|
1706
|
+
sum += parseInt(digits[i], 10) * weights[i];
|
|
1707
|
+
}
|
|
1708
|
+
const checksum = (11 - sum % 11) % 10;
|
|
1709
|
+
return checksum === parseInt(digits[12], 10);
|
|
1710
|
+
},
|
|
1711
|
+
contextWords: [
|
|
1712
|
+
"resident registration number",
|
|
1713
|
+
"rrn",
|
|
1714
|
+
"korean rrn"
|
|
1715
|
+
],
|
|
1716
|
+
baseConfidence: "medium",
|
|
1717
|
+
baseScore: 0.5
|
|
1718
|
+
};
|
|
1719
|
+
var KR_PASSPORT = {
|
|
1720
|
+
entityType: "KR_PASSPORT",
|
|
1721
|
+
patterns: [
|
|
1722
|
+
/(?<![A-Z0-9])[MSROD]\d{3}[A-Z]\d{4}(?!\d)/gi,
|
|
1723
|
+
/(?<![A-Z0-9])[MSROD]\d{8}(?!\d)/gi
|
|
1724
|
+
],
|
|
1725
|
+
contextWords: ["passport", "korean passport", "\uC5EC\uAD8C"],
|
|
1726
|
+
baseConfidence: "low",
|
|
1727
|
+
baseScore: 0.1
|
|
1728
|
+
};
|
|
1729
|
+
var TH_TNIN = {
|
|
1730
|
+
entityType: "TH_TNIN",
|
|
1731
|
+
patterns: [
|
|
1732
|
+
/\b[1-9](?:[134]\d|[25][0134567]|[67][01234567]|[89][0123456])\d{10}\b/g
|
|
1733
|
+
],
|
|
1734
|
+
validate: (match) => {
|
|
1735
|
+
if (match.length !== 13 || !/^\d+$/.test(match)) return false;
|
|
1736
|
+
const weights = [13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2];
|
|
1737
|
+
let sum = 0;
|
|
1738
|
+
for (let i = 0; i < 12; i++) {
|
|
1739
|
+
sum += weights[i] * parseInt(match[i], 10);
|
|
1740
|
+
}
|
|
1741
|
+
const x = sum % 11;
|
|
1742
|
+
const expected = x <= 1 ? 1 - x : 11 - x;
|
|
1743
|
+
return expected === parseInt(match[12], 10);
|
|
1744
|
+
},
|
|
1745
|
+
contextWords: ["thai national id", "thai id number", "tnin"],
|
|
1746
|
+
baseConfidence: "medium",
|
|
1747
|
+
baseScore: 0.5
|
|
1748
|
+
};
|
|
1749
|
+
var NG_NIN = {
|
|
1750
|
+
entityType: "NG_NIN",
|
|
1751
|
+
patterns: [/\b\d{11}\b/g],
|
|
1752
|
+
validate: (match) => {
|
|
1753
|
+
if (match.length !== 11 || !/^\d+$/.test(match)) return false;
|
|
1754
|
+
return verhoeff(parseInt(match, 10));
|
|
1755
|
+
},
|
|
1756
|
+
contextWords: [
|
|
1757
|
+
"nin",
|
|
1758
|
+
"national identification number",
|
|
1759
|
+
"national identity number",
|
|
1760
|
+
"nimc"
|
|
1761
|
+
],
|
|
1762
|
+
baseConfidence: "low",
|
|
1763
|
+
baseScore: 0.01
|
|
1764
|
+
};
|
|
1765
|
+
var APAC_PATTERNS = [
|
|
1766
|
+
SG_NRIC_FIN,
|
|
1767
|
+
SG_UEN,
|
|
1768
|
+
AU_ABN,
|
|
1769
|
+
AU_ACN,
|
|
1770
|
+
AU_TFN,
|
|
1771
|
+
AU_MEDICARE,
|
|
1772
|
+
IN_PAN,
|
|
1773
|
+
IN_AADHAAR,
|
|
1774
|
+
IN_VEHICLE_REGISTRATION,
|
|
1775
|
+
IN_VOTER,
|
|
1776
|
+
IN_PASSPORT,
|
|
1777
|
+
KR_RRN,
|
|
1778
|
+
KR_PASSPORT,
|
|
1779
|
+
TH_TNIN,
|
|
1780
|
+
NG_NIN
|
|
1781
|
+
];
|
|
1782
|
+
|
|
1783
|
+
// src/patterns/pii/index.ts
|
|
1784
|
+
var ALL_PII_PATTERNS = [
|
|
1785
|
+
...GENERIC_PATTERNS,
|
|
1786
|
+
...US_PATTERNS,
|
|
1787
|
+
...UK_PATTERNS,
|
|
1788
|
+
...EU_PATTERNS,
|
|
1789
|
+
...APAC_PATTERNS
|
|
1790
|
+
];
|
|
1791
|
+
|
|
1792
|
+
// src/guards/pii.guard.ts
|
|
1793
|
+
var CONTEXT_WINDOW = 100;
|
|
1794
|
+
var CONTEXT_SCORE_BOOST = 0.35;
|
|
1795
|
+
var PiiGuard = class extends BaseGuard {
|
|
1796
|
+
name = "pii";
|
|
1797
|
+
patterns;
|
|
1798
|
+
constructor(options) {
|
|
1799
|
+
super();
|
|
1800
|
+
let patterns = ALL_PII_PATTERNS;
|
|
1801
|
+
if (options?.entityTypes && options.entityTypes.length > 0) {
|
|
1802
|
+
const allowed = new Set(options.entityTypes);
|
|
1803
|
+
patterns = patterns.filter((p) => allowed.has(p.entityType));
|
|
1804
|
+
}
|
|
1805
|
+
if (options?.customPatterns) {
|
|
1806
|
+
patterns = [...patterns, ...options.customPatterns];
|
|
1807
|
+
}
|
|
1808
|
+
this.patterns = patterns;
|
|
1809
|
+
}
|
|
1810
|
+
async analyze(text, config) {
|
|
1811
|
+
const cfg = this.mergeConfig(config);
|
|
1812
|
+
const detections = this.detectAll(text, cfg.threshold);
|
|
1813
|
+
return this.buildResult(text, detections, cfg.mode);
|
|
1814
|
+
}
|
|
1815
|
+
detectAll(text, threshold) {
|
|
1816
|
+
const allDetections = [];
|
|
1817
|
+
const textLower = text.toLowerCase();
|
|
1818
|
+
for (const pattern of this.patterns) {
|
|
1819
|
+
for (const regex of pattern.patterns) {
|
|
1820
|
+
const re = new RegExp(regex.source, regex.flags);
|
|
1821
|
+
let match;
|
|
1822
|
+
while ((match = re.exec(text)) !== null) {
|
|
1823
|
+
const matchText = match[0];
|
|
1824
|
+
const start = match.index;
|
|
1825
|
+
const end = start + matchText.length;
|
|
1826
|
+
if (pattern.validate && !pattern.validate(matchText)) {
|
|
1827
|
+
continue;
|
|
1828
|
+
}
|
|
1829
|
+
let score = pattern.baseScore;
|
|
1830
|
+
let confidence = pattern.baseConfidence;
|
|
1831
|
+
if (pattern.contextWords && pattern.contextWords.length > 0) {
|
|
1832
|
+
const contextStart = Math.max(0, start - CONTEXT_WINDOW);
|
|
1833
|
+
const contextEnd = Math.min(text.length, end + CONTEXT_WINDOW);
|
|
1834
|
+
const context = textLower.slice(contextStart, contextEnd);
|
|
1835
|
+
const hasContext = pattern.contextWords.some(
|
|
1836
|
+
(word) => context.includes(word.toLowerCase())
|
|
1837
|
+
);
|
|
1838
|
+
if (hasContext) {
|
|
1839
|
+
score = Math.min(1, score + CONTEXT_SCORE_BOOST);
|
|
1840
|
+
if (confidence === "low") confidence = "medium";
|
|
1841
|
+
else if (confidence === "medium") confidence = "high";
|
|
1842
|
+
}
|
|
1843
|
+
}
|
|
1844
|
+
if (score < threshold) continue;
|
|
1845
|
+
allDetections.push(
|
|
1846
|
+
this.makeDetection(text, {
|
|
1847
|
+
entityType: pattern.entityType,
|
|
1848
|
+
start,
|
|
1849
|
+
end,
|
|
1850
|
+
text: matchText,
|
|
1851
|
+
confidence,
|
|
1852
|
+
score,
|
|
1853
|
+
guardName: this.name
|
|
1854
|
+
})
|
|
1855
|
+
);
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
}
|
|
1859
|
+
return this.deduplicateDetections(allDetections);
|
|
1860
|
+
}
|
|
1861
|
+
deduplicateDetections(detections) {
|
|
1862
|
+
if (detections.length <= 1) return detections;
|
|
1863
|
+
const sorted = [...detections].sort((a, b) => b.score - a.score);
|
|
1864
|
+
const result = [];
|
|
1865
|
+
for (const detection of sorted) {
|
|
1866
|
+
const overlaps = result.some(
|
|
1867
|
+
(existing) => detection.start < existing.end && detection.end > existing.start
|
|
1868
|
+
);
|
|
1869
|
+
if (!overlaps) {
|
|
1870
|
+
result.push(detection);
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
return result.sort((a, b) => a.start - b.start);
|
|
1874
|
+
}
|
|
1875
|
+
};
|
|
1876
|
+
|
|
1877
|
+
// src/patterns/secrets/cloud.ts
|
|
1878
|
+
var AWS_ACCESS_KEY = {
|
|
1879
|
+
secretType: "AWS_ACCESS_KEY",
|
|
1880
|
+
patterns: [/(?<![A-Z0-9])AKIA[0-9A-Z]{16}(?![A-Z0-9])/g]
|
|
1881
|
+
};
|
|
1882
|
+
var AWS_SECRET_KEY = {
|
|
1883
|
+
secretType: "AWS_SECRET_KEY",
|
|
1884
|
+
patterns: [
|
|
1885
|
+
/(?<![A-Za-z0-9/+=])[A-Za-z0-9/+=]{40}(?![A-Za-z0-9/+=])/g
|
|
1886
|
+
],
|
|
1887
|
+
minEntropy: 4.5
|
|
1888
|
+
};
|
|
1889
|
+
var AWS_SESSION_TOKEN = {
|
|
1890
|
+
secretType: "AWS_SESSION_TOKEN",
|
|
1891
|
+
patterns: [/(?:aws_session_token|AWS_SESSION_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9/+=]{100,})["']?/g],
|
|
1892
|
+
minEntropy: 4
|
|
1893
|
+
};
|
|
1894
|
+
var AWS_MWS_KEY = {
|
|
1895
|
+
secretType: "AWS_MWS_KEY",
|
|
1896
|
+
patterns: [/amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/g]
|
|
1897
|
+
};
|
|
1898
|
+
var AZURE_CONNECTION_STRING = {
|
|
1899
|
+
secretType: "AZURE_CONNECTION_STRING",
|
|
1900
|
+
patterns: [
|
|
1901
|
+
/DefaultEndpointsProtocol=https?;AccountName=[^;]+;AccountKey=[A-Za-z0-9+/=]+;?/g,
|
|
1902
|
+
/(?:AccountKey|SharedAccessKey|SharedAccessSignature)=[A-Za-z0-9+/=]{20,}/g
|
|
1903
|
+
]
|
|
1904
|
+
};
|
|
1905
|
+
var AZURE_AD_CLIENT_SECRET = {
|
|
1906
|
+
secretType: "AZURE_AD_CLIENT_SECRET",
|
|
1907
|
+
patterns: [/(?:client_secret|AZURE_CLIENT_SECRET)\s*[:=]\s*["']?([A-Za-z0-9~._-]{34,})["']?/g],
|
|
1908
|
+
minEntropy: 3.5
|
|
1909
|
+
};
|
|
1910
|
+
var AZURE_STORAGE_KEY = {
|
|
1911
|
+
secretType: "AZURE_STORAGE_KEY",
|
|
1912
|
+
patterns: [/[A-Za-z0-9+/]{86}==/g],
|
|
1913
|
+
minEntropy: 5
|
|
1914
|
+
};
|
|
1915
|
+
var AZURE_FUNCTION_KEY = {
|
|
1916
|
+
secretType: "AZURE_FUNCTION_KEY",
|
|
1917
|
+
patterns: [/(?:x-functions-key|functionkey)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/gi],
|
|
1918
|
+
minEntropy: 3.5
|
|
1919
|
+
};
|
|
1920
|
+
var GCP_SERVICE_ACCOUNT = {
|
|
1921
|
+
secretType: "GCP_SERVICE_ACCOUNT",
|
|
1922
|
+
patterns: [/"private_key"\s*:\s*"-----BEGIN (?:RSA )?PRIVATE KEY-----/g]
|
|
1923
|
+
};
|
|
1924
|
+
var GCP_API_KEY = {
|
|
1925
|
+
secretType: "GCP_API_KEY",
|
|
1926
|
+
patterns: [/AIza[0-9A-Za-z_-]{35}/g]
|
|
1927
|
+
};
|
|
1928
|
+
var GCP_OAUTH_SECRET = {
|
|
1929
|
+
secretType: "GCP_OAUTH_SECRET",
|
|
1930
|
+
patterns: [/GOCSPX-[A-Za-z0-9_-]{28}/g]
|
|
1931
|
+
};
|
|
1932
|
+
var IBM_CLOUD_API_KEY = {
|
|
1933
|
+
secretType: "IBM_CLOUD_API_KEY",
|
|
1934
|
+
patterns: [/(?:ibm[-_]?(?:cloud)?[-_]?api[-_]?key)\s*[:=]\s*["']?([A-Za-z0-9_-]{44})["']?/gi]
|
|
1935
|
+
};
|
|
1936
|
+
var IBM_COS_HMAC = {
|
|
1937
|
+
secretType: "IBM_COS_HMAC",
|
|
1938
|
+
patterns: [/(?:cos_hmac_keys|ibm_cos)\s*[:=]\s*["']?([A-Za-z0-9]{32,})["']?/gi],
|
|
1939
|
+
minEntropy: 3.5
|
|
1940
|
+
};
|
|
1941
|
+
var DIGITALOCEAN_TOKEN = {
|
|
1942
|
+
secretType: "DIGITALOCEAN_TOKEN",
|
|
1943
|
+
patterns: [
|
|
1944
|
+
/dop_v1_[a-f0-9]{64}/g,
|
|
1945
|
+
/doo_v1_[a-f0-9]{64}/g,
|
|
1946
|
+
/dor_v1_[a-f0-9]{64}/g
|
|
1947
|
+
]
|
|
1948
|
+
};
|
|
1949
|
+
var DIGITALOCEAN_SPACES_KEY = {
|
|
1950
|
+
secretType: "DIGITALOCEAN_SPACES_KEY",
|
|
1951
|
+
patterns: [/(?:SPACES_ACCESS_KEY_ID|DO_SPACES_KEY)\s*[:=]\s*["']?([A-Z0-9]{20})["']?/g]
|
|
1952
|
+
};
|
|
1953
|
+
var LINODE_TOKEN = {
|
|
1954
|
+
secretType: "LINODE_TOKEN",
|
|
1955
|
+
patterns: [/(?:LINODE_TOKEN|LINODE_API_TOKEN)\s*[:=]\s*["']?([a-f0-9]{64})["']?/g]
|
|
1956
|
+
};
|
|
1957
|
+
var VULTR_API_KEY = {
|
|
1958
|
+
secretType: "VULTR_API_KEY",
|
|
1959
|
+
patterns: [/(?:VULTR_API_KEY)\s*[:=]\s*["']?([A-Z0-9]{36})["']?/g]
|
|
1960
|
+
};
|
|
1961
|
+
var ORACLE_CLOUD_KEY = {
|
|
1962
|
+
secretType: "ORACLE_CLOUD_KEY",
|
|
1963
|
+
patterns: [/(?:OCI_API_KEY|ORACLE_CLOUD_KEY)\s*[:=]\s*["']?([A-Za-z0-9/+=]{40,})["']?/g],
|
|
1964
|
+
minEntropy: 4
|
|
1965
|
+
};
|
|
1966
|
+
var ALIBABA_CLOUD_KEY = {
|
|
1967
|
+
secretType: "ALIBABA_CLOUD_KEY",
|
|
1968
|
+
patterns: [/LTAI[A-Za-z0-9]{12,20}/g]
|
|
1969
|
+
};
|
|
1970
|
+
var CLOUD_PATTERNS = [
|
|
1971
|
+
AWS_ACCESS_KEY,
|
|
1972
|
+
AWS_SECRET_KEY,
|
|
1973
|
+
AWS_SESSION_TOKEN,
|
|
1974
|
+
AWS_MWS_KEY,
|
|
1975
|
+
AZURE_CONNECTION_STRING,
|
|
1976
|
+
AZURE_AD_CLIENT_SECRET,
|
|
1977
|
+
AZURE_STORAGE_KEY,
|
|
1978
|
+
AZURE_FUNCTION_KEY,
|
|
1979
|
+
GCP_SERVICE_ACCOUNT,
|
|
1980
|
+
GCP_API_KEY,
|
|
1981
|
+
GCP_OAUTH_SECRET,
|
|
1982
|
+
IBM_CLOUD_API_KEY,
|
|
1983
|
+
IBM_COS_HMAC,
|
|
1984
|
+
DIGITALOCEAN_TOKEN,
|
|
1985
|
+
DIGITALOCEAN_SPACES_KEY,
|
|
1986
|
+
LINODE_TOKEN,
|
|
1987
|
+
VULTR_API_KEY,
|
|
1988
|
+
ORACLE_CLOUD_KEY,
|
|
1989
|
+
ALIBABA_CLOUD_KEY
|
|
1990
|
+
];
|
|
1991
|
+
|
|
1992
|
+
// src/patterns/secrets/source-control.ts
|
|
1993
|
+
var GITHUB_TOKEN = {
|
|
1994
|
+
secretType: "GITHUB_TOKEN",
|
|
1995
|
+
patterns: [
|
|
1996
|
+
/ghp_[A-Za-z0-9]{36}/g,
|
|
1997
|
+
/gho_[A-Za-z0-9]{36}/g,
|
|
1998
|
+
/ghu_[A-Za-z0-9]{36}/g,
|
|
1999
|
+
/ghs_[A-Za-z0-9]{36}/g,
|
|
2000
|
+
/ghr_[A-Za-z0-9]{36}/g,
|
|
2001
|
+
/github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}/g
|
|
2002
|
+
]
|
|
2003
|
+
};
|
|
2004
|
+
var GITHUB_APP_TOKEN = {
|
|
2005
|
+
secretType: "GITHUB_APP_TOKEN",
|
|
2006
|
+
patterns: [/(?:ghu|ghs)_[A-Za-z0-9]{36}/g]
|
|
2007
|
+
};
|
|
2008
|
+
var GITHUB_FINE_GRAINED_TOKEN = {
|
|
2009
|
+
secretType: "GITHUB_FINE_GRAINED_TOKEN",
|
|
2010
|
+
patterns: [/github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}/g]
|
|
2011
|
+
};
|
|
2012
|
+
var GITLAB_TOKEN = {
|
|
2013
|
+
secretType: "GITLAB_TOKEN",
|
|
2014
|
+
patterns: [
|
|
2015
|
+
/glpat-[A-Za-z0-9\-_]{20}/g,
|
|
2016
|
+
/glcbt-[A-Za-z0-9]{1,5}_[A-Za-z0-9_\-]{20}/g,
|
|
2017
|
+
/gldt-[A-Za-z0-9_\-]{20}/g,
|
|
2018
|
+
/glft-[A-Za-z0-9_\-]{20}/g,
|
|
2019
|
+
/glsoat-[A-Za-z0-9_\-]{20}/g,
|
|
2020
|
+
/GR1348941[A-Za-z0-9_\-]{20}/g
|
|
2021
|
+
]
|
|
2022
|
+
};
|
|
2023
|
+
var GITLAB_PIPELINE_TOKEN = {
|
|
2024
|
+
secretType: "GITLAB_PIPELINE_TOKEN",
|
|
2025
|
+
patterns: [/glptt-[A-Za-z0-9]{20}/g]
|
|
2026
|
+
};
|
|
2027
|
+
var BITBUCKET_APP_PASSWORD = {
|
|
2028
|
+
secretType: "BITBUCKET_APP_PASSWORD",
|
|
2029
|
+
patterns: [/(?:BITBUCKET_APP_PASSWORD|BB_APP_PASSWORD)\s*[:=]\s*["']?([A-Za-z0-9]{18,})["']?/g],
|
|
2030
|
+
minEntropy: 3.5
|
|
2031
|
+
};
|
|
2032
|
+
var BITBUCKET_CLIENT_SECRET = {
|
|
2033
|
+
secretType: "BITBUCKET_CLIENT_SECRET",
|
|
2034
|
+
patterns: [/(?:BITBUCKET_CLIENT_SECRET|BB_CLIENT_SECRET)\s*[:=]\s*["']?([A-Za-z0-9_-]{32,})["']?/g],
|
|
2035
|
+
minEntropy: 3.5
|
|
2036
|
+
};
|
|
2037
|
+
var AZURE_DEVOPS_TOKEN = {
|
|
2038
|
+
secretType: "AZURE_DEVOPS_TOKEN",
|
|
2039
|
+
patterns: [/(?:AZURE_DEVOPS_PAT|ADO_TOKEN|SYSTEM_ACCESSTOKEN)\s*[:=]\s*["']?([A-Za-z0-9]{52,})["']?/g],
|
|
2040
|
+
minEntropy: 4
|
|
2041
|
+
};
|
|
2042
|
+
var GITEA_TOKEN = {
|
|
2043
|
+
secretType: "GITEA_TOKEN",
|
|
2044
|
+
patterns: [/(?:GITEA_TOKEN)\s*[:=]\s*["']?([a-f0-9]{40})["']?/g]
|
|
2045
|
+
};
|
|
2046
|
+
var SOURCE_CONTROL_PATTERNS = [
|
|
2047
|
+
GITHUB_TOKEN,
|
|
2048
|
+
GITHUB_APP_TOKEN,
|
|
2049
|
+
GITHUB_FINE_GRAINED_TOKEN,
|
|
2050
|
+
GITLAB_TOKEN,
|
|
2051
|
+
GITLAB_PIPELINE_TOKEN,
|
|
2052
|
+
BITBUCKET_APP_PASSWORD,
|
|
2053
|
+
BITBUCKET_CLIENT_SECRET,
|
|
2054
|
+
AZURE_DEVOPS_TOKEN,
|
|
2055
|
+
GITEA_TOKEN
|
|
2056
|
+
];
|
|
2057
|
+
|
|
2058
|
+
// src/patterns/secrets/cicd.ts
|
|
2059
|
+
var JENKINS_API_TOKEN = {
|
|
2060
|
+
secretType: "JENKINS_API_TOKEN",
|
|
2061
|
+
patterns: [/(?:JENKINS_TOKEN|JENKINS_API_TOKEN)\s*[:=]\s*["']?([a-f0-9]{32,})["']?/gi]
|
|
2062
|
+
};
|
|
2063
|
+
var JENKINS_CRUMB = {
|
|
2064
|
+
secretType: "JENKINS_CRUMB",
|
|
2065
|
+
patterns: [/Jenkins-Crumb:\s*([a-f0-9]{32,})/g]
|
|
2066
|
+
};
|
|
2067
|
+
var CIRCLECI_TOKEN = {
|
|
2068
|
+
secretType: "CIRCLECI_TOKEN",
|
|
2069
|
+
patterns: [
|
|
2070
|
+
/(?:CIRCLECI_TOKEN|CIRCLE_TOKEN)\s*[:=]\s*["']?([a-f0-9]{40})["']?/g,
|
|
2071
|
+
/circle-token\s*[:=]\s*["']?([a-f0-9]{40})["']?/g
|
|
2072
|
+
]
|
|
2073
|
+
};
|
|
2074
|
+
var TRAVIS_CI_TOKEN = {
|
|
2075
|
+
secretType: "TRAVIS_CI_TOKEN",
|
|
2076
|
+
patterns: [/(?:TRAVIS_TOKEN|TRAVIS_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{20,})["']?/g],
|
|
2077
|
+
minEntropy: 3.5
|
|
2078
|
+
};
|
|
2079
|
+
var GITHUB_ACTIONS_SECRET = {
|
|
2080
|
+
secretType: "GITHUB_ACTIONS_SECRET",
|
|
2081
|
+
patterns: [/(?:ACTIONS_SECRET|GH_ACTION_SECRET)\s*[:=]\s*["']?([A-Za-z0-9_-]{20,})["']?/g],
|
|
2082
|
+
minEntropy: 3.5
|
|
2083
|
+
};
|
|
2084
|
+
var BUILDKITE_TOKEN = {
|
|
2085
|
+
secretType: "BUILDKITE_TOKEN",
|
|
2086
|
+
patterns: [/bkua_[A-Za-z0-9]{40}/g]
|
|
2087
|
+
};
|
|
2088
|
+
var BUILDKITE_AGENT_TOKEN = {
|
|
2089
|
+
secretType: "BUILDKITE_AGENT_TOKEN",
|
|
2090
|
+
patterns: [/(?:BUILDKITE_AGENT_TOKEN)\s*[:=]\s*["']?([a-f0-9]{40,})["']?/g]
|
|
2091
|
+
};
|
|
2092
|
+
var DRONE_CI_TOKEN = {
|
|
2093
|
+
secretType: "DRONE_CI_TOKEN",
|
|
2094
|
+
patterns: [/(?:DRONE_TOKEN|DRONE_SERVER_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9]{32,})["']?/g],
|
|
2095
|
+
minEntropy: 3.5
|
|
2096
|
+
};
|
|
2097
|
+
var CODECOV_TOKEN = {
|
|
2098
|
+
secretType: "CODECOV_TOKEN",
|
|
2099
|
+
patterns: [/(?:CODECOV_TOKEN)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g]
|
|
2100
|
+
};
|
|
2101
|
+
var SONARQUBE_TOKEN = {
|
|
2102
|
+
secretType: "SONARQUBE_TOKEN",
|
|
2103
|
+
patterns: [
|
|
2104
|
+
/squ_[A-Za-z0-9]{40}/g,
|
|
2105
|
+
/sqp_[A-Za-z0-9]{40}/g
|
|
2106
|
+
]
|
|
2107
|
+
};
|
|
2108
|
+
var TEAMCITY_TOKEN = {
|
|
2109
|
+
secretType: "TEAMCITY_TOKEN",
|
|
2110
|
+
patterns: [/(?:TEAMCITY_TOKEN|TC_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{20,})["']?/g],
|
|
2111
|
+
minEntropy: 3.5
|
|
2112
|
+
};
|
|
2113
|
+
var CICD_PATTERNS = [
|
|
2114
|
+
JENKINS_API_TOKEN,
|
|
2115
|
+
JENKINS_CRUMB,
|
|
2116
|
+
CIRCLECI_TOKEN,
|
|
2117
|
+
TRAVIS_CI_TOKEN,
|
|
2118
|
+
GITHUB_ACTIONS_SECRET,
|
|
2119
|
+
BUILDKITE_TOKEN,
|
|
2120
|
+
BUILDKITE_AGENT_TOKEN,
|
|
2121
|
+
DRONE_CI_TOKEN,
|
|
2122
|
+
CODECOV_TOKEN,
|
|
2123
|
+
SONARQUBE_TOKEN,
|
|
2124
|
+
TEAMCITY_TOKEN
|
|
2125
|
+
];
|
|
2126
|
+
|
|
2127
|
+
// src/patterns/secrets/communication.ts
|
|
2128
|
+
var SLACK_TOKEN = {
|
|
2129
|
+
secretType: "SLACK_TOKEN",
|
|
2130
|
+
patterns: [/xox[bporas]-[A-Za-z0-9-]{10,}/g]
|
|
2131
|
+
};
|
|
2132
|
+
var SLACK_WEBHOOK = {
|
|
2133
|
+
secretType: "SLACK_WEBHOOK",
|
|
2134
|
+
patterns: [/https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]{8,}\/B[A-Z0-9]{8,}\/[A-Za-z0-9]{24}/g]
|
|
2135
|
+
};
|
|
2136
|
+
var SLACK_APP_TOKEN = {
|
|
2137
|
+
secretType: "SLACK_APP_TOKEN",
|
|
2138
|
+
patterns: [/xapp-[0-9]+-[A-Za-z0-9]+-[0-9]+-[A-Za-z0-9]+/g]
|
|
2139
|
+
};
|
|
2140
|
+
var DISCORD_TOKEN = {
|
|
2141
|
+
secretType: "DISCORD_TOKEN",
|
|
2142
|
+
patterns: [/(?:DISCORD_TOKEN|DISCORD_BOT_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{24}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,})["']?/g]
|
|
2143
|
+
};
|
|
2144
|
+
var DISCORD_WEBHOOK = {
|
|
2145
|
+
secretType: "DISCORD_WEBHOOK",
|
|
2146
|
+
patterns: [/https:\/\/(?:ptb\.|canary\.)?discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g]
|
|
2147
|
+
};
|
|
2148
|
+
var TELEGRAM_BOT_TOKEN = {
|
|
2149
|
+
secretType: "TELEGRAM_BOT_TOKEN",
|
|
2150
|
+
patterns: [/[0-9]{8,10}:[A-Za-z0-9_-]{35}/g]
|
|
2151
|
+
};
|
|
2152
|
+
var TWILIO_KEY = {
|
|
2153
|
+
secretType: "TWILIO_KEY",
|
|
2154
|
+
patterns: [/SK[0-9a-fA-F]{32}/g]
|
|
2155
|
+
};
|
|
2156
|
+
var TWILIO_ACCOUNT_SID = {
|
|
2157
|
+
secretType: "TWILIO_ACCOUNT_SID",
|
|
2158
|
+
patterns: [/AC[a-f0-9]{32}/g]
|
|
2159
|
+
};
|
|
2160
|
+
var TWILIO_AUTH_TOKEN = {
|
|
2161
|
+
secretType: "TWILIO_AUTH_TOKEN",
|
|
2162
|
+
patterns: [/(?:TWILIO_AUTH_TOKEN)\s*[:=]\s*["']?([a-f0-9]{32})["']?/g]
|
|
2163
|
+
};
|
|
2164
|
+
var SENDGRID_KEY = {
|
|
2165
|
+
secretType: "SENDGRID_KEY",
|
|
2166
|
+
patterns: [/SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}/g]
|
|
2167
|
+
};
|
|
2168
|
+
var MAILGUN_KEY = {
|
|
2169
|
+
secretType: "MAILGUN_KEY",
|
|
2170
|
+
patterns: [
|
|
2171
|
+
/key-[A-Za-z0-9]{32}/g,
|
|
2172
|
+
/(?:MAILGUN_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9-]{32,})["']?/g
|
|
2173
|
+
]
|
|
2174
|
+
};
|
|
2175
|
+
var MAILCHIMP_KEY = {
|
|
2176
|
+
secretType: "MAILCHIMP_KEY",
|
|
2177
|
+
patterns: [/[a-f0-9]{32}-us[0-9]{1,2}/g]
|
|
2178
|
+
};
|
|
2179
|
+
var POSTMARK_TOKEN = {
|
|
2180
|
+
secretType: "POSTMARK_TOKEN",
|
|
2181
|
+
patterns: [/(?:POSTMARK_API_TOKEN|POSTMARK_SERVER_TOKEN)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g]
|
|
2182
|
+
};
|
|
2183
|
+
var TEAMS_WEBHOOK = {
|
|
2184
|
+
secretType: "TEAMS_WEBHOOK",
|
|
2185
|
+
patterns: [/https:\/\/[a-z0-9-]+\.webhook\.office\.com\/webhookb2\/[A-Za-z0-9-]+/g]
|
|
2186
|
+
};
|
|
2187
|
+
var INTERCOM_TOKEN = {
|
|
2188
|
+
secretType: "INTERCOM_TOKEN",
|
|
2189
|
+
patterns: [/(?:INTERCOM_TOKEN|INTERCOM_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9_=-]{20,})["']?/g],
|
|
2190
|
+
minEntropy: 3.5
|
|
2191
|
+
};
|
|
2192
|
+
var COMMUNICATION_PATTERNS = [
|
|
2193
|
+
SLACK_TOKEN,
|
|
2194
|
+
SLACK_WEBHOOK,
|
|
2195
|
+
SLACK_APP_TOKEN,
|
|
2196
|
+
DISCORD_TOKEN,
|
|
2197
|
+
DISCORD_WEBHOOK,
|
|
2198
|
+
TELEGRAM_BOT_TOKEN,
|
|
2199
|
+
TWILIO_KEY,
|
|
2200
|
+
TWILIO_ACCOUNT_SID,
|
|
2201
|
+
TWILIO_AUTH_TOKEN,
|
|
2202
|
+
SENDGRID_KEY,
|
|
2203
|
+
MAILGUN_KEY,
|
|
2204
|
+
MAILCHIMP_KEY,
|
|
2205
|
+
POSTMARK_TOKEN,
|
|
2206
|
+
TEAMS_WEBHOOK,
|
|
2207
|
+
INTERCOM_TOKEN
|
|
2208
|
+
];
|
|
2209
|
+
|
|
2210
|
+
// src/patterns/secrets/payment.ts
|
|
2211
|
+
var STRIPE_KEY = {
|
|
2212
|
+
secretType: "STRIPE_KEY",
|
|
2213
|
+
patterns: [
|
|
2214
|
+
/sk_live_[A-Za-z0-9]{20,}/g,
|
|
2215
|
+
/rk_live_[A-Za-z0-9]{20,}/g,
|
|
2216
|
+
/sk_test_[A-Za-z0-9]{20,}/g,
|
|
2217
|
+
/rk_test_[A-Za-z0-9]{20,}/g
|
|
2218
|
+
]
|
|
2219
|
+
};
|
|
2220
|
+
var STRIPE_WEBHOOK_SECRET = {
|
|
2221
|
+
secretType: "STRIPE_WEBHOOK_SECRET",
|
|
2222
|
+
patterns: [/whsec_[A-Za-z0-9]{32,}/g]
|
|
2223
|
+
};
|
|
2224
|
+
var SQUARE_ACCESS_TOKEN = {
|
|
2225
|
+
secretType: "SQUARE_ACCESS_TOKEN",
|
|
2226
|
+
patterns: [
|
|
2227
|
+
/sq0atp-[A-Za-z0-9_-]{22}/g,
|
|
2228
|
+
/EAAAE[A-Za-z0-9]{50,}/g
|
|
2229
|
+
]
|
|
2230
|
+
};
|
|
2231
|
+
var SQUARE_OAUTH_SECRET = {
|
|
2232
|
+
secretType: "SQUARE_OAUTH_SECRET",
|
|
2233
|
+
patterns: [/sq0csp-[A-Za-z0-9_-]{43}/g]
|
|
2234
|
+
};
|
|
2235
|
+
var PAYPAL_CLIENT_SECRET = {
|
|
2236
|
+
secretType: "PAYPAL_CLIENT_SECRET",
|
|
2237
|
+
patterns: [/(?:PAYPAL_CLIENT_SECRET|PAYPAL_SECRET)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2238
|
+
minEntropy: 3.5
|
|
2239
|
+
};
|
|
2240
|
+
var PAYPAL_BRAINTREE_TOKEN = {
|
|
2241
|
+
secretType: "PAYPAL_BRAINTREE_TOKEN",
|
|
2242
|
+
patterns: [/access_token\$(?:production|sandbox)\$[a-z0-9]{16}\$[a-f0-9]{32}/g]
|
|
2243
|
+
};
|
|
2244
|
+
var BRAINTREE_KEY = {
|
|
2245
|
+
secretType: "BRAINTREE_KEY",
|
|
2246
|
+
patterns: [/(?:BRAINTREE_(?:PUBLIC|PRIVATE)_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{32})["']?/g]
|
|
2247
|
+
};
|
|
2248
|
+
var ADYEN_API_KEY = {
|
|
2249
|
+
secretType: "ADYEN_API_KEY",
|
|
2250
|
+
patterns: [/(?:ADYEN_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{32,})["']?/g],
|
|
2251
|
+
minEntropy: 3.5
|
|
2252
|
+
};
|
|
2253
|
+
var SHOPIFY_TOKEN = {
|
|
2254
|
+
secretType: "SHOPIFY_TOKEN",
|
|
2255
|
+
patterns: [
|
|
2256
|
+
/shpat_[a-fA-F0-9]{32}/g,
|
|
2257
|
+
/shpca_[a-fA-F0-9]{32}/g,
|
|
2258
|
+
/shppa_[a-fA-F0-9]{32}/g,
|
|
2259
|
+
/shpss_[a-fA-F0-9]{32}/g
|
|
2260
|
+
]
|
|
2261
|
+
};
|
|
2262
|
+
var PLAID_KEY = {
|
|
2263
|
+
secretType: "PLAID_KEY",
|
|
2264
|
+
patterns: [
|
|
2265
|
+
/(?:PLAID_CLIENT_ID)\s*[:=]\s*["']?([a-f0-9]{24})["']?/g,
|
|
2266
|
+
/(?:PLAID_SECRET)\s*[:=]\s*["']?([a-f0-9]{30})["']?/g,
|
|
2267
|
+
/access-(?:sandbox|development|production)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/g
|
|
2268
|
+
]
|
|
2269
|
+
};
|
|
2270
|
+
var PAYMENT_PATTERNS = [
|
|
2271
|
+
STRIPE_KEY,
|
|
2272
|
+
STRIPE_WEBHOOK_SECRET,
|
|
2273
|
+
SQUARE_ACCESS_TOKEN,
|
|
2274
|
+
SQUARE_OAUTH_SECRET,
|
|
2275
|
+
PAYPAL_CLIENT_SECRET,
|
|
2276
|
+
PAYPAL_BRAINTREE_TOKEN,
|
|
2277
|
+
BRAINTREE_KEY,
|
|
2278
|
+
ADYEN_API_KEY,
|
|
2279
|
+
SHOPIFY_TOKEN,
|
|
2280
|
+
PLAID_KEY
|
|
2281
|
+
];
|
|
2282
|
+
|
|
2283
|
+
// src/patterns/secrets/database.ts
|
|
2284
|
+
var DATABASE_CONNECTION_STRING = {
|
|
2285
|
+
secretType: "DATABASE_CONNECTION_STRING",
|
|
2286
|
+
patterns: [
|
|
2287
|
+
/(?:postgres|postgresql|mysql|mongodb|redis|amqp|mssql):\/\/[^\s"']+/gi
|
|
2288
|
+
]
|
|
2289
|
+
};
|
|
2290
|
+
var MONGODB_SRV = {
|
|
2291
|
+
secretType: "MONGODB_SRV",
|
|
2292
|
+
patterns: [/mongodb\+srv:\/\/[^\s"']+/g]
|
|
2293
|
+
};
|
|
2294
|
+
var REDIS_URL_WITH_PASSWORD = {
|
|
2295
|
+
secretType: "REDIS_URL_WITH_PASSWORD",
|
|
2296
|
+
patterns: [/redis:\/\/:[^@\s]+@[^\s"']+/g]
|
|
2297
|
+
};
|
|
2298
|
+
var ELASTICSEARCH_URL = {
|
|
2299
|
+
secretType: "ELASTICSEARCH_URL",
|
|
2300
|
+
patterns: [/https?:\/\/[^:]+:[^@]+@[^/]*(?:elastic|es|elasticsearch)[^\s"']*/gi]
|
|
2301
|
+
};
|
|
2302
|
+
var FIREBASE_KEY = {
|
|
2303
|
+
secretType: "FIREBASE_KEY",
|
|
2304
|
+
patterns: [/(?:FIREBASE_API_KEY|FIREBASE_KEY)\s*[:=]\s*["']?(AIza[A-Za-z0-9_-]{35})["']?/g]
|
|
2305
|
+
};
|
|
2306
|
+
var FIREBASE_URL = {
|
|
2307
|
+
secretType: "FIREBASE_URL",
|
|
2308
|
+
patterns: [/https:\/\/[a-z0-9-]+\.firebaseio\.com/g]
|
|
2309
|
+
};
|
|
2310
|
+
var SUPABASE_KEY = {
|
|
2311
|
+
secretType: "SUPABASE_KEY",
|
|
2312
|
+
patterns: [
|
|
2313
|
+
/(?:SUPABASE_KEY|SUPABASE_ANON_KEY|SUPABASE_SERVICE_KEY)\s*[:=]\s*["']?(eyJ[A-Za-z0-9_-]{100,})["']?/g
|
|
2314
|
+
]
|
|
2315
|
+
};
|
|
2316
|
+
var PLANETSCALE_TOKEN = {
|
|
2317
|
+
secretType: "PLANETSCALE_TOKEN",
|
|
2318
|
+
patterns: [/pscale_tkn_[A-Za-z0-9_-]{40,}/g]
|
|
2319
|
+
};
|
|
2320
|
+
var PLANETSCALE_PASSWORD = {
|
|
2321
|
+
secretType: "PLANETSCALE_PASSWORD",
|
|
2322
|
+
patterns: [/pscale_pw_[A-Za-z0-9_-]{40,}/g]
|
|
2323
|
+
};
|
|
2324
|
+
var COCKROACHDB_CONNECTION = {
|
|
2325
|
+
secretType: "COCKROACHDB_CONNECTION",
|
|
2326
|
+
patterns: [/(?:COCKROACH_URL|DATABASE_URL)\s*[:=]\s*["']?(postgresql:\/\/[^\s"']+\.cockroachlabs\.cloud[^\s"']*)["']?/g]
|
|
2327
|
+
};
|
|
2328
|
+
var NEON_DB_TOKEN = {
|
|
2329
|
+
secretType: "NEON_DB_TOKEN",
|
|
2330
|
+
patterns: [/(?:NEON_API_KEY|NEON_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2331
|
+
minEntropy: 3.5
|
|
2332
|
+
};
|
|
2333
|
+
var DATABASE_PATTERNS = [
|
|
2334
|
+
DATABASE_CONNECTION_STRING,
|
|
2335
|
+
MONGODB_SRV,
|
|
2336
|
+
REDIS_URL_WITH_PASSWORD,
|
|
2337
|
+
ELASTICSEARCH_URL,
|
|
2338
|
+
FIREBASE_KEY,
|
|
2339
|
+
FIREBASE_URL,
|
|
2340
|
+
SUPABASE_KEY,
|
|
2341
|
+
PLANETSCALE_TOKEN,
|
|
2342
|
+
PLANETSCALE_PASSWORD,
|
|
2343
|
+
COCKROACHDB_CONNECTION,
|
|
2344
|
+
NEON_DB_TOKEN
|
|
2345
|
+
];
|
|
2346
|
+
|
|
2347
|
+
// src/patterns/secrets/infrastructure.ts
|
|
2348
|
+
var DOCKER_HUB_TOKEN = {
|
|
2349
|
+
secretType: "DOCKER_HUB_TOKEN",
|
|
2350
|
+
patterns: [
|
|
2351
|
+
/dckr_pat_[A-Za-z0-9_-]{27,}/g,
|
|
2352
|
+
/(?:DOCKER_PASSWORD|DOCKERHUB_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{36,})["']?/g
|
|
2353
|
+
]
|
|
2354
|
+
};
|
|
2355
|
+
var DOCKER_REGISTRY_AUTH = {
|
|
2356
|
+
secretType: "DOCKER_REGISTRY_AUTH",
|
|
2357
|
+
patterns: [/"auth"\s*:\s*"([A-Za-z0-9+/=]{20,})"/g],
|
|
2358
|
+
minEntropy: 3.5
|
|
2359
|
+
};
|
|
2360
|
+
var KUBERNETES_SERVICE_TOKEN = {
|
|
2361
|
+
secretType: "KUBERNETES_SERVICE_TOKEN",
|
|
2362
|
+
patterns: [/(?:KUBERNETES_TOKEN|K8S_TOKEN|KUBE_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{100,})["']?/g],
|
|
2363
|
+
minEntropy: 4
|
|
2364
|
+
};
|
|
2365
|
+
var TERRAFORM_TOKEN = {
|
|
2366
|
+
secretType: "TERRAFORM_TOKEN",
|
|
2367
|
+
patterns: [
|
|
2368
|
+
/[A-Za-z0-9]{14}\.atlasv1\.[A-Za-z0-9_-]{60,}/g,
|
|
2369
|
+
/(?:TF_TOKEN|TFC_TOKEN|TERRAFORM_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9._-]{40,})["']?/g
|
|
2370
|
+
]
|
|
2371
|
+
};
|
|
2372
|
+
var VAULT_TOKEN = {
|
|
2373
|
+
secretType: "VAULT_TOKEN",
|
|
2374
|
+
patterns: [
|
|
2375
|
+
/hvs\.[A-Za-z0-9_-]{24,}/g,
|
|
2376
|
+
/(?:VAULT_TOKEN)\s*[:=]\s*["']?([a-z0-9.-]{20,})["']?/g
|
|
2377
|
+
]
|
|
2378
|
+
};
|
|
2379
|
+
var CONSUL_TOKEN = {
|
|
2380
|
+
secretType: "CONSUL_TOKEN",
|
|
2381
|
+
patterns: [/(?:CONSUL_TOKEN|CONSUL_HTTP_TOKEN)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g]
|
|
2382
|
+
};
|
|
2383
|
+
var PULUMI_TOKEN = {
|
|
2384
|
+
secretType: "PULUMI_TOKEN",
|
|
2385
|
+
patterns: [/pul-[A-Za-z0-9]{40}/g]
|
|
2386
|
+
};
|
|
2387
|
+
var ANSIBLE_VAULT_PASSWORD = {
|
|
2388
|
+
secretType: "ANSIBLE_VAULT_PASSWORD",
|
|
2389
|
+
patterns: [/\$ANSIBLE_VAULT;[0-9.]+;AES256/g]
|
|
2390
|
+
};
|
|
2391
|
+
var HELM_REPO_TOKEN = {
|
|
2392
|
+
secretType: "HELM_REPO_TOKEN",
|
|
2393
|
+
patterns: [/(?:HELM_REPO_PASSWORD|HELM_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{20,})["']?/g],
|
|
2394
|
+
minEntropy: 3.5
|
|
2395
|
+
};
|
|
2396
|
+
var GRAFANA_API_KEY = {
|
|
2397
|
+
secretType: "GRAFANA_API_KEY",
|
|
2398
|
+
patterns: [
|
|
2399
|
+
/eyJrIjoi[A-Za-z0-9_-]{30,}/g,
|
|
2400
|
+
/glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8}/g,
|
|
2401
|
+
/glc_[A-Za-z0-9+/]{32,}={0,2}/g
|
|
2402
|
+
]
|
|
2403
|
+
};
|
|
2404
|
+
var INFRASTRUCTURE_PATTERNS = [
|
|
2405
|
+
DOCKER_HUB_TOKEN,
|
|
2406
|
+
DOCKER_REGISTRY_AUTH,
|
|
2407
|
+
KUBERNETES_SERVICE_TOKEN,
|
|
2408
|
+
TERRAFORM_TOKEN,
|
|
2409
|
+
VAULT_TOKEN,
|
|
2410
|
+
CONSUL_TOKEN,
|
|
2411
|
+
PULUMI_TOKEN,
|
|
2412
|
+
ANSIBLE_VAULT_PASSWORD,
|
|
2413
|
+
HELM_REPO_TOKEN,
|
|
2414
|
+
GRAFANA_API_KEY
|
|
2415
|
+
];
|
|
2416
|
+
|
|
2417
|
+
// src/patterns/secrets/saas.ts
|
|
2418
|
+
var SALESFORCE_TOKEN = {
|
|
2419
|
+
secretType: "SALESFORCE_TOKEN",
|
|
2420
|
+
patterns: [/(?:SALESFORCE_TOKEN|SF_ACCESS_TOKEN|SFDC_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9!]{40,})["']?/g],
|
|
2421
|
+
minEntropy: 3.5
|
|
2422
|
+
};
|
|
2423
|
+
var HUBSPOT_API_KEY = {
|
|
2424
|
+
secretType: "HUBSPOT_API_KEY",
|
|
2425
|
+
patterns: [
|
|
2426
|
+
/(?:HUBSPOT_API_KEY|HAPI_KEY)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g,
|
|
2427
|
+
/pat-(?:na|eu)1-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/g
|
|
2428
|
+
]
|
|
2429
|
+
};
|
|
2430
|
+
var ZENDESK_TOKEN = {
|
|
2431
|
+
secretType: "ZENDESK_TOKEN",
|
|
2432
|
+
patterns: [/(?:ZENDESK_TOKEN|ZENDESK_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9]{40})["']?/g]
|
|
2433
|
+
};
|
|
2434
|
+
var DATADOG_API_KEY = {
|
|
2435
|
+
secretType: "DATADOG_API_KEY",
|
|
2436
|
+
patterns: [
|
|
2437
|
+
/(?:DD_API_KEY|DATADOG_API_KEY)\s*[:=]\s*["']?([a-f0-9]{32})["']?/g,
|
|
2438
|
+
/(?:DD_APP_KEY|DATADOG_APP_KEY)\s*[:=]\s*["']?([a-f0-9]{40})["']?/g
|
|
2439
|
+
]
|
|
2440
|
+
};
|
|
2441
|
+
var NEW_RELIC_KEY = {
|
|
2442
|
+
secretType: "NEW_RELIC_KEY",
|
|
2443
|
+
patterns: [
|
|
2444
|
+
/NRAK-[A-Z0-9]{27}/g,
|
|
2445
|
+
/(?:NEW_RELIC_LICENSE_KEY|NEWRELIC_KEY)\s*[:=]\s*["']?([a-f0-9]{40})["']?/g,
|
|
2446
|
+
/NRII-[A-Za-z0-9_-]{32}/g,
|
|
2447
|
+
/NRIQ-[A-Za-z0-9_-]{32}/g
|
|
2448
|
+
]
|
|
2449
|
+
};
|
|
2450
|
+
var PAGERDUTY_KEY = {
|
|
2451
|
+
secretType: "PAGERDUTY_KEY",
|
|
2452
|
+
patterns: [/(?:PAGERDUTY_TOKEN|PD_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9_+-]{20})["']?/g]
|
|
2453
|
+
};
|
|
2454
|
+
var LAUNCHDARKLY_KEY = {
|
|
2455
|
+
secretType: "LAUNCHDARKLY_KEY",
|
|
2456
|
+
patterns: [
|
|
2457
|
+
/(?:sdk|mob|api)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/g
|
|
2458
|
+
]
|
|
2459
|
+
};
|
|
2460
|
+
var SENTRY_DSN = {
|
|
2461
|
+
secretType: "SENTRY_DSN",
|
|
2462
|
+
patterns: [/https:\/\/[a-f0-9]{32}@[a-z0-9.]+\.ingest\.sentry\.io\/[0-9]+/g]
|
|
2463
|
+
};
|
|
2464
|
+
var SENTRY_AUTH_TOKEN = {
|
|
2465
|
+
secretType: "SENTRY_AUTH_TOKEN",
|
|
2466
|
+
patterns: [
|
|
2467
|
+
/sntrys_[A-Za-z0-9]{60,}/g,
|
|
2468
|
+
/(?:SENTRY_AUTH_TOKEN)\s*[:=]\s*["']?([a-f0-9]{64})["']?/g
|
|
2469
|
+
]
|
|
2470
|
+
};
|
|
2471
|
+
var SEGMENT_KEY = {
|
|
2472
|
+
secretType: "SEGMENT_KEY",
|
|
2473
|
+
patterns: [/(?:SEGMENT_WRITE_KEY|SEGMENT_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{32})["']?/g]
|
|
2474
|
+
};
|
|
2475
|
+
var AIRTABLE_KEY = {
|
|
2476
|
+
secretType: "AIRTABLE_KEY",
|
|
2477
|
+
patterns: [
|
|
2478
|
+
/key[A-Za-z0-9]{14}/g,
|
|
2479
|
+
/pat[A-Za-z0-9]{14}\.[a-f0-9]{64}/g
|
|
2480
|
+
]
|
|
2481
|
+
};
|
|
2482
|
+
var NOTION_TOKEN = {
|
|
2483
|
+
secretType: "NOTION_TOKEN",
|
|
2484
|
+
patterns: [
|
|
2485
|
+
/secret_[A-Za-z0-9]{43}/g,
|
|
2486
|
+
/ntn_[A-Za-z0-9]{40,}/g
|
|
2487
|
+
]
|
|
2488
|
+
};
|
|
2489
|
+
var ASANA_TOKEN = {
|
|
2490
|
+
secretType: "ASANA_TOKEN",
|
|
2491
|
+
patterns: [/(?:ASANA_TOKEN|ASANA_ACCESS_TOKEN)\s*[:=]\s*["']?([0-9]\/[0-9]{16}:[A-Za-z0-9]{32})["']?/g]
|
|
2492
|
+
};
|
|
2493
|
+
var JIRA_TOKEN = {
|
|
2494
|
+
secretType: "JIRA_TOKEN",
|
|
2495
|
+
patterns: [/(?:JIRA_TOKEN|JIRA_API_TOKEN|ATLASSIAN_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9]{24,})["']?/g],
|
|
2496
|
+
minEntropy: 3.5
|
|
2497
|
+
};
|
|
2498
|
+
var LINEAR_API_KEY = {
|
|
2499
|
+
secretType: "LINEAR_API_KEY",
|
|
2500
|
+
patterns: [/lin_api_[A-Za-z0-9]{40}/g]
|
|
2501
|
+
};
|
|
2502
|
+
var CONTENTFUL_TOKEN = {
|
|
2503
|
+
secretType: "CONTENTFUL_TOKEN",
|
|
2504
|
+
patterns: [/(?:CONTENTFUL_ACCESS_TOKEN|CONTENTFUL_DELIVERY_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{43})["']?/g]
|
|
2505
|
+
};
|
|
2506
|
+
var ALGOLIA_KEY = {
|
|
2507
|
+
secretType: "ALGOLIA_KEY",
|
|
2508
|
+
patterns: [/(?:ALGOLIA_ADMIN_KEY|ALGOLIA_API_KEY)\s*[:=]\s*["']?([a-f0-9]{32})["']?/g]
|
|
2509
|
+
};
|
|
2510
|
+
var SAAS_PATTERNS = [
|
|
2511
|
+
SALESFORCE_TOKEN,
|
|
2512
|
+
HUBSPOT_API_KEY,
|
|
2513
|
+
ZENDESK_TOKEN,
|
|
2514
|
+
DATADOG_API_KEY,
|
|
2515
|
+
NEW_RELIC_KEY,
|
|
2516
|
+
PAGERDUTY_KEY,
|
|
2517
|
+
LAUNCHDARKLY_KEY,
|
|
2518
|
+
SENTRY_DSN,
|
|
2519
|
+
SENTRY_AUTH_TOKEN,
|
|
2520
|
+
SEGMENT_KEY,
|
|
2521
|
+
AIRTABLE_KEY,
|
|
2522
|
+
NOTION_TOKEN,
|
|
2523
|
+
ASANA_TOKEN,
|
|
2524
|
+
JIRA_TOKEN,
|
|
2525
|
+
LINEAR_API_KEY,
|
|
2526
|
+
CONTENTFUL_TOKEN,
|
|
2527
|
+
ALGOLIA_KEY
|
|
2528
|
+
];
|
|
2529
|
+
|
|
2530
|
+
// src/patterns/secrets/ai-ml.ts
|
|
2531
|
+
var OPENAI_KEY = {
|
|
2532
|
+
secretType: "OPENAI_KEY",
|
|
2533
|
+
patterns: [
|
|
2534
|
+
/sk-[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}/g,
|
|
2535
|
+
/sk-proj-[A-Za-z0-9_-]{40,}/g,
|
|
2536
|
+
/sk-[A-Za-z0-9_-]{40,}/g
|
|
2537
|
+
]
|
|
2538
|
+
};
|
|
2539
|
+
var ANTHROPIC_KEY = {
|
|
2540
|
+
secretType: "ANTHROPIC_KEY",
|
|
2541
|
+
patterns: [/sk-ant-[A-Za-z0-9_-]{40,}/g]
|
|
2542
|
+
};
|
|
2543
|
+
var COHERE_KEY = {
|
|
2544
|
+
secretType: "COHERE_KEY",
|
|
2545
|
+
patterns: [
|
|
2546
|
+
/(?:COHERE_API_KEY|CO_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{40})["']?/g
|
|
2547
|
+
]
|
|
2548
|
+
};
|
|
2549
|
+
var HUGGINGFACE_TOKEN = {
|
|
2550
|
+
secretType: "HUGGINGFACE_TOKEN",
|
|
2551
|
+
patterns: [/hf_[A-Za-z0-9]{34}/g]
|
|
2552
|
+
};
|
|
2553
|
+
var REPLICATE_TOKEN = {
|
|
2554
|
+
secretType: "REPLICATE_TOKEN",
|
|
2555
|
+
patterns: [
|
|
2556
|
+
/r8_[A-Za-z0-9]{37}/g,
|
|
2557
|
+
/(?:REPLICATE_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40})["']?/g
|
|
2558
|
+
]
|
|
2559
|
+
};
|
|
2560
|
+
var GOOGLE_AI_KEY = {
|
|
2561
|
+
secretType: "GOOGLE_AI_KEY",
|
|
2562
|
+
patterns: [/(?:GOOGLE_AI_KEY|GEMINI_API_KEY)\s*[:=]\s*["']?(AIza[A-Za-z0-9_-]{35})["']?/g]
|
|
2563
|
+
};
|
|
2564
|
+
var MISTRAL_KEY = {
|
|
2565
|
+
secretType: "MISTRAL_KEY",
|
|
2566
|
+
patterns: [/(?:MISTRAL_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{32})["']?/g]
|
|
2567
|
+
};
|
|
2568
|
+
var PINECONE_KEY = {
|
|
2569
|
+
secretType: "PINECONE_KEY",
|
|
2570
|
+
patterns: [/(?:PINECONE_API_KEY)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g]
|
|
2571
|
+
};
|
|
2572
|
+
var WEAVIATE_KEY = {
|
|
2573
|
+
secretType: "WEAVIATE_KEY",
|
|
2574
|
+
patterns: [/(?:WEAVIATE_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{40,})["']?/g],
|
|
2575
|
+
minEntropy: 3.5
|
|
2576
|
+
};
|
|
2577
|
+
var WANDB_KEY = {
|
|
2578
|
+
secretType: "WANDB_KEY",
|
|
2579
|
+
patterns: [/(?:WANDB_API_KEY)\s*[:=]\s*["']?([a-f0-9]{40})["']?/g]
|
|
2580
|
+
};
|
|
2581
|
+
var DEEPSEEK_KEY = {
|
|
2582
|
+
secretType: "DEEPSEEK_KEY",
|
|
2583
|
+
patterns: [/(?:DEEPSEEK_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2584
|
+
minEntropy: 3.5
|
|
2585
|
+
};
|
|
2586
|
+
var GROQ_KEY = {
|
|
2587
|
+
secretType: "GROQ_KEY",
|
|
2588
|
+
patterns: [/gsk_[A-Za-z0-9]{52}/g]
|
|
2589
|
+
};
|
|
2590
|
+
var AI_ML_PATTERNS = [
|
|
2591
|
+
OPENAI_KEY,
|
|
2592
|
+
ANTHROPIC_KEY,
|
|
2593
|
+
COHERE_KEY,
|
|
2594
|
+
HUGGINGFACE_TOKEN,
|
|
2595
|
+
REPLICATE_TOKEN,
|
|
2596
|
+
GOOGLE_AI_KEY,
|
|
2597
|
+
MISTRAL_KEY,
|
|
2598
|
+
PINECONE_KEY,
|
|
2599
|
+
WEAVIATE_KEY,
|
|
2600
|
+
WANDB_KEY,
|
|
2601
|
+
DEEPSEEK_KEY,
|
|
2602
|
+
GROQ_KEY
|
|
2603
|
+
];
|
|
2604
|
+
|
|
2605
|
+
// src/patterns/secrets/auth.ts
|
|
2606
|
+
var AUTH0_CLIENT_SECRET = {
|
|
2607
|
+
secretType: "AUTH0_CLIENT_SECRET",
|
|
2608
|
+
patterns: [/(?:AUTH0_CLIENT_SECRET)\s*[:=]\s*["']?([A-Za-z0-9_-]{32,})["']?/g],
|
|
2609
|
+
minEntropy: 3.5
|
|
2610
|
+
};
|
|
2611
|
+
var AUTH0_MANAGEMENT_TOKEN = {
|
|
2612
|
+
secretType: "AUTH0_MANAGEMENT_TOKEN",
|
|
2613
|
+
patterns: [/(?:AUTH0_MANAGEMENT_API_TOKEN|AUTH0_TOKEN)\s*[:=]\s*["']?(eyJ[A-Za-z0-9_-]{100,})["']?/g]
|
|
2614
|
+
};
|
|
2615
|
+
var OKTA_TOKEN = {
|
|
2616
|
+
secretType: "OKTA_TOKEN",
|
|
2617
|
+
patterns: [
|
|
2618
|
+
/(?:OKTA_TOKEN|OKTA_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{42})["']?/g,
|
|
2619
|
+
/00[A-Za-z0-9_-]{40}/g
|
|
2620
|
+
]
|
|
2621
|
+
};
|
|
2622
|
+
var CLERK_SECRET_KEY = {
|
|
2623
|
+
secretType: "CLERK_SECRET_KEY",
|
|
2624
|
+
patterns: [/sk_(?:live|test)_[A-Za-z0-9]{24,}/g]
|
|
2625
|
+
};
|
|
2626
|
+
var CLERK_PUBLISHABLE_KEY = {
|
|
2627
|
+
secretType: "CLERK_PUBLISHABLE_KEY",
|
|
2628
|
+
patterns: [/pk_(?:live|test)_[A-Za-z0-9]{24,}/g]
|
|
2629
|
+
};
|
|
2630
|
+
var FIREBASE_AUTH_KEY = {
|
|
2631
|
+
secretType: "FIREBASE_AUTH_KEY",
|
|
2632
|
+
patterns: [/(?:FIREBASE_AUTH_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{100,})["']?/g],
|
|
2633
|
+
minEntropy: 4
|
|
2634
|
+
};
|
|
2635
|
+
var SUPABASE_SERVICE_KEY = {
|
|
2636
|
+
secretType: "SUPABASE_SERVICE_KEY",
|
|
2637
|
+
patterns: [/(?:SUPABASE_SERVICE_ROLE_KEY)\s*[:=]\s*["']?(eyJ[A-Za-z0-9_-]{100,})["']?/g]
|
|
2638
|
+
};
|
|
2639
|
+
var STYTCH_SECRET = {
|
|
2640
|
+
secretType: "STYTCH_SECRET",
|
|
2641
|
+
patterns: [/secret-(?:live|test)-[A-Za-z0-9_-]{36,}/g]
|
|
2642
|
+
};
|
|
2643
|
+
var PROPELAUTH_KEY = {
|
|
2644
|
+
secretType: "PROPELAUTH_KEY",
|
|
2645
|
+
patterns: [/(?:PROPELAUTH_API_KEY)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2646
|
+
minEntropy: 3.5
|
|
2647
|
+
};
|
|
2648
|
+
var KEYCLOAK_SECRET = {
|
|
2649
|
+
secretType: "KEYCLOAK_SECRET",
|
|
2650
|
+
patterns: [/(?:KEYCLOAK_CLIENT_SECRET|KC_CLIENT_SECRET)\s*[:=]\s*["']?([A-Za-z0-9-]{36})["']?/g]
|
|
2651
|
+
};
|
|
2652
|
+
var AUTH_PATTERNS = [
|
|
2653
|
+
AUTH0_CLIENT_SECRET,
|
|
2654
|
+
AUTH0_MANAGEMENT_TOKEN,
|
|
2655
|
+
OKTA_TOKEN,
|
|
2656
|
+
CLERK_SECRET_KEY,
|
|
2657
|
+
CLERK_PUBLISHABLE_KEY,
|
|
2658
|
+
FIREBASE_AUTH_KEY,
|
|
2659
|
+
SUPABASE_SERVICE_KEY,
|
|
2660
|
+
STYTCH_SECRET,
|
|
2661
|
+
PROPELAUTH_KEY,
|
|
2662
|
+
KEYCLOAK_SECRET
|
|
2663
|
+
];
|
|
2664
|
+
|
|
2665
|
+
// src/patterns/secrets/cdn-hosting.ts
|
|
2666
|
+
var CLOUDFLARE_API_KEY = {
|
|
2667
|
+
secretType: "CLOUDFLARE_API_KEY",
|
|
2668
|
+
patterns: [
|
|
2669
|
+
/(?:CLOUDFLARE_API_KEY|CF_API_KEY)\s*[:=]\s*["']?([a-f0-9]{37})["']?/g,
|
|
2670
|
+
/(?:CLOUDFLARE_API_TOKEN|CF_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40})["']?/g
|
|
2671
|
+
]
|
|
2672
|
+
};
|
|
2673
|
+
var CLOUDFLARE_CA_KEY = {
|
|
2674
|
+
secretType: "CLOUDFLARE_CA_KEY",
|
|
2675
|
+
patterns: [/v1\.0-[a-f0-9]{24}-[a-f0-9]{146}/g]
|
|
2676
|
+
};
|
|
2677
|
+
var FASTLY_API_KEY = {
|
|
2678
|
+
secretType: "FASTLY_API_KEY",
|
|
2679
|
+
patterns: [/(?:FASTLY_API_TOKEN|FASTLY_KEY)\s*[:=]\s*["']?([A-Za-z0-9_-]{32})["']?/g]
|
|
2680
|
+
};
|
|
2681
|
+
var NETLIFY_TOKEN = {
|
|
2682
|
+
secretType: "NETLIFY_TOKEN",
|
|
2683
|
+
patterns: [/(?:NETLIFY_AUTH_TOKEN|NETLIFY_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2684
|
+
minEntropy: 3.5
|
|
2685
|
+
};
|
|
2686
|
+
var VERCEL_TOKEN = {
|
|
2687
|
+
secretType: "VERCEL_TOKEN",
|
|
2688
|
+
patterns: [/(?:VERCEL_TOKEN|NOW_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9]{24})["']?/g]
|
|
2689
|
+
};
|
|
2690
|
+
var HEROKU_API_KEY = {
|
|
2691
|
+
secretType: "HEROKU_API_KEY",
|
|
2692
|
+
patterns: [
|
|
2693
|
+
/(?:HEROKU_API_KEY)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g,
|
|
2694
|
+
/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/g
|
|
2695
|
+
]
|
|
2696
|
+
};
|
|
2697
|
+
var RENDER_TOKEN = {
|
|
2698
|
+
secretType: "RENDER_TOKEN",
|
|
2699
|
+
patterns: [/rnd_[A-Za-z0-9]{32,}/g]
|
|
2700
|
+
};
|
|
2701
|
+
var FLY_IO_TOKEN = {
|
|
2702
|
+
secretType: "FLY_IO_TOKEN",
|
|
2703
|
+
patterns: [/(?:FLY_ACCESS_TOKEN|FLY_API_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2704
|
+
minEntropy: 3.5
|
|
2705
|
+
};
|
|
2706
|
+
var RAILWAY_TOKEN = {
|
|
2707
|
+
secretType: "RAILWAY_TOKEN",
|
|
2708
|
+
patterns: [/(?:RAILWAY_TOKEN)\s*[:=]\s*["']?([a-f0-9-]{36})["']?/g]
|
|
2709
|
+
};
|
|
2710
|
+
var SURGE_TOKEN = {
|
|
2711
|
+
secretType: "SURGE_TOKEN",
|
|
2712
|
+
patterns: [/(?:SURGE_TOKEN|SURGE_LOGIN)\s*[:=]\s*["']?([A-Za-z0-9._-]{20,})["']?/g],
|
|
2713
|
+
minEntropy: 3.5
|
|
2714
|
+
};
|
|
2715
|
+
var CDN_HOSTING_PATTERNS = [
|
|
2716
|
+
CLOUDFLARE_API_KEY,
|
|
2717
|
+
CLOUDFLARE_CA_KEY,
|
|
2718
|
+
FASTLY_API_KEY,
|
|
2719
|
+
NETLIFY_TOKEN,
|
|
2720
|
+
VERCEL_TOKEN,
|
|
2721
|
+
HEROKU_API_KEY,
|
|
2722
|
+
RENDER_TOKEN,
|
|
2723
|
+
FLY_IO_TOKEN,
|
|
2724
|
+
RAILWAY_TOKEN,
|
|
2725
|
+
SURGE_TOKEN
|
|
2726
|
+
];
|
|
2727
|
+
|
|
2728
|
+
// src/patterns/secrets/social.ts
|
|
2729
|
+
var TWITTER_API_KEY = {
|
|
2730
|
+
secretType: "TWITTER_API_KEY",
|
|
2731
|
+
patterns: [
|
|
2732
|
+
/(?:TWITTER_API_KEY|TWITTER_CONSUMER_KEY)\s*[:=]\s*["']?([A-Za-z0-9]{25})["']?/g
|
|
2733
|
+
]
|
|
2734
|
+
};
|
|
2735
|
+
var TWITTER_API_SECRET = {
|
|
2736
|
+
secretType: "TWITTER_API_SECRET",
|
|
2737
|
+
patterns: [
|
|
2738
|
+
/(?:TWITTER_API_SECRET|TWITTER_CONSUMER_SECRET)\s*[:=]\s*["']?([A-Za-z0-9]{50})["']?/g
|
|
2739
|
+
]
|
|
2740
|
+
};
|
|
2741
|
+
var TWITTER_BEARER_TOKEN = {
|
|
2742
|
+
secretType: "TWITTER_BEARER_TOKEN",
|
|
2743
|
+
patterns: [/AAAAAAAAAAAAAAAAAAA[A-Za-z0-9%]{30,}/g]
|
|
2744
|
+
};
|
|
2745
|
+
var FACEBOOK_TOKEN = {
|
|
2746
|
+
secretType: "FACEBOOK_TOKEN",
|
|
2747
|
+
patterns: [
|
|
2748
|
+
/EAA[A-Za-z0-9]{100,}/g,
|
|
2749
|
+
/(?:FACEBOOK_TOKEN|FB_ACCESS_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9|_-]{40,})["']?/g
|
|
2750
|
+
]
|
|
2751
|
+
};
|
|
2752
|
+
var FACEBOOK_SECRET = {
|
|
2753
|
+
secretType: "FACEBOOK_SECRET",
|
|
2754
|
+
patterns: [/(?:FACEBOOK_SECRET|FB_APP_SECRET)\s*[:=]\s*["']?([a-f0-9]{32})["']?/g]
|
|
2755
|
+
};
|
|
2756
|
+
var INSTAGRAM_TOKEN = {
|
|
2757
|
+
secretType: "INSTAGRAM_TOKEN",
|
|
2758
|
+
patterns: [/(?:INSTAGRAM_TOKEN|IG_ACCESS_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9.]{100,})["']?/g],
|
|
2759
|
+
minEntropy: 3.5
|
|
2760
|
+
};
|
|
2761
|
+
var LINKEDIN_SECRET = {
|
|
2762
|
+
secretType: "LINKEDIN_SECRET",
|
|
2763
|
+
patterns: [/(?:LINKEDIN_CLIENT_SECRET|LINKEDIN_SECRET)\s*[:=]\s*["']?([A-Za-z0-9]{16})["']?/g]
|
|
2764
|
+
};
|
|
2765
|
+
var YOUTUBE_API_KEY = {
|
|
2766
|
+
secretType: "YOUTUBE_API_KEY",
|
|
2767
|
+
patterns: [/(?:YOUTUBE_API_KEY)\s*[:=]\s*["']?(AIza[A-Za-z0-9_-]{35})["']?/g]
|
|
2768
|
+
};
|
|
2769
|
+
var TIKTOK_TOKEN = {
|
|
2770
|
+
secretType: "TIKTOK_TOKEN",
|
|
2771
|
+
patterns: [/(?:TIKTOK_TOKEN|TIKTOK_ACCESS_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9._-]{40,})["']?/g],
|
|
2772
|
+
minEntropy: 3.5
|
|
2773
|
+
};
|
|
2774
|
+
var PINTEREST_TOKEN = {
|
|
2775
|
+
secretType: "PINTEREST_TOKEN",
|
|
2776
|
+
patterns: [/(?:PINTEREST_TOKEN|PINTEREST_ACCESS_TOKEN)\s*[:=]\s*["']?([A-Za-z0-9_-]{40,})["']?/g],
|
|
2777
|
+
minEntropy: 3.5
|
|
2778
|
+
};
|
|
2779
|
+
var SOCIAL_PATTERNS = [
|
|
2780
|
+
TWITTER_API_KEY,
|
|
2781
|
+
TWITTER_API_SECRET,
|
|
2782
|
+
TWITTER_BEARER_TOKEN,
|
|
2783
|
+
FACEBOOK_TOKEN,
|
|
2784
|
+
FACEBOOK_SECRET,
|
|
2785
|
+
INSTAGRAM_TOKEN,
|
|
2786
|
+
LINKEDIN_SECRET,
|
|
2787
|
+
YOUTUBE_API_KEY,
|
|
2788
|
+
TIKTOK_TOKEN,
|
|
2789
|
+
PINTEREST_TOKEN
|
|
2790
|
+
];
|
|
2791
|
+
|
|
2792
|
+
// src/patterns/secrets/generic.ts
|
|
2793
|
+
var JWT_TOKEN = {
|
|
2794
|
+
secretType: "JWT_TOKEN",
|
|
2795
|
+
patterns: [
|
|
2796
|
+
/eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/g
|
|
2797
|
+
]
|
|
2798
|
+
};
|
|
2799
|
+
var PRIVATE_KEY = {
|
|
2800
|
+
secretType: "PRIVATE_KEY",
|
|
2801
|
+
patterns: [
|
|
2802
|
+
/-----BEGIN (?:RSA |EC |ED25519 |OPENSSH |DSA |PGP )?PRIVATE KEY(?: BLOCK)?-----/g
|
|
2803
|
+
]
|
|
2804
|
+
};
|
|
2805
|
+
var NPM_TOKEN = {
|
|
2806
|
+
secretType: "NPM_TOKEN",
|
|
2807
|
+
patterns: [
|
|
2808
|
+
/npm_[A-Za-z0-9]{36}/g,
|
|
2809
|
+
/\/\/registry\.npmjs\.org\/:_authToken=[A-Za-z0-9-]+/g
|
|
2810
|
+
]
|
|
2811
|
+
};
|
|
2812
|
+
var PYPI_TOKEN = {
|
|
2813
|
+
secretType: "PYPI_TOKEN",
|
|
2814
|
+
patterns: [/pypi-AgEIcHlwaS5vcmc[A-Za-z0-9_-]{50,}/g]
|
|
2815
|
+
};
|
|
2816
|
+
var RUBYGEMS_KEY = {
|
|
2817
|
+
secretType: "RUBYGEMS_KEY",
|
|
2818
|
+
patterns: [/rubygems_[a-f0-9]{48}/g]
|
|
2819
|
+
};
|
|
2820
|
+
var NUGET_KEY = {
|
|
2821
|
+
secretType: "NUGET_KEY",
|
|
2822
|
+
patterns: [/oy2[A-Za-z0-9]{43}/g]
|
|
2823
|
+
};
|
|
2824
|
+
var CRATES_IO_TOKEN = {
|
|
2825
|
+
secretType: "CRATES_IO_TOKEN",
|
|
2826
|
+
patterns: [/cio[A-Za-z0-9]{32}/g]
|
|
2827
|
+
};
|
|
2828
|
+
var GENERIC_HIGH_ENTROPY = {
|
|
2829
|
+
secretType: "GENERIC_SECRET",
|
|
2830
|
+
patterns: [
|
|
2831
|
+
/(?:api[_-]?key|secret|token|password|passwd|credential|auth)[_\s]*[:=]\s*["']([A-Za-z0-9+/=_\-]{16,})["']/gi
|
|
2832
|
+
],
|
|
2833
|
+
minEntropy: 3.5
|
|
2834
|
+
};
|
|
2835
|
+
var GENERIC_BEARER_TOKEN = {
|
|
2836
|
+
secretType: "BEARER_TOKEN",
|
|
2837
|
+
patterns: [
|
|
2838
|
+
/(?:Authorization|Bearer)\s*[:=]\s*["']?Bearer\s+([A-Za-z0-9._~+/=-]{20,})["']?/g
|
|
2839
|
+
],
|
|
2840
|
+
minEntropy: 3.5
|
|
2841
|
+
};
|
|
2842
|
+
var BASIC_AUTH_HEADER = {
|
|
2843
|
+
secretType: "BASIC_AUTH",
|
|
2844
|
+
patterns: [
|
|
2845
|
+
/(?:Authorization)\s*[:=]\s*["']?Basic\s+([A-Za-z0-9+/=]{20,})["']?/g
|
|
2846
|
+
]
|
|
2847
|
+
};
|
|
2848
|
+
var GENERIC_PATTERNS2 = [
|
|
2849
|
+
JWT_TOKEN,
|
|
2850
|
+
PRIVATE_KEY,
|
|
2851
|
+
NPM_TOKEN,
|
|
2852
|
+
PYPI_TOKEN,
|
|
2853
|
+
RUBYGEMS_KEY,
|
|
2854
|
+
NUGET_KEY,
|
|
2855
|
+
CRATES_IO_TOKEN,
|
|
2856
|
+
GENERIC_HIGH_ENTROPY,
|
|
2857
|
+
GENERIC_BEARER_TOKEN,
|
|
2858
|
+
BASIC_AUTH_HEADER
|
|
2859
|
+
];
|
|
2860
|
+
|
|
2861
|
+
// src/patterns/secrets/index.ts
|
|
2862
|
+
var ALL_SECRET_PATTERNS = [
|
|
2863
|
+
...CLOUD_PATTERNS,
|
|
2864
|
+
...SOURCE_CONTROL_PATTERNS,
|
|
2865
|
+
...CICD_PATTERNS,
|
|
2866
|
+
...COMMUNICATION_PATTERNS,
|
|
2867
|
+
...PAYMENT_PATTERNS,
|
|
2868
|
+
...DATABASE_PATTERNS,
|
|
2869
|
+
...INFRASTRUCTURE_PATTERNS,
|
|
2870
|
+
...SAAS_PATTERNS,
|
|
2871
|
+
...AI_ML_PATTERNS,
|
|
2872
|
+
...AUTH_PATTERNS,
|
|
2873
|
+
...CDN_HOSTING_PATTERNS,
|
|
2874
|
+
...SOCIAL_PATTERNS,
|
|
2875
|
+
...GENERIC_PATTERNS2
|
|
2876
|
+
];
|
|
2877
|
+
|
|
2878
|
+
// src/guards/secret.guard.ts
|
|
2879
|
+
var SecretGuard = class extends BaseGuard {
|
|
2880
|
+
name = "secret";
|
|
2881
|
+
patterns;
|
|
2882
|
+
constructor(options) {
|
|
2883
|
+
super();
|
|
2884
|
+
let patterns = ALL_SECRET_PATTERNS;
|
|
2885
|
+
if (options?.secretTypes && options.secretTypes.length > 0) {
|
|
2886
|
+
const allowed = new Set(options.secretTypes);
|
|
2887
|
+
patterns = patterns.filter((p) => allowed.has(p.secretType));
|
|
2888
|
+
}
|
|
2889
|
+
this.patterns = patterns;
|
|
2890
|
+
}
|
|
2891
|
+
async analyze(text, config) {
|
|
2892
|
+
const cfg = this.mergeConfig(config);
|
|
2893
|
+
const detections = [];
|
|
2894
|
+
for (const pattern of this.patterns) {
|
|
2895
|
+
for (const regex of pattern.patterns) {
|
|
2896
|
+
const re = new RegExp(regex.source, regex.flags);
|
|
2897
|
+
let match;
|
|
2898
|
+
while ((match = re.exec(text)) !== null) {
|
|
2899
|
+
const matchText = match[1] ?? match[0];
|
|
2900
|
+
const fullMatch = match[0];
|
|
2901
|
+
const start = match.index;
|
|
2902
|
+
const end = start + fullMatch.length;
|
|
2903
|
+
if (pattern.validate && !pattern.validate(matchText)) {
|
|
2904
|
+
continue;
|
|
2905
|
+
}
|
|
2906
|
+
if (pattern.minEntropy) {
|
|
2907
|
+
const entropy = shannonEntropy(matchText);
|
|
2908
|
+
if (entropy < pattern.minEntropy) {
|
|
2909
|
+
continue;
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
2912
|
+
detections.push(
|
|
2913
|
+
this.makeDetection(text, {
|
|
2914
|
+
entityType: pattern.secretType,
|
|
2915
|
+
start,
|
|
2916
|
+
end,
|
|
2917
|
+
text: fullMatch,
|
|
2918
|
+
confidence: "high",
|
|
2919
|
+
score: 0.9,
|
|
2920
|
+
guardName: this.name
|
|
2921
|
+
})
|
|
2922
|
+
);
|
|
2923
|
+
}
|
|
2924
|
+
}
|
|
2925
|
+
}
|
|
2926
|
+
return this.buildResult(text, detections, cfg.mode);
|
|
2927
|
+
}
|
|
2928
|
+
};
|
|
2929
|
+
|
|
2930
|
+
// src/patterns/injection.ts
|
|
2931
|
+
var INJECTION_PATTERNS = [
|
|
2932
|
+
/ignore\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?|directives?)/i,
|
|
2933
|
+
/disregard\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?)/i,
|
|
2934
|
+
/forget\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?)/i,
|
|
2935
|
+
/override\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?)/i,
|
|
2936
|
+
/you\s+are\s+now\s+(?:a|an|in)\s+/i,
|
|
2937
|
+
/new\s+instructions?\s*:/i,
|
|
2938
|
+
/system\s*:\s*/i,
|
|
2939
|
+
/\bDAN\s+mode\b/i,
|
|
2940
|
+
/\bjailbreak\b/i,
|
|
2941
|
+
/do\s+anything\s+now/i,
|
|
2942
|
+
/act\s+as\s+(?:if\s+)?(?:you\s+(?:are|were)\s+)?(?:a\s+)?/i,
|
|
2943
|
+
/pretend\s+(?:you\s+are|to\s+be)\s+/i,
|
|
2944
|
+
/(?:reveal|show|display|print|output)\s+(?:your\s+)?(?:system\s+)?(?:prompt|instructions?)/i,
|
|
2945
|
+
/(?:what|repeat|tell\s+me)\s+(?:are\s+)?your\s+(?:system\s+)?(?:instructions?|prompt|rules?)/i,
|
|
2946
|
+
/bypass\s+(?:your\s+)?(?:safety|security|content|ethical)\s+(?:filters?|guidelines?|restrictions?|rules?)/i,
|
|
2947
|
+
/(?:from\s+now\s+on|starting\s+now|henceforth)\s*,?\s*(?:you\s+(?:will|shall|must|should))/i
|
|
2948
|
+
];
|
|
2949
|
+
var INJECTION_KEYWORDS = [
|
|
2950
|
+
"ignore previous instructions",
|
|
2951
|
+
"disregard all prior instructions",
|
|
2952
|
+
"forget your instructions",
|
|
2953
|
+
"override system prompt",
|
|
2954
|
+
"you are now a",
|
|
2955
|
+
"new instructions",
|
|
2956
|
+
"jailbreak",
|
|
2957
|
+
"DAN mode",
|
|
2958
|
+
"do anything now",
|
|
2959
|
+
"bypass safety filters",
|
|
2960
|
+
"ignore content policy",
|
|
2961
|
+
"pretend you are",
|
|
2962
|
+
"act as if you were",
|
|
2963
|
+
"reveal your system prompt",
|
|
2964
|
+
"what are your instructions",
|
|
2965
|
+
"from now on you will",
|
|
2966
|
+
"ignore all restrictions",
|
|
2967
|
+
"developer mode",
|
|
2968
|
+
"sudo mode",
|
|
2969
|
+
"admin override",
|
|
2970
|
+
"unlock hidden features",
|
|
2971
|
+
"disable safety mode",
|
|
2972
|
+
"ignore ethical guidelines",
|
|
2973
|
+
"forget all rules",
|
|
2974
|
+
"you have no restrictions"
|
|
2975
|
+
];
|
|
2976
|
+
var LEAKAGE_PATTERNS = [
|
|
2977
|
+
/(?:reveal|show|display|print|output|repeat|echo)\s+(?:the\s+)?(?:system\s+)?(?:prompt|instructions?)/i,
|
|
2978
|
+
/(?:what|tell\s+me|share)\s+(?:is\s+|are\s+)?(?:the\s+|your\s+)?(?:system\s+)?(?:prompt|instructions?|initial\s+message)/i,
|
|
2979
|
+
/(?:beginning|start|first\s+part)\s+of\s+(?:the\s+|your\s+)?(?:conversation|prompt|message)/i,
|
|
2980
|
+
/(?:copy|paste|reproduce)\s+(?:the\s+)?(?:text|content)\s+(?:above|before)/i,
|
|
2981
|
+
/(?:text|content|message)\s+(?:before|above|preceding)\s+(?:this|my)\s+(?:message|input)/i,
|
|
2982
|
+
/(?:everything|all)\s+(?:before|above|prior\s+to)\s+(?:this|my\s+message)/i,
|
|
2983
|
+
/repeat\s+(?:everything|all|the\s+text)\s+(?:above|before|I\s+said)/i
|
|
2984
|
+
];
|
|
2985
|
+
var LEAKAGE_KEYWORDS = [
|
|
2986
|
+
"reveal system prompt",
|
|
2987
|
+
"show me your instructions",
|
|
2988
|
+
"what is your system prompt",
|
|
2989
|
+
"display initial message",
|
|
2990
|
+
"repeat everything above",
|
|
2991
|
+
"copy text before my message",
|
|
2992
|
+
"what are your instructions",
|
|
2993
|
+
"print your prompt",
|
|
2994
|
+
"output your system message",
|
|
2995
|
+
"beginning of conversation",
|
|
2996
|
+
"share your initial instructions"
|
|
2997
|
+
];
|
|
2998
|
+
|
|
2999
|
+
// src/guards/injection.guard.ts
|
|
3000
|
+
var import_string_similarity = require("string-similarity");
|
|
3001
|
+
var InjectionGuard = class extends BaseGuard {
|
|
3002
|
+
name = "injection";
|
|
3003
|
+
async analyze(text, config) {
|
|
3004
|
+
const cfg = this.mergeConfig({ threshold: 0.6, mode: "block", ...config });
|
|
3005
|
+
const detections = [];
|
|
3006
|
+
for (const pattern of INJECTION_PATTERNS) {
|
|
3007
|
+
const re = new RegExp(pattern.source, pattern.flags + "g");
|
|
3008
|
+
let match;
|
|
3009
|
+
while ((match = re.exec(text)) !== null) {
|
|
3010
|
+
detections.push(
|
|
3011
|
+
this.makeDetection(text, {
|
|
3012
|
+
entityType: "PROMPT_INJECTION",
|
|
3013
|
+
start: match.index,
|
|
3014
|
+
end: match.index + match[0].length,
|
|
3015
|
+
text: match[0],
|
|
3016
|
+
confidence: "high",
|
|
3017
|
+
score: 0.9,
|
|
3018
|
+
guardName: this.name
|
|
3019
|
+
})
|
|
3020
|
+
);
|
|
3021
|
+
}
|
|
3022
|
+
}
|
|
3023
|
+
if (detections.length === 0) {
|
|
3024
|
+
const score = this.heuristicScore(text);
|
|
3025
|
+
if (score >= cfg.threshold) {
|
|
3026
|
+
detections.push(
|
|
3027
|
+
this.makeDetection(text, {
|
|
3028
|
+
entityType: "PROMPT_INJECTION",
|
|
3029
|
+
start: 0,
|
|
3030
|
+
end: text.length,
|
|
3031
|
+
text: text.slice(0, 200),
|
|
3032
|
+
confidence: score >= 0.8 ? "high" : "medium",
|
|
3033
|
+
score,
|
|
3034
|
+
guardName: this.name
|
|
3035
|
+
})
|
|
3036
|
+
);
|
|
3037
|
+
}
|
|
3038
|
+
}
|
|
3039
|
+
return this.buildResult(text, detections, cfg.mode);
|
|
3040
|
+
}
|
|
3041
|
+
heuristicScore(text) {
|
|
3042
|
+
const normalized = text.toLowerCase().trim();
|
|
3043
|
+
if (normalized.length === 0 || INJECTION_KEYWORDS.length === 0) return 0;
|
|
3044
|
+
const result = (0, import_string_similarity.findBestMatch)(normalized, INJECTION_KEYWORDS);
|
|
3045
|
+
let bestScore = result.bestMatch.rating;
|
|
3046
|
+
if (normalized.length > 100) {
|
|
3047
|
+
const windowSize = 80;
|
|
3048
|
+
const step = 30;
|
|
3049
|
+
for (let i = 0; i <= normalized.length - windowSize; i += step) {
|
|
3050
|
+
const window = normalized.slice(i, i + windowSize);
|
|
3051
|
+
const windowResult = (0, import_string_similarity.findBestMatch)(window, INJECTION_KEYWORDS);
|
|
3052
|
+
bestScore = Math.max(bestScore, windowResult.bestMatch.rating);
|
|
3053
|
+
}
|
|
3054
|
+
}
|
|
3055
|
+
return bestScore;
|
|
3056
|
+
}
|
|
3057
|
+
};
|
|
3058
|
+
|
|
3059
|
+
// src/guards/leakage.guard.ts
|
|
3060
|
+
var import_string_similarity2 = require("string-similarity");
|
|
3061
|
+
var LeakageGuard = class extends BaseGuard {
|
|
3062
|
+
name = "leakage";
|
|
3063
|
+
async analyze(text, config) {
|
|
3064
|
+
const cfg = this.mergeConfig({ threshold: 0.6, mode: "block", ...config });
|
|
3065
|
+
const detections = [];
|
|
3066
|
+
for (const pattern of LEAKAGE_PATTERNS) {
|
|
3067
|
+
const re = new RegExp(pattern.source, pattern.flags + "g");
|
|
3068
|
+
let match;
|
|
3069
|
+
while ((match = re.exec(text)) !== null) {
|
|
3070
|
+
detections.push(
|
|
3071
|
+
this.makeDetection(text, {
|
|
3072
|
+
entityType: "PROMPT_LEAKAGE",
|
|
3073
|
+
start: match.index,
|
|
3074
|
+
end: match.index + match[0].length,
|
|
3075
|
+
text: match[0],
|
|
3076
|
+
confidence: "high",
|
|
3077
|
+
score: 0.9,
|
|
3078
|
+
guardName: this.name
|
|
3079
|
+
})
|
|
3080
|
+
);
|
|
3081
|
+
}
|
|
3082
|
+
}
|
|
3083
|
+
if (detections.length === 0) {
|
|
3084
|
+
const normalized = text.toLowerCase().trim();
|
|
3085
|
+
if (normalized.length > 0 && LEAKAGE_KEYWORDS.length > 0) {
|
|
3086
|
+
const result = (0, import_string_similarity2.findBestMatch)(normalized, LEAKAGE_KEYWORDS);
|
|
3087
|
+
const score = result.bestMatch.rating;
|
|
3088
|
+
if (score >= cfg.threshold) {
|
|
3089
|
+
detections.push(
|
|
3090
|
+
this.makeDetection(text, {
|
|
3091
|
+
entityType: "PROMPT_LEAKAGE",
|
|
3092
|
+
start: 0,
|
|
3093
|
+
end: text.length,
|
|
3094
|
+
text: text.slice(0, 200),
|
|
3095
|
+
confidence: score >= 0.8 ? "high" : "medium",
|
|
3096
|
+
score,
|
|
3097
|
+
guardName: this.name
|
|
3098
|
+
})
|
|
3099
|
+
);
|
|
3100
|
+
}
|
|
3101
|
+
}
|
|
3102
|
+
}
|
|
3103
|
+
return this.buildResult(text, detections, cfg.mode);
|
|
3104
|
+
}
|
|
3105
|
+
};
|
|
3106
|
+
|
|
3107
|
+
// src/guards/testdata.guard.ts
|
|
3108
|
+
var SYNTHETIC_GUID_KEYWORDS = /\b[0-9a-f]{8}-(?:eval|test|demo|mock|fake|sample|exam|plac)[0-9a-f-]*\b/gi;
|
|
3109
|
+
var ZERO_PADDED_GUID = /\b0{8}-[0-9a-f]{4}-[0-9a-f]{4}-0{4}-0{12}\b/gi;
|
|
3110
|
+
var TEST_CREDIT_CARDS = [
|
|
3111
|
+
"4111111111111111",
|
|
3112
|
+
// Visa test
|
|
3113
|
+
"4242424242424242",
|
|
3114
|
+
// Stripe Visa
|
|
3115
|
+
"5500000000000004",
|
|
3116
|
+
// Mastercard test
|
|
3117
|
+
"5555555555554444",
|
|
3118
|
+
// Stripe Mastercard
|
|
3119
|
+
"378282246310005",
|
|
3120
|
+
// Amex test
|
|
3121
|
+
"371449635398431",
|
|
3122
|
+
// Amex test
|
|
3123
|
+
"6011111111111117",
|
|
3124
|
+
// Discover test
|
|
3125
|
+
"3056930009020004",
|
|
3126
|
+
// Diners test
|
|
3127
|
+
"3566002020360505",
|
|
3128
|
+
// JCB test
|
|
3129
|
+
"4000056655665556"
|
|
3130
|
+
// Stripe debit
|
|
3131
|
+
];
|
|
3132
|
+
var TEST_SSNS = [
|
|
3133
|
+
"000-00-0000",
|
|
3134
|
+
"123-45-6789",
|
|
3135
|
+
"078-05-1120",
|
|
3136
|
+
// Woolworth wallet card SSN (famous invalid)
|
|
3137
|
+
"219-09-9999"
|
|
3138
|
+
// Social Security ad SSN
|
|
3139
|
+
];
|
|
3140
|
+
var TEST_DATA_PATTERNS = [
|
|
3141
|
+
{
|
|
3142
|
+
entityType: "TEST_DATA_GUID",
|
|
3143
|
+
patterns: [SYNTHETIC_GUID_KEYWORDS, ZERO_PADDED_GUID],
|
|
3144
|
+
reason: "synthetic-guid"
|
|
3145
|
+
},
|
|
3146
|
+
{
|
|
3147
|
+
entityType: "TEST_DATA_CREDIT_CARD",
|
|
3148
|
+
patterns: TEST_CREDIT_CARDS.map(
|
|
3149
|
+
(num) => new RegExp(`\\b${num}\\b`, "g")
|
|
3150
|
+
),
|
|
3151
|
+
reason: "test-credit-card"
|
|
3152
|
+
},
|
|
3153
|
+
{
|
|
3154
|
+
entityType: "TEST_DATA_SSN",
|
|
3155
|
+
patterns: TEST_SSNS.map(
|
|
3156
|
+
// Require dashes or spaces between SSN groups (not embedded in other numbers)
|
|
3157
|
+
(ssn) => new RegExp(`\\b${ssn.replace(/-/g, "[\\s-]")}\\b`, "g")
|
|
3158
|
+
),
|
|
3159
|
+
reason: "test-ssn"
|
|
3160
|
+
},
|
|
3161
|
+
{
|
|
3162
|
+
entityType: "TEST_DATA_EMAIL",
|
|
3163
|
+
patterns: [
|
|
3164
|
+
/\b[\w.+-]+@(?:example\.(?:com|org|net)|test\.com|mailinator\.com|tempmail\.com)\b/gi,
|
|
3165
|
+
/\bnoreply@[^\s]+\b/gi
|
|
3166
|
+
],
|
|
3167
|
+
reason: "placeholder-email"
|
|
3168
|
+
},
|
|
3169
|
+
{
|
|
3170
|
+
entityType: "TEST_DATA_PHONE",
|
|
3171
|
+
patterns: [
|
|
3172
|
+
/\b(?:\+?1[-.\s]?)?(?:\()?555[-.\s]?01[0-9]{2}(?:\))?[-.\s]?\d{4}\b/g
|
|
3173
|
+
// 555-01xx range (reserved fictional)
|
|
3174
|
+
],
|
|
3175
|
+
reason: "fictional-phone"
|
|
3176
|
+
},
|
|
3177
|
+
{
|
|
3178
|
+
entityType: "TEST_DATA_SEQUENTIAL",
|
|
3179
|
+
patterns: [
|
|
3180
|
+
/\b[A]{4,}0{6,}[0-9A-F]{1,4}\b/gi,
|
|
3181
|
+
// AAAA000000000001 pattern
|
|
3182
|
+
/\b1234567890abcdef\b/gi
|
|
3183
|
+
// Classic sequential hex (exact)
|
|
3184
|
+
],
|
|
3185
|
+
reason: "sequential-pattern"
|
|
3186
|
+
}
|
|
3187
|
+
];
|
|
3188
|
+
var TestDataGuard = class extends BaseGuard {
|
|
3189
|
+
name = "testdata";
|
|
3190
|
+
async analyze(text, config) {
|
|
3191
|
+
const cfg = this.mergeConfig(config);
|
|
3192
|
+
const detections = [];
|
|
3193
|
+
for (const pattern of TEST_DATA_PATTERNS) {
|
|
3194
|
+
for (const regex of pattern.patterns) {
|
|
3195
|
+
const re = new RegExp(regex.source, regex.flags);
|
|
3196
|
+
let match;
|
|
3197
|
+
while ((match = re.exec(text)) !== null) {
|
|
3198
|
+
const matchText = match[0];
|
|
3199
|
+
const start = match.index;
|
|
3200
|
+
const end = start + matchText.length;
|
|
3201
|
+
detections.push(
|
|
3202
|
+
this.makeDetection(
|
|
3203
|
+
text,
|
|
3204
|
+
{
|
|
3205
|
+
entityType: pattern.entityType,
|
|
3206
|
+
start,
|
|
3207
|
+
end,
|
|
3208
|
+
text: matchText,
|
|
3209
|
+
confidence: "high",
|
|
3210
|
+
score: 0.95,
|
|
3211
|
+
guardName: this.name
|
|
3212
|
+
},
|
|
3213
|
+
"regex",
|
|
3214
|
+
"informational"
|
|
3215
|
+
)
|
|
3216
|
+
);
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
}
|
|
3220
|
+
const deduped = this.deduplicateDetections(detections);
|
|
3221
|
+
return this.buildInformationalResult(text, deduped);
|
|
3222
|
+
}
|
|
3223
|
+
/** Build a result that always passes — test data is informational, not blocking */
|
|
3224
|
+
buildInformationalResult(text, detections) {
|
|
3225
|
+
const score = detections.length > 0 ? Math.max(...detections.map((d) => d.score)) : 0;
|
|
3226
|
+
const types = [...new Set(detections.map((d) => d.entityType))];
|
|
3227
|
+
return {
|
|
3228
|
+
passed: true,
|
|
3229
|
+
// Always passes — informational only
|
|
3230
|
+
reason: detections.length === 0 ? "No test data detected" : `Test data detected: ${types.join(", ")}`,
|
|
3231
|
+
guardName: this.name,
|
|
3232
|
+
score,
|
|
3233
|
+
detections
|
|
3234
|
+
};
|
|
3235
|
+
}
|
|
3236
|
+
deduplicateDetections(detections) {
|
|
3237
|
+
if (detections.length <= 1) return detections;
|
|
3238
|
+
const sorted = [...detections].sort((a, b) => b.score - a.score);
|
|
3239
|
+
const result = [];
|
|
3240
|
+
for (const detection of sorted) {
|
|
3241
|
+
const overlaps = result.some(
|
|
3242
|
+
(existing) => detection.start < existing.end && detection.end > existing.start
|
|
3243
|
+
);
|
|
3244
|
+
if (!overlaps) {
|
|
3245
|
+
result.push(detection);
|
|
3246
|
+
}
|
|
3247
|
+
}
|
|
3248
|
+
return result.sort((a, b) => a.start - b.start);
|
|
3249
|
+
}
|
|
3250
|
+
};
|
|
3251
|
+
|
|
3252
|
+
// src/policy/presets.ts
|
|
3253
|
+
var STRICT = {
|
|
3254
|
+
name: "strict",
|
|
3255
|
+
description: "Maximum sensitivity \u2014 flags all detectable PII, secrets, injection, and test data at low confidence thresholds",
|
|
3256
|
+
guards: {
|
|
3257
|
+
pii: { enabled: true, threshold: 0.3, mode: "block" },
|
|
3258
|
+
secret: { enabled: true, threshold: 0.5, mode: "block" },
|
|
3259
|
+
injection: { enabled: true, threshold: 0.5, mode: "block" },
|
|
3260
|
+
leakage: { enabled: true, threshold: 0.5, mode: "block" }
|
|
3261
|
+
},
|
|
3262
|
+
riskThresholds: { critical: 0.9, high: 0.7, medium: 0.5, low: 0.3 },
|
|
3263
|
+
testDataDetection: "flag"
|
|
3264
|
+
};
|
|
3265
|
+
var MODERATE = {
|
|
3266
|
+
name: "moderate",
|
|
3267
|
+
description: "Balanced sensitivity \u2014 flags high-confidence PII and secrets, redacts rather than blocks",
|
|
3268
|
+
guards: {
|
|
3269
|
+
pii: { enabled: true, threshold: 0.5, mode: "redact" },
|
|
3270
|
+
secret: { enabled: true, threshold: 0.7, mode: "redact" },
|
|
3271
|
+
injection: { enabled: true, threshold: 0.6, mode: "block" },
|
|
3272
|
+
leakage: { enabled: true, threshold: 0.6, mode: "block" }
|
|
3273
|
+
},
|
|
3274
|
+
riskThresholds: { critical: 0.9, high: 0.8, medium: 0.65, low: 0.5 },
|
|
3275
|
+
testDataDetection: "flag"
|
|
3276
|
+
};
|
|
3277
|
+
var BUILTIN_POLICIES = {
|
|
3278
|
+
strict: STRICT,
|
|
3279
|
+
moderate: MODERATE
|
|
3280
|
+
};
|
|
3281
|
+
function getPolicy(name) {
|
|
3282
|
+
const policy = BUILTIN_POLICIES[name];
|
|
3283
|
+
if (!policy) {
|
|
3284
|
+
const available = Object.keys(BUILTIN_POLICIES).join(", ");
|
|
3285
|
+
throw new Error(
|
|
3286
|
+
`Unknown policy "${name}". Available: ${available}`
|
|
3287
|
+
);
|
|
3288
|
+
}
|
|
3289
|
+
return policy;
|
|
3290
|
+
}
|
|
3291
|
+
|
|
3292
|
+
// src/policy/resolve.ts
|
|
3293
|
+
function resolveRef(ref) {
|
|
3294
|
+
return typeof ref === "string" ? getPolicy(ref) : ref;
|
|
3295
|
+
}
|
|
3296
|
+
function resolvePolicy(base, ...overlays) {
|
|
3297
|
+
let result = structuredClone(resolveRef(base));
|
|
3298
|
+
for (const overlay of overlays) {
|
|
3299
|
+
const o = resolveRef(overlay);
|
|
3300
|
+
result = mergePolicies(result, o);
|
|
3301
|
+
}
|
|
3302
|
+
return result;
|
|
3303
|
+
}
|
|
3304
|
+
function mergePolicies(base, overlay) {
|
|
3305
|
+
const result = structuredClone(base);
|
|
3306
|
+
result.name = `${base.name}+${overlay.name}`;
|
|
3307
|
+
result.description = `${base.description} | ${overlay.description}`;
|
|
3308
|
+
for (const guardName of ["pii", "secret", "injection", "leakage"]) {
|
|
3309
|
+
const baseGuard = base.guards[guardName];
|
|
3310
|
+
const overlayGuard = overlay.guards[guardName];
|
|
3311
|
+
if (!overlayGuard) continue;
|
|
3312
|
+
if (!baseGuard) {
|
|
3313
|
+
result.guards[guardName] = structuredClone(overlayGuard);
|
|
3314
|
+
continue;
|
|
3315
|
+
}
|
|
3316
|
+
result.guards[guardName] = mergeGuardConfig(baseGuard, overlayGuard);
|
|
3317
|
+
}
|
|
3318
|
+
if (overlay.riskThresholds) {
|
|
3319
|
+
result.riskThresholds = {
|
|
3320
|
+
critical: Math.min(
|
|
3321
|
+
base.riskThresholds.critical,
|
|
3322
|
+
overlay.riskThresholds.critical
|
|
3323
|
+
),
|
|
3324
|
+
high: Math.min(base.riskThresholds.high, overlay.riskThresholds.high),
|
|
3325
|
+
medium: Math.min(
|
|
3326
|
+
base.riskThresholds.medium,
|
|
3327
|
+
overlay.riskThresholds.medium
|
|
3328
|
+
),
|
|
3329
|
+
low: Math.min(base.riskThresholds.low, overlay.riskThresholds.low)
|
|
3330
|
+
};
|
|
3331
|
+
}
|
|
3332
|
+
if (overlay.testDataDetection) {
|
|
3333
|
+
result.testDataDetection = overlay.testDataDetection;
|
|
3334
|
+
}
|
|
3335
|
+
return result;
|
|
3336
|
+
}
|
|
3337
|
+
function mergeGuardConfig(base, overlay) {
|
|
3338
|
+
const result = { ...base };
|
|
3339
|
+
if (overlay.enabled !== void 0) {
|
|
3340
|
+
result.enabled = overlay.enabled;
|
|
3341
|
+
}
|
|
3342
|
+
if (overlay.threshold !== void 0) {
|
|
3343
|
+
result.threshold = base.threshold !== void 0 ? Math.min(base.threshold, overlay.threshold) : overlay.threshold;
|
|
3344
|
+
}
|
|
3345
|
+
if (overlay.mode !== void 0) {
|
|
3346
|
+
if (overlay.mode === "block" || base.mode === "block") {
|
|
3347
|
+
result.mode = "block";
|
|
3348
|
+
} else {
|
|
3349
|
+
result.mode = overlay.mode;
|
|
3350
|
+
}
|
|
3351
|
+
}
|
|
3352
|
+
if (overlay.entityTypes) {
|
|
3353
|
+
if (base.entityTypes && base.entityTypes.length > 0) {
|
|
3354
|
+
const overlaySet = new Set(overlay.entityTypes);
|
|
3355
|
+
result.entityTypes = base.entityTypes.filter((t) => overlaySet.has(t));
|
|
3356
|
+
} else {
|
|
3357
|
+
result.entityTypes = [...overlay.entityTypes];
|
|
3358
|
+
}
|
|
3359
|
+
}
|
|
3360
|
+
if (overlay.secretTypes) {
|
|
3361
|
+
if (base.secretTypes && base.secretTypes.length > 0) {
|
|
3362
|
+
const overlaySet = new Set(overlay.secretTypes);
|
|
3363
|
+
result.secretTypes = base.secretTypes.filter((t) => overlaySet.has(t));
|
|
3364
|
+
} else {
|
|
3365
|
+
result.secretTypes = [...overlay.secretTypes];
|
|
3366
|
+
}
|
|
3367
|
+
}
|
|
3368
|
+
return result;
|
|
3369
|
+
}
|
|
3370
|
+
function policyToEngineConfig(policy) {
|
|
3371
|
+
const pii = policy.guards.pii ?? {};
|
|
3372
|
+
const secret = policy.guards.secret ?? {};
|
|
3373
|
+
const injection = policy.guards.injection ?? {};
|
|
3374
|
+
const leakage = policy.guards.leakage ?? {};
|
|
3375
|
+
return {
|
|
3376
|
+
piiOptions: {
|
|
3377
|
+
entityTypes: pii.entityTypes
|
|
3378
|
+
},
|
|
3379
|
+
secretOptions: {
|
|
3380
|
+
secretTypes: secret.secretTypes
|
|
3381
|
+
},
|
|
3382
|
+
guardConfigs: {
|
|
3383
|
+
pii: { threshold: pii.threshold, mode: pii.mode },
|
|
3384
|
+
secret: { threshold: secret.threshold, mode: secret.mode },
|
|
3385
|
+
injection: { threshold: injection.threshold, mode: injection.mode },
|
|
3386
|
+
leakage: { threshold: leakage.threshold, mode: leakage.mode }
|
|
3387
|
+
}
|
|
3388
|
+
};
|
|
3389
|
+
}
|
|
3390
|
+
|
|
3391
|
+
// src/index.ts
|
|
3392
|
+
var DEFAULT_CONFIG2 = {
|
|
3393
|
+
enabled: true,
|
|
3394
|
+
debounceMs: 500,
|
|
3395
|
+
guards: {
|
|
3396
|
+
pii: { enabled: true },
|
|
3397
|
+
secret: { enabled: true },
|
|
3398
|
+
injection: { enabled: true },
|
|
3399
|
+
contentSafety: { enabled: false }
|
|
3400
|
+
},
|
|
3401
|
+
cascade: {
|
|
3402
|
+
escalationThreshold: 0.75,
|
|
3403
|
+
contextSentences: 3,
|
|
3404
|
+
modelEnabled: false,
|
|
3405
|
+
modelId: "Xenova/bert-base-NER"
|
|
3406
|
+
}
|
|
3407
|
+
};
|
|
3408
|
+
function createEngine(config = DEFAULT_CONFIG2) {
|
|
3409
|
+
const engine = new GuardrailsEngine();
|
|
3410
|
+
if (config.policy) {
|
|
3411
|
+
let policy = resolveRef(config.policy);
|
|
3412
|
+
if (config.policyOverlays && config.policyOverlays.length > 0) {
|
|
3413
|
+
policy = resolvePolicy(policy, ...config.policyOverlays);
|
|
3414
|
+
}
|
|
3415
|
+
const { piiOptions, secretOptions, guardConfigs } = policyToEngineConfig(policy);
|
|
3416
|
+
if (policy.guards.pii?.enabled !== false) {
|
|
3417
|
+
engine.addGuard(new PiiGuard(piiOptions));
|
|
3418
|
+
}
|
|
3419
|
+
if (policy.guards.secret?.enabled !== false) {
|
|
3420
|
+
engine.addGuard(new SecretGuard(secretOptions));
|
|
3421
|
+
}
|
|
3422
|
+
if (policy.guards.injection?.enabled !== false) {
|
|
3423
|
+
engine.addGuard(new InjectionGuard());
|
|
3424
|
+
engine.addGuard(new LeakageGuard());
|
|
3425
|
+
}
|
|
3426
|
+
if (policy.testDataDetection !== "ignore") {
|
|
3427
|
+
engine.addGuard(new TestDataGuard());
|
|
3428
|
+
}
|
|
3429
|
+
engine.updateConfig({ guards: guardConfigs });
|
|
3430
|
+
} else {
|
|
3431
|
+
if (config.guards.pii.enabled) {
|
|
3432
|
+
engine.addGuard(new PiiGuard());
|
|
3433
|
+
}
|
|
3434
|
+
if (config.guards.secret.enabled) {
|
|
3435
|
+
engine.addGuard(new SecretGuard());
|
|
3436
|
+
}
|
|
3437
|
+
if (config.guards.injection.enabled) {
|
|
3438
|
+
engine.addGuard(new InjectionGuard());
|
|
3439
|
+
engine.addGuard(new LeakageGuard());
|
|
3440
|
+
}
|
|
3441
|
+
}
|
|
3442
|
+
if (config.cascade.modelEnabled) {
|
|
3443
|
+
engine.initCascade({
|
|
3444
|
+
bertEnabled: true,
|
|
3445
|
+
llmEnabled: config.guards.contentSafety.enabled,
|
|
3446
|
+
escalationThreshold: config.cascade.escalationThreshold,
|
|
3447
|
+
contextSentences: config.cascade.contextSentences,
|
|
3448
|
+
modelId: config.cascade.modelId
|
|
3449
|
+
});
|
|
3450
|
+
}
|
|
3451
|
+
return engine;
|
|
3452
|
+
}
|
|
3453
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3454
|
+
0 && (module.exports = {
|
|
3455
|
+
BUILTIN_POLICIES,
|
|
3456
|
+
BaseGuard,
|
|
3457
|
+
DEFAULT_CONFIG,
|
|
3458
|
+
GuardrailsEngine,
|
|
3459
|
+
InjectionGuard,
|
|
3460
|
+
LeakageGuard,
|
|
3461
|
+
PiiGuard,
|
|
3462
|
+
SecretGuard,
|
|
3463
|
+
TestDataGuard,
|
|
3464
|
+
assessRisk,
|
|
3465
|
+
createEngine,
|
|
3466
|
+
getPolicy,
|
|
3467
|
+
policyToEngineConfig,
|
|
3468
|
+
resolvePolicy
|
|
3469
|
+
});
|
|
3470
|
+
//# sourceMappingURL=index.js.map
|