openred 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +289 -0
- package/dist/chunk-65T2K4W6.js +629 -0
- package/dist/chunk-ABSJVCYK.js +44 -0
- package/dist/chunk-DWYPLA6C.cjs +48 -0
- package/dist/chunk-VAXUQISI.cjs +648 -0
- package/dist/index.cjs +258 -0
- package/dist/index.d.cts +51 -0
- package/dist/index.d.ts +51 -0
- package/dist/index.js +187 -0
- package/dist/integrations/anthropic.cjs +52 -0
- package/dist/integrations/anthropic.d.cts +35 -0
- package/dist/integrations/anthropic.d.ts +35 -0
- package/dist/integrations/anthropic.js +50 -0
- package/dist/integrations/langchain.cjs +28 -0
- package/dist/integrations/langchain.d.cts +18 -0
- package/dist/integrations/langchain.d.ts +18 -0
- package/dist/integrations/langchain.js +26 -0
- package/dist/integrations/openai.cjs +40 -0
- package/dist/integrations/openai.d.cts +38 -0
- package/dist/integrations/openai.d.ts +38 -0
- package/dist/integrations/openai.js +38 -0
- package/dist/middleware/express.cjs +30 -0
- package/dist/middleware/express.d.cts +16 -0
- package/dist/middleware/express.d.ts +16 -0
- package/dist/middleware/express.js +28 -0
- package/dist/middleware/fastify.cjs +23 -0
- package/dist/middleware/fastify.d.cts +27 -0
- package/dist/middleware/fastify.d.ts +27 -0
- package/dist/middleware/fastify.js +21 -0
- package/dist/pipeline-D_6YC4Us.d.cts +16 -0
- package/dist/pipeline-gWjT4cYU.d.ts +16 -0
- package/dist/vault-CDr54-Ev.d.cts +80 -0
- package/dist/vault-CDr54-Ev.d.ts +80 -0
- package/package.json +112 -0
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
|
|
3
|
+
// src/aggregator.ts
|
|
4
|
+
var DEFAULT_CONFIG = {
|
|
5
|
+
overlapResolution: "highest-confidence",
|
|
6
|
+
minConfidence: 0,
|
|
7
|
+
mergeAdjacent: false
|
|
8
|
+
};
|
|
9
|
+
function aggregate(matches, config = {}) {
|
|
10
|
+
const { overlapResolution, minConfidence, mergeAdjacent } = {
|
|
11
|
+
...DEFAULT_CONFIG,
|
|
12
|
+
...config
|
|
13
|
+
};
|
|
14
|
+
let filtered = minConfidence > 0 ? matches.filter((m) => m.confidence >= minConfidence) : [...matches];
|
|
15
|
+
filtered.sort((a, b) => a.start - b.start || b.confidence - a.confidence);
|
|
16
|
+
const resolved = [];
|
|
17
|
+
for (const match of filtered) {
|
|
18
|
+
const prev = resolved[resolved.length - 1];
|
|
19
|
+
if (prev && match.start < prev.end) {
|
|
20
|
+
const winner = pickWinner(prev, match, overlapResolution);
|
|
21
|
+
resolved[resolved.length - 1] = winner;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
resolved.push(match);
|
|
25
|
+
}
|
|
26
|
+
if (mergeAdjacent) {
|
|
27
|
+
return mergeAdjacentMatches(resolved);
|
|
28
|
+
}
|
|
29
|
+
return resolved;
|
|
30
|
+
}
|
|
31
|
+
function pickWinner(a, b, strategy) {
|
|
32
|
+
switch (strategy) {
|
|
33
|
+
case "longest": {
|
|
34
|
+
const lenA = a.end - a.start;
|
|
35
|
+
const lenB = b.end - b.start;
|
|
36
|
+
return lenB > lenA ? b : a;
|
|
37
|
+
}
|
|
38
|
+
case "highest-confidence":
|
|
39
|
+
return b.confidence > a.confidence ? b : a;
|
|
40
|
+
case "first":
|
|
41
|
+
return a;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function mergeAdjacentMatches(matches) {
|
|
45
|
+
if (matches.length <= 1) return matches;
|
|
46
|
+
const merged = [matches[0]];
|
|
47
|
+
for (let i = 1; i < matches.length; i++) {
|
|
48
|
+
const prev = merged[merged.length - 1];
|
|
49
|
+
const curr = matches[i];
|
|
50
|
+
if (prev.type === curr.type && curr.start - prev.end <= 2) {
|
|
51
|
+
merged[merged.length - 1] = {
|
|
52
|
+
...prev,
|
|
53
|
+
end: curr.end,
|
|
54
|
+
value: prev.value + " " + curr.value,
|
|
55
|
+
// approximate
|
|
56
|
+
confidence: Math.min(prev.confidence, curr.confidence)
|
|
57
|
+
};
|
|
58
|
+
} else {
|
|
59
|
+
merged.push(curr);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return merged;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// src/detectors/email.ts
|
|
66
|
+
var EMAIL_RE = /[a-zA-Z][a-zA-Z0-9._%+-]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)*\.[a-zA-Z]{2,}/g;
|
|
67
|
+
var FILE_EXT_RE = /\.(png|jpg|jpeg|gif|svg|webp|ico|bmp|tiff|pdf|zip|tar|gz|js|ts|css|html|xml|json|yaml|yml|md|txt|csv|log|sh|py|rb|exe|dll|so|wasm)$/i;
|
|
68
|
+
var EmailDetector = {
|
|
69
|
+
name: "email",
|
|
70
|
+
type: "EMAIL",
|
|
71
|
+
confidence: "high",
|
|
72
|
+
detect(text) {
|
|
73
|
+
const matches = [];
|
|
74
|
+
EMAIL_RE.lastIndex = 0;
|
|
75
|
+
let match;
|
|
76
|
+
while ((match = EMAIL_RE.exec(text)) !== null) {
|
|
77
|
+
const value = match[0];
|
|
78
|
+
if (FILE_EXT_RE.test(value)) continue;
|
|
79
|
+
matches.push({
|
|
80
|
+
type: "EMAIL",
|
|
81
|
+
value,
|
|
82
|
+
start: match.index,
|
|
83
|
+
end: match.index + value.length,
|
|
84
|
+
confidence: 0.95,
|
|
85
|
+
detector: "email"
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return matches;
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
// src/utils/validation.ts
|
|
93
|
+
function luhnCheck(digits) {
|
|
94
|
+
let sum = 0;
|
|
95
|
+
let alternate = false;
|
|
96
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
97
|
+
let n = parseInt(digits[i], 10);
|
|
98
|
+
if (alternate) {
|
|
99
|
+
n *= 2;
|
|
100
|
+
if (n > 9) n -= 9;
|
|
101
|
+
}
|
|
102
|
+
sum += n;
|
|
103
|
+
alternate = !alternate;
|
|
104
|
+
}
|
|
105
|
+
return sum % 10 === 0;
|
|
106
|
+
}
|
|
107
|
+
function countDigits(s) {
|
|
108
|
+
return (s.match(/\d/g) || []).length;
|
|
109
|
+
}
|
|
110
|
+
function stripNonDigits(s) {
|
|
111
|
+
return s.replace(/\D/g, "");
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// src/detectors/phone.ts
|
|
115
|
+
var PHONE_RE = /(?:\+\d{1,3}[\s.-]?)\(?\d{2,4}\)?[\s.-]?\d{3,4}[\s.-]?\d{3,4}|(?:\(?\d{2,4}\)?[\s.-])\d{3,4}[\s.-]?\d{3,4}|\(?\d{2,4}\)?[\s.-]?\d{3,4}[\s.-]\d{3,4}/g;
|
|
116
|
+
function hasSeparator(s) {
|
|
117
|
+
return /[.\s()-]/.test(s);
|
|
118
|
+
}
|
|
119
|
+
var PhoneDetector = {
|
|
120
|
+
name: "phone",
|
|
121
|
+
type: "PHONE",
|
|
122
|
+
confidence: "high",
|
|
123
|
+
detect(text) {
|
|
124
|
+
const matches = [];
|
|
125
|
+
PHONE_RE.lastIndex = 0;
|
|
126
|
+
let match;
|
|
127
|
+
while ((match = PHONE_RE.exec(text)) !== null) {
|
|
128
|
+
const value = match[0];
|
|
129
|
+
const digits = countDigits(value);
|
|
130
|
+
if (digits < 7 || digits > 15) continue;
|
|
131
|
+
if (!hasSeparator(value) && !value.startsWith("+")) continue;
|
|
132
|
+
if (match.index > 0 && text[match.index - 1] === "#") continue;
|
|
133
|
+
const conf = value.startsWith("+") ? 0.95 : hasSeparator(value) ? 0.85 : 0.6;
|
|
134
|
+
matches.push({
|
|
135
|
+
type: "PHONE",
|
|
136
|
+
value,
|
|
137
|
+
start: match.index,
|
|
138
|
+
end: match.index + value.length,
|
|
139
|
+
confidence: conf,
|
|
140
|
+
detector: "phone"
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
return matches;
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
// src/detectors/ssn.ts
|
|
148
|
+
var SSN_RE = /\b(\d{3})[-\s](\d{2})[-\s](\d{4})\b/g;
|
|
149
|
+
function isValidSSN(area, group, serial) {
|
|
150
|
+
const areaNum = parseInt(area, 10);
|
|
151
|
+
if (areaNum === 0 || areaNum === 666 || areaNum >= 900) return false;
|
|
152
|
+
if (parseInt(group, 10) === 0) return false;
|
|
153
|
+
if (parseInt(serial, 10) === 0) return false;
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
156
|
+
var SSNDetector = {
|
|
157
|
+
name: "ssn",
|
|
158
|
+
type: "SSN",
|
|
159
|
+
confidence: "high",
|
|
160
|
+
detect(text) {
|
|
161
|
+
const matches = [];
|
|
162
|
+
SSN_RE.lastIndex = 0;
|
|
163
|
+
let match;
|
|
164
|
+
while ((match = SSN_RE.exec(text)) !== null) {
|
|
165
|
+
if (!isValidSSN(match[1], match[2], match[3])) continue;
|
|
166
|
+
matches.push({
|
|
167
|
+
type: "SSN",
|
|
168
|
+
value: match[0],
|
|
169
|
+
start: match.index,
|
|
170
|
+
end: match.index + match[0].length,
|
|
171
|
+
confidence: 0.9,
|
|
172
|
+
detector: "ssn"
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
return matches;
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// src/detectors/credit-card.ts
|
|
180
|
+
var CC_SEPARATED_RE = /\d{4}[ -]\d{4}[ -]\d{4}[ -]\d{1,7}|\d{4}[ -]\d{6}[ -]\d{5}/g;
|
|
181
|
+
var CC_BARE_RE = /\b\d{13,19}\b/g;
|
|
182
|
+
function isKnownPrefix(digits) {
|
|
183
|
+
const first = parseInt(digits[0], 10);
|
|
184
|
+
return first >= 3 && first <= 6;
|
|
185
|
+
}
|
|
186
|
+
function isPrecededByIdMarker(text, index) {
|
|
187
|
+
const before = text.slice(Math.max(0, index - 10), index).trimEnd();
|
|
188
|
+
return /[#]$/.test(before) || /\bID\s*:?\s*$/.test(before) || /\bNo\.?\s*$/.test(before);
|
|
189
|
+
}
|
|
190
|
+
function validateMatch(text, match, isSeparated) {
|
|
191
|
+
const value = match[0].trim();
|
|
192
|
+
const digits = stripNonDigits(value);
|
|
193
|
+
if (digits.length < 13 || digits.length > 19) return null;
|
|
194
|
+
if (!isKnownPrefix(digits)) return null;
|
|
195
|
+
if (!luhnCheck(digits)) return null;
|
|
196
|
+
return {
|
|
197
|
+
type: "CREDIT_CARD",
|
|
198
|
+
value,
|
|
199
|
+
start: match.index,
|
|
200
|
+
end: match.index + value.length,
|
|
201
|
+
confidence: isSeparated ? 0.95 : 0.8,
|
|
202
|
+
detector: "credit-card"
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
var CreditCardDetector = {
|
|
206
|
+
name: "credit-card",
|
|
207
|
+
type: "CREDIT_CARD",
|
|
208
|
+
confidence: "high",
|
|
209
|
+
detect(text) {
|
|
210
|
+
const matches = [];
|
|
211
|
+
let match;
|
|
212
|
+
CC_SEPARATED_RE.lastIndex = 0;
|
|
213
|
+
while ((match = CC_SEPARATED_RE.exec(text)) !== null) {
|
|
214
|
+
const m = validateMatch(text, match, true);
|
|
215
|
+
if (m) matches.push(m);
|
|
216
|
+
}
|
|
217
|
+
CC_BARE_RE.lastIndex = 0;
|
|
218
|
+
while ((match = CC_BARE_RE.exec(text)) !== null) {
|
|
219
|
+
if (isPrecededByIdMarker(text, match.index)) continue;
|
|
220
|
+
const m = validateMatch(text, match, false);
|
|
221
|
+
if (m) matches.push(m);
|
|
222
|
+
}
|
|
223
|
+
return matches;
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
// src/detectors/ip-address.ts
|
|
228
|
+
var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
|
|
229
|
+
var IPV6_RE = /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}/g;
|
|
230
|
+
function isPrivateIPv4(ip) {
|
|
231
|
+
const parts = ip.split(".").map(Number);
|
|
232
|
+
if (parts[0] === 127) return true;
|
|
233
|
+
if (parts[0] === 10) return true;
|
|
234
|
+
if (parts[0] === 172 && parts[1] >= 16 && parts[1] <= 31) return true;
|
|
235
|
+
if (parts[0] === 192 && parts[1] === 168) return true;
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
var IPAddressDetector = {
|
|
239
|
+
name: "ip-address",
|
|
240
|
+
type: "IP_ADDRESS",
|
|
241
|
+
confidence: "high",
|
|
242
|
+
detect(text) {
|
|
243
|
+
const matches = [];
|
|
244
|
+
let match;
|
|
245
|
+
IPV4_RE.lastIndex = 0;
|
|
246
|
+
while ((match = IPV4_RE.exec(text)) !== null) {
|
|
247
|
+
if (isPrivateIPv4(match[0])) continue;
|
|
248
|
+
matches.push({
|
|
249
|
+
type: "IP_ADDRESS",
|
|
250
|
+
value: match[0],
|
|
251
|
+
start: match.index,
|
|
252
|
+
end: match.index + match[0].length,
|
|
253
|
+
confidence: 0.9,
|
|
254
|
+
detector: "ip-address"
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
IPV6_RE.lastIndex = 0;
|
|
258
|
+
while ((match = IPV6_RE.exec(text)) !== null) {
|
|
259
|
+
if (match[0] === "::1") continue;
|
|
260
|
+
matches.push({
|
|
261
|
+
type: "IP_ADDRESS",
|
|
262
|
+
value: match[0],
|
|
263
|
+
start: match.index,
|
|
264
|
+
end: match.index + match[0].length,
|
|
265
|
+
confidence: 0.9,
|
|
266
|
+
detector: "ip-address"
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
return matches;
|
|
270
|
+
}
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
// src/detectors/url.ts
|
|
274
|
+
var URL_RE = /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g;
|
|
275
|
+
var URLDetector = {
|
|
276
|
+
name: "url",
|
|
277
|
+
type: "URL",
|
|
278
|
+
confidence: "high",
|
|
279
|
+
detect(text) {
|
|
280
|
+
const matches = [];
|
|
281
|
+
URL_RE.lastIndex = 0;
|
|
282
|
+
let match;
|
|
283
|
+
while ((match = URL_RE.exec(text)) !== null) {
|
|
284
|
+
let value = match[0].replace(/[.,;:!?)]+$/, "");
|
|
285
|
+
matches.push({
|
|
286
|
+
type: "URL",
|
|
287
|
+
value,
|
|
288
|
+
start: match.index,
|
|
289
|
+
end: match.index + value.length,
|
|
290
|
+
confidence: 0.95,
|
|
291
|
+
detector: "url"
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
return matches;
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
// src/detectors/date-of-birth.ts
|
|
299
|
+
var DATE_NUMERIC_RE = /\b(?:0?[1-9]|1[0-2])[\/\-.](?:0?[1-9]|[12]\d|3[01])[\/\-.](?:19|20)\d{2}\b|\b(?:19|20)\d{2}[\/\-.](?:0?[1-9]|1[0-2])[\/\-.](?:0?[1-9]|[12]\d|3[01])\b/g;
|
|
300
|
+
var MONTHS = "(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)";
|
|
301
|
+
var DATE_WRITTEN_RE = new RegExp(
|
|
302
|
+
`\\b${MONTHS}\\s+\\d{1,2}(?:st|nd|rd|th)?,?\\s*(?:19|20)\\d{2}\\b|\\b\\d{1,2}(?:st|nd|rd|th)?\\s+${MONTHS},?\\s*(?:19|20)\\d{2}\\b`,
|
|
303
|
+
"gi"
|
|
304
|
+
);
|
|
305
|
+
var DOB_CONTEXT_RE = /\b(?:born|birth|birthday|dob|date\s+of\s+birth|d\.o\.b)\b/i;
|
|
306
|
+
var DateOfBirthDetector = {
|
|
307
|
+
name: "date-of-birth",
|
|
308
|
+
type: "DATE_OF_BIRTH",
|
|
309
|
+
confidence: "medium",
|
|
310
|
+
detect(text) {
|
|
311
|
+
const matches = [];
|
|
312
|
+
const hasContext = DOB_CONTEXT_RE.test(text);
|
|
313
|
+
for (const regex of [DATE_NUMERIC_RE, DATE_WRITTEN_RE]) {
|
|
314
|
+
regex.lastIndex = 0;
|
|
315
|
+
let match;
|
|
316
|
+
while ((match = regex.exec(text)) !== null) {
|
|
317
|
+
const start = Math.max(0, match.index - 50);
|
|
318
|
+
const end = Math.min(text.length, match.index + match[0].length + 50);
|
|
319
|
+
const localContext = text.slice(start, end);
|
|
320
|
+
const localHasContext = DOB_CONTEXT_RE.test(localContext);
|
|
321
|
+
if (!hasContext && !localHasContext) continue;
|
|
322
|
+
matches.push({
|
|
323
|
+
type: "DATE_OF_BIRTH",
|
|
324
|
+
value: match[0],
|
|
325
|
+
start: match.index,
|
|
326
|
+
end: match.index + match[0].length,
|
|
327
|
+
confidence: localHasContext ? 0.85 : 0.5,
|
|
328
|
+
detector: "date-of-birth"
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
return matches;
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
// src/detectors/address.ts
|
|
337
|
+
var STREET_RE = /\b\d{1,6}\s+(?:[NSEW]\.?\s+)?[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*\s+(?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Rd|Road|Ct|Court|Pl(?:ace)?|Way|Cir(?:cle)?|Pkwy|Parkway|Ter(?:race)?|Hwy|Highway)\.?\b/g;
|
|
338
|
+
var ZIP_RE = /\b\d{5}(?:-\d{4})?\b/g;
|
|
339
|
+
var ADDRESS_CONTEXT_RE = /\b(?:address|live[sd]?\s+(?:at|on)|located\s+at|ship(?:ping)?\s+to|deliver\s+to|reside[sd]?\s+at|mailing)\b/i;
|
|
340
|
+
var AddressDetector = {
|
|
341
|
+
name: "address",
|
|
342
|
+
type: "ADDRESS",
|
|
343
|
+
confidence: "medium",
|
|
344
|
+
detect(text) {
|
|
345
|
+
const matches = [];
|
|
346
|
+
STREET_RE.lastIndex = 0;
|
|
347
|
+
let match;
|
|
348
|
+
while ((match = STREET_RE.exec(text)) !== null) {
|
|
349
|
+
matches.push({
|
|
350
|
+
type: "ADDRESS",
|
|
351
|
+
value: match[0],
|
|
352
|
+
start: match.index,
|
|
353
|
+
end: match.index + match[0].length,
|
|
354
|
+
confidence: 0.8,
|
|
355
|
+
detector: "address"
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
const hasContext = ADDRESS_CONTEXT_RE.test(text);
|
|
359
|
+
if (hasContext) {
|
|
360
|
+
ZIP_RE.lastIndex = 0;
|
|
361
|
+
while ((match = ZIP_RE.exec(text)) !== null) {
|
|
362
|
+
const isPartOfStreet = matches.some(
|
|
363
|
+
(m) => match.index >= m.start && match.index < m.end
|
|
364
|
+
);
|
|
365
|
+
if (isPartOfStreet) continue;
|
|
366
|
+
matches.push({
|
|
367
|
+
type: "ADDRESS",
|
|
368
|
+
value: match[0],
|
|
369
|
+
start: match.index,
|
|
370
|
+
end: match.index + match[0].length,
|
|
371
|
+
confidence: 0.6,
|
|
372
|
+
detector: "address"
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
return matches;
|
|
377
|
+
}
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
// src/detectors/index.ts
|
|
381
|
+
var allDetectors = [
|
|
382
|
+
EmailDetector,
|
|
383
|
+
PhoneDetector,
|
|
384
|
+
SSNDetector,
|
|
385
|
+
CreditCardDetector,
|
|
386
|
+
IPAddressDetector,
|
|
387
|
+
URLDetector,
|
|
388
|
+
DateOfBirthDetector,
|
|
389
|
+
AddressDetector
|
|
390
|
+
];
|
|
391
|
+
|
|
392
|
+
// src/strategies/placeholder.ts
|
|
393
|
+
function createPlaceholderStrategy() {
|
|
394
|
+
const counters = /* @__PURE__ */ new Map();
|
|
395
|
+
const valueToToken = /* @__PURE__ */ new Map();
|
|
396
|
+
return (match) => {
|
|
397
|
+
const existing = valueToToken.get(match.value);
|
|
398
|
+
if (existing) return existing;
|
|
399
|
+
const count = (counters.get(match.type) ?? 0) + 1;
|
|
400
|
+
counters.set(match.type, count);
|
|
401
|
+
const token = `[${match.type}_${count}]`;
|
|
402
|
+
valueToToken.set(match.value, token);
|
|
403
|
+
return token;
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
function shortHash(value) {
|
|
407
|
+
return createHash("sha256").update(value).digest("hex").slice(0, 8);
|
|
408
|
+
}
|
|
409
|
+
var hashStrategy = (match) => {
|
|
410
|
+
return shortHash(match.value);
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
// src/strategies/mask.ts
|
|
414
|
+
function maskEmail(value) {
|
|
415
|
+
const [local, domain] = value.split("@");
|
|
416
|
+
if (!domain) return "\u2588\u2588\u2588\u2588";
|
|
417
|
+
const maskedLocal = local[0] + "\u2588\u2588\u2588";
|
|
418
|
+
const domainParts = domain.split(".");
|
|
419
|
+
const maskedDomain = domainParts[0][0] + "\u2588\u2588\u2588." + domainParts.slice(1).join(".");
|
|
420
|
+
return maskedLocal + "@" + maskedDomain;
|
|
421
|
+
}
|
|
422
|
+
function maskPhone(value) {
|
|
423
|
+
const digits = value.replace(/\D/g, "");
|
|
424
|
+
const last4 = digits.slice(-4);
|
|
425
|
+
return "\u2588".repeat(digits.length - 4) + last4;
|
|
426
|
+
}
|
|
427
|
+
function maskSSN(value) {
|
|
428
|
+
return "\u2588\u2588\u2588-\u2588\u2588-" + value.slice(-4);
|
|
429
|
+
}
|
|
430
|
+
function maskCreditCard(value) {
|
|
431
|
+
const digits = value.replace(/\D/g, "");
|
|
432
|
+
const last4 = digits.slice(-4);
|
|
433
|
+
return "\u2588\u2588\u2588\u2588-\u2588\u2588\u2588\u2588-\u2588\u2588\u2588\u2588-" + last4;
|
|
434
|
+
}
|
|
435
|
+
function maskGeneric(value) {
|
|
436
|
+
if (value.length <= 4) return "\u2588\u2588\u2588\u2588";
|
|
437
|
+
return value[0] + "\u2588".repeat(value.length - 2) + value[value.length - 1];
|
|
438
|
+
}
|
|
439
|
+
var MASKERS = {
|
|
440
|
+
EMAIL: maskEmail,
|
|
441
|
+
PHONE: maskPhone,
|
|
442
|
+
SSN: maskSSN,
|
|
443
|
+
CREDIT_CARD: maskCreditCard
|
|
444
|
+
};
|
|
445
|
+
var maskStrategy = (match) => {
|
|
446
|
+
const masker = MASKERS[match.type] ?? maskGeneric;
|
|
447
|
+
return masker(match.value);
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
// src/strategies/category.ts
|
|
451
|
+
var categoryStrategy = (match) => {
|
|
452
|
+
return `[${match.type}]`;
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
// src/strategies/index.ts
|
|
456
|
+
function resolveStrategy(strategy) {
|
|
457
|
+
if (typeof strategy === "function") return strategy;
|
|
458
|
+
switch (strategy ?? "placeholder") {
|
|
459
|
+
case "placeholder":
|
|
460
|
+
return createPlaceholderStrategy();
|
|
461
|
+
case "hash":
|
|
462
|
+
return hashStrategy;
|
|
463
|
+
case "mask":
|
|
464
|
+
return maskStrategy;
|
|
465
|
+
case "category":
|
|
466
|
+
return categoryStrategy;
|
|
467
|
+
default:
|
|
468
|
+
throw new Error(`Unknown strategy: ${strategy}`);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// src/vault.ts
|
|
473
|
+
var Vault = class {
|
|
474
|
+
replacementToOriginal = /* @__PURE__ */ new Map();
|
|
475
|
+
originalToReplacement = /* @__PURE__ */ new Map();
|
|
476
|
+
ttl;
|
|
477
|
+
timers = [];
|
|
478
|
+
constructor(options) {
|
|
479
|
+
this.ttl = options?.ttl;
|
|
480
|
+
}
|
|
481
|
+
store(original, replacement, type) {
|
|
482
|
+
this.replacementToOriginal.set(replacement, { original, replacement, type });
|
|
483
|
+
this.originalToReplacement.set(original, replacement);
|
|
484
|
+
if (this.ttl && this.ttl > 0) {
|
|
485
|
+
const timer = setTimeout(() => {
|
|
486
|
+
this.replacementToOriginal.delete(replacement);
|
|
487
|
+
this.originalToReplacement.delete(original);
|
|
488
|
+
}, this.ttl);
|
|
489
|
+
this.timers.push(timer);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
getReplacementFor(original) {
|
|
493
|
+
return this.originalToReplacement.get(original);
|
|
494
|
+
}
|
|
495
|
+
restore(text) {
|
|
496
|
+
let result = text;
|
|
497
|
+
const entries = [...this.replacementToOriginal.values()].sort(
|
|
498
|
+
(a, b) => b.replacement.length - a.replacement.length
|
|
499
|
+
);
|
|
500
|
+
for (const entry of entries) {
|
|
501
|
+
while (result.includes(entry.replacement)) {
|
|
502
|
+
result = result.replace(entry.replacement, entry.original);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
return result;
|
|
506
|
+
}
|
|
507
|
+
getMapping() {
|
|
508
|
+
return new Map(this.replacementToOriginal);
|
|
509
|
+
}
|
|
510
|
+
getEntries() {
|
|
511
|
+
return [...this.replacementToOriginal.values()];
|
|
512
|
+
}
|
|
513
|
+
clear() {
|
|
514
|
+
this.replacementToOriginal.clear();
|
|
515
|
+
this.originalToReplacement.clear();
|
|
516
|
+
for (const timer of this.timers) clearTimeout(timer);
|
|
517
|
+
this.timers = [];
|
|
518
|
+
}
|
|
519
|
+
export() {
|
|
520
|
+
return JSON.stringify([...this.replacementToOriginal.entries()]);
|
|
521
|
+
}
|
|
522
|
+
import(data) {
|
|
523
|
+
const entries = JSON.parse(data);
|
|
524
|
+
for (const [key, entry] of entries) {
|
|
525
|
+
this.replacementToOriginal.set(key, entry);
|
|
526
|
+
this.originalToReplacement.set(entry.original, entry.replacement);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
// src/pipeline.ts
|
|
532
|
+
var RedactionPipeline = class {
|
|
533
|
+
detectors;
|
|
534
|
+
strategyFn;
|
|
535
|
+
vault;
|
|
536
|
+
config;
|
|
537
|
+
constructor(config = {}) {
|
|
538
|
+
this.config = {
|
|
539
|
+
detectors: config.detectors ?? allDetectors,
|
|
540
|
+
...config
|
|
541
|
+
};
|
|
542
|
+
this.detectors = this.config.detectors;
|
|
543
|
+
this.strategyFn = resolveStrategy(this.config.strategy);
|
|
544
|
+
this.vault = this.config.vault ? new Vault({ ttl: this.config.vaultTTL }) : null;
|
|
545
|
+
}
|
|
546
|
+
addDetector(detector) {
|
|
547
|
+
this.detectors.push(detector);
|
|
548
|
+
}
|
|
549
|
+
redact(text) {
|
|
550
|
+
const startTime = performance.now();
|
|
551
|
+
const context = { locale: this.config.locale };
|
|
552
|
+
let rawMatches = [];
|
|
553
|
+
for (const detector of this.detectors) {
|
|
554
|
+
rawMatches.push(...detector.detect(text, context));
|
|
555
|
+
}
|
|
556
|
+
if (this.config.onDetection) {
|
|
557
|
+
for (const match of rawMatches) {
|
|
558
|
+
this.config.onDetection(match);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
const matches = aggregate(rawMatches, {
|
|
562
|
+
overlapResolution: this.config.overlapResolution ?? "highest-confidence",
|
|
563
|
+
minConfidence: this.config.minConfidence ?? 0,
|
|
564
|
+
mergeAdjacent: false
|
|
565
|
+
});
|
|
566
|
+
const finalMatches = this.applyLists(matches);
|
|
567
|
+
const tokens = [];
|
|
568
|
+
let result = "";
|
|
569
|
+
let cursor = 0;
|
|
570
|
+
for (const match of finalMatches) {
|
|
571
|
+
result += text.slice(cursor, match.start);
|
|
572
|
+
let replacement;
|
|
573
|
+
if (this.vault) {
|
|
574
|
+
const existing = this.vault.getReplacementFor(match.value);
|
|
575
|
+
if (existing) {
|
|
576
|
+
replacement = existing;
|
|
577
|
+
} else {
|
|
578
|
+
replacement = this.strategyFn(match);
|
|
579
|
+
this.vault.store(match.value, replacement, match.type);
|
|
580
|
+
}
|
|
581
|
+
} else {
|
|
582
|
+
replacement = this.strategyFn(match);
|
|
583
|
+
}
|
|
584
|
+
result += replacement;
|
|
585
|
+
tokens.push({
|
|
586
|
+
original: match.value,
|
|
587
|
+
replacement,
|
|
588
|
+
type: match.type,
|
|
589
|
+
confidence: match.confidence
|
|
590
|
+
});
|
|
591
|
+
cursor = match.end;
|
|
592
|
+
}
|
|
593
|
+
result += text.slice(cursor);
|
|
594
|
+
const byType = {};
|
|
595
|
+
for (const match of finalMatches) {
|
|
596
|
+
byType[match.type] = (byType[match.type] ?? 0) + 1;
|
|
597
|
+
}
|
|
598
|
+
const redactionResult = {
|
|
599
|
+
text: result,
|
|
600
|
+
matches: finalMatches,
|
|
601
|
+
tokens,
|
|
602
|
+
stats: {
|
|
603
|
+
totalDetected: finalMatches.length,
|
|
604
|
+
byType,
|
|
605
|
+
processingTimeMs: performance.now() - startTime
|
|
606
|
+
}
|
|
607
|
+
};
|
|
608
|
+
if (this.config.onRedaction) {
|
|
609
|
+
this.config.onRedaction(redactionResult);
|
|
610
|
+
}
|
|
611
|
+
return redactionResult;
|
|
612
|
+
}
|
|
613
|
+
/** Get the vault instance (only available if vault: true). */
|
|
614
|
+
getVault() {
|
|
615
|
+
return this.vault;
|
|
616
|
+
}
|
|
617
|
+
applyLists(matches) {
|
|
618
|
+
const allowList = this.config.allowList;
|
|
619
|
+
this.config.denyList;
|
|
620
|
+
let result = matches;
|
|
621
|
+
if (allowList && allowList.length > 0) {
|
|
622
|
+
const allowSet = new Set(allowList.map((s) => s.toLowerCase()));
|
|
623
|
+
result = result.filter((m) => !allowSet.has(m.value.toLowerCase()));
|
|
624
|
+
}
|
|
625
|
+
return result;
|
|
626
|
+
}
|
|
627
|
+
};
|
|
628
|
+
|
|
629
|
+
export { AddressDetector, CreditCardDetector, DateOfBirthDetector, EmailDetector, IPAddressDetector, PhoneDetector, RedactionPipeline, SSNDetector, URLDetector, Vault, aggregate, allDetectors, categoryStrategy, countDigits, createPlaceholderStrategy, hashStrategy, maskStrategy, resolveStrategy };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// src/utils/text.ts
|
|
2
|
+
function transformStrings(obj, fn) {
|
|
3
|
+
if (obj === null || obj === void 0 || typeof obj !== "object") return;
|
|
4
|
+
if (Array.isArray(obj)) {
|
|
5
|
+
for (let i = 0; i < obj.length; i++) {
|
|
6
|
+
if (typeof obj[i] === "string") {
|
|
7
|
+
obj[i] = fn(obj[i]);
|
|
8
|
+
} else {
|
|
9
|
+
transformStrings(obj[i], fn);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
const record = obj;
|
|
15
|
+
for (const key of Object.keys(record)) {
|
|
16
|
+
if (typeof record[key] === "string") {
|
|
17
|
+
record[key] = fn(record[key]);
|
|
18
|
+
} else {
|
|
19
|
+
transformStrings(record[key], fn);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function getNestedValue(obj, path) {
|
|
24
|
+
const parts = path.split(".");
|
|
25
|
+
let current = obj;
|
|
26
|
+
for (const part of parts) {
|
|
27
|
+
if (current === null || current === void 0 || typeof current !== "object") return void 0;
|
|
28
|
+
current = current[part];
|
|
29
|
+
}
|
|
30
|
+
return current;
|
|
31
|
+
}
|
|
32
|
+
function setNestedValue(obj, path, value) {
|
|
33
|
+
const parts = path.split(".");
|
|
34
|
+
let current = obj;
|
|
35
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
36
|
+
if (current === null || current === void 0 || typeof current !== "object") return;
|
|
37
|
+
current = current[parts[i]];
|
|
38
|
+
}
|
|
39
|
+
if (current !== null && current !== void 0 && typeof current === "object") {
|
|
40
|
+
current[parts[parts.length - 1]] = value;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export { getNestedValue, setNestedValue, transformStrings };
|