@elanlanguages/bridge-anonymization 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +382 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +100 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +163 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +173 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +294 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +102 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +253 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +240 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +45 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +99 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +140 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +341 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +9 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +9 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +238 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +74 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +169 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +134 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +69 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IP Address Recognizer
|
|
3
|
+
* Detects IPv4 and IPv6 addresses
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* IPv4 address pattern
|
|
8
|
+
* Matches: 0.0.0.0 to 255.255.255.255
|
|
9
|
+
* Requires at least one octet > 0 to avoid matching version numbers like 1.2.3.4
|
|
10
|
+
*/
|
|
11
|
+
const IPV4_PATTERN = /\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\b/g;
|
|
12
|
+
/**
|
|
13
|
+
* IPv6 address patterns
|
|
14
|
+
* Covers standard, compressed, and mixed formats
|
|
15
|
+
*/
|
|
16
|
+
const IPV6_PATTERNS = [
|
|
17
|
+
// Full format: 8 groups of 4 hex digits
|
|
18
|
+
/\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
|
|
19
|
+
// Compressed format with ::
|
|
20
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,7}:\b/g,
|
|
21
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}\b/g,
|
|
22
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}\b/g,
|
|
23
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}\b/g,
|
|
24
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}\b/g,
|
|
25
|
+
/\b(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}\b/g,
|
|
26
|
+
/\b[0-9a-fA-F]{1,4}:(?::[0-9a-fA-F]{1,4}){1,6}\b/g,
|
|
27
|
+
/\b::(?:[0-9a-fA-F]{1,4}:){0,5}[0-9a-fA-F]{1,4}\b/g,
|
|
28
|
+
/\b::\b/g, // Loopback shorthand
|
|
29
|
+
// IPv4-mapped IPv6 (::ffff:x.x.x.x)
|
|
30
|
+
/\b::ffff:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/gi,
|
|
31
|
+
];
|
|
32
|
+
/**
|
|
33
|
+
* Common internal/local IP ranges to optionally exclude
|
|
34
|
+
*/
|
|
35
|
+
const INTERNAL_IPV4_RANGES = [
|
|
36
|
+
/^10\./, // 10.0.0.0/8
|
|
37
|
+
/^172\.(1[6-9]|2[0-9]|3[01])\./, // 172.16.0.0/12
|
|
38
|
+
/^192\.168\./, // 192.168.0.0/16
|
|
39
|
+
/^127\./, // Loopback
|
|
40
|
+
/^0\./, // Invalid
|
|
41
|
+
];
|
|
42
|
+
/**
|
|
43
|
+
* IP Address recognizer (IPv4 and IPv6)
|
|
44
|
+
*/
|
|
45
|
+
export const ipAddressRecognizer = {
|
|
46
|
+
type: PIIType.IP_ADDRESS,
|
|
47
|
+
name: 'IP Address',
|
|
48
|
+
defaultConfidence: 0.9,
|
|
49
|
+
find(text) {
|
|
50
|
+
const matches = [];
|
|
51
|
+
const seen = new Set();
|
|
52
|
+
// Find IPv4 addresses
|
|
53
|
+
const ipv4Pattern = new RegExp(IPV4_PATTERN.source, 'g');
|
|
54
|
+
for (const match of text.matchAll(ipv4Pattern)) {
|
|
55
|
+
if (match.index === undefined)
|
|
56
|
+
continue;
|
|
57
|
+
const ip = match[0];
|
|
58
|
+
const key = `${match.index}:${match.index + ip.length}`;
|
|
59
|
+
if (seen.has(key))
|
|
60
|
+
continue;
|
|
61
|
+
if (!this.validate(ip))
|
|
62
|
+
continue;
|
|
63
|
+
seen.add(key);
|
|
64
|
+
matches.push({
|
|
65
|
+
type: PIIType.IP_ADDRESS,
|
|
66
|
+
start: match.index,
|
|
67
|
+
end: match.index + ip.length,
|
|
68
|
+
confidence: this.defaultConfidence,
|
|
69
|
+
source: DetectionSource.REGEX,
|
|
70
|
+
text: ip,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
// Find IPv6 addresses
|
|
74
|
+
for (const pattern of IPV6_PATTERNS) {
|
|
75
|
+
const globalPattern = new RegExp(pattern.source, pattern.flags.includes('g') ? pattern.flags : pattern.flags + 'g');
|
|
76
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
77
|
+
if (match.index === undefined)
|
|
78
|
+
continue;
|
|
79
|
+
const ip = match[0];
|
|
80
|
+
// Skip very short matches that might be false positives
|
|
81
|
+
if (ip.length < 3)
|
|
82
|
+
continue;
|
|
83
|
+
const key = `${match.index}:${match.index + ip.length}`;
|
|
84
|
+
if (seen.has(key))
|
|
85
|
+
continue;
|
|
86
|
+
// Basic IPv6 validation
|
|
87
|
+
if (!isValidIPv6(ip))
|
|
88
|
+
continue;
|
|
89
|
+
seen.add(key);
|
|
90
|
+
matches.push({
|
|
91
|
+
type: PIIType.IP_ADDRESS,
|
|
92
|
+
start: match.index,
|
|
93
|
+
end: match.index + ip.length,
|
|
94
|
+
confidence: this.defaultConfidence * 0.95, // Slightly lower confidence for IPv6
|
|
95
|
+
source: DetectionSource.REGEX,
|
|
96
|
+
text: ip,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return matches;
|
|
101
|
+
},
|
|
102
|
+
validate(ip) {
|
|
103
|
+
// Check if it's IPv4
|
|
104
|
+
if (ip.includes('.') && !ip.includes(':')) {
|
|
105
|
+
return isValidIPv4(ip);
|
|
106
|
+
}
|
|
107
|
+
// Check if it's IPv6
|
|
108
|
+
if (ip.includes(':')) {
|
|
109
|
+
return isValidIPv6(ip);
|
|
110
|
+
}
|
|
111
|
+
return false;
|
|
112
|
+
},
|
|
113
|
+
normalize(ip) {
|
|
114
|
+
return ip.toLowerCase().trim();
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
/**
|
|
118
|
+
* Validates an IPv4 address
|
|
119
|
+
*/
|
|
120
|
+
function isValidIPv4(ip) {
|
|
121
|
+
const parts = ip.split('.');
|
|
122
|
+
if (parts.length !== 4)
|
|
123
|
+
return false;
|
|
124
|
+
let hasLargeOctet = false;
|
|
125
|
+
for (const part of parts) {
|
|
126
|
+
const num = parseInt(part, 10);
|
|
127
|
+
if (isNaN(num) || num < 0 || num > 255)
|
|
128
|
+
return false;
|
|
129
|
+
// Check for leading zeros (except for 0 itself)
|
|
130
|
+
if (part.length > 1 && part.startsWith('0'))
|
|
131
|
+
return false;
|
|
132
|
+
// Track if any octet is > 9 (helps distinguish from version numbers)
|
|
133
|
+
if (num > 9)
|
|
134
|
+
hasLargeOctet = true;
|
|
135
|
+
}
|
|
136
|
+
// Require at least one octet > 9 to avoid matching version numbers like 1.2.3.4
|
|
137
|
+
return hasLargeOctet;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Validates an IPv6 address
|
|
141
|
+
*/
|
|
142
|
+
function isValidIPv6(ip) {
|
|
143
|
+
// Handle IPv4-mapped addresses
|
|
144
|
+
if (ip.toLowerCase().startsWith('::ffff:') && ip.includes('.')) {
|
|
145
|
+
const ipv4Part = ip.slice(7);
|
|
146
|
+
return isValidIPv4(ipv4Part);
|
|
147
|
+
}
|
|
148
|
+
// Count colons and check for ::
|
|
149
|
+
const colonCount = (ip.match(/:/g) ?? []).length;
|
|
150
|
+
const hasDoubleColon = ip.includes('::');
|
|
151
|
+
// If has ::, must have less than 8 groups
|
|
152
|
+
if (hasDoubleColon) {
|
|
153
|
+
// Can only have one ::
|
|
154
|
+
if ((ip.match(/::/g) ?? []).length > 1)
|
|
155
|
+
return false;
|
|
156
|
+
// Should have at least 2 colons (including the ::)
|
|
157
|
+
if (colonCount < 2)
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
// Must have exactly 7 colons for 8 groups
|
|
162
|
+
if (colonCount !== 7)
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
// Check each part is valid hex
|
|
166
|
+
const parts = ip.split(':');
|
|
167
|
+
for (const part of parts) {
|
|
168
|
+
if (part === '')
|
|
169
|
+
continue; // Empty parts are OK with ::
|
|
170
|
+
if (part.length > 4)
|
|
171
|
+
return false;
|
|
172
|
+
if (!/^[0-9a-fA-F]+$/.test(part))
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
return true;
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Checks if an IPv4 address is in a private/internal range
|
|
179
|
+
*/
|
|
180
|
+
export function isInternalIPv4(ip) {
|
|
181
|
+
return INTERNAL_IPV4_RANGES.some((pattern) => pattern.test(ip));
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=ip-address.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ip-address.js","sourceRoot":"","sources":["../../src/recognizers/ip-address.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGxE;;;;GAIG;AACH,MAAM,YAAY,GAChB,oHAAoH,CAAC;AAEvH;;;GAGG;AACH,MAAM,aAAa,GAAG;IACpB,wCAAwC;IACxC,+CAA+C;IAE/C,4BAA4B;IAC5B,kCAAkC;IAClC,kDAAkD;IAClD,2DAA2D;IAC3D,2DAA2D;IAC3D,2DAA2D;IAC3D,2DAA2D;IAC3D,kDAAkD;IAClD,mDAAmD;IACnD,SAAS,EAAE,qBAAqB;IAEhC,oCAAoC;IACpC,wGAAwG;CACzG,CAAC;AAEF;;GAEG;AACH,MAAM,oBAAoB,GAAG;IAC3B,OAAO,EAAE,aAAa;IACtB,+BAA+B,EAAE,gBAAgB;IACjD,aAAa,EAAE,iBAAiB;IAChC,QAAQ,EAAE,WAAW;IACrB,MAAM,EAAE,UAAU;CACnB,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAe;IAC7C,IAAI,EAAE,OAAO,CAAC,UAAU;IACxB,IAAI,EAAE,YAAY;IAClB,iBAAiB,EAAE,GAAG;IAEtB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,sBAAsB;QACtB,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACzD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/C,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;gBAAE,SAAS;YAExC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;YAExD,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC5B,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,EAAE,CAAC;gBAAE,SAAS;YAElC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,OAAO,CAAC,UAAU;gBACxB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,MAAM;gBAC5B,UAAU,EAAE,IAAI,CAAC,iBAAiB;gBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;gBAC7B,IAAI,EAAE,EAAE;aACT,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,KAAK,MAAM,OAAO,IAAI,aAAa,EAAE,CAAC;YACpC,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;YAEpH,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAEpB,wDAAwD;gBACxD,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;oBAAE,SAAS;gBAE5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;gBAExD,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAE5B,wBAAwB;gBACxB,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC;oBAAE,SAAS;gBAE/B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,UAAU;oBACxB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,MAAM;oBAC5B,UAAU,EAAE,IAAI,CAAC,iBAAiB,GAAG,IAAI,EAAE,qCAAqC;oBAChF,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,EAAE;iBACT,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,QAAQ,CAAC,EAAU;QACjB,qBAAqB;QACrB,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1C,OAAO,WAAW,CAAC,EAAE,CAAC,CAAC;QACzB,CAAC;QAED,qBAAqB;QACrB,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,WAAW,CAAC,EAAE,CAAC,CAAC;QACzB,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,SAAS,CAAC,EAAU;QAClB,OAAO,EAAE,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACjC,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,WAAW,CAAC,EAAU;IAC7B,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAE5B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAErC,IAAI,aAAa,GAAG,KAAK,CAAC;IAE1B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;QACrD,gDAAgD;QAChD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAC1D,qEAAqE;QACrE,IAAI,GAAG,GAAG,CAAC;YAAE,aAAa,GAAG,IAAI,CAAC;IACpC,CAAC;IAED,gFAAgF;IAChF,OAAO,aAAa,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,EAAU;IAC7B,+BAA+B;IAC/B,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/D,MAAM,QAAQ,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC7B,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;IAC/B,CAAC;IAED,gCAAgC;IAChC,MAAM,UAAU,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,cAAc,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEzC,0CAA0C;IAC1C,IAAI,cAAc,EAAE,CAAC;QACnB,uBAAuB;QACvB,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACrD,mDAAmD;QACnD,IAAI,UAAU,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,0CAA0C;QAC1C,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;IACrC,CAAC;IAED,+BAA+B;IAC/B,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,KAAK,EAAE;YAAE,SAAS,CAAC,6BAA6B;QACxD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAClC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;IACjD,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,EAAU;IACvC,OAAO,oBAAoB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;AAClE,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phone Number Recognizer
|
|
3
|
+
* Country-aware patterns for DE, EN, FR with support for various formats
|
|
4
|
+
*/
|
|
5
|
+
import type { Recognizer } from './base.js';
|
|
6
|
+
/**
|
|
7
|
+
* Phone number recognizer with multi-region support
|
|
8
|
+
*/
|
|
9
|
+
export declare const phoneRecognizer: Recognizer;
|
|
10
|
+
//# sourceMappingURL=phone.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phone.d.ts","sourceRoot":"","sources":["../../src/recognizers/phone.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAyC5C;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,UAqE7B,CAAC"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phone Number Recognizer
|
|
3
|
+
* Country-aware patterns for DE, EN, FR with support for various formats
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Phone number patterns organized by region
|
|
8
|
+
* All patterns use word boundaries and allow common separators
|
|
9
|
+
*/
|
|
10
|
+
const PHONE_PATTERNS = {
|
|
11
|
+
// International format: +XX or 00XX followed by digits
|
|
12
|
+
international: /(?<![0-9])(?:\+|00)[1-9][0-9]{6,14}(?![0-9])/g,
|
|
13
|
+
// International with separators
|
|
14
|
+
internationalFormatted: /(?<![0-9])(?:\+|00)[1-9][0-9]{0,2}[\s.-]?(?:\([0-9]{1,4}\)|[0-9]{1,4})[\s.-]?[0-9]{2,4}[\s.-]?[0-9]{2,4}(?:[\s.-]?[0-9]{2,4})?(?![0-9])/g,
|
|
15
|
+
// German formats
|
|
16
|
+
// Standard: 0XX XXXXXXX or 0XXXX XXXXX
|
|
17
|
+
german: /(?<![0-9])0[1-9][0-9]{1,4}[\s/-]?[0-9]{3,8}(?![0-9])/g,
|
|
18
|
+
// With area code in parentheses: (0XX) XXXXXXX
|
|
19
|
+
germanParens: /(?<![0-9])\(0[1-9][0-9]{1,4}\)[\s]?[0-9]{3,8}(?![0-9])/g,
|
|
20
|
+
// US/UK formats
|
|
21
|
+
// US: (XXX) XXX-XXXX or XXX-XXX-XXXX
|
|
22
|
+
usFormat: /(?<![0-9])(?:\([0-9]{3}\)[\s.-]?|[0-9]{3}[\s.-])[0-9]{3}[\s.-][0-9]{4}(?![0-9])/g,
|
|
23
|
+
// UK: 0XXXX XXXXXX
|
|
24
|
+
ukFormat: /(?<![0-9])0[1-9][0-9]{2,4}[\s][0-9]{5,6}(?![0-9])/g,
|
|
25
|
+
// French formats: 0X XX XX XX XX or 0XXXXXXXXX
|
|
26
|
+
french: /(?<![0-9])0[1-9][0-9]{8}(?![0-9])/g,
|
|
27
|
+
frenchFormatted: /(?<![0-9])0[1-9](?:[\s.-]?[0-9]{2}){4}(?![0-9])/g,
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Minimum number of digits for a valid phone number
|
|
31
|
+
*/
|
|
32
|
+
const MIN_DIGITS = 7;
|
|
33
|
+
/**
|
|
34
|
+
* Maximum number of digits for a valid phone number
|
|
35
|
+
*/
|
|
36
|
+
const MAX_DIGITS = 15;
|
|
37
|
+
/**
|
|
38
|
+
* Phone number recognizer with multi-region support
|
|
39
|
+
*/
|
|
40
|
+
export const phoneRecognizer = {
|
|
41
|
+
type: PIIType.PHONE,
|
|
42
|
+
name: 'Phone Number',
|
|
43
|
+
defaultConfidence: 0.9,
|
|
44
|
+
find(text) {
|
|
45
|
+
const matches = [];
|
|
46
|
+
const seen = new Set();
|
|
47
|
+
// Try each pattern
|
|
48
|
+
for (const pattern of Object.values(PHONE_PATTERNS)) {
|
|
49
|
+
const globalPattern = new RegExp(pattern.source, 'g');
|
|
50
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
51
|
+
if (match.index === undefined)
|
|
52
|
+
continue;
|
|
53
|
+
const phone = match[0];
|
|
54
|
+
const key = `${match.index}:${match.index + phone.length}`;
|
|
55
|
+
// Skip duplicates (from overlapping patterns)
|
|
56
|
+
if (seen.has(key))
|
|
57
|
+
continue;
|
|
58
|
+
// Validate the match
|
|
59
|
+
if (!this.validate(phone))
|
|
60
|
+
continue;
|
|
61
|
+
seen.add(key);
|
|
62
|
+
matches.push({
|
|
63
|
+
type: PIIType.PHONE,
|
|
64
|
+
start: match.index,
|
|
65
|
+
end: match.index + phone.length,
|
|
66
|
+
confidence: this.defaultConfidence,
|
|
67
|
+
source: DetectionSource.REGEX,
|
|
68
|
+
text: phone,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Remove overlapping matches, keeping longer ones
|
|
73
|
+
return deduplicateOverlapping(matches);
|
|
74
|
+
},
|
|
75
|
+
validate(phone) {
|
|
76
|
+
// Count digits
|
|
77
|
+
const digits = phone.replace(/\D/g, '');
|
|
78
|
+
// Check digit count
|
|
79
|
+
if (digits.length < MIN_DIGITS || digits.length > MAX_DIGITS) {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
// Should not be all same digit (e.g., 0000000000)
|
|
83
|
+
if (/^(\d)\1+$/.test(digits)) {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
// Should not be sequential
|
|
87
|
+
if (isSequential(digits)) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
return true;
|
|
91
|
+
},
|
|
92
|
+
normalize(phone) {
|
|
93
|
+
// Remove all non-digit characters except leading +
|
|
94
|
+
const hasPlus = phone.startsWith('+');
|
|
95
|
+
const digits = phone.replace(/\D/g, '');
|
|
96
|
+
return hasPlus ? '+' + digits : digits;
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
/**
|
|
100
|
+
* Check if a digit string is sequential (123456789 or 987654321)
|
|
101
|
+
*/
|
|
102
|
+
function isSequential(digits) {
|
|
103
|
+
if (digits.length < 5)
|
|
104
|
+
return false;
|
|
105
|
+
let ascending = true;
|
|
106
|
+
let descending = true;
|
|
107
|
+
for (let i = 1; i < digits.length; i++) {
|
|
108
|
+
const prev = parseInt(digits[i - 1], 10);
|
|
109
|
+
const curr = parseInt(digits[i], 10);
|
|
110
|
+
if (curr !== prev + 1)
|
|
111
|
+
ascending = false;
|
|
112
|
+
if (curr !== prev - 1)
|
|
113
|
+
descending = false;
|
|
114
|
+
if (!ascending && !descending)
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
return ascending || descending;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Remove overlapping matches, keeping longer ones
|
|
121
|
+
*/
|
|
122
|
+
function deduplicateOverlapping(matches) {
|
|
123
|
+
if (matches.length <= 1)
|
|
124
|
+
return matches;
|
|
125
|
+
// Sort by start position
|
|
126
|
+
const sorted = [...matches].sort((a, b) => a.start - b.start);
|
|
127
|
+
const result = [];
|
|
128
|
+
for (const match of sorted) {
|
|
129
|
+
// Check if this overlaps with the last added match
|
|
130
|
+
const last = result[result.length - 1];
|
|
131
|
+
if (last !== undefined && match.start < last.end) {
|
|
132
|
+
// Overlapping - keep the longer one
|
|
133
|
+
if (match.end - match.start > last.end - last.start) {
|
|
134
|
+
result.pop();
|
|
135
|
+
result.push(match);
|
|
136
|
+
}
|
|
137
|
+
// Otherwise keep the existing one
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
result.push(match);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=phone.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phone.js","sourceRoot":"","sources":["../../src/recognizers/phone.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGxE;;;GAGG;AACH,MAAM,cAAc,GAAG;IACrB,uDAAuD;IACvD,aAAa,EAAE,+CAA+C;IAE9D,gCAAgC;IAChC,sBAAsB,EACpB,0IAA0I;IAE5I,iBAAiB;IACjB,uCAAuC;IACvC,MAAM,EAAE,uDAAuD;IAC/D,+CAA+C;IAC/C,YAAY,EAAE,yDAAyD;IAEvE,gBAAgB;IAChB,qCAAqC;IACrC,QAAQ,EAAE,kFAAkF;IAC5F,mBAAmB;IACnB,QAAQ,EAAE,oDAAoD;IAE9D,+CAA+C;IAC/C,MAAM,EAAE,oCAAoC;IAC5C,eAAe,EAAE,kDAAkD;CACpE,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,GAAG,CAAC,CAAC;AAErB;;GAEG;AACH,MAAM,UAAU,GAAG,EAAE,CAAC;AAEtB;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAe;IACzC,IAAI,EAAE,OAAO,CAAC,KAAK;IACnB,IAAI,EAAE,cAAc;IACpB,iBAAiB,EAAE,GAAG;IAEtB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,mBAAmB;QACnB,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;YACpD,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAEtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACvB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBAE3D,8CAA8C;gBAC9C,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAE5B,qBAAqB;gBACrB,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,KAAK,CAAC;oBAAE,SAAS;gBAErC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,KAAK;oBACnB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM;oBAC/B,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,KAAK;iBACZ,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,OAAO,sBAAsB,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,QAAQ,CAAC,KAAa;QACpB,eAAe;QACf,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAExC,oBAAoB;QACpB,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC;YAC7D,OAAO,KAAK,CAAC;QACf,CAAC;QAED,kDAAkD;QAClD,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,2BAA2B;QAC3B,IAAI,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YACzB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,KAAa;QACrB,mDAAmD;QACnD,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;IACzC,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,YAAY,CAAC,MAAc;IAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAEpC,IAAI,SAAS,GAAG,IAAI,CAAC;IACrB,IAAI,UAAU,GAAG,IAAI,CAAC;IAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC;QAEtC,IAAI,IAAI,KAAK,IAAI,GAAG,CAAC;YAAE,SAAS,GAAG,KAAK,CAAC;QACzC,IAAI,IAAI,KAAK,IAAI,GAAG,CAAC;YAAE,UAAU,GAAG,KAAK,CAAC;QAE1C,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU;YAAE,OAAO,KAAK,CAAC;IAC9C,CAAC;IAED,OAAO,SAAS,IAAI,UAAU,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,OAAoB;IAClD,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,yBAAyB;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,mDAAmD;QACnD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACjD,oCAAoC;YACpC,IAAI,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;gBACpD,MAAM,CAAC,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,kCAAkC;QACpC,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognizer Registry
|
|
3
|
+
* Central registry for all PII recognizers
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, SpanMatch, AnonymizationPolicy } from '../types/index.js';
|
|
6
|
+
import type { Recognizer } from './base.js';
|
|
7
|
+
/**
|
|
8
|
+
* Registry for managing PII recognizers
|
|
9
|
+
*/
|
|
10
|
+
export declare class RecognizerRegistry {
|
|
11
|
+
private recognizers;
|
|
12
|
+
/**
|
|
13
|
+
* Registers a recognizer for a PII type
|
|
14
|
+
*/
|
|
15
|
+
register(recognizer: Recognizer): void;
|
|
16
|
+
/**
|
|
17
|
+
* Registers multiple recognizers
|
|
18
|
+
*/
|
|
19
|
+
registerAll(recognizers: Recognizer[]): void;
|
|
20
|
+
/**
|
|
21
|
+
* Gets all recognizers for a specific type
|
|
22
|
+
*/
|
|
23
|
+
getRecognizers(type: PIIType): Recognizer[];
|
|
24
|
+
/**
|
|
25
|
+
* Gets all registered recognizers
|
|
26
|
+
*/
|
|
27
|
+
getAllRecognizers(): Recognizer[];
|
|
28
|
+
/**
|
|
29
|
+
* Gets all registered PII types
|
|
30
|
+
*/
|
|
31
|
+
getRegisteredTypes(): PIIType[];
|
|
32
|
+
/**
|
|
33
|
+
* Checks if a recognizer is registered for a type
|
|
34
|
+
*/
|
|
35
|
+
hasRecognizer(type: PIIType): boolean;
|
|
36
|
+
/**
|
|
37
|
+
* Removes all recognizers for a type
|
|
38
|
+
*/
|
|
39
|
+
unregister(type: PIIType): void;
|
|
40
|
+
/**
|
|
41
|
+
* Clears all recognizers
|
|
42
|
+
*/
|
|
43
|
+
clear(): void;
|
|
44
|
+
/**
|
|
45
|
+
* Runs all enabled recognizers on text and returns matches
|
|
46
|
+
* @param text - Text to analyze
|
|
47
|
+
* @param policy - Anonymization policy to determine which types to detect
|
|
48
|
+
*/
|
|
49
|
+
findAll(text: string, policy: AnonymizationPolicy): SpanMatch[];
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Gets the global recognizer registry (singleton)
|
|
53
|
+
*/
|
|
54
|
+
export declare function getGlobalRegistry(): RecognizerRegistry;
|
|
55
|
+
/**
|
|
56
|
+
* Creates a new isolated registry (useful for testing)
|
|
57
|
+
*/
|
|
58
|
+
export declare function createRegistry(): RecognizerRegistry;
|
|
59
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/recognizers/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAC5E,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAE5C;;GAEG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,WAAW,CAAyC;IAE5D;;OAEG;IACH,QAAQ,CAAC,UAAU,EAAE,UAAU,GAAG,IAAI;IAMtC;;OAEG;IACH,WAAW,CAAC,WAAW,EAAE,UAAU,EAAE,GAAG,IAAI;IAM5C;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,OAAO,GAAG,UAAU,EAAE;IAI3C;;OAEG;IACH,iBAAiB,IAAI,UAAU,EAAE;IAQjC;;OAEG;IACH,kBAAkB,IAAI,OAAO,EAAE;IAI/B;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO;IAKrC;;OAEG;IACH,UAAU,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI;IAI/B;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;;;OAIG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,SAAS,EAAE;CA0BhE;AAOD;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,kBAAkB,CAKtD;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,kBAAkB,CAEnD"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognizer Registry
|
|
3
|
+
* Central registry for all PII recognizers
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Registry for managing PII recognizers
|
|
7
|
+
*/
|
|
8
|
+
export class RecognizerRegistry {
|
|
9
|
+
recognizers = new Map();
|
|
10
|
+
/**
|
|
11
|
+
* Registers a recognizer for a PII type
|
|
12
|
+
*/
|
|
13
|
+
register(recognizer) {
|
|
14
|
+
const existing = this.recognizers.get(recognizer.type) ?? [];
|
|
15
|
+
existing.push(recognizer);
|
|
16
|
+
this.recognizers.set(recognizer.type, existing);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Registers multiple recognizers
|
|
20
|
+
*/
|
|
21
|
+
registerAll(recognizers) {
|
|
22
|
+
for (const recognizer of recognizers) {
|
|
23
|
+
this.register(recognizer);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Gets all recognizers for a specific type
|
|
28
|
+
*/
|
|
29
|
+
getRecognizers(type) {
|
|
30
|
+
return this.recognizers.get(type) ?? [];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Gets all registered recognizers
|
|
34
|
+
*/
|
|
35
|
+
getAllRecognizers() {
|
|
36
|
+
const all = [];
|
|
37
|
+
for (const recognizers of this.recognizers.values()) {
|
|
38
|
+
all.push(...recognizers);
|
|
39
|
+
}
|
|
40
|
+
return all;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Gets all registered PII types
|
|
44
|
+
*/
|
|
45
|
+
getRegisteredTypes() {
|
|
46
|
+
return Array.from(this.recognizers.keys());
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Checks if a recognizer is registered for a type
|
|
50
|
+
*/
|
|
51
|
+
hasRecognizer(type) {
|
|
52
|
+
const recognizers = this.recognizers.get(type);
|
|
53
|
+
return recognizers !== undefined && recognizers.length > 0;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Removes all recognizers for a type
|
|
57
|
+
*/
|
|
58
|
+
unregister(type) {
|
|
59
|
+
this.recognizers.delete(type);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Clears all recognizers
|
|
63
|
+
*/
|
|
64
|
+
clear() {
|
|
65
|
+
this.recognizers.clear();
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Runs all enabled recognizers on text and returns matches
|
|
69
|
+
* @param text - Text to analyze
|
|
70
|
+
* @param policy - Anonymization policy to determine which types to detect
|
|
71
|
+
*/
|
|
72
|
+
findAll(text, policy) {
|
|
73
|
+
const matches = [];
|
|
74
|
+
for (const [type, recognizers] of this.recognizers) {
|
|
75
|
+
// Skip types not enabled in policy
|
|
76
|
+
if (!policy.enabledTypes.has(type) || !policy.regexEnabledTypes.has(type)) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
// Get confidence threshold for this type
|
|
80
|
+
const threshold = policy.confidenceThresholds.get(type) ?? 0.5;
|
|
81
|
+
for (const recognizer of recognizers) {
|
|
82
|
+
const typeMatches = recognizer.find(text);
|
|
83
|
+
// Filter by confidence threshold
|
|
84
|
+
for (const match of typeMatches) {
|
|
85
|
+
if (match.confidence >= threshold) {
|
|
86
|
+
matches.push(match);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return matches;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Global singleton registry instance
|
|
96
|
+
*/
|
|
97
|
+
let globalRegistry = null;
|
|
98
|
+
/**
|
|
99
|
+
* Gets the global recognizer registry (singleton)
|
|
100
|
+
*/
|
|
101
|
+
export function getGlobalRegistry() {
|
|
102
|
+
if (globalRegistry === null) {
|
|
103
|
+
globalRegistry = new RecognizerRegistry();
|
|
104
|
+
}
|
|
105
|
+
return globalRegistry;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Creates a new isolated registry (useful for testing)
|
|
109
|
+
*/
|
|
110
|
+
export function createRegistry() {
|
|
111
|
+
return new RecognizerRegistry();
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/recognizers/registry.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACrB,WAAW,GAA+B,IAAI,GAAG,EAAE,CAAC;IAE5D;;OAEG;IACH,QAAQ,CAAC,UAAsB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC1B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,WAAyB;QACnC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAa;QAC1B,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,MAAM,GAAG,GAAiB,EAAE,CAAC;QAC7B,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,IAAa;QACzB,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,WAAW,KAAK,SAAS,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,IAAa;QACtB,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,OAAO,CAAC,IAAY,EAAE,MAA2B;QAC/C,MAAM,OAAO,GAAgB,EAAE,CAAC;QAEhC,KAAK,MAAM,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACnD,mCAAmC;YACnC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1E,SAAS;YACX,CAAC;YAED,yCAAyC;YACzC,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;YAE/D,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;gBACrC,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;oBAChC,IAAI,KAAK,CAAC,UAAU,IAAI,SAAS,EAAE,CAAC;wBAClC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED;;GAEG;AACH,IAAI,cAAc,GAA8B,IAAI,CAAC;AAErD;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,IAAI,cAAc,KAAK,IAAI,EAAE,CAAC;QAC5B,cAAc,GAAG,IAAI,kBAAkB,EAAE,CAAC;IAC5C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,IAAI,kBAAkB,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Recognizer
|
|
3
|
+
* Detects URLs with various protocols
|
|
4
|
+
*/
|
|
5
|
+
import type { Recognizer } from './base.js';
|
|
6
|
+
/**
|
|
7
|
+
* URL recognizer
|
|
8
|
+
*/
|
|
9
|
+
export declare const urlRecognizer: Recognizer;
|
|
10
|
+
/**
|
|
11
|
+
* Extracts the domain from a URL
|
|
12
|
+
*/
|
|
13
|
+
export declare function extractDomain(url: string): string | null;
|
|
14
|
+
//# sourceMappingURL=url.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/recognizers/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAoB5C;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,UA6D3B,CAAC;AA4BF;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgBxD"}
|