@elanlanguages/bridge-anonymization 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +382 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +100 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +163 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +173 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +294 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +102 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +253 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +240 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +45 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +99 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +140 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +341 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +9 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +9 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +238 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +74 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +169 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +134 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +69 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Credit Card Number Recognizer
|
|
3
|
+
* Detects major card types with Luhn checksum validation
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
import { validateLuhn } from '../utils/luhn.js';
|
|
7
|
+
/**
|
|
8
|
+
* Credit card patterns for major card types
|
|
9
|
+
* All patterns allow optional separators (spaces, dashes)
|
|
10
|
+
*/
|
|
11
|
+
const CARD_PATTERNS = {
|
|
12
|
+
// Visa: 13 or 16 digits, starts with 4
|
|
13
|
+
visa: /\b4[0-9]{3}[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{1,4}\b/g,
|
|
14
|
+
// Mastercard: 16 digits, starts with 51-55 or 2221-2720
|
|
15
|
+
mastercard: /\b(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b/g,
|
|
16
|
+
// American Express: 15 digits, starts with 34 or 37
|
|
17
|
+
amex: /\b3[47][0-9]{2}[\s-]?[0-9]{6}[\s-]?[0-9]{5}\b/g,
|
|
18
|
+
// Discover: 16 digits, starts with 6011, 644-649, 65
|
|
19
|
+
discover: /\b(?:6011|64[4-9][0-9]|65[0-9]{2})[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b/g,
|
|
20
|
+
// Diners Club: 14 digits, starts with 36, 38, or 300-305
|
|
21
|
+
diners: /\b(?:36[0-9]{2}|38[0-9]{2}|30[0-5][0-9])[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{2}\b/g,
|
|
22
|
+
// JCB: 16 digits, starts with 3528-3589
|
|
23
|
+
jcb: /\b35(?:2[89]|[3-8][0-9])[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b/g,
|
|
24
|
+
// Generic 16-digit pattern (fallback)
|
|
25
|
+
generic: /\b[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b/g,
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Credit card recognizer with Luhn validation
|
|
29
|
+
*/
|
|
30
|
+
export const creditCardRecognizer = {
|
|
31
|
+
type: PIIType.CREDIT_CARD,
|
|
32
|
+
name: 'Credit Card Number',
|
|
33
|
+
defaultConfidence: 0.98,
|
|
34
|
+
find(text) {
|
|
35
|
+
const matches = [];
|
|
36
|
+
const seen = new Set();
|
|
37
|
+
// Try each card type pattern
|
|
38
|
+
for (const [cardType, pattern] of Object.entries(CARD_PATTERNS)) {
|
|
39
|
+
const globalPattern = new RegExp(pattern.source, 'g');
|
|
40
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
41
|
+
if (match.index === undefined)
|
|
42
|
+
continue;
|
|
43
|
+
const cardNumber = match[0];
|
|
44
|
+
const key = `${match.index}:${match.index + cardNumber.length}`;
|
|
45
|
+
if (seen.has(key))
|
|
46
|
+
continue;
|
|
47
|
+
// Validate with Luhn checksum
|
|
48
|
+
if (!this.validate(cardNumber))
|
|
49
|
+
continue;
|
|
50
|
+
// Skip generic matches that look like other number sequences
|
|
51
|
+
if (cardType === 'generic' && !looksLikeCreditCard(cardNumber)) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
seen.add(key);
|
|
55
|
+
matches.push({
|
|
56
|
+
type: PIIType.CREDIT_CARD,
|
|
57
|
+
start: match.index,
|
|
58
|
+
end: match.index + cardNumber.length,
|
|
59
|
+
confidence: this.defaultConfidence,
|
|
60
|
+
source: DetectionSource.REGEX,
|
|
61
|
+
text: cardNumber,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Remove overlapping matches
|
|
66
|
+
return deduplicateOverlapping(matches);
|
|
67
|
+
},
|
|
68
|
+
validate(cardNumber) {
|
|
69
|
+
// Extract digits only
|
|
70
|
+
const digits = cardNumber.replace(/\D/g, '');
|
|
71
|
+
// Check length (13-19 digits)
|
|
72
|
+
if (digits.length < 13 || digits.length > 19) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
// Validate Luhn checksum
|
|
76
|
+
if (!validateLuhn(digits)) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
// Should not be all same digit
|
|
80
|
+
if (/^(\d)\1+$/.test(digits)) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
return true;
|
|
84
|
+
},
|
|
85
|
+
normalize(cardNumber) {
|
|
86
|
+
// Remove separators, return digits only
|
|
87
|
+
return cardNumber.replace(/\D/g, '');
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
/**
|
|
91
|
+
* Additional heuristics for generic 16-digit sequences
|
|
92
|
+
*/
|
|
93
|
+
function looksLikeCreditCard(number) {
|
|
94
|
+
const digits = number.replace(/\D/g, '');
|
|
95
|
+
// Check if it starts with a known card prefix
|
|
96
|
+
const knownPrefixes = [
|
|
97
|
+
'4', // Visa
|
|
98
|
+
'5', // Mastercard
|
|
99
|
+
'34',
|
|
100
|
+
'37', // Amex
|
|
101
|
+
'6011',
|
|
102
|
+
'65', // Discover
|
|
103
|
+
'36',
|
|
104
|
+
'38', // Diners
|
|
105
|
+
'35', // JCB
|
|
106
|
+
];
|
|
107
|
+
for (const prefix of knownPrefixes) {
|
|
108
|
+
if (digits.startsWith(prefix)) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// If it has separators in a card-like format, probably a card
|
|
113
|
+
if (/\d{4}[\s-]\d{4}[\s-]\d{4}[\s-]\d{4}/.test(number)) {
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Remove overlapping matches
|
|
120
|
+
*/
|
|
121
|
+
function deduplicateOverlapping(matches) {
|
|
122
|
+
if (matches.length <= 1)
|
|
123
|
+
return matches;
|
|
124
|
+
const sorted = [...matches].sort((a, b) => a.start - b.start);
|
|
125
|
+
const result = [];
|
|
126
|
+
for (const match of sorted) {
|
|
127
|
+
const last = result[result.length - 1];
|
|
128
|
+
if (last !== undefined && match.start < last.end) {
|
|
129
|
+
// Keep the one with higher confidence or longer match
|
|
130
|
+
if (match.confidence > last.confidence || match.end - match.start > last.end - last.start) {
|
|
131
|
+
result.pop();
|
|
132
|
+
result.push(match);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
result.push(match);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return result;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Identifies the card type from a card number
|
|
143
|
+
*/
|
|
144
|
+
export function identifyCardType(cardNumber) {
|
|
145
|
+
const digits = cardNumber.replace(/\D/g, '');
|
|
146
|
+
if (/^4/.test(digits))
|
|
147
|
+
return 'visa';
|
|
148
|
+
if (/^5[1-5]/.test(digits) || /^2(?:2[2-9][1-9]|2[3-9]|[3-6]|7[01]|720)/.test(digits))
|
|
149
|
+
return 'mastercard';
|
|
150
|
+
if (/^3[47]/.test(digits))
|
|
151
|
+
return 'amex';
|
|
152
|
+
if (/^6(?:011|4[4-9]|5)/.test(digits))
|
|
153
|
+
return 'discover';
|
|
154
|
+
if (/^3(?:6|8|0[0-5])/.test(digits))
|
|
155
|
+
return 'diners';
|
|
156
|
+
if (/^35(?:2[89]|[3-8])/.test(digits))
|
|
157
|
+
return 'jcb';
|
|
158
|
+
return 'unknown';
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=credit-card.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"credit-card.js","sourceRoot":"","sources":["../../src/recognizers/credit-card.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAExE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD;;;GAGG;AACH,MAAM,aAAa,GAAG;IACpB,uCAAuC;IACvC,IAAI,EAAE,4DAA4D;IAElE,wDAAwD;IACxD,UAAU,EACR,yHAAyH;IAE3H,oDAAoD;IACpD,IAAI,EAAE,gDAAgD;IAEtD,qDAAqD;IACrD,QAAQ,EACN,iFAAiF;IAEnF,yDAAyD;IACzD,MAAM,EAAE,uFAAuF;IAE/F,wCAAwC;IACxC,GAAG,EAAE,uEAAuE;IAE5E,sCAAsC;IACtC,OAAO,EAAE,yDAAyD;CACnE,CAAC;AAOF;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAe;IAC9C,IAAI,EAAE,OAAO,CAAC,WAAW;IACzB,IAAI,EAAE,oBAAoB;IAC1B,iBAAiB,EAAE,IAAI;IAEvB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,6BAA6B;QAC7B,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YAChE,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAEtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;gBAEhE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAE5B,8BAA8B;gBAC9B,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,UAAU,CAAC;oBAAE,SAAS;gBAE1C,6DAA6D;gBAC7D,IAAI,QAAQ,KAAK,SAAS,IAAI,CAAC,mBAAmB,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC/D,SAAS;gBACX,CAAC;gBAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,WAAW;oBACzB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,UAAU,CAAC,MAAM;oBACpC,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,UAAU;iBACjB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,OAAO,sBAAsB,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,QAAQ,CAAC,UAAkB;QACzB,sBAAsB;QACtB,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAE7C,8BAA8B;QAC9B,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAC7C,OAAO,KAAK,CAAC;QACf,CAAC;QAED,yBAAyB;QACzB,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,+BAA+B;QAC/B,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,UAAkB;QAC1B,wCAAwC;QACxC,OAAO,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACvC,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,mBAAmB,CAAC,MAAc;IACzC,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAEzC,8CAA8C;IAC9C,MAAM,aAAa,GAAG;QACpB,GAAG,EAAE,OAAO;QACZ,GAAG,EAAE,aAAa;QAClB,IAAI;QACJ,IAAI,EAAE,OAAO;QACb,MAAM;QACN,IAAI,EAAE,WAAW;QACjB,IAAI;QACJ,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,MAAM;KACb,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;QACnC,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,IAAI,qCAAqC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QACvD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,OAAoB;IAClD,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACjD,sDAAsD;YACtD,IAAI,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC1F,MAAM,CAAC,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,UAAkB;IACjD,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAE7C,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,MAAM,CAAC;IACrC,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,0CAA0C,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,YAAY,CAAC;IAC3G,IAAI,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,MAAM,CAAC;IACzC,IAAI,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,UAAU,CAAC;IACzD,IAAI,kBAAkB,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,QAAQ,CAAC;IACrD,IAAI,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IAEpD,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom ID Recognizer
|
|
3
|
+
* Configurable recognizer for domain-specific identifiers
|
|
4
|
+
* (Case IDs, Customer IDs, File References, etc.)
|
|
5
|
+
*/
|
|
6
|
+
import { CustomIdPattern } from '../types/index.js';
|
|
7
|
+
import type { Recognizer } from './base.js';
|
|
8
|
+
/**
|
|
9
|
+
* Creates a custom ID recognizer from patterns defined in policy
|
|
10
|
+
*/
|
|
11
|
+
export declare function createCustomIdRecognizer(patterns: CustomIdPattern[]): Recognizer;
|
|
12
|
+
/**
|
|
13
|
+
* Common patterns for banking/government IDs that can be used as templates
|
|
14
|
+
*/
|
|
15
|
+
export declare const COMMON_ID_PATTERNS: Record<string, RegExp>;
|
|
16
|
+
/**
|
|
17
|
+
* Creates a standard case ID recognizer
|
|
18
|
+
*/
|
|
19
|
+
export declare function createCaseIdRecognizer(): Recognizer;
|
|
20
|
+
/**
|
|
21
|
+
* Creates a standard customer ID recognizer
|
|
22
|
+
*/
|
|
23
|
+
export declare function createCustomerIdRecognizer(): Recognizer;
|
|
24
|
+
/**
|
|
25
|
+
* Validates that a string looks like a structured ID (not just random text)
|
|
26
|
+
*/
|
|
27
|
+
export declare function isStructuredId(text: string): boolean;
|
|
28
|
+
//# sourceMappingURL=custom-id.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"custom-id.d.ts","sourceRoot":"","sources":["../../src/recognizers/custom-id.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAuC,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAE5C;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,eAAe,EAAE,GAAG,UAAU,CA8ChF;AAED;;GAEG;AACH,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAsBrD,CAAC;AAEF;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,UAAU,CAanD;AAED;;GAEG;AACH,wBAAgB,0BAA0B,IAAI,UAAU,CAQvD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAcpD"}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Custom ID Recognizer
|
|
3
|
+
* Configurable recognizer for domain-specific identifiers
|
|
4
|
+
* (Case IDs, Customer IDs, File References, etc.)
|
|
5
|
+
*/
|
|
6
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
7
|
+
/**
|
|
8
|
+
* Creates a custom ID recognizer from patterns defined in policy
|
|
9
|
+
*/
|
|
10
|
+
export function createCustomIdRecognizer(patterns) {
|
|
11
|
+
return {
|
|
12
|
+
type: PIIType.CASE_ID, // Default type, will be overridden per pattern
|
|
13
|
+
name: 'Custom ID',
|
|
14
|
+
defaultConfidence: 0.9,
|
|
15
|
+
find(text) {
|
|
16
|
+
const matches = [];
|
|
17
|
+
const seen = new Set();
|
|
18
|
+
for (const patternConfig of patterns) {
|
|
19
|
+
const { pattern, type, validate } = patternConfig;
|
|
20
|
+
// Ensure pattern has global flag
|
|
21
|
+
const globalPattern = pattern.global
|
|
22
|
+
? pattern
|
|
23
|
+
: new RegExp(pattern.source, pattern.flags + 'g');
|
|
24
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
25
|
+
if (match.index === undefined)
|
|
26
|
+
continue;
|
|
27
|
+
const idText = match[0];
|
|
28
|
+
const key = `${match.index}:${match.index + idText.length}`;
|
|
29
|
+
if (seen.has(key))
|
|
30
|
+
continue;
|
|
31
|
+
// Run custom validation if provided
|
|
32
|
+
if (validate !== undefined && !validate(idText)) {
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
seen.add(key);
|
|
36
|
+
matches.push({
|
|
37
|
+
type,
|
|
38
|
+
start: match.index,
|
|
39
|
+
end: match.index + idText.length,
|
|
40
|
+
confidence: 0.9,
|
|
41
|
+
source: DetectionSource.REGEX,
|
|
42
|
+
text: idText,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return matches;
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Common patterns for banking/government IDs that can be used as templates
|
|
52
|
+
*/
|
|
53
|
+
export const COMMON_ID_PATTERNS = {
|
|
54
|
+
// German case/file reference patterns
|
|
55
|
+
germanCaseId: /\b[A-Z]{2,3}[\s-]?[0-9]{4,10}[\s-]?[A-Z0-9]{0,4}\b/g,
|
|
56
|
+
// Generic case number (e.g., CASE-12345)
|
|
57
|
+
genericCaseNumber: /\b(?:CASE|FILE|REF|TICKET)[\s-]?[A-Z0-9]{4,12}\b/gi,
|
|
58
|
+
// Customer number patterns
|
|
59
|
+
customerNumber: /\b(?:CUST|CUSTOMER|KD|KUNDEN)[\s-]?(?:NR|NO|NUM|NUMBER)?[\s-]?[0-9]{4,12}\b/gi,
|
|
60
|
+
// Account reference (non-IBAN)
|
|
61
|
+
accountReference: /\b(?:ACC|ACCT|KONTO)[\s-]?[0-9]{6,12}\b/gi,
|
|
62
|
+
// Invoice/Order numbers
|
|
63
|
+
invoiceNumber: /\b(?:INV|INVOICE|RG|RECHNUNG)[\s-]?[A-Z0-9]{4,12}\b/gi,
|
|
64
|
+
orderNumber: /\b(?:ORD|ORDER|BESTELLUNG)[\s-]?[A-Z0-9]{4,12}\b/gi,
|
|
65
|
+
// Policy/Contract numbers
|
|
66
|
+
policyNumber: /\b(?:POL|POLICY|VERTRAG)[\s-]?[A-Z0-9]{6,15}\b/gi,
|
|
67
|
+
// Social Security / Tax ID patterns (generic)
|
|
68
|
+
taxIdGeneric: /\b[0-9]{2,3}[\s/-]?[0-9]{2,4}[\s/-]?[0-9]{2,4}[\s/-]?[0-9]{1,4}\b/g,
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Creates a standard case ID recognizer
|
|
72
|
+
*/
|
|
73
|
+
export function createCaseIdRecognizer() {
|
|
74
|
+
return createCustomIdRecognizer([
|
|
75
|
+
{
|
|
76
|
+
name: 'German Case ID',
|
|
77
|
+
pattern: COMMON_ID_PATTERNS['germanCaseId'],
|
|
78
|
+
type: PIIType.CASE_ID,
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: 'Generic Case Number',
|
|
82
|
+
pattern: COMMON_ID_PATTERNS['genericCaseNumber'],
|
|
83
|
+
type: PIIType.CASE_ID,
|
|
84
|
+
},
|
|
85
|
+
]);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Creates a standard customer ID recognizer
|
|
89
|
+
*/
|
|
90
|
+
export function createCustomerIdRecognizer() {
|
|
91
|
+
return createCustomIdRecognizer([
|
|
92
|
+
{
|
|
93
|
+
name: 'Customer Number',
|
|
94
|
+
pattern: COMMON_ID_PATTERNS['customerNumber'],
|
|
95
|
+
type: PIIType.CUSTOMER_ID,
|
|
96
|
+
},
|
|
97
|
+
]);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Validates that a string looks like a structured ID (not just random text)
|
|
101
|
+
*/
|
|
102
|
+
export function isStructuredId(text) {
|
|
103
|
+
// Should have at least some digits
|
|
104
|
+
const digitCount = (text.match(/\d/g) ?? []).length;
|
|
105
|
+
if (digitCount < 3)
|
|
106
|
+
return false;
|
|
107
|
+
// Should not be all letters or all digits
|
|
108
|
+
const letterCount = (text.match(/[a-zA-Z]/g) ?? []).length;
|
|
109
|
+
const totalAlphanumeric = digitCount + letterCount;
|
|
110
|
+
// Most of the characters should be alphanumeric
|
|
111
|
+
const cleanText = text.replace(/[\s\-_]/g, '');
|
|
112
|
+
if (totalAlphanumeric / cleanText.length < 0.9)
|
|
113
|
+
return false;
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
//# sourceMappingURL=custom-id.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"custom-id.js","sourceRoot":"","sources":["../../src/recognizers/custom-id.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAmB,MAAM,mBAAmB,CAAC;AAGzF;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,QAA2B;IAClE,OAAO;QACL,IAAI,EAAE,OAAO,CAAC,OAAO,EAAE,+CAA+C;QACtE,IAAI,EAAE,WAAW;QACjB,iBAAiB,EAAE,GAAG;QAEtB,IAAI,CAAC,IAAY;YACf,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;YAE/B,KAAK,MAAM,aAAa,IAAI,QAAQ,EAAE,CAAC;gBACrC,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,aAAa,CAAC;gBAElD,iCAAiC;gBACjC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;oBAClC,CAAC,CAAC,OAAO;oBACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;gBAEpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;oBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;wBAAE,SAAS;oBAExC,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;oBACxB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;oBAE5D,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;oBAE5B,oCAAoC;oBACpC,IAAI,QAAQ,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBAChD,SAAS;oBACX,CAAC;oBAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,OAAO,CAAC,IAAI,CAAC;wBACX,IAAI;wBACJ,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM;wBAChC,UAAU,EAAE,GAAG;wBACf,MAAM,EAAE,eAAe,CAAC,KAAK;wBAC7B,IAAI,EAAE,MAAM;qBACb,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAA2B;IACxD,sCAAsC;IACtC,YAAY,EAAE,qDAAqD;IAEnE,yCAAyC;IACzC,iBAAiB,EAAE,oDAAoD;IAEvE,2BAA2B;IAC3B,cAAc,EAAE,+EAA+E;IAE/F,+BAA+B;IAC/B,gBAAgB,EAAE,2CAA2C;IAE7D,wBAAwB;IACxB,aAAa,EAAE,uDAAuD;IACtE,WAAW,EAAE,oDAAoD;IAEjE,0BAA0B;IAC1B,YAAY,EAAE,kDAAkD;IAEhE,8CAA8C;IAC9C,YAAY,EAAE,oEAAoE;CACnF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO,wBAAwB,CAAC;QAC9B;YACE,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE,kBAAkB,CAAC,cAAc,CAAE;YAC5C,IAAI,EAAE,OAAO,CAAC,OAAO;SACtB;QACD;YACE,IAAI,EAAE,qBAAqB;YAC3B,OAAO,EAAE,kBAAkB,CAAC,mBAAmB,CAAE;YACjD,IAAI,EAAE,OAAO,CAAC,OAAO;SACtB;KACF,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B;IACxC,OAAO,wBAAwB,CAAC;QAC9B;YACE,IAAI,EAAE,iBAAiB;YACvB,OAAO,EAAE,kBAAkB,CAAC,gBAAgB,CAAE;YAC9C,IAAI,EAAE,OAAO,CAAC,WAAW;SAC1B;KACF,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,mCAAmC;IACnC,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACpD,IAAI,UAAU,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAEjC,0CAA0C;IAC1C,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAC3D,MAAM,iBAAiB,GAAG,UAAU,GAAG,WAAW,CAAC;IAEnD,gDAAgD;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAC/C,IAAI,iBAAiB,GAAG,SAAS,CAAC,MAAM,GAAG,GAAG;QAAE,OAAO,KAAK,CAAC;IAE7D,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"email.d.ts","sourceRoot":"","sources":["../../src/recognizers/email.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAW5C;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,UA6D7B,CAAC"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Email Address Recognizer
|
|
3
|
+
* RFC-like pattern with boundary awareness
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Email regex pattern
|
|
8
|
+
* - Local part: alphanumeric, dots, underscores, hyphens, plus signs
|
|
9
|
+
* - Domain: alphanumeric with dots and hyphens
|
|
10
|
+
* - TLD: 2-10 characters
|
|
11
|
+
* - Word boundaries to avoid matching code/variables
|
|
12
|
+
*/
|
|
13
|
+
const EMAIL_PATTERN = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)*\.[a-zA-Z]{2,10}\b/g;
|
|
14
|
+
/**
|
|
15
|
+
* Email address recognizer
|
|
16
|
+
*/
|
|
17
|
+
export const emailRecognizer = {
|
|
18
|
+
type: PIIType.EMAIL,
|
|
19
|
+
name: 'Email Address',
|
|
20
|
+
defaultConfidence: 0.98,
|
|
21
|
+
find(text) {
|
|
22
|
+
const matches = [];
|
|
23
|
+
const pattern = new RegExp(EMAIL_PATTERN.source, 'g');
|
|
24
|
+
for (const match of text.matchAll(pattern)) {
|
|
25
|
+
if (match.index === undefined)
|
|
26
|
+
continue;
|
|
27
|
+
const email = match[0];
|
|
28
|
+
// Skip if it looks like code (contains multiple @ or unusual patterns)
|
|
29
|
+
if (!this.validate(email)) {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
matches.push({
|
|
33
|
+
type: PIIType.EMAIL,
|
|
34
|
+
start: match.index,
|
|
35
|
+
end: match.index + email.length,
|
|
36
|
+
confidence: this.defaultConfidence,
|
|
37
|
+
source: DetectionSource.REGEX,
|
|
38
|
+
text: email,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
return matches;
|
|
42
|
+
},
|
|
43
|
+
validate(email) {
|
|
44
|
+
// Basic validation
|
|
45
|
+
if (email.length > 254)
|
|
46
|
+
return false; // Max email length per RFC
|
|
47
|
+
if (email.includes('..'))
|
|
48
|
+
return false; // No consecutive dots
|
|
49
|
+
const parts = email.split('@');
|
|
50
|
+
if (parts.length !== 2)
|
|
51
|
+
return false;
|
|
52
|
+
const [local, domain] = parts;
|
|
53
|
+
if (local === undefined || domain === undefined)
|
|
54
|
+
return false;
|
|
55
|
+
// Local part validation
|
|
56
|
+
if (local.length === 0 || local.length > 64)
|
|
57
|
+
return false;
|
|
58
|
+
if (local.startsWith('.') || local.endsWith('.'))
|
|
59
|
+
return false;
|
|
60
|
+
// Domain validation
|
|
61
|
+
if (domain.length === 0 || domain.length > 253)
|
|
62
|
+
return false;
|
|
63
|
+
if (!domain.includes('.'))
|
|
64
|
+
return false;
|
|
65
|
+
// Check for valid TLD (at least 2 chars)
|
|
66
|
+
const tld = domain.split('.').pop();
|
|
67
|
+
if (tld === undefined || tld.length < 2)
|
|
68
|
+
return false;
|
|
69
|
+
return true;
|
|
70
|
+
},
|
|
71
|
+
normalize(email) {
|
|
72
|
+
return email.toLowerCase().trim();
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
//# sourceMappingURL=email.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"email.js","sourceRoot":"","sources":["../../src/recognizers/email.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGxE;;;;;;GAMG;AACH,MAAM,aAAa,GAAG,gIAAgI,CAAC;AAEvJ;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAe;IACzC,IAAI,EAAE,OAAO,CAAC,KAAK;IACnB,IAAI,EAAE,eAAe;IACrB,iBAAiB,EAAE,IAAI;IAEvB,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,IAAI,MAAM,CAAC,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAEtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;gBAAE,SAAS;YAExC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEvB,uEAAuE;YACvE,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC3B,SAAS;YACX,CAAC;YAED,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,OAAO,CAAC,KAAK;gBACnB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM;gBAC/B,UAAU,EAAE,IAAI,CAAC,iBAAiB;gBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;gBAC7B,IAAI,EAAE,KAAK;aACZ,CAAC,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,QAAQ,CAAC,KAAa;QACpB,mBAAmB;QACnB,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC,CAAC,2BAA2B;QACjE,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC,CAAC,sBAAsB;QAE9D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QAErC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,GAAG,KAAK,CAAC;QAC9B,IAAI,KAAK,KAAK,SAAS,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,KAAK,CAAC;QAE9D,wBAAwB;QACxB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,KAAK,CAAC;QAC1D,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAE/D,oBAAoB;QACpB,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;QAC7D,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAExC,yCAAyC;QACzC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAEtD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,KAAa;QACrB,OAAO,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACpC,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IBAN Recognizer
|
|
3
|
+
* International Bank Account Number with mod-97 checksum validation
|
|
4
|
+
*/
|
|
5
|
+
import type { Recognizer } from './base.js';
|
|
6
|
+
/**
|
|
7
|
+
* IBAN recognizer with checksum validation
|
|
8
|
+
*/
|
|
9
|
+
export declare const ibanRecognizer: Recognizer;
|
|
10
|
+
/**
|
|
11
|
+
* Gets expected IBAN length for a country code
|
|
12
|
+
*/
|
|
13
|
+
export declare function getExpectedIBANLength(countryCode: string): number | undefined;
|
|
14
|
+
//# sourceMappingURL=iban.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"iban.d.ts","sourceRoot":"","sources":["../../src/recognizers/iban.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAgB5C;;GAEG;AACH,eAAO,MAAM,cAAc,EAAE,UAiD5B,CAAC;AAEF;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAE7E"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IBAN Recognizer
|
|
3
|
+
* International Bank Account Number with mod-97 checksum validation
|
|
4
|
+
*/
|
|
5
|
+
import { PIIType, DetectionSource } from '../types/index.js';
|
|
6
|
+
import { validateIBAN, normalizeIBAN, IBAN_LENGTHS } from '../utils/iban-checksum.js';
|
|
7
|
+
/**
|
|
8
|
+
* IBAN pattern - matches standard format with optional spaces
|
|
9
|
+
* Format: 2 letters (country) + 2 digits (check) + 11-30 alphanumeric (BBAN)
|
|
10
|
+
*/
|
|
11
|
+
const IBAN_PATTERN = /\b[A-Z]{2}[0-9]{2}[\s]?(?:[A-Z0-9]{4}[\s]?){2,7}[A-Z0-9]{1,4}\b/gi;
|
|
12
|
+
/**
|
|
13
|
+
* More permissive pattern for IBANs with various separators
|
|
14
|
+
*/
|
|
15
|
+
const IBAN_PATTERN_WITH_SEPARATORS = /\b[A-Z]{2}[0-9]{2}[\s.-]?(?:[A-Z0-9]{4}[\s.-]?){2,7}[A-Z0-9]{1,4}\b/gi;
|
|
16
|
+
/**
|
|
17
|
+
* IBAN recognizer with checksum validation
|
|
18
|
+
*/
|
|
19
|
+
export const ibanRecognizer = {
|
|
20
|
+
type: PIIType.IBAN,
|
|
21
|
+
name: 'IBAN',
|
|
22
|
+
defaultConfidence: 0.99, // High confidence when checksum passes
|
|
23
|
+
find(text) {
|
|
24
|
+
const matches = [];
|
|
25
|
+
const seen = new Set();
|
|
26
|
+
// Try both patterns
|
|
27
|
+
const patterns = [IBAN_PATTERN, IBAN_PATTERN_WITH_SEPARATORS];
|
|
28
|
+
for (const pattern of patterns) {
|
|
29
|
+
const globalPattern = new RegExp(pattern.source, 'gi');
|
|
30
|
+
for (const match of text.matchAll(globalPattern)) {
|
|
31
|
+
if (match.index === undefined)
|
|
32
|
+
continue;
|
|
33
|
+
const iban = match[0];
|
|
34
|
+
const key = `${match.index}:${match.index + iban.length}`;
|
|
35
|
+
if (seen.has(key))
|
|
36
|
+
continue;
|
|
37
|
+
// Validate with checksum
|
|
38
|
+
if (!this.validate(iban))
|
|
39
|
+
continue;
|
|
40
|
+
seen.add(key);
|
|
41
|
+
matches.push({
|
|
42
|
+
type: PIIType.IBAN,
|
|
43
|
+
start: match.index,
|
|
44
|
+
end: match.index + iban.length,
|
|
45
|
+
confidence: this.defaultConfidence,
|
|
46
|
+
source: DetectionSource.REGEX,
|
|
47
|
+
text: iban,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return matches;
|
|
52
|
+
},
|
|
53
|
+
validate(iban) {
|
|
54
|
+
// Use the full IBAN validation with mod-97 checksum
|
|
55
|
+
return validateIBAN(iban);
|
|
56
|
+
},
|
|
57
|
+
normalize(iban) {
|
|
58
|
+
return normalizeIBAN(iban);
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
/**
|
|
62
|
+
* Gets expected IBAN length for a country code
|
|
63
|
+
*/
|
|
64
|
+
export function getExpectedIBANLength(countryCode) {
|
|
65
|
+
return IBAN_LENGTHS[countryCode.toUpperCase()];
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=iban.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"iban.js","sourceRoot":"","sources":["../../src/recognizers/iban.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAExE,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAEtF;;;GAGG;AACH,MAAM,YAAY,GAChB,mEAAmE,CAAC;AAEtE;;GAEG;AACH,MAAM,4BAA4B,GAChC,uEAAuE,CAAC;AAE1E;;GAEG;AACH,MAAM,CAAC,MAAM,cAAc,GAAe;IACxC,IAAI,EAAE,OAAO,CAAC,IAAI;IAClB,IAAI,EAAE,MAAM;IACZ,iBAAiB,EAAE,IAAI,EAAE,uCAAuC;IAEhE,IAAI,CAAC,IAAY;QACf,MAAM,OAAO,GAAgB,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,oBAAoB;QACpB,MAAM,QAAQ,GAAG,CAAC,YAAY,EAAE,4BAA4B,CAAC,CAAC;QAE9D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAEvD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACjD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;oBAAE,SAAS;gBAExC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACtB,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAE1D,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAE5B,yBAAyB;gBACzB,IAAI,CAAC,IAAI,CAAC,QAAS,CAAC,IAAI,CAAC;oBAAE,SAAS;gBAEpC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM;oBAC9B,UAAU,EAAE,IAAI,CAAC,iBAAiB;oBAClC,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,IAAI;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,oDAAoD;QACpD,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED,SAAS,CAAC,IAAY;QACpB,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,WAAmB;IACvD,OAAO,YAAY,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognizers Module
|
|
3
|
+
* Exports all recognizers and registry utilities
|
|
4
|
+
*/
|
|
5
|
+
export * from './base.js';
|
|
6
|
+
export * from './registry.js';
|
|
7
|
+
export { emailRecognizer } from './email.js';
|
|
8
|
+
export { phoneRecognizer } from './phone.js';
|
|
9
|
+
export { ibanRecognizer, getExpectedIBANLength } from './iban.js';
|
|
10
|
+
export { bicSwiftRecognizer } from './bic-swift.js';
|
|
11
|
+
export { creditCardRecognizer, identifyCardType } from './credit-card.js';
|
|
12
|
+
export { ipAddressRecognizer, isInternalIPv4 } from './ip-address.js';
|
|
13
|
+
export { urlRecognizer, extractDomain } from './url.js';
|
|
14
|
+
export { createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, COMMON_ID_PATTERNS, isStructuredId, } from './custom-id.js';
|
|
15
|
+
import { RecognizerRegistry } from './registry.js';
|
|
16
|
+
/**
|
|
17
|
+
* Creates a registry with all default recognizers registered
|
|
18
|
+
*/
|
|
19
|
+
export declare function createDefaultRegistry(): RecognizerRegistry;
|
|
20
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/recognizers/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,eAAe,CAAC;AAC9B,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,WAAW,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EACL,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,EAC1B,kBAAkB,EAClB,cAAc,GACf,MAAM,gBAAgB,CAAC;AAExB,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAUnD;;GAEG;AACH,wBAAgB,qBAAqB,IAAI,kBAAkB,CAiB1D"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognizers Module
|
|
3
|
+
* Exports all recognizers and registry utilities
|
|
4
|
+
*/
|
|
5
|
+
export * from './base.js';
|
|
6
|
+
export * from './registry.js';
|
|
7
|
+
export { emailRecognizer } from './email.js';
|
|
8
|
+
export { phoneRecognizer } from './phone.js';
|
|
9
|
+
export { ibanRecognizer, getExpectedIBANLength } from './iban.js';
|
|
10
|
+
export { bicSwiftRecognizer } from './bic-swift.js';
|
|
11
|
+
export { creditCardRecognizer, identifyCardType } from './credit-card.js';
|
|
12
|
+
export { ipAddressRecognizer, isInternalIPv4 } from './ip-address.js';
|
|
13
|
+
export { urlRecognizer, extractDomain } from './url.js';
|
|
14
|
+
export { createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, COMMON_ID_PATTERNS, isStructuredId, } from './custom-id.js';
|
|
15
|
+
import { RecognizerRegistry } from './registry.js';
|
|
16
|
+
import { emailRecognizer } from './email.js';
|
|
17
|
+
import { phoneRecognizer } from './phone.js';
|
|
18
|
+
import { ibanRecognizer } from './iban.js';
|
|
19
|
+
import { bicSwiftRecognizer } from './bic-swift.js';
|
|
20
|
+
import { creditCardRecognizer } from './credit-card.js';
|
|
21
|
+
import { ipAddressRecognizer } from './ip-address.js';
|
|
22
|
+
import { urlRecognizer } from './url.js';
|
|
23
|
+
import { createCaseIdRecognizer, createCustomerIdRecognizer } from './custom-id.js';
|
|
24
|
+
/**
|
|
25
|
+
* Creates a registry with all default recognizers registered
|
|
26
|
+
*/
|
|
27
|
+
export function createDefaultRegistry() {
|
|
28
|
+
const registry = new RecognizerRegistry();
|
|
29
|
+
// Register all built-in recognizers
|
|
30
|
+
registry.register(emailRecognizer);
|
|
31
|
+
registry.register(phoneRecognizer);
|
|
32
|
+
registry.register(ibanRecognizer);
|
|
33
|
+
registry.register(bicSwiftRecognizer);
|
|
34
|
+
registry.register(creditCardRecognizer);
|
|
35
|
+
registry.register(ipAddressRecognizer);
|
|
36
|
+
registry.register(urlRecognizer);
|
|
37
|
+
// Register default custom ID recognizers
|
|
38
|
+
registry.register(createCaseIdRecognizer());
|
|
39
|
+
registry.register(createCustomerIdRecognizer());
|
|
40
|
+
return registry;
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/recognizers/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,WAAW,CAAC;AAC1B,cAAc,eAAe,CAAC;AAC9B,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,WAAW,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EACL,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,EAC1B,kBAAkB,EAClB,cAAc,GACf,MAAM,gBAAgB,CAAC;AAExB,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,gBAAgB,CAAC;AAEpF;;GAEG;AACH,MAAM,UAAU,qBAAqB;IACnC,MAAM,QAAQ,GAAG,IAAI,kBAAkB,EAAE,CAAC;IAE1C,oCAAoC;IACpC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACnC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACnC,QAAQ,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;IAClC,QAAQ,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACtC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;IACxC,QAAQ,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC;IACvC,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;IAEjC,yCAAyC;IACzC,QAAQ,CAAC,QAAQ,CAAC,sBAAsB,EAAE,CAAC,CAAC;IAC5C,QAAQ,CAAC,QAAQ,CAAC,0BAA0B,EAAE,CAAC,CAAC;IAEhD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IP Address Recognizer
|
|
3
|
+
* Detects IPv4 and IPv6 addresses
|
|
4
|
+
*/
|
|
5
|
+
import type { Recognizer } from './base.js';
|
|
6
|
+
/**
|
|
7
|
+
* IP Address recognizer (IPv4 and IPv6)
|
|
8
|
+
*/
|
|
9
|
+
export declare const ipAddressRecognizer: Recognizer;
|
|
10
|
+
/**
|
|
11
|
+
* Checks if an IPv4 address is in a private/internal range
|
|
12
|
+
*/
|
|
13
|
+
export declare function isInternalIPv4(ip: string): boolean;
|
|
14
|
+
//# sourceMappingURL=ip-address.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ip-address.d.ts","sourceRoot":"","sources":["../../src/recognizers/ip-address.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AA4C5C;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,UAkFjC,CAAC;AA6DF;;GAEG;AACH,wBAAgB,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAElD"}
|