@flexorch/audit 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +230 -11
- package/dist/index.js +230 -11
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -33,8 +33,9 @@ module.exports = __toCommonJS(index_exports);
|
|
|
33
33
|
|
|
34
34
|
// src/pii.ts
|
|
35
35
|
var EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
|
|
36
|
-
var PHONE_INTL_RE =
|
|
36
|
+
var PHONE_INTL_RE = /(?<![+\d])(\+[1-9][\d\s\-.()]{5,18}\d)(?!\d)/g;
|
|
37
37
|
var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
|
|
38
|
+
var IBAN_INTL_RE = /\b([A-Z]{2}\d{2}[0-9A-Z]{11,30})\b/g;
|
|
38
39
|
var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
|
|
39
40
|
var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
|
|
40
41
|
var _H = "[0-9a-fA-F]{1,4}";
|
|
@@ -45,6 +46,102 @@ var IPV6_RE = new RegExp(
|
|
|
45
46
|
var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
|
|
46
47
|
var TCKN_RE = /\b([1-9]\d{10})\b/g;
|
|
47
48
|
var VKN_RE = /\b([1-9]\d{9})\b/g;
|
|
49
|
+
var IBAN_TR_RE = /\bTR\d{2}[0-9A-Z]{22}\b/g;
|
|
50
|
+
var _TR_COMPANY_SUFFIX = "(?:A\\.\u015E\\.|Ltd\\.\\s*\u015Eti\\.|Koll\\.\\s*\u015Eti\\.|Koop\\.|T\\.A\\.\u015E\\.)";
|
|
51
|
+
var _TR_NAME_TOKEN = "(?:ve|ile|[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*\\.?)";
|
|
52
|
+
var COMPANY_NAME_TR_RE = new RegExp(
|
|
53
|
+
`(?<![A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC])([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*(?:\\s+${_TR_NAME_TOKEN}){0,6}\\s+${_TR_COMPANY_SUFFIX})`,
|
|
54
|
+
"gu"
|
|
55
|
+
);
|
|
56
|
+
var MERSIS_RE = /\b([1-9]\d{15})\b/g;
|
|
57
|
+
var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
|
|
58
|
+
var _TR_PROVINCES_SORTED = [
|
|
59
|
+
"Afyonkarahisar",
|
|
60
|
+
"Kahramanmara\u015F",
|
|
61
|
+
"K\u0131r\u0131kkale",
|
|
62
|
+
"K\u0131rklareli",
|
|
63
|
+
"Diyarbak\u0131r",
|
|
64
|
+
"Gaziantep",
|
|
65
|
+
"\u015Eanl\u0131urfa",
|
|
66
|
+
"Nev\u015Fehir",
|
|
67
|
+
"Kastamonu",
|
|
68
|
+
"G\xFCm\xFC\u015Fhane",
|
|
69
|
+
"Eski\u015Fehir",
|
|
70
|
+
"Erzincan",
|
|
71
|
+
"Erzurum",
|
|
72
|
+
"Denizli",
|
|
73
|
+
"\xC7anakkale",
|
|
74
|
+
"Ad\u0131yaman",
|
|
75
|
+
"Zonguldak",
|
|
76
|
+
"Tekirda\u011F",
|
|
77
|
+
"Trabzon",
|
|
78
|
+
"Tunceli",
|
|
79
|
+
"Karaman",
|
|
80
|
+
"Karab\xFCk",
|
|
81
|
+
"Aksaray",
|
|
82
|
+
"Antalya",
|
|
83
|
+
"K\u0131r\u015Fehir",
|
|
84
|
+
"Osmaniye",
|
|
85
|
+
"Kocaeli",
|
|
86
|
+
"Sakarya",
|
|
87
|
+
"Bart\u0131n",
|
|
88
|
+
"Bayburt",
|
|
89
|
+
"Ardahan",
|
|
90
|
+
"Yozgat",
|
|
91
|
+
"Ankara",
|
|
92
|
+
"Amasya",
|
|
93
|
+
"Artvin",
|
|
94
|
+
"Bal\u0131kesir",
|
|
95
|
+
"Bilecik",
|
|
96
|
+
"Bing\xF6l",
|
|
97
|
+
"Bitlis",
|
|
98
|
+
"Burdur",
|
|
99
|
+
"\xC7ank\u0131r\u0131",
|
|
100
|
+
"Edirne",
|
|
101
|
+
"Elaz\u0131\u011F",
|
|
102
|
+
"Giresun",
|
|
103
|
+
"Hakkari",
|
|
104
|
+
"Isparta",
|
|
105
|
+
"\u0130stanbul",
|
|
106
|
+
"\u0130zmir",
|
|
107
|
+
"Kayseri",
|
|
108
|
+
"K\xFCtahya",
|
|
109
|
+
"Malatya",
|
|
110
|
+
"Manisa",
|
|
111
|
+
"Mardin",
|
|
112
|
+
"Samsun",
|
|
113
|
+
"\u015E\u0131rnak",
|
|
114
|
+
"Sinop",
|
|
115
|
+
"Tokat",
|
|
116
|
+
"Hatay",
|
|
117
|
+
"Konya",
|
|
118
|
+
"Mu\u011Fla",
|
|
119
|
+
"Ni\u011Fde",
|
|
120
|
+
"Rize",
|
|
121
|
+
"Siirt",
|
|
122
|
+
"Sivas",
|
|
123
|
+
"Adana",
|
|
124
|
+
"Ayd\u0131n",
|
|
125
|
+
"Bursa",
|
|
126
|
+
"\xC7orum",
|
|
127
|
+
"I\u011Fd\u0131r",
|
|
128
|
+
"Kilis",
|
|
129
|
+
"Mersin",
|
|
130
|
+
"Batman",
|
|
131
|
+
"Yalova",
|
|
132
|
+
"D\xFCzce",
|
|
133
|
+
"Ordu",
|
|
134
|
+
"Kars",
|
|
135
|
+
"A\u011Fr\u0131",
|
|
136
|
+
"Bolu",
|
|
137
|
+
"Van",
|
|
138
|
+
"U\u015Fak",
|
|
139
|
+
"Mu\u015F"
|
|
140
|
+
].sort((a, b) => b.length - a.length);
|
|
141
|
+
var PROVINCE_TR_RE = new RegExp(
|
|
142
|
+
`(?<!\\w)(${_TR_PROVINCES_SORTED.map((p) => p.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})(?!\\w)`,
|
|
143
|
+
"gu"
|
|
144
|
+
);
|
|
48
145
|
var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
|
|
49
146
|
var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
|
|
50
147
|
var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
|
|
@@ -52,6 +149,46 @@ var NAME_RE = new RegExp(
|
|
|
52
149
|
`(?:${NAME_PREFIX_TR}|${NAME_PREFIX_EN})\\s*[:\\-]\\s*${NAME_VALUE}`,
|
|
53
150
|
"gu"
|
|
54
151
|
);
|
|
152
|
+
var _IBAN_INTL_LENGTHS = {
|
|
153
|
+
AT: 20,
|
|
154
|
+
BE: 16,
|
|
155
|
+
BG: 22,
|
|
156
|
+
HR: 21,
|
|
157
|
+
CY: 28,
|
|
158
|
+
CZ: 24,
|
|
159
|
+
DK: 18,
|
|
160
|
+
EE: 20,
|
|
161
|
+
FI: 18,
|
|
162
|
+
FR: 27,
|
|
163
|
+
DE: 22,
|
|
164
|
+
GR: 27,
|
|
165
|
+
HU: 28,
|
|
166
|
+
IE: 22,
|
|
167
|
+
IT: 27,
|
|
168
|
+
LV: 21,
|
|
169
|
+
LT: 20,
|
|
170
|
+
LU: 20,
|
|
171
|
+
MT: 31,
|
|
172
|
+
NL: 18,
|
|
173
|
+
PL: 28,
|
|
174
|
+
PT: 25,
|
|
175
|
+
RO: 24,
|
|
176
|
+
SK: 24,
|
|
177
|
+
SI: 19,
|
|
178
|
+
ES: 24,
|
|
179
|
+
SE: 24,
|
|
180
|
+
GB: 22,
|
|
181
|
+
CH: 21,
|
|
182
|
+
NO: 15
|
|
183
|
+
};
|
|
184
|
+
var _INTL_SUFFIX = "(?:KGaA|GmbH|OHG|GbR|SARL|EURL|S\\.p\\.A\\.|S\\.r\\.l\\.|S\\.n\\.c\\.|S\\.a\\.s\\.|B\\.V\\.|N\\.V\\.|S\\.A\\.|S\\.L\\.|Corp\\.|Inc\\.|Ltd\\.|LLP|LLC|PLC|SpA|Srl|SNC|SAS|BV|NV|SL|SA|Corp|Inc|Ltd|KG|AG|UG)";
|
|
185
|
+
var _UC = "[A-Z\xC0-\u024F]";
|
|
186
|
+
var _WC = "[A-Za-z0-9\xC0-\u024F\\-]";
|
|
187
|
+
var _INTL_NAME_TOKEN = `(?:and|&|${_UC}${_WC}*\\.?)`;
|
|
188
|
+
var COMPANY_NAME_INTL_RE = new RegExp(
|
|
189
|
+
`(?<![A-Za-z\xC0-\u024F])(${_UC}${_WC}*(?:\\s+${_INTL_NAME_TOKEN}){0,6}\\s+${_INTL_SUFFIX})`,
|
|
190
|
+
"gu"
|
|
191
|
+
);
|
|
55
192
|
var SSN_RE = /\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b/g;
|
|
56
193
|
function validTckn(s) {
|
|
57
194
|
if (s.length !== 11 || s[0] === "0") return false;
|
|
@@ -103,10 +240,30 @@ function validIban(s) {
|
|
|
103
240
|
}
|
|
104
241
|
return remainder === 1;
|
|
105
242
|
}
|
|
243
|
+
function validIbanIntl(s) {
|
|
244
|
+
const country = s.slice(0, 2);
|
|
245
|
+
if (country === "TR" || !(country in _IBAN_INTL_LENGTHS)) return false;
|
|
246
|
+
if (s.length !== _IBAN_INTL_LENGTHS[country]) return false;
|
|
247
|
+
return validIban(s);
|
|
248
|
+
}
|
|
249
|
+
function validPhoneIntl(raw) {
|
|
250
|
+
const digits = raw.replace(/\D/g, "");
|
|
251
|
+
return digits.length >= 7 && digits.length <= 15 && digits.slice(0, 2) !== "90";
|
|
252
|
+
}
|
|
106
253
|
var LOCALE_DETECTORS = {
|
|
107
|
-
tr: /* @__PURE__ */ new Set([
|
|
108
|
-
|
|
109
|
-
|
|
254
|
+
tr: /* @__PURE__ */ new Set([
|
|
255
|
+
"national_id_tr",
|
|
256
|
+
"tax_id_tr",
|
|
257
|
+
"phone_tr",
|
|
258
|
+
"name",
|
|
259
|
+
"iban_tr",
|
|
260
|
+
"company_name_tr",
|
|
261
|
+
"mersis_no",
|
|
262
|
+
"postal_code_tr",
|
|
263
|
+
"province_tr"
|
|
264
|
+
]),
|
|
265
|
+
us: /* @__PURE__ */ new Set(["ssn", "phone_intl", "company_name_intl"]),
|
|
266
|
+
eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"])
|
|
110
267
|
};
|
|
111
268
|
var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
|
|
112
269
|
function activeDetectors(locale) {
|
|
@@ -115,7 +272,6 @@ function activeDetectors(locale) {
|
|
|
115
272
|
for (const detectors of Object.values(LOCALE_DETECTORS)) {
|
|
116
273
|
detectors.forEach((d) => active2.add(d));
|
|
117
274
|
}
|
|
118
|
-
if (active2.has("phone_tr")) active2.delete("phone");
|
|
119
275
|
return active2;
|
|
120
276
|
}
|
|
121
277
|
const active = new Set(UNIVERSAL);
|
|
@@ -134,15 +290,15 @@ function findAll(re, text, type) {
|
|
|
134
290
|
function detectPii(text, locale = "tr") {
|
|
135
291
|
const active = activeDetectors(locale);
|
|
136
292
|
const t = text ?? "";
|
|
137
|
-
|
|
293
|
+
let findings = [];
|
|
138
294
|
if (active.has("email")) findings.push(...findAll(EMAIL_RE, t, "email"));
|
|
139
|
-
if (active.has("
|
|
295
|
+
if (active.has("phone_intl")) {
|
|
140
296
|
PHONE_INTL_RE.lastIndex = 0;
|
|
141
297
|
let m;
|
|
142
298
|
while ((m = PHONE_INTL_RE.exec(t)) !== null) {
|
|
143
|
-
const
|
|
144
|
-
if (
|
|
145
|
-
findings.push({ type: "
|
|
299
|
+
const candidate = m[1];
|
|
300
|
+
if (validPhoneIntl(candidate)) {
|
|
301
|
+
findings.push({ type: "phone_intl", value: candidate, start: m.index, end: m.index + candidate.length });
|
|
146
302
|
}
|
|
147
303
|
}
|
|
148
304
|
}
|
|
@@ -195,8 +351,71 @@ function detectPii(text, locale = "tr") {
|
|
|
195
351
|
findings.push({ type: "name", value, start, end: start + value.length });
|
|
196
352
|
}
|
|
197
353
|
}
|
|
354
|
+
if (active.has("iban_tr")) {
|
|
355
|
+
IBAN_TR_RE.lastIndex = 0;
|
|
356
|
+
let m;
|
|
357
|
+
while ((m = IBAN_TR_RE.exec(t)) !== null) {
|
|
358
|
+
if (validIban(m[0])) {
|
|
359
|
+
findings.push({ type: "iban_tr", value: m[0], start: m.index, end: m.index + m[0].length });
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
if (active.has("company_name_tr")) {
|
|
364
|
+
COMPANY_NAME_TR_RE.lastIndex = 0;
|
|
365
|
+
let m;
|
|
366
|
+
while ((m = COMPANY_NAME_TR_RE.exec(t)) !== null) {
|
|
367
|
+
findings.push({ type: "company_name_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (active.has("mersis_no")) {
|
|
371
|
+
MERSIS_RE.lastIndex = 0;
|
|
372
|
+
let m;
|
|
373
|
+
while ((m = MERSIS_RE.exec(t)) !== null) {
|
|
374
|
+
findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
if (active.has("postal_code_tr")) {
|
|
378
|
+
POSTAL_CODE_TR_RE.lastIndex = 0;
|
|
379
|
+
let m;
|
|
380
|
+
while ((m = POSTAL_CODE_TR_RE.exec(t)) !== null) {
|
|
381
|
+
findings.push({ type: "postal_code_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
if (active.has("province_tr")) {
|
|
385
|
+
PROVINCE_TR_RE.lastIndex = 0;
|
|
386
|
+
let m;
|
|
387
|
+
while ((m = PROVINCE_TR_RE.exec(t)) !== null) {
|
|
388
|
+
findings.push({ type: "province_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
389
|
+
}
|
|
390
|
+
}
|
|
198
391
|
if (active.has("ssn")) findings.push(...findAll(SSN_RE, t, "ssn"));
|
|
199
|
-
|
|
392
|
+
if (active.has("iban_intl")) {
|
|
393
|
+
IBAN_INTL_RE.lastIndex = 0;
|
|
394
|
+
let m;
|
|
395
|
+
while ((m = IBAN_INTL_RE.exec(t)) !== null) {
|
|
396
|
+
const candidate = m[1];
|
|
397
|
+
if (validIbanIntl(candidate)) {
|
|
398
|
+
findings.push({ type: "iban_intl", value: candidate, start: m.index, end: m.index + candidate.length });
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
if (active.has("company_name_intl")) {
|
|
403
|
+
COMPANY_NAME_INTL_RE.lastIndex = 0;
|
|
404
|
+
let m;
|
|
405
|
+
while ((m = COMPANY_NAME_INTL_RE.exec(t)) !== null) {
|
|
406
|
+
findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
findings.sort((a, b) => a.start - b.start);
|
|
410
|
+
const specificIbanSpans = new Set(
|
|
411
|
+
findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
|
|
412
|
+
);
|
|
413
|
+
if (specificIbanSpans.size > 0) {
|
|
414
|
+
findings = findings.filter(
|
|
415
|
+
(f) => !(f.type === "iban" && specificIbanSpans.has(`${f.start}:${f.end}`))
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
return findings;
|
|
200
419
|
}
|
|
201
420
|
|
|
202
421
|
// src/quality.ts
|
package/dist/index.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
// src/pii.ts
|
|
2
2
|
var EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
|
|
3
|
-
var PHONE_INTL_RE =
|
|
3
|
+
var PHONE_INTL_RE = /(?<![+\d])(\+[1-9][\d\s\-.()]{5,18}\d)(?!\d)/g;
|
|
4
4
|
var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
|
|
5
|
+
var IBAN_INTL_RE = /\b([A-Z]{2}\d{2}[0-9A-Z]{11,30})\b/g;
|
|
5
6
|
var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
|
|
6
7
|
var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
|
|
7
8
|
var _H = "[0-9a-fA-F]{1,4}";
|
|
@@ -12,6 +13,102 @@ var IPV6_RE = new RegExp(
|
|
|
12
13
|
var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
|
|
13
14
|
var TCKN_RE = /\b([1-9]\d{10})\b/g;
|
|
14
15
|
var VKN_RE = /\b([1-9]\d{9})\b/g;
|
|
16
|
+
var IBAN_TR_RE = /\bTR\d{2}[0-9A-Z]{22}\b/g;
|
|
17
|
+
var _TR_COMPANY_SUFFIX = "(?:A\\.\u015E\\.|Ltd\\.\\s*\u015Eti\\.|Koll\\.\\s*\u015Eti\\.|Koop\\.|T\\.A\\.\u015E\\.)";
|
|
18
|
+
var _TR_NAME_TOKEN = "(?:ve|ile|[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*\\.?)";
|
|
19
|
+
var COMPANY_NAME_TR_RE = new RegExp(
|
|
20
|
+
`(?<![A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC])([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*(?:\\s+${_TR_NAME_TOKEN}){0,6}\\s+${_TR_COMPANY_SUFFIX})`,
|
|
21
|
+
"gu"
|
|
22
|
+
);
|
|
23
|
+
var MERSIS_RE = /\b([1-9]\d{15})\b/g;
|
|
24
|
+
var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
|
|
25
|
+
var _TR_PROVINCES_SORTED = [
|
|
26
|
+
"Afyonkarahisar",
|
|
27
|
+
"Kahramanmara\u015F",
|
|
28
|
+
"K\u0131r\u0131kkale",
|
|
29
|
+
"K\u0131rklareli",
|
|
30
|
+
"Diyarbak\u0131r",
|
|
31
|
+
"Gaziantep",
|
|
32
|
+
"\u015Eanl\u0131urfa",
|
|
33
|
+
"Nev\u015Fehir",
|
|
34
|
+
"Kastamonu",
|
|
35
|
+
"G\xFCm\xFC\u015Fhane",
|
|
36
|
+
"Eski\u015Fehir",
|
|
37
|
+
"Erzincan",
|
|
38
|
+
"Erzurum",
|
|
39
|
+
"Denizli",
|
|
40
|
+
"\xC7anakkale",
|
|
41
|
+
"Ad\u0131yaman",
|
|
42
|
+
"Zonguldak",
|
|
43
|
+
"Tekirda\u011F",
|
|
44
|
+
"Trabzon",
|
|
45
|
+
"Tunceli",
|
|
46
|
+
"Karaman",
|
|
47
|
+
"Karab\xFCk",
|
|
48
|
+
"Aksaray",
|
|
49
|
+
"Antalya",
|
|
50
|
+
"K\u0131r\u015Fehir",
|
|
51
|
+
"Osmaniye",
|
|
52
|
+
"Kocaeli",
|
|
53
|
+
"Sakarya",
|
|
54
|
+
"Bart\u0131n",
|
|
55
|
+
"Bayburt",
|
|
56
|
+
"Ardahan",
|
|
57
|
+
"Yozgat",
|
|
58
|
+
"Ankara",
|
|
59
|
+
"Amasya",
|
|
60
|
+
"Artvin",
|
|
61
|
+
"Bal\u0131kesir",
|
|
62
|
+
"Bilecik",
|
|
63
|
+
"Bing\xF6l",
|
|
64
|
+
"Bitlis",
|
|
65
|
+
"Burdur",
|
|
66
|
+
"\xC7ank\u0131r\u0131",
|
|
67
|
+
"Edirne",
|
|
68
|
+
"Elaz\u0131\u011F",
|
|
69
|
+
"Giresun",
|
|
70
|
+
"Hakkari",
|
|
71
|
+
"Isparta",
|
|
72
|
+
"\u0130stanbul",
|
|
73
|
+
"\u0130zmir",
|
|
74
|
+
"Kayseri",
|
|
75
|
+
"K\xFCtahya",
|
|
76
|
+
"Malatya",
|
|
77
|
+
"Manisa",
|
|
78
|
+
"Mardin",
|
|
79
|
+
"Samsun",
|
|
80
|
+
"\u015E\u0131rnak",
|
|
81
|
+
"Sinop",
|
|
82
|
+
"Tokat",
|
|
83
|
+
"Hatay",
|
|
84
|
+
"Konya",
|
|
85
|
+
"Mu\u011Fla",
|
|
86
|
+
"Ni\u011Fde",
|
|
87
|
+
"Rize",
|
|
88
|
+
"Siirt",
|
|
89
|
+
"Sivas",
|
|
90
|
+
"Adana",
|
|
91
|
+
"Ayd\u0131n",
|
|
92
|
+
"Bursa",
|
|
93
|
+
"\xC7orum",
|
|
94
|
+
"I\u011Fd\u0131r",
|
|
95
|
+
"Kilis",
|
|
96
|
+
"Mersin",
|
|
97
|
+
"Batman",
|
|
98
|
+
"Yalova",
|
|
99
|
+
"D\xFCzce",
|
|
100
|
+
"Ordu",
|
|
101
|
+
"Kars",
|
|
102
|
+
"A\u011Fr\u0131",
|
|
103
|
+
"Bolu",
|
|
104
|
+
"Van",
|
|
105
|
+
"U\u015Fak",
|
|
106
|
+
"Mu\u015F"
|
|
107
|
+
].sort((a, b) => b.length - a.length);
|
|
108
|
+
var PROVINCE_TR_RE = new RegExp(
|
|
109
|
+
`(?<!\\w)(${_TR_PROVINCES_SORTED.map((p) => p.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})(?!\\w)`,
|
|
110
|
+
"gu"
|
|
111
|
+
);
|
|
15
112
|
var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
|
|
16
113
|
var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
|
|
17
114
|
var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
|
|
@@ -19,6 +116,46 @@ var NAME_RE = new RegExp(
|
|
|
19
116
|
`(?:${NAME_PREFIX_TR}|${NAME_PREFIX_EN})\\s*[:\\-]\\s*${NAME_VALUE}`,
|
|
20
117
|
"gu"
|
|
21
118
|
);
|
|
119
|
+
var _IBAN_INTL_LENGTHS = {
|
|
120
|
+
AT: 20,
|
|
121
|
+
BE: 16,
|
|
122
|
+
BG: 22,
|
|
123
|
+
HR: 21,
|
|
124
|
+
CY: 28,
|
|
125
|
+
CZ: 24,
|
|
126
|
+
DK: 18,
|
|
127
|
+
EE: 20,
|
|
128
|
+
FI: 18,
|
|
129
|
+
FR: 27,
|
|
130
|
+
DE: 22,
|
|
131
|
+
GR: 27,
|
|
132
|
+
HU: 28,
|
|
133
|
+
IE: 22,
|
|
134
|
+
IT: 27,
|
|
135
|
+
LV: 21,
|
|
136
|
+
LT: 20,
|
|
137
|
+
LU: 20,
|
|
138
|
+
MT: 31,
|
|
139
|
+
NL: 18,
|
|
140
|
+
PL: 28,
|
|
141
|
+
PT: 25,
|
|
142
|
+
RO: 24,
|
|
143
|
+
SK: 24,
|
|
144
|
+
SI: 19,
|
|
145
|
+
ES: 24,
|
|
146
|
+
SE: 24,
|
|
147
|
+
GB: 22,
|
|
148
|
+
CH: 21,
|
|
149
|
+
NO: 15
|
|
150
|
+
};
|
|
151
|
+
var _INTL_SUFFIX = "(?:KGaA|GmbH|OHG|GbR|SARL|EURL|S\\.p\\.A\\.|S\\.r\\.l\\.|S\\.n\\.c\\.|S\\.a\\.s\\.|B\\.V\\.|N\\.V\\.|S\\.A\\.|S\\.L\\.|Corp\\.|Inc\\.|Ltd\\.|LLP|LLC|PLC|SpA|Srl|SNC|SAS|BV|NV|SL|SA|Corp|Inc|Ltd|KG|AG|UG)";
|
|
152
|
+
var _UC = "[A-Z\xC0-\u024F]";
|
|
153
|
+
var _WC = "[A-Za-z0-9\xC0-\u024F\\-]";
|
|
154
|
+
var _INTL_NAME_TOKEN = `(?:and|&|${_UC}${_WC}*\\.?)`;
|
|
155
|
+
var COMPANY_NAME_INTL_RE = new RegExp(
|
|
156
|
+
`(?<![A-Za-z\xC0-\u024F])(${_UC}${_WC}*(?:\\s+${_INTL_NAME_TOKEN}){0,6}\\s+${_INTL_SUFFIX})`,
|
|
157
|
+
"gu"
|
|
158
|
+
);
|
|
22
159
|
var SSN_RE = /\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b/g;
|
|
23
160
|
function validTckn(s) {
|
|
24
161
|
if (s.length !== 11 || s[0] === "0") return false;
|
|
@@ -70,10 +207,30 @@ function validIban(s) {
|
|
|
70
207
|
}
|
|
71
208
|
return remainder === 1;
|
|
72
209
|
}
|
|
210
|
+
function validIbanIntl(s) {
|
|
211
|
+
const country = s.slice(0, 2);
|
|
212
|
+
if (country === "TR" || !(country in _IBAN_INTL_LENGTHS)) return false;
|
|
213
|
+
if (s.length !== _IBAN_INTL_LENGTHS[country]) return false;
|
|
214
|
+
return validIban(s);
|
|
215
|
+
}
|
|
216
|
+
function validPhoneIntl(raw) {
|
|
217
|
+
const digits = raw.replace(/\D/g, "");
|
|
218
|
+
return digits.length >= 7 && digits.length <= 15 && digits.slice(0, 2) !== "90";
|
|
219
|
+
}
|
|
73
220
|
var LOCALE_DETECTORS = {
|
|
74
|
-
tr: /* @__PURE__ */ new Set([
|
|
75
|
-
|
|
76
|
-
|
|
221
|
+
tr: /* @__PURE__ */ new Set([
|
|
222
|
+
"national_id_tr",
|
|
223
|
+
"tax_id_tr",
|
|
224
|
+
"phone_tr",
|
|
225
|
+
"name",
|
|
226
|
+
"iban_tr",
|
|
227
|
+
"company_name_tr",
|
|
228
|
+
"mersis_no",
|
|
229
|
+
"postal_code_tr",
|
|
230
|
+
"province_tr"
|
|
231
|
+
]),
|
|
232
|
+
us: /* @__PURE__ */ new Set(["ssn", "phone_intl", "company_name_intl"]),
|
|
233
|
+
eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"])
|
|
77
234
|
};
|
|
78
235
|
var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
|
|
79
236
|
function activeDetectors(locale) {
|
|
@@ -82,7 +239,6 @@ function activeDetectors(locale) {
|
|
|
82
239
|
for (const detectors of Object.values(LOCALE_DETECTORS)) {
|
|
83
240
|
detectors.forEach((d) => active2.add(d));
|
|
84
241
|
}
|
|
85
|
-
if (active2.has("phone_tr")) active2.delete("phone");
|
|
86
242
|
return active2;
|
|
87
243
|
}
|
|
88
244
|
const active = new Set(UNIVERSAL);
|
|
@@ -101,15 +257,15 @@ function findAll(re, text, type) {
|
|
|
101
257
|
function detectPii(text, locale = "tr") {
|
|
102
258
|
const active = activeDetectors(locale);
|
|
103
259
|
const t = text ?? "";
|
|
104
|
-
|
|
260
|
+
let findings = [];
|
|
105
261
|
if (active.has("email")) findings.push(...findAll(EMAIL_RE, t, "email"));
|
|
106
|
-
if (active.has("
|
|
262
|
+
if (active.has("phone_intl")) {
|
|
107
263
|
PHONE_INTL_RE.lastIndex = 0;
|
|
108
264
|
let m;
|
|
109
265
|
while ((m = PHONE_INTL_RE.exec(t)) !== null) {
|
|
110
|
-
const
|
|
111
|
-
if (
|
|
112
|
-
findings.push({ type: "
|
|
266
|
+
const candidate = m[1];
|
|
267
|
+
if (validPhoneIntl(candidate)) {
|
|
268
|
+
findings.push({ type: "phone_intl", value: candidate, start: m.index, end: m.index + candidate.length });
|
|
113
269
|
}
|
|
114
270
|
}
|
|
115
271
|
}
|
|
@@ -162,8 +318,71 @@ function detectPii(text, locale = "tr") {
|
|
|
162
318
|
findings.push({ type: "name", value, start, end: start + value.length });
|
|
163
319
|
}
|
|
164
320
|
}
|
|
321
|
+
if (active.has("iban_tr")) {
|
|
322
|
+
IBAN_TR_RE.lastIndex = 0;
|
|
323
|
+
let m;
|
|
324
|
+
while ((m = IBAN_TR_RE.exec(t)) !== null) {
|
|
325
|
+
if (validIban(m[0])) {
|
|
326
|
+
findings.push({ type: "iban_tr", value: m[0], start: m.index, end: m.index + m[0].length });
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
if (active.has("company_name_tr")) {
|
|
331
|
+
COMPANY_NAME_TR_RE.lastIndex = 0;
|
|
332
|
+
let m;
|
|
333
|
+
while ((m = COMPANY_NAME_TR_RE.exec(t)) !== null) {
|
|
334
|
+
findings.push({ type: "company_name_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (active.has("mersis_no")) {
|
|
338
|
+
MERSIS_RE.lastIndex = 0;
|
|
339
|
+
let m;
|
|
340
|
+
while ((m = MERSIS_RE.exec(t)) !== null) {
|
|
341
|
+
findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
if (active.has("postal_code_tr")) {
|
|
345
|
+
POSTAL_CODE_TR_RE.lastIndex = 0;
|
|
346
|
+
let m;
|
|
347
|
+
while ((m = POSTAL_CODE_TR_RE.exec(t)) !== null) {
|
|
348
|
+
findings.push({ type: "postal_code_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
if (active.has("province_tr")) {
|
|
352
|
+
PROVINCE_TR_RE.lastIndex = 0;
|
|
353
|
+
let m;
|
|
354
|
+
while ((m = PROVINCE_TR_RE.exec(t)) !== null) {
|
|
355
|
+
findings.push({ type: "province_tr", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
356
|
+
}
|
|
357
|
+
}
|
|
165
358
|
if (active.has("ssn")) findings.push(...findAll(SSN_RE, t, "ssn"));
|
|
166
|
-
|
|
359
|
+
if (active.has("iban_intl")) {
|
|
360
|
+
IBAN_INTL_RE.lastIndex = 0;
|
|
361
|
+
let m;
|
|
362
|
+
while ((m = IBAN_INTL_RE.exec(t)) !== null) {
|
|
363
|
+
const candidate = m[1];
|
|
364
|
+
if (validIbanIntl(candidate)) {
|
|
365
|
+
findings.push({ type: "iban_intl", value: candidate, start: m.index, end: m.index + candidate.length });
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
if (active.has("company_name_intl")) {
|
|
370
|
+
COMPANY_NAME_INTL_RE.lastIndex = 0;
|
|
371
|
+
let m;
|
|
372
|
+
while ((m = COMPANY_NAME_INTL_RE.exec(t)) !== null) {
|
|
373
|
+
findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
findings.sort((a, b) => a.start - b.start);
|
|
377
|
+
const specificIbanSpans = new Set(
|
|
378
|
+
findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
|
|
379
|
+
);
|
|
380
|
+
if (specificIbanSpans.size > 0) {
|
|
381
|
+
findings = findings.filter(
|
|
382
|
+
(f) => !(f.type === "iban" && specificIbanSpans.has(`${f.start}:${f.end}`))
|
|
383
|
+
);
|
|
384
|
+
}
|
|
385
|
+
return findings;
|
|
167
386
|
}
|
|
168
387
|
|
|
169
388
|
// src/quality.ts
|