@flexorch/audit 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.cjs +230 -11
  2. package/dist/index.js +230 -11
  3. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -33,8 +33,9 @@ module.exports = __toCommonJS(index_exports);
33
33
 
34
34
  // src/pii.ts
35
35
  var EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
36
- var PHONE_INTL_RE = /\+\d{1,3}[\s\-.]?\(?\d{1,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{4}\b/g;
36
+ var PHONE_INTL_RE = /(?<![+\d])(\+[1-9][\d\s\-.()]{5,18}\d)(?!\d)/g;
37
37
  var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
38
+ var IBAN_INTL_RE = /\b([A-Z]{2}\d{2}[0-9A-Z]{11,30})\b/g;
38
39
  var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
39
40
  var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
40
41
  var _H = "[0-9a-fA-F]{1,4}";
@@ -45,6 +46,102 @@ var IPV6_RE = new RegExp(
45
46
  var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
46
47
  var TCKN_RE = /\b([1-9]\d{10})\b/g;
47
48
  var VKN_RE = /\b([1-9]\d{9})\b/g;
49
+ var IBAN_TR_RE = /\bTR\d{2}[0-9A-Z]{22}\b/g;
50
+ var _TR_COMPANY_SUFFIX = "(?:A\\.\u015E\\.|Ltd\\.\\s*\u015Eti\\.|Koll\\.\\s*\u015Eti\\.|Koop\\.|T\\.A\\.\u015E\\.)";
51
+ var _TR_NAME_TOKEN = "(?:ve|ile|[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*\\.?)";
52
+ var COMPANY_NAME_TR_RE = new RegExp(
53
+ `(?<![A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC])([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*(?:\\s+${_TR_NAME_TOKEN}){0,6}\\s+${_TR_COMPANY_SUFFIX})`,
54
+ "gu"
55
+ );
56
+ var MERSIS_RE = /\b([1-9]\d{15})\b/g;
57
+ var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
58
+ var _TR_PROVINCES_SORTED = [
59
+ "Afyonkarahisar",
60
+ "Kahramanmara\u015F",
61
+ "K\u0131r\u0131kkale",
62
+ "K\u0131rklareli",
63
+ "Diyarbak\u0131r",
64
+ "Gaziantep",
65
+ "\u015Eanl\u0131urfa",
66
+ "Nev\u015Fehir",
67
+ "Kastamonu",
68
+ "G\xFCm\xFC\u015Fhane",
69
+ "Eski\u015Fehir",
70
+ "Erzincan",
71
+ "Erzurum",
72
+ "Denizli",
73
+ "\xC7anakkale",
74
+ "Ad\u0131yaman",
75
+ "Zonguldak",
76
+ "Tekirda\u011F",
77
+ "Trabzon",
78
+ "Tunceli",
79
+ "Karaman",
80
+ "Karab\xFCk",
81
+ "Aksaray",
82
+ "Antalya",
83
+ "K\u0131r\u015Fehir",
84
+ "Osmaniye",
85
+ "Kocaeli",
86
+ "Sakarya",
87
+ "Bart\u0131n",
88
+ "Bayburt",
89
+ "Ardahan",
90
+ "Yozgat",
91
+ "Ankara",
92
+ "Amasya",
93
+ "Artvin",
94
+ "Bal\u0131kesir",
95
+ "Bilecik",
96
+ "Bing\xF6l",
97
+ "Bitlis",
98
+ "Burdur",
99
+ "\xC7ank\u0131r\u0131",
100
+ "Edirne",
101
+ "Elaz\u0131\u011F",
102
+ "Giresun",
103
+ "Hakkari",
104
+ "Isparta",
105
+ "\u0130stanbul",
106
+ "\u0130zmir",
107
+ "Kayseri",
108
+ "K\xFCtahya",
109
+ "Malatya",
110
+ "Manisa",
111
+ "Mardin",
112
+ "Samsun",
113
+ "\u015E\u0131rnak",
114
+ "Sinop",
115
+ "Tokat",
116
+ "Hatay",
117
+ "Konya",
118
+ "Mu\u011Fla",
119
+ "Ni\u011Fde",
120
+ "Rize",
121
+ "Siirt",
122
+ "Sivas",
123
+ "Adana",
124
+ "Ayd\u0131n",
125
+ "Bursa",
126
+ "\xC7orum",
127
+ "I\u011Fd\u0131r",
128
+ "Kilis",
129
+ "Mersin",
130
+ "Batman",
131
+ "Yalova",
132
+ "D\xFCzce",
133
+ "Ordu",
134
+ "Kars",
135
+ "A\u011Fr\u0131",
136
+ "Bolu",
137
+ "Van",
138
+ "U\u015Fak",
139
+ "Mu\u015F"
140
+ ].sort((a, b) => b.length - a.length);
141
+ var PROVINCE_TR_RE = new RegExp(
142
+ `(?<!\\w)(${_TR_PROVINCES_SORTED.map((p) => p.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})(?!\\w)`,
143
+ "gu"
144
+ );
48
145
  var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
49
146
  var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
50
147
  var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
@@ -52,6 +149,46 @@ var NAME_RE = new RegExp(
52
149
  `(?:${NAME_PREFIX_TR}|${NAME_PREFIX_EN})\\s*[:\\-]\\s*${NAME_VALUE}`,
53
150
  "gu"
54
151
  );
152
+ var _IBAN_INTL_LENGTHS = {
153
+ AT: 20,
154
+ BE: 16,
155
+ BG: 22,
156
+ HR: 21,
157
+ CY: 28,
158
+ CZ: 24,
159
+ DK: 18,
160
+ EE: 20,
161
+ FI: 18,
162
+ FR: 27,
163
+ DE: 22,
164
+ GR: 27,
165
+ HU: 28,
166
+ IE: 22,
167
+ IT: 27,
168
+ LV: 21,
169
+ LT: 20,
170
+ LU: 20,
171
+ MT: 31,
172
+ NL: 18,
173
+ PL: 28,
174
+ PT: 25,
175
+ RO: 24,
176
+ SK: 24,
177
+ SI: 19,
178
+ ES: 24,
179
+ SE: 24,
180
+ GB: 22,
181
+ CH: 21,
182
+ NO: 15
183
+ };
184
+ var _INTL_SUFFIX = "(?:KGaA|GmbH|OHG|GbR|SARL|EURL|S\\.p\\.A\\.|S\\.r\\.l\\.|S\\.n\\.c\\.|S\\.a\\.s\\.|B\\.V\\.|N\\.V\\.|S\\.A\\.|S\\.L\\.|Corp\\.|Inc\\.|Ltd\\.|LLP|LLC|PLC|SpA|Srl|SNC|SAS|BV|NV|SL|SA|Corp|Inc|Ltd|KG|AG|UG)";
185
+ var _UC = "[A-Z\xC0-\u024F]";
186
+ var _WC = "[A-Za-z0-9\xC0-\u024F\\-]";
187
+ var _INTL_NAME_TOKEN = `(?:and|&|${_UC}${_WC}*\\.?)`;
188
+ var COMPANY_NAME_INTL_RE = new RegExp(
189
+ `(?<![A-Za-z\xC0-\u024F])(${_UC}${_WC}*(?:\\s+${_INTL_NAME_TOKEN}){0,6}\\s+${_INTL_SUFFIX})`,
190
+ "gu"
191
+ );
55
192
  var SSN_RE = /\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b/g;
56
193
  function validTckn(s) {
57
194
  if (s.length !== 11 || s[0] === "0") return false;
@@ -103,10 +240,30 @@ function validIban(s) {
103
240
  }
104
241
  return remainder === 1;
105
242
  }
243
+ function validIbanIntl(s) {
244
+ const country = s.slice(0, 2);
245
+ if (country === "TR" || !(country in _IBAN_INTL_LENGTHS)) return false;
246
+ if (s.length !== _IBAN_INTL_LENGTHS[country]) return false;
247
+ return validIban(s);
248
+ }
249
+ function validPhoneIntl(raw) {
250
+ const digits = raw.replace(/\D/g, "");
251
+ return digits.length >= 7 && digits.length <= 15 && digits.slice(0, 2) !== "90";
252
+ }
106
253
  var LOCALE_DETECTORS = {
107
- tr: /* @__PURE__ */ new Set(["national_id_tr", "tax_id_tr", "phone_tr", "name"]),
108
- us: /* @__PURE__ */ new Set(["ssn", "phone"]),
109
- eu: /* @__PURE__ */ new Set(["phone"])
254
+ tr: /* @__PURE__ */ new Set([
255
+ "national_id_tr",
256
+ "tax_id_tr",
257
+ "phone_tr",
258
+ "name",
259
+ "iban_tr",
260
+ "company_name_tr",
261
+ "mersis_no",
262
+ "postal_code_tr",
263
+ "province_tr"
264
+ ]),
265
+ us: /* @__PURE__ */ new Set(["ssn", "phone_intl", "company_name_intl"]),
266
+ eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"])
110
267
  };
111
268
  var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
112
269
  function activeDetectors(locale) {
@@ -115,7 +272,6 @@ function activeDetectors(locale) {
115
272
  for (const detectors of Object.values(LOCALE_DETECTORS)) {
116
273
  detectors.forEach((d) => active2.add(d));
117
274
  }
118
- if (active2.has("phone_tr")) active2.delete("phone");
119
275
  return active2;
120
276
  }
121
277
  const active = new Set(UNIVERSAL);
@@ -134,15 +290,15 @@ function findAll(re, text, type) {
134
290
  function detectPii(text, locale = "tr") {
135
291
  const active = activeDetectors(locale);
136
292
  const t = text ?? "";
137
- const findings = [];
293
+ let findings = [];
138
294
  if (active.has("email")) findings.push(...findAll(EMAIL_RE, t, "email"));
139
- if (active.has("phone")) {
295
+ if (active.has("phone_intl")) {
140
296
  PHONE_INTL_RE.lastIndex = 0;
141
297
  let m;
142
298
  while ((m = PHONE_INTL_RE.exec(t)) !== null) {
143
- const digits = m[0].replace(/\D/g, "").length;
144
- if (digits >= 10) {
145
- findings.push({ type: "phone", value: m[0], start: m.index, end: m.index + m[0].length });
299
+ const candidate = m[1];
300
+ if (validPhoneIntl(candidate)) {
301
+ findings.push({ type: "phone_intl", value: candidate, start: m.index, end: m.index + candidate.length });
146
302
  }
147
303
  }
148
304
  }
@@ -195,8 +351,71 @@ function detectPii(text, locale = "tr") {
195
351
  findings.push({ type: "name", value, start, end: start + value.length });
196
352
  }
197
353
  }
354
+ if (active.has("iban_tr")) {
355
+ IBAN_TR_RE.lastIndex = 0;
356
+ let m;
357
+ while ((m = IBAN_TR_RE.exec(t)) !== null) {
358
+ if (validIban(m[0])) {
359
+ findings.push({ type: "iban_tr", value: m[0], start: m.index, end: m.index + m[0].length });
360
+ }
361
+ }
362
+ }
363
+ if (active.has("company_name_tr")) {
364
+ COMPANY_NAME_TR_RE.lastIndex = 0;
365
+ let m;
366
+ while ((m = COMPANY_NAME_TR_RE.exec(t)) !== null) {
367
+ findings.push({ type: "company_name_tr", value: m[1], start: m.index, end: m.index + m[1].length });
368
+ }
369
+ }
370
+ if (active.has("mersis_no")) {
371
+ MERSIS_RE.lastIndex = 0;
372
+ let m;
373
+ while ((m = MERSIS_RE.exec(t)) !== null) {
374
+ findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
375
+ }
376
+ }
377
+ if (active.has("postal_code_tr")) {
378
+ POSTAL_CODE_TR_RE.lastIndex = 0;
379
+ let m;
380
+ while ((m = POSTAL_CODE_TR_RE.exec(t)) !== null) {
381
+ findings.push({ type: "postal_code_tr", value: m[1], start: m.index, end: m.index + m[1].length });
382
+ }
383
+ }
384
+ if (active.has("province_tr")) {
385
+ PROVINCE_TR_RE.lastIndex = 0;
386
+ let m;
387
+ while ((m = PROVINCE_TR_RE.exec(t)) !== null) {
388
+ findings.push({ type: "province_tr", value: m[1], start: m.index, end: m.index + m[1].length });
389
+ }
390
+ }
198
391
  if (active.has("ssn")) findings.push(...findAll(SSN_RE, t, "ssn"));
199
- return findings.sort((a, b) => a.start - b.start);
392
+ if (active.has("iban_intl")) {
393
+ IBAN_INTL_RE.lastIndex = 0;
394
+ let m;
395
+ while ((m = IBAN_INTL_RE.exec(t)) !== null) {
396
+ const candidate = m[1];
397
+ if (validIbanIntl(candidate)) {
398
+ findings.push({ type: "iban_intl", value: candidate, start: m.index, end: m.index + candidate.length });
399
+ }
400
+ }
401
+ }
402
+ if (active.has("company_name_intl")) {
403
+ COMPANY_NAME_INTL_RE.lastIndex = 0;
404
+ let m;
405
+ while ((m = COMPANY_NAME_INTL_RE.exec(t)) !== null) {
406
+ findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
407
+ }
408
+ }
409
+ findings.sort((a, b) => a.start - b.start);
410
+ const specificIbanSpans = new Set(
411
+ findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
412
+ );
413
+ if (specificIbanSpans.size > 0) {
414
+ findings = findings.filter(
415
+ (f) => !(f.type === "iban" && specificIbanSpans.has(`${f.start}:${f.end}`))
416
+ );
417
+ }
418
+ return findings;
200
419
  }
201
420
 
202
421
  // src/quality.ts
package/dist/index.js CHANGED
@@ -1,7 +1,8 @@
1
1
  // src/pii.ts
2
2
  var EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
3
- var PHONE_INTL_RE = /\+\d{1,3}[\s\-.]?\(?\d{1,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{4}\b/g;
3
+ var PHONE_INTL_RE = /(?<![+\d])(\+[1-9][\d\s\-.()]{5,18}\d)(?!\d)/g;
4
4
  var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
5
+ var IBAN_INTL_RE = /\b([A-Z]{2}\d{2}[0-9A-Z]{11,30})\b/g;
5
6
  var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
6
7
  var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
7
8
  var _H = "[0-9a-fA-F]{1,4}";
@@ -12,6 +13,102 @@ var IPV6_RE = new RegExp(
12
13
  var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
13
14
  var TCKN_RE = /\b([1-9]\d{10})\b/g;
14
15
  var VKN_RE = /\b([1-9]\d{9})\b/g;
16
+ var IBAN_TR_RE = /\bTR\d{2}[0-9A-Z]{22}\b/g;
17
+ var _TR_COMPANY_SUFFIX = "(?:A\\.\u015E\\.|Ltd\\.\\s*\u015Eti\\.|Koll\\.\\s*\u015Eti\\.|Koop\\.|T\\.A\\.\u015E\\.)";
18
+ var _TR_NAME_TOKEN = "(?:ve|ile|[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*\\.?)";
19
+ var COMPANY_NAME_TR_RE = new RegExp(
20
+ `(?<![A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC])([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][A-Za-z\xC7\u011E\u0130\xD6\u015E\xDC\xE7\u011F\u0131\u015F\xF6\u015F\xFC]*(?:\\s+${_TR_NAME_TOKEN}){0,6}\\s+${_TR_COMPANY_SUFFIX})`,
21
+ "gu"
22
+ );
23
+ var MERSIS_RE = /\b([1-9]\d{15})\b/g;
24
+ var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
25
+ var _TR_PROVINCES_SORTED = [
26
+ "Afyonkarahisar",
27
+ "Kahramanmara\u015F",
28
+ "K\u0131r\u0131kkale",
29
+ "K\u0131rklareli",
30
+ "Diyarbak\u0131r",
31
+ "Gaziantep",
32
+ "\u015Eanl\u0131urfa",
33
+ "Nev\u015Fehir",
34
+ "Kastamonu",
35
+ "G\xFCm\xFC\u015Fhane",
36
+ "Eski\u015Fehir",
37
+ "Erzincan",
38
+ "Erzurum",
39
+ "Denizli",
40
+ "\xC7anakkale",
41
+ "Ad\u0131yaman",
42
+ "Zonguldak",
43
+ "Tekirda\u011F",
44
+ "Trabzon",
45
+ "Tunceli",
46
+ "Karaman",
47
+ "Karab\xFCk",
48
+ "Aksaray",
49
+ "Antalya",
50
+ "K\u0131r\u015Fehir",
51
+ "Osmaniye",
52
+ "Kocaeli",
53
+ "Sakarya",
54
+ "Bart\u0131n",
55
+ "Bayburt",
56
+ "Ardahan",
57
+ "Yozgat",
58
+ "Ankara",
59
+ "Amasya",
60
+ "Artvin",
61
+ "Bal\u0131kesir",
62
+ "Bilecik",
63
+ "Bing\xF6l",
64
+ "Bitlis",
65
+ "Burdur",
66
+ "\xC7ank\u0131r\u0131",
67
+ "Edirne",
68
+ "Elaz\u0131\u011F",
69
+ "Giresun",
70
+ "Hakkari",
71
+ "Isparta",
72
+ "\u0130stanbul",
73
+ "\u0130zmir",
74
+ "Kayseri",
75
+ "K\xFCtahya",
76
+ "Malatya",
77
+ "Manisa",
78
+ "Mardin",
79
+ "Samsun",
80
+ "\u015E\u0131rnak",
81
+ "Sinop",
82
+ "Tokat",
83
+ "Hatay",
84
+ "Konya",
85
+ "Mu\u011Fla",
86
+ "Ni\u011Fde",
87
+ "Rize",
88
+ "Siirt",
89
+ "Sivas",
90
+ "Adana",
91
+ "Ayd\u0131n",
92
+ "Bursa",
93
+ "\xC7orum",
94
+ "I\u011Fd\u0131r",
95
+ "Kilis",
96
+ "Mersin",
97
+ "Batman",
98
+ "Yalova",
99
+ "D\xFCzce",
100
+ "Ordu",
101
+ "Kars",
102
+ "A\u011Fr\u0131",
103
+ "Bolu",
104
+ "Van",
105
+ "U\u015Fak",
106
+ "Mu\u015F"
107
+ ].sort((a, b) => b.length - a.length);
108
+ var PROVINCE_TR_RE = new RegExp(
109
+ `(?<!\\w)(${_TR_PROVINCES_SORTED.map((p) => p.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})(?!\\w)`,
110
+ "gu"
111
+ );
15
112
  var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
16
113
  var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
17
114
  var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
@@ -19,6 +116,46 @@ var NAME_RE = new RegExp(
19
116
  `(?:${NAME_PREFIX_TR}|${NAME_PREFIX_EN})\\s*[:\\-]\\s*${NAME_VALUE}`,
20
117
  "gu"
21
118
  );
119
+ var _IBAN_INTL_LENGTHS = {
120
+ AT: 20,
121
+ BE: 16,
122
+ BG: 22,
123
+ HR: 21,
124
+ CY: 28,
125
+ CZ: 24,
126
+ DK: 18,
127
+ EE: 20,
128
+ FI: 18,
129
+ FR: 27,
130
+ DE: 22,
131
+ GR: 27,
132
+ HU: 28,
133
+ IE: 22,
134
+ IT: 27,
135
+ LV: 21,
136
+ LT: 20,
137
+ LU: 20,
138
+ MT: 31,
139
+ NL: 18,
140
+ PL: 28,
141
+ PT: 25,
142
+ RO: 24,
143
+ SK: 24,
144
+ SI: 19,
145
+ ES: 24,
146
+ SE: 24,
147
+ GB: 22,
148
+ CH: 21,
149
+ NO: 15
150
+ };
151
+ var _INTL_SUFFIX = "(?:KGaA|GmbH|OHG|GbR|SARL|EURL|S\\.p\\.A\\.|S\\.r\\.l\\.|S\\.n\\.c\\.|S\\.a\\.s\\.|B\\.V\\.|N\\.V\\.|S\\.A\\.|S\\.L\\.|Corp\\.|Inc\\.|Ltd\\.|LLP|LLC|PLC|SpA|Srl|SNC|SAS|BV|NV|SL|SA|Corp|Inc|Ltd|KG|AG|UG)";
152
+ var _UC = "[A-Z\xC0-\u024F]";
153
+ var _WC = "[A-Za-z0-9\xC0-\u024F\\-]";
154
+ var _INTL_NAME_TOKEN = `(?:and|&|${_UC}${_WC}*\\.?)`;
155
+ var COMPANY_NAME_INTL_RE = new RegExp(
156
+ `(?<![A-Za-z\xC0-\u024F])(${_UC}${_WC}*(?:\\s+${_INTL_NAME_TOKEN}){0,6}\\s+${_INTL_SUFFIX})`,
157
+ "gu"
158
+ );
22
159
  var SSN_RE = /\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b/g;
23
160
  function validTckn(s) {
24
161
  if (s.length !== 11 || s[0] === "0") return false;
@@ -70,10 +207,30 @@ function validIban(s) {
70
207
  }
71
208
  return remainder === 1;
72
209
  }
210
+ function validIbanIntl(s) {
211
+ const country = s.slice(0, 2);
212
+ if (country === "TR" || !(country in _IBAN_INTL_LENGTHS)) return false;
213
+ if (s.length !== _IBAN_INTL_LENGTHS[country]) return false;
214
+ return validIban(s);
215
+ }
216
+ function validPhoneIntl(raw) {
217
+ const digits = raw.replace(/\D/g, "");
218
+ return digits.length >= 7 && digits.length <= 15 && digits.slice(0, 2) !== "90";
219
+ }
73
220
  var LOCALE_DETECTORS = {
74
- tr: /* @__PURE__ */ new Set(["national_id_tr", "tax_id_tr", "phone_tr", "name"]),
75
- us: /* @__PURE__ */ new Set(["ssn", "phone"]),
76
- eu: /* @__PURE__ */ new Set(["phone"])
221
+ tr: /* @__PURE__ */ new Set([
222
+ "national_id_tr",
223
+ "tax_id_tr",
224
+ "phone_tr",
225
+ "name",
226
+ "iban_tr",
227
+ "company_name_tr",
228
+ "mersis_no",
229
+ "postal_code_tr",
230
+ "province_tr"
231
+ ]),
232
+ us: /* @__PURE__ */ new Set(["ssn", "phone_intl", "company_name_intl"]),
233
+ eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"])
77
234
  };
78
235
  var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
79
236
  function activeDetectors(locale) {
@@ -82,7 +239,6 @@ function activeDetectors(locale) {
82
239
  for (const detectors of Object.values(LOCALE_DETECTORS)) {
83
240
  detectors.forEach((d) => active2.add(d));
84
241
  }
85
- if (active2.has("phone_tr")) active2.delete("phone");
86
242
  return active2;
87
243
  }
88
244
  const active = new Set(UNIVERSAL);
@@ -101,15 +257,15 @@ function findAll(re, text, type) {
101
257
  function detectPii(text, locale = "tr") {
102
258
  const active = activeDetectors(locale);
103
259
  const t = text ?? "";
104
- const findings = [];
260
+ let findings = [];
105
261
  if (active.has("email")) findings.push(...findAll(EMAIL_RE, t, "email"));
106
- if (active.has("phone")) {
262
+ if (active.has("phone_intl")) {
107
263
  PHONE_INTL_RE.lastIndex = 0;
108
264
  let m;
109
265
  while ((m = PHONE_INTL_RE.exec(t)) !== null) {
110
- const digits = m[0].replace(/\D/g, "").length;
111
- if (digits >= 10) {
112
- findings.push({ type: "phone", value: m[0], start: m.index, end: m.index + m[0].length });
266
+ const candidate = m[1];
267
+ if (validPhoneIntl(candidate)) {
268
+ findings.push({ type: "phone_intl", value: candidate, start: m.index, end: m.index + candidate.length });
113
269
  }
114
270
  }
115
271
  }
@@ -162,8 +318,71 @@ function detectPii(text, locale = "tr") {
162
318
  findings.push({ type: "name", value, start, end: start + value.length });
163
319
  }
164
320
  }
321
+ if (active.has("iban_tr")) {
322
+ IBAN_TR_RE.lastIndex = 0;
323
+ let m;
324
+ while ((m = IBAN_TR_RE.exec(t)) !== null) {
325
+ if (validIban(m[0])) {
326
+ findings.push({ type: "iban_tr", value: m[0], start: m.index, end: m.index + m[0].length });
327
+ }
328
+ }
329
+ }
330
+ if (active.has("company_name_tr")) {
331
+ COMPANY_NAME_TR_RE.lastIndex = 0;
332
+ let m;
333
+ while ((m = COMPANY_NAME_TR_RE.exec(t)) !== null) {
334
+ findings.push({ type: "company_name_tr", value: m[1], start: m.index, end: m.index + m[1].length });
335
+ }
336
+ }
337
+ if (active.has("mersis_no")) {
338
+ MERSIS_RE.lastIndex = 0;
339
+ let m;
340
+ while ((m = MERSIS_RE.exec(t)) !== null) {
341
+ findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
342
+ }
343
+ }
344
+ if (active.has("postal_code_tr")) {
345
+ POSTAL_CODE_TR_RE.lastIndex = 0;
346
+ let m;
347
+ while ((m = POSTAL_CODE_TR_RE.exec(t)) !== null) {
348
+ findings.push({ type: "postal_code_tr", value: m[1], start: m.index, end: m.index + m[1].length });
349
+ }
350
+ }
351
+ if (active.has("province_tr")) {
352
+ PROVINCE_TR_RE.lastIndex = 0;
353
+ let m;
354
+ while ((m = PROVINCE_TR_RE.exec(t)) !== null) {
355
+ findings.push({ type: "province_tr", value: m[1], start: m.index, end: m.index + m[1].length });
356
+ }
357
+ }
165
358
  if (active.has("ssn")) findings.push(...findAll(SSN_RE, t, "ssn"));
166
- return findings.sort((a, b) => a.start - b.start);
359
+ if (active.has("iban_intl")) {
360
+ IBAN_INTL_RE.lastIndex = 0;
361
+ let m;
362
+ while ((m = IBAN_INTL_RE.exec(t)) !== null) {
363
+ const candidate = m[1];
364
+ if (validIbanIntl(candidate)) {
365
+ findings.push({ type: "iban_intl", value: candidate, start: m.index, end: m.index + candidate.length });
366
+ }
367
+ }
368
+ }
369
+ if (active.has("company_name_intl")) {
370
+ COMPANY_NAME_INTL_RE.lastIndex = 0;
371
+ let m;
372
+ while ((m = COMPANY_NAME_INTL_RE.exec(t)) !== null) {
373
+ findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
374
+ }
375
+ }
376
+ findings.sort((a, b) => a.start - b.start);
377
+ const specificIbanSpans = new Set(
378
+ findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
379
+ );
380
+ if (specificIbanSpans.size > 0) {
381
+ findings = findings.filter(
382
+ (f) => !(f.type === "iban" && specificIbanSpans.has(`${f.start}:${f.end}`))
383
+ );
384
+ }
385
+ return findings;
167
386
  }
168
387
 
169
388
  // src/quality.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.3.1",
3
+ "version": "0.4.0",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",