@flexorch/audit 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -60,6 +60,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
60
60
  );
61
61
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
62
62
  var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
63
+ var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
64
+ var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
65
+ var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
63
66
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
64
67
  var _TR_PROVINCES_SORTED = [
65
68
  "Afyonkarahisar",
@@ -383,7 +386,10 @@ var LOCALE_DETECTORS = {
383
386
  "mersis_no",
384
387
  "postal_code_tr",
385
388
  "province_tr",
386
- "sgk_no"
389
+ "sgk_no",
390
+ "emeklilik_no",
391
+ "isyeri_sicil_no",
392
+ "bagkur_no"
387
393
  ]),
388
394
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
389
395
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
@@ -518,6 +524,30 @@ function detectPii(text, locale = "und") {
518
524
  findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
519
525
  }
520
526
  }
527
+ if (active.has("emeklilik_no")) {
528
+ EMEKLILIK_NO_RE.lastIndex = 0;
529
+ let m;
530
+ while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
531
+ const start = m.index + m[0].lastIndexOf(m[1]);
532
+ findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
533
+ }
534
+ }
535
+ if (active.has("isyeri_sicil_no")) {
536
+ ISYERI_SICIL_NO_RE.lastIndex = 0;
537
+ let m;
538
+ while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
539
+ const start = m.index + m[0].lastIndexOf(m[1]);
540
+ findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
541
+ }
542
+ }
543
+ if (active.has("bagkur_no")) {
544
+ BAGKUR_NO_RE.lastIndex = 0;
545
+ let m;
546
+ while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
547
+ const start = m.index + m[0].lastIndexOf(m[1]);
548
+ findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
549
+ }
550
+ }
521
551
  if (active.has("postal_code_tr")) {
522
552
  POSTAL_CODE_TR_RE.lastIndex = 0;
523
553
  let m;
package/dist/index.js CHANGED
@@ -22,6 +22,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
22
22
  );
23
23
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
24
24
  var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
25
+ var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
26
+ var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
27
+ var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
25
28
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
26
29
  var _TR_PROVINCES_SORTED = [
27
30
  "Afyonkarahisar",
@@ -345,7 +348,10 @@ var LOCALE_DETECTORS = {
345
348
  "mersis_no",
346
349
  "postal_code_tr",
347
350
  "province_tr",
348
- "sgk_no"
351
+ "sgk_no",
352
+ "emeklilik_no",
353
+ "isyeri_sicil_no",
354
+ "bagkur_no"
349
355
  ]),
350
356
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
351
357
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
@@ -480,6 +486,30 @@ function detectPii(text, locale = "und") {
480
486
  findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
481
487
  }
482
488
  }
489
+ if (active.has("emeklilik_no")) {
490
+ EMEKLILIK_NO_RE.lastIndex = 0;
491
+ let m;
492
+ while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
493
+ const start = m.index + m[0].lastIndexOf(m[1]);
494
+ findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
495
+ }
496
+ }
497
+ if (active.has("isyeri_sicil_no")) {
498
+ ISYERI_SICIL_NO_RE.lastIndex = 0;
499
+ let m;
500
+ while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
501
+ const start = m.index + m[0].lastIndexOf(m[1]);
502
+ findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
503
+ }
504
+ }
505
+ if (active.has("bagkur_no")) {
506
+ BAGKUR_NO_RE.lastIndex = 0;
507
+ let m;
508
+ while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
509
+ const start = m.index + m[0].lastIndexOf(m[1]);
510
+ findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
511
+ }
512
+ }
483
513
  if (active.has("postal_code_tr")) {
484
514
  POSTAL_CODE_TR_RE.lastIndex = 0;
485
515
  let m;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.8.2",
3
+ "version": "0.9.0",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",