@flexorch/audit 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -60,6 +60,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
60
60
  );
61
61
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
62
62
  var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
63
+ var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
64
+ var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
65
+ var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
63
66
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
64
67
  var _TR_PROVINCES_SORTED = [
65
68
  "Afyonkarahisar",
@@ -383,7 +386,10 @@ var LOCALE_DETECTORS = {
383
386
  "mersis_no",
384
387
  "postal_code_tr",
385
388
  "province_tr",
386
- "sgk_no"
389
+ "sgk_no",
390
+ "emeklilik_no",
391
+ "isyeri_sicil_no",
392
+ "bagkur_no"
387
393
  ]),
388
394
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
389
395
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
@@ -518,6 +524,30 @@ function detectPii(text, locale = "und") {
518
524
  findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
519
525
  }
520
526
  }
527
+ if (active.has("emeklilik_no")) {
528
+ EMEKLILIK_NO_RE.lastIndex = 0;
529
+ let m;
530
+ while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
531
+ const start = m.index + m[0].lastIndexOf(m[1]);
532
+ findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
533
+ }
534
+ }
535
+ if (active.has("isyeri_sicil_no")) {
536
+ ISYERI_SICIL_NO_RE.lastIndex = 0;
537
+ let m;
538
+ while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
539
+ const start = m.index + m[0].lastIndexOf(m[1]);
540
+ findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
541
+ }
542
+ }
543
+ if (active.has("bagkur_no")) {
544
+ BAGKUR_NO_RE.lastIndex = 0;
545
+ let m;
546
+ while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
547
+ const start = m.index + m[0].lastIndexOf(m[1]);
548
+ findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
549
+ }
550
+ }
521
551
  if (active.has("postal_code_tr")) {
522
552
  POSTAL_CODE_TR_RE.lastIndex = 0;
523
553
  let m;
@@ -838,7 +868,7 @@ function applyMask(text, findings, strategy = "redact") {
838
868
  const tag = type.toUpperCase();
839
869
  let replacement;
840
870
  if (strategy === "redact") {
841
- replacement = `[REDACTED_${tag}]`;
871
+ replacement = `[MASKED_${tag}]`;
842
872
  } else if (strategy === "replace") {
843
873
  replacement = synthetic(type, value);
844
874
  } else if (strategy === "token") {
@@ -853,7 +883,7 @@ function applyMask(text, findings, strategy = "redact") {
853
883
  }
854
884
 
855
885
  // src/index.ts
856
- var version = "0.7.0";
886
+ var version = "0.8.2";
857
887
  function computeQualityScore(completeness, avgLength, garbageRatio) {
858
888
  const lengthScore = Math.min(avgLength / 500, 1);
859
889
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
package/dist/index.d.cts CHANGED
@@ -51,10 +51,10 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
51
51
  * result.noise // { garbage_ratio, encoding_ok }
52
52
  *
53
53
  * const clean = mask(text, result.pii, { strategy: "redact" })
54
- * // "Contact: [REDACTED_EMAIL]"
54
+ * // "Contact: [MASKED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.7.0";
57
+ declare const version = "0.8.2";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -139,7 +139,7 @@ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOption
139
139
  *
140
140
  * @example
141
141
  * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
142
- * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
142
+ * // "TCKN: [MASKED_NATIONAL_ID_TR], email: [MASKED_EMAIL]"
143
143
  */
144
144
  declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
145
145
  /**
package/dist/index.d.ts CHANGED
@@ -51,10 +51,10 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
51
51
  * result.noise // { garbage_ratio, encoding_ok }
52
52
  *
53
53
  * const clean = mask(text, result.pii, { strategy: "redact" })
54
- * // "Contact: [REDACTED_EMAIL]"
54
+ * // "Contact: [MASKED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.7.0";
57
+ declare const version = "0.8.2";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -139,7 +139,7 @@ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOption
139
139
  *
140
140
  * @example
141
141
  * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
142
- * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
142
+ * // "TCKN: [MASKED_NATIONAL_ID_TR], email: [MASKED_EMAIL]"
143
143
  */
144
144
  declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
145
145
  /**
package/dist/index.js CHANGED
@@ -22,6 +22,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
22
22
  );
23
23
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
24
24
  var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
25
+ var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
26
+ var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
27
+ var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
25
28
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
26
29
  var _TR_PROVINCES_SORTED = [
27
30
  "Afyonkarahisar",
@@ -345,7 +348,10 @@ var LOCALE_DETECTORS = {
345
348
  "mersis_no",
346
349
  "postal_code_tr",
347
350
  "province_tr",
348
- "sgk_no"
351
+ "sgk_no",
352
+ "emeklilik_no",
353
+ "isyeri_sicil_no",
354
+ "bagkur_no"
349
355
  ]),
350
356
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
351
357
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
@@ -480,6 +486,30 @@ function detectPii(text, locale = "und") {
480
486
  findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
481
487
  }
482
488
  }
489
+ if (active.has("emeklilik_no")) {
490
+ EMEKLILIK_NO_RE.lastIndex = 0;
491
+ let m;
492
+ while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
493
+ const start = m.index + m[0].lastIndexOf(m[1]);
494
+ findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
495
+ }
496
+ }
497
+ if (active.has("isyeri_sicil_no")) {
498
+ ISYERI_SICIL_NO_RE.lastIndex = 0;
499
+ let m;
500
+ while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
501
+ const start = m.index + m[0].lastIndexOf(m[1]);
502
+ findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
503
+ }
504
+ }
505
+ if (active.has("bagkur_no")) {
506
+ BAGKUR_NO_RE.lastIndex = 0;
507
+ let m;
508
+ while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
509
+ const start = m.index + m[0].lastIndexOf(m[1]);
510
+ findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
511
+ }
512
+ }
483
513
  if (active.has("postal_code_tr")) {
484
514
  POSTAL_CODE_TR_RE.lastIndex = 0;
485
515
  let m;
@@ -800,7 +830,7 @@ function applyMask(text, findings, strategy = "redact") {
800
830
  const tag = type.toUpperCase();
801
831
  let replacement;
802
832
  if (strategy === "redact") {
803
- replacement = `[REDACTED_${tag}]`;
833
+ replacement = `[MASKED_${tag}]`;
804
834
  } else if (strategy === "replace") {
805
835
  replacement = synthetic(type, value);
806
836
  } else if (strategy === "token") {
@@ -815,7 +845,7 @@ function applyMask(text, findings, strategy = "redact") {
815
845
  }
816
846
 
817
847
  // src/index.ts
818
- var version = "0.7.0";
848
+ var version = "0.8.2";
819
849
  function computeQualityScore(completeness, avgLength, garbageRatio) {
820
850
  const lengthScore = Math.min(avgLength / 500, 1);
821
851
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",