@flexorch/audit 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +33 -3
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +33 -3
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -60,6 +60,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
|
|
|
60
60
|
);
|
|
61
61
|
var MERSIS_RE = /\b([1-9]\d{15})\b/g;
|
|
62
62
|
var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
|
|
63
|
+
var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
|
|
64
|
+
var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
|
|
65
|
+
var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
|
|
63
66
|
var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
|
|
64
67
|
var _TR_PROVINCES_SORTED = [
|
|
65
68
|
"Afyonkarahisar",
|
|
@@ -383,7 +386,10 @@ var LOCALE_DETECTORS = {
|
|
|
383
386
|
"mersis_no",
|
|
384
387
|
"postal_code_tr",
|
|
385
388
|
"province_tr",
|
|
386
|
-
"sgk_no"
|
|
389
|
+
"sgk_no",
|
|
390
|
+
"emeklilik_no",
|
|
391
|
+
"isyeri_sicil_no",
|
|
392
|
+
"bagkur_no"
|
|
387
393
|
]),
|
|
388
394
|
us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
|
|
389
395
|
eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
|
|
@@ -518,6 +524,30 @@ function detectPii(text, locale = "und") {
|
|
|
518
524
|
findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
|
|
519
525
|
}
|
|
520
526
|
}
|
|
527
|
+
if (active.has("emeklilik_no")) {
|
|
528
|
+
EMEKLILIK_NO_RE.lastIndex = 0;
|
|
529
|
+
let m;
|
|
530
|
+
while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
|
|
531
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
532
|
+
findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
if (active.has("isyeri_sicil_no")) {
|
|
536
|
+
ISYERI_SICIL_NO_RE.lastIndex = 0;
|
|
537
|
+
let m;
|
|
538
|
+
while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
|
|
539
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
540
|
+
findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
if (active.has("bagkur_no")) {
|
|
544
|
+
BAGKUR_NO_RE.lastIndex = 0;
|
|
545
|
+
let m;
|
|
546
|
+
while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
|
|
547
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
548
|
+
findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
|
|
549
|
+
}
|
|
550
|
+
}
|
|
521
551
|
if (active.has("postal_code_tr")) {
|
|
522
552
|
POSTAL_CODE_TR_RE.lastIndex = 0;
|
|
523
553
|
let m;
|
|
@@ -838,7 +868,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
838
868
|
const tag = type.toUpperCase();
|
|
839
869
|
let replacement;
|
|
840
870
|
if (strategy === "redact") {
|
|
841
|
-
replacement = `[
|
|
871
|
+
replacement = `[MASKED_${tag}]`;
|
|
842
872
|
} else if (strategy === "replace") {
|
|
843
873
|
replacement = synthetic(type, value);
|
|
844
874
|
} else if (strategy === "token") {
|
|
@@ -853,7 +883,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
853
883
|
}
|
|
854
884
|
|
|
855
885
|
// src/index.ts
|
|
856
|
-
var version = "0.
|
|
886
|
+
var version = "0.8.2";
|
|
857
887
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
858
888
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
859
889
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|
package/dist/index.d.cts
CHANGED
|
@@ -51,10 +51,10 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
51
51
|
* result.noise // { garbage_ratio, encoding_ok }
|
|
52
52
|
*
|
|
53
53
|
* const clean = mask(text, result.pii, { strategy: "redact" })
|
|
54
|
-
* // "Contact: [
|
|
54
|
+
* // "Contact: [MASKED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.8.2";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -139,7 +139,7 @@ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOption
|
|
|
139
139
|
*
|
|
140
140
|
* @example
|
|
141
141
|
* const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
|
|
142
|
-
* // "TCKN: [
|
|
142
|
+
* // "TCKN: [MASKED_NATIONAL_ID_TR], email: [MASKED_EMAIL]"
|
|
143
143
|
*/
|
|
144
144
|
declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
|
|
145
145
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -51,10 +51,10 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
51
51
|
* result.noise // { garbage_ratio, encoding_ok }
|
|
52
52
|
*
|
|
53
53
|
* const clean = mask(text, result.pii, { strategy: "redact" })
|
|
54
|
-
* // "Contact: [
|
|
54
|
+
* // "Contact: [MASKED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.8.2";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -139,7 +139,7 @@ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOption
|
|
|
139
139
|
*
|
|
140
140
|
* @example
|
|
141
141
|
* const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
|
|
142
|
-
* // "TCKN: [
|
|
142
|
+
* // "TCKN: [MASKED_NATIONAL_ID_TR], email: [MASKED_EMAIL]"
|
|
143
143
|
*/
|
|
144
144
|
declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
|
|
145
145
|
/**
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,9 @@ var COMPANY_NAME_TR_RE = new RegExp(
|
|
|
22
22
|
);
|
|
23
23
|
var MERSIS_RE = /\b([1-9]\d{15})\b/g;
|
|
24
24
|
var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
|
|
25
|
+
var EMEKLILIK_NO_RE = /(?:Emekli(?:lik)?\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|E\s*Sicil|Emekli\s*Maaş[ıi]\s*No(?:su)?)\s*[:#]*\s*(\d{9,11})\b/giu;
|
|
26
|
+
var ISYERI_SICIL_NO_RE = /(?:İşyeri\s*(?:SGK\s*)?(?:Sicil\s*)?(?:Kodu?|No(?:su)?|Numara(?:s[ıi])?)|İşyeri\s*Tescil\s*No(?:su)?|SGK\s*İşyeri\s*Kodu?)\s*[:#]*\s*(\d{8,9})\b/giu;
|
|
27
|
+
var BAGKUR_NO_RE = /(?:Bağ[-\s]?[Kk]ur\s*(?:Sicil\s*)?(?:No(?:su)?|Numara(?:s[ıi])?)|4\s*\/\s*b\s*(?:Sicil\s*)?No(?:su)?|Kendi\s*Nam[ıi]na\s*(?:Çalışan\s*)?SGK\s*No(?:su)?)\s*[:#]*\s*(\d{10,11})\b/giu;
|
|
25
28
|
var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
|
|
26
29
|
var _TR_PROVINCES_SORTED = [
|
|
27
30
|
"Afyonkarahisar",
|
|
@@ -345,7 +348,10 @@ var LOCALE_DETECTORS = {
|
|
|
345
348
|
"mersis_no",
|
|
346
349
|
"postal_code_tr",
|
|
347
350
|
"province_tr",
|
|
348
|
-
"sgk_no"
|
|
351
|
+
"sgk_no",
|
|
352
|
+
"emeklilik_no",
|
|
353
|
+
"isyeri_sicil_no",
|
|
354
|
+
"bagkur_no"
|
|
349
355
|
]),
|
|
350
356
|
us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
|
|
351
357
|
eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
|
|
@@ -480,6 +486,30 @@ function detectPii(text, locale = "und") {
|
|
|
480
486
|
findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
|
|
481
487
|
}
|
|
482
488
|
}
|
|
489
|
+
if (active.has("emeklilik_no")) {
|
|
490
|
+
EMEKLILIK_NO_RE.lastIndex = 0;
|
|
491
|
+
let m;
|
|
492
|
+
while ((m = EMEKLILIK_NO_RE.exec(t)) !== null) {
|
|
493
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
494
|
+
findings.push({ type: "emeklilik_no", value: m[1], start, end: start + m[1].length });
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
if (active.has("isyeri_sicil_no")) {
|
|
498
|
+
ISYERI_SICIL_NO_RE.lastIndex = 0;
|
|
499
|
+
let m;
|
|
500
|
+
while ((m = ISYERI_SICIL_NO_RE.exec(t)) !== null) {
|
|
501
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
502
|
+
findings.push({ type: "isyeri_sicil_no", value: m[1], start, end: start + m[1].length });
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
if (active.has("bagkur_no")) {
|
|
506
|
+
BAGKUR_NO_RE.lastIndex = 0;
|
|
507
|
+
let m;
|
|
508
|
+
while ((m = BAGKUR_NO_RE.exec(t)) !== null) {
|
|
509
|
+
const start = m.index + m[0].lastIndexOf(m[1]);
|
|
510
|
+
findings.push({ type: "bagkur_no", value: m[1], start, end: start + m[1].length });
|
|
511
|
+
}
|
|
512
|
+
}
|
|
483
513
|
if (active.has("postal_code_tr")) {
|
|
484
514
|
POSTAL_CODE_TR_RE.lastIndex = 0;
|
|
485
515
|
let m;
|
|
@@ -800,7 +830,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
800
830
|
const tag = type.toUpperCase();
|
|
801
831
|
let replacement;
|
|
802
832
|
if (strategy === "redact") {
|
|
803
|
-
replacement = `[
|
|
833
|
+
replacement = `[MASKED_${tag}]`;
|
|
804
834
|
} else if (strategy === "replace") {
|
|
805
835
|
replacement = synthetic(type, value);
|
|
806
836
|
} else if (strategy === "token") {
|
|
@@ -815,7 +845,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
815
845
|
}
|
|
816
846
|
|
|
817
847
|
// src/index.ts
|
|
818
|
-
var version = "0.
|
|
848
|
+
var version = "0.8.2";
|
|
819
849
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
820
850
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
821
851
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|