@flexorch/audit 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +152 -6
- package/dist/index.d.cts +30 -2
- package/dist/index.d.ts +30 -2
- package/dist/index.js +150 -6
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -23,6 +23,8 @@ __export(index_exports, {
|
|
|
23
23
|
applyMask: () => applyMask,
|
|
24
24
|
audit: () => audit,
|
|
25
25
|
auditBatch: () => auditBatch,
|
|
26
|
+
auditStream: () => auditStream,
|
|
27
|
+
complianceReport: () => complianceReport,
|
|
26
28
|
detectPii: () => detectPii,
|
|
27
29
|
mask: () => mask,
|
|
28
30
|
noiseMetrics: () => noiseMetrics,
|
|
@@ -333,6 +335,28 @@ var _EIN_INVALID_PREFIXES = /* @__PURE__ */ new Set([
|
|
|
333
335
|
function validEinUs(s) {
|
|
334
336
|
return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
|
|
335
337
|
}
|
|
338
|
+
var PESEL_PL_RE = /\b(\d{11})\b/g;
|
|
339
|
+
function validPeselPl(s) {
|
|
340
|
+
if (s.length !== 11 || !/^\d+$/.test(s)) return false;
|
|
341
|
+
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
342
|
+
const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
|
|
343
|
+
return (10 - total % 10) % 10 === parseInt(s[10]);
|
|
344
|
+
}
|
|
345
|
+
var SVNR_AT_RE = /\b(\d{10})\b/g;
|
|
346
|
+
function validSvnrAt(s) {
|
|
347
|
+
if (s.length !== 10 || !/^\d+$/.test(s)) return false;
|
|
348
|
+
const weights = [3, 7, 9, 0, 5, 8, 4, 2, 1, 6];
|
|
349
|
+
const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
|
|
350
|
+
return total % 10 === parseInt(s[3]);
|
|
351
|
+
}
|
|
352
|
+
var NRRNISS_BE_RE = /\b(\d{11})\b/g;
|
|
353
|
+
function validNrrnissBe(s) {
|
|
354
|
+
if (s.length !== 11 || !/^\d+$/.test(s)) return false;
|
|
355
|
+
const body = parseInt(s.slice(0, 9));
|
|
356
|
+
const check = parseInt(s.slice(9));
|
|
357
|
+
if (97 - body % 97 === check) return true;
|
|
358
|
+
return 97 - (2e9 + body) % 97 === check;
|
|
359
|
+
}
|
|
336
360
|
var LOCALE_DETECTORS = {
|
|
337
361
|
tr: /* @__PURE__ */ new Set([
|
|
338
362
|
"national_id_tr",
|
|
@@ -352,7 +376,10 @@ var LOCALE_DETECTORS = {
|
|
|
352
376
|
it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
|
|
353
377
|
nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
|
|
354
378
|
es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
|
|
355
|
-
uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"])
|
|
379
|
+
uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
|
|
380
|
+
pl: /* @__PURE__ */ new Set(["national_id_pl"]),
|
|
381
|
+
at: /* @__PURE__ */ new Set(["social_id_at"]),
|
|
382
|
+
be: /* @__PURE__ */ new Set(["national_id_be"])
|
|
356
383
|
};
|
|
357
384
|
var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
|
|
358
385
|
function activeDetectors(locale) {
|
|
@@ -604,6 +631,27 @@ function detectPii(text, locale = "und") {
|
|
|
604
631
|
findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
605
632
|
}
|
|
606
633
|
}
|
|
634
|
+
if (active.has("national_id_pl")) {
|
|
635
|
+
PESEL_PL_RE.lastIndex = 0;
|
|
636
|
+
let m;
|
|
637
|
+
while ((m = PESEL_PL_RE.exec(t)) !== null) {
|
|
638
|
+
if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
if (active.has("social_id_at")) {
|
|
642
|
+
SVNR_AT_RE.lastIndex = 0;
|
|
643
|
+
let m;
|
|
644
|
+
while ((m = SVNR_AT_RE.exec(t)) !== null) {
|
|
645
|
+
if (validSvnrAt(m[1])) findings.push({ type: "social_id_at", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
if (active.has("national_id_be")) {
|
|
649
|
+
NRRNISS_BE_RE.lastIndex = 0;
|
|
650
|
+
let m;
|
|
651
|
+
while ((m = NRRNISS_BE_RE.exec(t)) !== null) {
|
|
652
|
+
if (validNrrnissBe(m[1])) findings.push({ type: "national_id_be", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
653
|
+
}
|
|
654
|
+
}
|
|
607
655
|
findings.sort((a, b) => a.start - b.start);
|
|
608
656
|
const specificIbanSpans = new Set(
|
|
609
657
|
findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
|
|
@@ -659,18 +707,56 @@ function noiseMetrics(text) {
|
|
|
659
707
|
|
|
660
708
|
// src/mask.ts
|
|
661
709
|
var import_crypto = require("crypto");
|
|
662
|
-
var
|
|
710
|
+
var TCKN_POOL = ["12345678950", "10000000146", "23456789060"];
|
|
711
|
+
var IBAN_TR_POOL = ["TR330006100519786457841326", "TR390006199999888888888813"];
|
|
712
|
+
var NAME_POOL = [
|
|
713
|
+
"Ahmet Yilmaz",
|
|
714
|
+
"Mehmet Demir",
|
|
715
|
+
"Ayse Kaya",
|
|
716
|
+
"Fatma Celik",
|
|
717
|
+
"Ali Sahin",
|
|
718
|
+
"Zeynep Arslan",
|
|
719
|
+
"Mustafa Ozturk",
|
|
720
|
+
"Emine Dogan",
|
|
721
|
+
"Ibrahim Kurt",
|
|
722
|
+
"Hatice Aydin",
|
|
723
|
+
"Hasan Yildiz",
|
|
724
|
+
"Elif Gunes",
|
|
725
|
+
"Huseyin Cetin",
|
|
726
|
+
"Meryem Polat",
|
|
727
|
+
"Omer Koc",
|
|
728
|
+
"Busra Tekin",
|
|
729
|
+
"Yusuf Erdogan",
|
|
730
|
+
"Selin Bozkurt",
|
|
731
|
+
"Kemal Akin",
|
|
732
|
+
"Derya Ucar"
|
|
733
|
+
];
|
|
734
|
+
var STATIC_SYNTHETIC = {
|
|
663
735
|
email: "user@example.com",
|
|
664
736
|
phone: "+1 000 000 0000",
|
|
665
737
|
phone_tr: "0500 000 00 00",
|
|
666
|
-
|
|
738
|
+
phone_intl: "+1 000 000 0000",
|
|
667
739
|
ssn: "000-00-0000",
|
|
668
740
|
iban: "XX00 0000 0000 0000 0000 00",
|
|
669
741
|
credit_card: "0000 0000 0000 0000",
|
|
670
742
|
ip: "0.0.0.0",
|
|
671
|
-
|
|
743
|
+
ip_v6: "2001:db8::1",
|
|
744
|
+
national_id_pl: "00000000000",
|
|
745
|
+
social_id_at: "0000000000",
|
|
746
|
+
national_id_be: "00000000000"
|
|
672
747
|
};
|
|
673
748
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set(["redact", "replace", "token", "hash"]);
|
|
749
|
+
function pick(pool, seed) {
|
|
750
|
+
const h = (0, import_crypto.createHash)("sha256").update(seed).digest("hex");
|
|
751
|
+
const idx = parseInt(h.slice(0, 8), 16) % pool.length;
|
|
752
|
+
return pool[idx];
|
|
753
|
+
}
|
|
754
|
+
function synthetic(ptype, original) {
|
|
755
|
+
if (ptype === "national_id_tr") return pick(TCKN_POOL, original);
|
|
756
|
+
if (ptype === "iban_tr" || ptype === "iban_intl") return pick(IBAN_TR_POOL, original);
|
|
757
|
+
if (ptype === "name") return pick(NAME_POOL, original);
|
|
758
|
+
return STATIC_SYNTHETIC[ptype] ?? `[${ptype.toUpperCase()}]`;
|
|
759
|
+
}
|
|
674
760
|
function applyMask(text, findings, strategy = "redact") {
|
|
675
761
|
if (!VALID_STRATEGIES.has(strategy)) {
|
|
676
762
|
throw new Error(`Unknown strategy "${strategy}". Use: redact, replace, token, hash`);
|
|
@@ -687,7 +773,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
687
773
|
if (strategy === "redact") {
|
|
688
774
|
replacement = `[REDACTED_${tag}]`;
|
|
689
775
|
} else if (strategy === "replace") {
|
|
690
|
-
replacement =
|
|
776
|
+
replacement = synthetic(type, value);
|
|
691
777
|
} else if (strategy === "token") {
|
|
692
778
|
replacement = `<PII_${tag}_${counter[type]}>`;
|
|
693
779
|
} else {
|
|
@@ -700,7 +786,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
700
786
|
}
|
|
701
787
|
|
|
702
788
|
// src/index.ts
|
|
703
|
-
var version = "0.
|
|
789
|
+
var version = "0.6.0";
|
|
704
790
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
705
791
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
706
792
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|
|
@@ -751,11 +837,71 @@ function auditBatch(texts, options = {}) {
|
|
|
751
837
|
function mask(text, findings, options = {}) {
|
|
752
838
|
return applyMask(text, findings, options.strategy ?? "redact");
|
|
753
839
|
}
|
|
840
|
+
async function* auditStream(texts, options = {}) {
|
|
841
|
+
for await (const text of texts) {
|
|
842
|
+
yield audit(text, options);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
846
|
+
"national_id_tr",
|
|
847
|
+
"ssn",
|
|
848
|
+
"credit_card",
|
|
849
|
+
"national_id_pl",
|
|
850
|
+
"national_id_be",
|
|
851
|
+
"social_id_at",
|
|
852
|
+
"social_id_de",
|
|
853
|
+
"social_id_uk",
|
|
854
|
+
"national_id_it",
|
|
855
|
+
"national_id_nl",
|
|
856
|
+
"national_id_es",
|
|
857
|
+
"national_id_us",
|
|
858
|
+
"tax_id_tr",
|
|
859
|
+
"tax_id_de"
|
|
860
|
+
]);
|
|
861
|
+
var MEDIUM_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
862
|
+
"email",
|
|
863
|
+
"phone_tr",
|
|
864
|
+
"phone_intl",
|
|
865
|
+
"iban",
|
|
866
|
+
"iban_tr",
|
|
867
|
+
"iban_intl",
|
|
868
|
+
"name"
|
|
869
|
+
]);
|
|
870
|
+
function complianceReport(result) {
|
|
871
|
+
const types = [...new Set(result.pii.map((f) => f.type))].sort();
|
|
872
|
+
let risk_level = "none";
|
|
873
|
+
if (types.length > 0) {
|
|
874
|
+
if (types.some((t) => HIGH_RISK_TYPES.has(t))) risk_level = "high";
|
|
875
|
+
else if (types.some((t) => MEDIUM_RISK_TYPES.has(t))) risk_level = "medium";
|
|
876
|
+
else risk_level = "low";
|
|
877
|
+
}
|
|
878
|
+
const recommendations = [];
|
|
879
|
+
if (risk_level === "high" || risk_level === "medium") {
|
|
880
|
+
recommendations.push("Apply mask({ strategy: 'redact' }) before storing or sharing this text.");
|
|
881
|
+
}
|
|
882
|
+
if (risk_level === "high") {
|
|
883
|
+
recommendations.push(
|
|
884
|
+
"Review applicable regulations (KVKK Art. 6, GDPR Art. 9) for special category data handling."
|
|
885
|
+
);
|
|
886
|
+
}
|
|
887
|
+
if (recommendations.length === 0) {
|
|
888
|
+
recommendations.push("No PII detected \u2014 text is safe for LLM processing.");
|
|
889
|
+
}
|
|
890
|
+
return {
|
|
891
|
+
has_pii: types.length > 0,
|
|
892
|
+
pii_types: types,
|
|
893
|
+
risk_level,
|
|
894
|
+
masking_required: types.length > 0,
|
|
895
|
+
recommendations
|
|
896
|
+
};
|
|
897
|
+
}
|
|
754
898
|
// Annotate the CommonJS export names for ESM import in node:
|
|
755
899
|
0 && (module.exports = {
|
|
756
900
|
applyMask,
|
|
757
901
|
audit,
|
|
758
902
|
auditBatch,
|
|
903
|
+
auditStream,
|
|
904
|
+
complianceReport,
|
|
759
905
|
detectPii,
|
|
760
906
|
mask,
|
|
761
907
|
noiseMetrics,
|
package/dist/index.d.cts
CHANGED
|
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
54
54
|
* // "Contact: [REDACTED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.6.0";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -116,5 +116,33 @@ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAudit
|
|
|
116
116
|
* Apply masking to PII findings in *text*.
|
|
117
117
|
*/
|
|
118
118
|
declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
|
|
119
|
+
/**
|
|
120
|
+
* Async generator that audits texts one at a time from an async iterable.
|
|
121
|
+
*
|
|
122
|
+
* Yields one AuditResult per input text. Processing is sequential.
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* async function* lines() {
|
|
126
|
+
* for (const line of data) yield line;
|
|
127
|
+
* }
|
|
128
|
+
* for await (const result of auditStream(lines())) {
|
|
129
|
+
* console.log(result.quality_grade, result.pii_summary);
|
|
130
|
+
* }
|
|
131
|
+
*/
|
|
132
|
+
declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
|
|
133
|
+
type RiskLevel = "none" | "low" | "medium" | "high";
|
|
134
|
+
interface ComplianceReport {
|
|
135
|
+
has_pii: boolean;
|
|
136
|
+
pii_types: string[];
|
|
137
|
+
risk_level: RiskLevel;
|
|
138
|
+
masking_required: boolean;
|
|
139
|
+
recommendations: string[];
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Generate a KVKK/GDPR compliance summary for an AuditResult.
|
|
143
|
+
*
|
|
144
|
+
* This is a technical summary only — not a legal document or regulatory opinion.
|
|
145
|
+
*/
|
|
146
|
+
declare function complianceReport(result: AuditResult): ComplianceReport;
|
|
119
147
|
|
|
120
|
-
export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
|
148
|
+
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
package/dist/index.d.ts
CHANGED
|
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
54
54
|
* // "Contact: [REDACTED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.6.0";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -116,5 +116,33 @@ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAudit
|
|
|
116
116
|
* Apply masking to PII findings in *text*.
|
|
117
117
|
*/
|
|
118
118
|
declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
|
|
119
|
+
/**
|
|
120
|
+
* Async generator that audits texts one at a time from an async iterable.
|
|
121
|
+
*
|
|
122
|
+
* Yields one AuditResult per input text. Processing is sequential.
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* async function* lines() {
|
|
126
|
+
* for (const line of data) yield line;
|
|
127
|
+
* }
|
|
128
|
+
* for await (const result of auditStream(lines())) {
|
|
129
|
+
* console.log(result.quality_grade, result.pii_summary);
|
|
130
|
+
* }
|
|
131
|
+
*/
|
|
132
|
+
declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
|
|
133
|
+
type RiskLevel = "none" | "low" | "medium" | "high";
|
|
134
|
+
interface ComplianceReport {
|
|
135
|
+
has_pii: boolean;
|
|
136
|
+
pii_types: string[];
|
|
137
|
+
risk_level: RiskLevel;
|
|
138
|
+
masking_required: boolean;
|
|
139
|
+
recommendations: string[];
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Generate a KVKK/GDPR compliance summary for an AuditResult.
|
|
143
|
+
*
|
|
144
|
+
* This is a technical summary only — not a legal document or regulatory opinion.
|
|
145
|
+
*/
|
|
146
|
+
declare function complianceReport(result: AuditResult): ComplianceReport;
|
|
119
147
|
|
|
120
|
-
export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
|
148
|
+
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
package/dist/index.js
CHANGED
|
@@ -299,6 +299,28 @@ var _EIN_INVALID_PREFIXES = /* @__PURE__ */ new Set([
|
|
|
299
299
|
function validEinUs(s) {
|
|
300
300
|
return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
|
|
301
301
|
}
|
|
302
|
+
var PESEL_PL_RE = /\b(\d{11})\b/g;
|
|
303
|
+
function validPeselPl(s) {
|
|
304
|
+
if (s.length !== 11 || !/^\d+$/.test(s)) return false;
|
|
305
|
+
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
306
|
+
const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
|
|
307
|
+
return (10 - total % 10) % 10 === parseInt(s[10]);
|
|
308
|
+
}
|
|
309
|
+
var SVNR_AT_RE = /\b(\d{10})\b/g;
|
|
310
|
+
function validSvnrAt(s) {
|
|
311
|
+
if (s.length !== 10 || !/^\d+$/.test(s)) return false;
|
|
312
|
+
const weights = [3, 7, 9, 0, 5, 8, 4, 2, 1, 6];
|
|
313
|
+
const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
|
|
314
|
+
return total % 10 === parseInt(s[3]);
|
|
315
|
+
}
|
|
316
|
+
var NRRNISS_BE_RE = /\b(\d{11})\b/g;
|
|
317
|
+
function validNrrnissBe(s) {
|
|
318
|
+
if (s.length !== 11 || !/^\d+$/.test(s)) return false;
|
|
319
|
+
const body = parseInt(s.slice(0, 9));
|
|
320
|
+
const check = parseInt(s.slice(9));
|
|
321
|
+
if (97 - body % 97 === check) return true;
|
|
322
|
+
return 97 - (2e9 + body) % 97 === check;
|
|
323
|
+
}
|
|
302
324
|
var LOCALE_DETECTORS = {
|
|
303
325
|
tr: /* @__PURE__ */ new Set([
|
|
304
326
|
"national_id_tr",
|
|
@@ -318,7 +340,10 @@ var LOCALE_DETECTORS = {
|
|
|
318
340
|
it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
|
|
319
341
|
nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
|
|
320
342
|
es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
|
|
321
|
-
uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"])
|
|
343
|
+
uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
|
|
344
|
+
pl: /* @__PURE__ */ new Set(["national_id_pl"]),
|
|
345
|
+
at: /* @__PURE__ */ new Set(["social_id_at"]),
|
|
346
|
+
be: /* @__PURE__ */ new Set(["national_id_be"])
|
|
322
347
|
};
|
|
323
348
|
var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
|
|
324
349
|
function activeDetectors(locale) {
|
|
@@ -570,6 +595,27 @@ function detectPii(text, locale = "und") {
|
|
|
570
595
|
findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
571
596
|
}
|
|
572
597
|
}
|
|
598
|
+
if (active.has("national_id_pl")) {
|
|
599
|
+
PESEL_PL_RE.lastIndex = 0;
|
|
600
|
+
let m;
|
|
601
|
+
while ((m = PESEL_PL_RE.exec(t)) !== null) {
|
|
602
|
+
if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
if (active.has("social_id_at")) {
|
|
606
|
+
SVNR_AT_RE.lastIndex = 0;
|
|
607
|
+
let m;
|
|
608
|
+
while ((m = SVNR_AT_RE.exec(t)) !== null) {
|
|
609
|
+
if (validSvnrAt(m[1])) findings.push({ type: "social_id_at", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
if (active.has("national_id_be")) {
|
|
613
|
+
NRRNISS_BE_RE.lastIndex = 0;
|
|
614
|
+
let m;
|
|
615
|
+
while ((m = NRRNISS_BE_RE.exec(t)) !== null) {
|
|
616
|
+
if (validNrrnissBe(m[1])) findings.push({ type: "national_id_be", value: m[1], start: m.index, end: m.index + m[1].length });
|
|
617
|
+
}
|
|
618
|
+
}
|
|
573
619
|
findings.sort((a, b) => a.start - b.start);
|
|
574
620
|
const specificIbanSpans = new Set(
|
|
575
621
|
findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
|
|
@@ -625,18 +671,56 @@ function noiseMetrics(text) {
|
|
|
625
671
|
|
|
626
672
|
// src/mask.ts
|
|
627
673
|
import { createHash } from "crypto";
|
|
628
|
-
var
|
|
674
|
+
var TCKN_POOL = ["12345678950", "10000000146", "23456789060"];
|
|
675
|
+
var IBAN_TR_POOL = ["TR330006100519786457841326", "TR390006199999888888888813"];
|
|
676
|
+
var NAME_POOL = [
|
|
677
|
+
"Ahmet Yilmaz",
|
|
678
|
+
"Mehmet Demir",
|
|
679
|
+
"Ayse Kaya",
|
|
680
|
+
"Fatma Celik",
|
|
681
|
+
"Ali Sahin",
|
|
682
|
+
"Zeynep Arslan",
|
|
683
|
+
"Mustafa Ozturk",
|
|
684
|
+
"Emine Dogan",
|
|
685
|
+
"Ibrahim Kurt",
|
|
686
|
+
"Hatice Aydin",
|
|
687
|
+
"Hasan Yildiz",
|
|
688
|
+
"Elif Gunes",
|
|
689
|
+
"Huseyin Cetin",
|
|
690
|
+
"Meryem Polat",
|
|
691
|
+
"Omer Koc",
|
|
692
|
+
"Busra Tekin",
|
|
693
|
+
"Yusuf Erdogan",
|
|
694
|
+
"Selin Bozkurt",
|
|
695
|
+
"Kemal Akin",
|
|
696
|
+
"Derya Ucar"
|
|
697
|
+
];
|
|
698
|
+
var STATIC_SYNTHETIC = {
|
|
629
699
|
email: "user@example.com",
|
|
630
700
|
phone: "+1 000 000 0000",
|
|
631
701
|
phone_tr: "0500 000 00 00",
|
|
632
|
-
|
|
702
|
+
phone_intl: "+1 000 000 0000",
|
|
633
703
|
ssn: "000-00-0000",
|
|
634
704
|
iban: "XX00 0000 0000 0000 0000 00",
|
|
635
705
|
credit_card: "0000 0000 0000 0000",
|
|
636
706
|
ip: "0.0.0.0",
|
|
637
|
-
|
|
707
|
+
ip_v6: "2001:db8::1",
|
|
708
|
+
national_id_pl: "00000000000",
|
|
709
|
+
social_id_at: "0000000000",
|
|
710
|
+
national_id_be: "00000000000"
|
|
638
711
|
};
|
|
639
712
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set(["redact", "replace", "token", "hash"]);
|
|
713
|
+
function pick(pool, seed) {
|
|
714
|
+
const h = createHash("sha256").update(seed).digest("hex");
|
|
715
|
+
const idx = parseInt(h.slice(0, 8), 16) % pool.length;
|
|
716
|
+
return pool[idx];
|
|
717
|
+
}
|
|
718
|
+
function synthetic(ptype, original) {
|
|
719
|
+
if (ptype === "national_id_tr") return pick(TCKN_POOL, original);
|
|
720
|
+
if (ptype === "iban_tr" || ptype === "iban_intl") return pick(IBAN_TR_POOL, original);
|
|
721
|
+
if (ptype === "name") return pick(NAME_POOL, original);
|
|
722
|
+
return STATIC_SYNTHETIC[ptype] ?? `[${ptype.toUpperCase()}]`;
|
|
723
|
+
}
|
|
640
724
|
function applyMask(text, findings, strategy = "redact") {
|
|
641
725
|
if (!VALID_STRATEGIES.has(strategy)) {
|
|
642
726
|
throw new Error(`Unknown strategy "${strategy}". Use: redact, replace, token, hash`);
|
|
@@ -653,7 +737,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
653
737
|
if (strategy === "redact") {
|
|
654
738
|
replacement = `[REDACTED_${tag}]`;
|
|
655
739
|
} else if (strategy === "replace") {
|
|
656
|
-
replacement =
|
|
740
|
+
replacement = synthetic(type, value);
|
|
657
741
|
} else if (strategy === "token") {
|
|
658
742
|
replacement = `<PII_${tag}_${counter[type]}>`;
|
|
659
743
|
} else {
|
|
@@ -666,7 +750,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
666
750
|
}
|
|
667
751
|
|
|
668
752
|
// src/index.ts
|
|
669
|
-
var version = "0.
|
|
753
|
+
var version = "0.6.0";
|
|
670
754
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
671
755
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
672
756
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|
|
@@ -717,10 +801,70 @@ function auditBatch(texts, options = {}) {
|
|
|
717
801
|
function mask(text, findings, options = {}) {
|
|
718
802
|
return applyMask(text, findings, options.strategy ?? "redact");
|
|
719
803
|
}
|
|
804
|
+
async function* auditStream(texts, options = {}) {
|
|
805
|
+
for await (const text of texts) {
|
|
806
|
+
yield audit(text, options);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
810
|
+
"national_id_tr",
|
|
811
|
+
"ssn",
|
|
812
|
+
"credit_card",
|
|
813
|
+
"national_id_pl",
|
|
814
|
+
"national_id_be",
|
|
815
|
+
"social_id_at",
|
|
816
|
+
"social_id_de",
|
|
817
|
+
"social_id_uk",
|
|
818
|
+
"national_id_it",
|
|
819
|
+
"national_id_nl",
|
|
820
|
+
"national_id_es",
|
|
821
|
+
"national_id_us",
|
|
822
|
+
"tax_id_tr",
|
|
823
|
+
"tax_id_de"
|
|
824
|
+
]);
|
|
825
|
+
var MEDIUM_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
826
|
+
"email",
|
|
827
|
+
"phone_tr",
|
|
828
|
+
"phone_intl",
|
|
829
|
+
"iban",
|
|
830
|
+
"iban_tr",
|
|
831
|
+
"iban_intl",
|
|
832
|
+
"name"
|
|
833
|
+
]);
|
|
834
|
+
function complianceReport(result) {
|
|
835
|
+
const types = [...new Set(result.pii.map((f) => f.type))].sort();
|
|
836
|
+
let risk_level = "none";
|
|
837
|
+
if (types.length > 0) {
|
|
838
|
+
if (types.some((t) => HIGH_RISK_TYPES.has(t))) risk_level = "high";
|
|
839
|
+
else if (types.some((t) => MEDIUM_RISK_TYPES.has(t))) risk_level = "medium";
|
|
840
|
+
else risk_level = "low";
|
|
841
|
+
}
|
|
842
|
+
const recommendations = [];
|
|
843
|
+
if (risk_level === "high" || risk_level === "medium") {
|
|
844
|
+
recommendations.push("Apply mask({ strategy: 'redact' }) before storing or sharing this text.");
|
|
845
|
+
}
|
|
846
|
+
if (risk_level === "high") {
|
|
847
|
+
recommendations.push(
|
|
848
|
+
"Review applicable regulations (KVKK Art. 6, GDPR Art. 9) for special category data handling."
|
|
849
|
+
);
|
|
850
|
+
}
|
|
851
|
+
if (recommendations.length === 0) {
|
|
852
|
+
recommendations.push("No PII detected \u2014 text is safe for LLM processing.");
|
|
853
|
+
}
|
|
854
|
+
return {
|
|
855
|
+
has_pii: types.length > 0,
|
|
856
|
+
pii_types: types,
|
|
857
|
+
risk_level,
|
|
858
|
+
masking_required: types.length > 0,
|
|
859
|
+
recommendations
|
|
860
|
+
};
|
|
861
|
+
}
|
|
720
862
|
export {
|
|
721
863
|
applyMask,
|
|
722
864
|
audit,
|
|
723
865
|
auditBatch,
|
|
866
|
+
auditStream,
|
|
867
|
+
complianceReport,
|
|
724
868
|
detectPii,
|
|
725
869
|
mask,
|
|
726
870
|
noiseMetrics,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flexorch/audit",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pii",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
],
|
|
15
15
|
"license": "MIT",
|
|
16
16
|
"author": "FlexOrch",
|
|
17
|
-
"homepage": "https://
|
|
17
|
+
"homepage": "https://flexorch.com",
|
|
18
18
|
"repository": {
|
|
19
19
|
"type": "git",
|
|
20
20
|
"url": "git+https://github.com/flexorch/flexorch-audit-js.git"
|