@flexorch/audit 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -23,6 +23,8 @@ __export(index_exports, {
23
23
  applyMask: () => applyMask,
24
24
  audit: () => audit,
25
25
  auditBatch: () => auditBatch,
26
+ auditStream: () => auditStream,
27
+ complianceReport: () => complianceReport,
26
28
  detectPii: () => detectPii,
27
29
  mask: () => mask,
28
30
  noiseMetrics: () => noiseMetrics,
@@ -333,6 +335,28 @@ var _EIN_INVALID_PREFIXES = /* @__PURE__ */ new Set([
333
335
  function validEinUs(s) {
334
336
  return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
335
337
  }
338
+ var PESEL_PL_RE = /\b(\d{11})\b/g;
339
+ function validPeselPl(s) {
340
+ if (s.length !== 11 || !/^\d+$/.test(s)) return false;
341
+ const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
342
+ const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
343
+ return (10 - total % 10) % 10 === parseInt(s[10]);
344
+ }
345
+ var SVNR_AT_RE = /\b(\d{10})\b/g;
346
+ function validSvnrAt(s) {
347
+ if (s.length !== 10 || !/^\d+$/.test(s)) return false;
348
+ const weights = [3, 7, 9, 0, 5, 8, 4, 2, 1, 6];
349
+ const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
350
+ return total % 10 === parseInt(s[3]);
351
+ }
352
+ var NRRNISS_BE_RE = /\b(\d{11})\b/g;
353
+ function validNrrnissBe(s) {
354
+ if (s.length !== 11 || !/^\d+$/.test(s)) return false;
355
+ const body = parseInt(s.slice(0, 9));
356
+ const check = parseInt(s.slice(9));
357
+ if (97 - body % 97 === check) return true;
358
+ return 97 - (2e9 + body) % 97 === check;
359
+ }
336
360
  var LOCALE_DETECTORS = {
337
361
  tr: /* @__PURE__ */ new Set([
338
362
  "national_id_tr",
@@ -352,7 +376,10 @@ var LOCALE_DETECTORS = {
352
376
  it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
353
377
  nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
354
378
  es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
355
- uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"])
379
+ uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
380
+ pl: /* @__PURE__ */ new Set(["national_id_pl"]),
381
+ at: /* @__PURE__ */ new Set(["social_id_at"]),
382
+ be: /* @__PURE__ */ new Set(["national_id_be"])
356
383
  };
357
384
  var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
358
385
  function activeDetectors(locale) {
@@ -604,6 +631,27 @@ function detectPii(text, locale = "und") {
604
631
  findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
605
632
  }
606
633
  }
634
+ if (active.has("national_id_pl")) {
635
+ PESEL_PL_RE.lastIndex = 0;
636
+ let m;
637
+ while ((m = PESEL_PL_RE.exec(t)) !== null) {
638
+ if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
639
+ }
640
+ }
641
+ if (active.has("social_id_at")) {
642
+ SVNR_AT_RE.lastIndex = 0;
643
+ let m;
644
+ while ((m = SVNR_AT_RE.exec(t)) !== null) {
645
+ if (validSvnrAt(m[1])) findings.push({ type: "social_id_at", value: m[1], start: m.index, end: m.index + m[1].length });
646
+ }
647
+ }
648
+ if (active.has("national_id_be")) {
649
+ NRRNISS_BE_RE.lastIndex = 0;
650
+ let m;
651
+ while ((m = NRRNISS_BE_RE.exec(t)) !== null) {
652
+ if (validNrrnissBe(m[1])) findings.push({ type: "national_id_be", value: m[1], start: m.index, end: m.index + m[1].length });
653
+ }
654
+ }
607
655
  findings.sort((a, b) => a.start - b.start);
608
656
  const specificIbanSpans = new Set(
609
657
  findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
@@ -659,18 +707,56 @@ function noiseMetrics(text) {
659
707
 
660
708
  // src/mask.ts
661
709
  var import_crypto = require("crypto");
662
- var SYNTHETIC = {
710
+ var TCKN_POOL = ["12345678950", "10000000146", "23456789060"];
711
+ var IBAN_TR_POOL = ["TR330006100519786457841326", "TR390006199999888888888813"];
712
+ var NAME_POOL = [
713
+ "Ahmet Yilmaz",
714
+ "Mehmet Demir",
715
+ "Ayse Kaya",
716
+ "Fatma Celik",
717
+ "Ali Sahin",
718
+ "Zeynep Arslan",
719
+ "Mustafa Ozturk",
720
+ "Emine Dogan",
721
+ "Ibrahim Kurt",
722
+ "Hatice Aydin",
723
+ "Hasan Yildiz",
724
+ "Elif Gunes",
725
+ "Huseyin Cetin",
726
+ "Meryem Polat",
727
+ "Omer Koc",
728
+ "Busra Tekin",
729
+ "Yusuf Erdogan",
730
+ "Selin Bozkurt",
731
+ "Kemal Akin",
732
+ "Derya Ucar"
733
+ ];
734
+ var STATIC_SYNTHETIC = {
663
735
  email: "user@example.com",
664
736
  phone: "+1 000 000 0000",
665
737
  phone_tr: "0500 000 00 00",
666
- national_id_tr: "00000000000",
738
+ phone_intl: "+1 000 000 0000",
667
739
  ssn: "000-00-0000",
668
740
  iban: "XX00 0000 0000 0000 0000 00",
669
741
  credit_card: "0000 0000 0000 0000",
670
742
  ip: "0.0.0.0",
671
- name: "AD SOYAD"
743
+ ip_v6: "2001:db8::1",
744
+ national_id_pl: "00000000000",
745
+ social_id_at: "0000000000",
746
+ national_id_be: "00000000000"
672
747
  };
673
748
  var VALID_STRATEGIES = /* @__PURE__ */ new Set(["redact", "replace", "token", "hash"]);
749
+ function pick(pool, seed) {
750
+ const h = (0, import_crypto.createHash)("sha256").update(seed).digest("hex");
751
+ const idx = parseInt(h.slice(0, 8), 16) % pool.length;
752
+ return pool[idx];
753
+ }
754
+ function synthetic(ptype, original) {
755
+ if (ptype === "national_id_tr") return pick(TCKN_POOL, original);
756
+ if (ptype === "iban_tr" || ptype === "iban_intl") return pick(IBAN_TR_POOL, original);
757
+ if (ptype === "name") return pick(NAME_POOL, original);
758
+ return STATIC_SYNTHETIC[ptype] ?? `[${ptype.toUpperCase()}]`;
759
+ }
674
760
  function applyMask(text, findings, strategy = "redact") {
675
761
  if (!VALID_STRATEGIES.has(strategy)) {
676
762
  throw new Error(`Unknown strategy "${strategy}". Use: redact, replace, token, hash`);
@@ -687,7 +773,7 @@ function applyMask(text, findings, strategy = "redact") {
687
773
  if (strategy === "redact") {
688
774
  replacement = `[REDACTED_${tag}]`;
689
775
  } else if (strategy === "replace") {
690
- replacement = SYNTHETIC[type] ?? `[${tag}]`;
776
+ replacement = synthetic(type, value);
691
777
  } else if (strategy === "token") {
692
778
  replacement = `<PII_${tag}_${counter[type]}>`;
693
779
  } else {
@@ -700,7 +786,7 @@ function applyMask(text, findings, strategy = "redact") {
700
786
  }
701
787
 
702
788
  // src/index.ts
703
- var version = "0.5.1";
789
+ var version = "0.6.0";
704
790
  function computeQualityScore(completeness, avgLength, garbageRatio) {
705
791
  const lengthScore = Math.min(avgLength / 500, 1);
706
792
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -751,11 +837,71 @@ function auditBatch(texts, options = {}) {
751
837
  function mask(text, findings, options = {}) {
752
838
  return applyMask(text, findings, options.strategy ?? "redact");
753
839
  }
840
+ async function* auditStream(texts, options = {}) {
841
+ for await (const text of texts) {
842
+ yield audit(text, options);
843
+ }
844
+ }
845
+ var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
846
+ "national_id_tr",
847
+ "ssn",
848
+ "credit_card",
849
+ "national_id_pl",
850
+ "national_id_be",
851
+ "social_id_at",
852
+ "social_id_de",
853
+ "social_id_uk",
854
+ "national_id_it",
855
+ "national_id_nl",
856
+ "national_id_es",
857
+ "national_id_us",
858
+ "tax_id_tr",
859
+ "tax_id_de"
860
+ ]);
861
+ var MEDIUM_RISK_TYPES = /* @__PURE__ */ new Set([
862
+ "email",
863
+ "phone_tr",
864
+ "phone_intl",
865
+ "iban",
866
+ "iban_tr",
867
+ "iban_intl",
868
+ "name"
869
+ ]);
870
+ function complianceReport(result) {
871
+ const types = [...new Set(result.pii.map((f) => f.type))].sort();
872
+ let risk_level = "none";
873
+ if (types.length > 0) {
874
+ if (types.some((t) => HIGH_RISK_TYPES.has(t))) risk_level = "high";
875
+ else if (types.some((t) => MEDIUM_RISK_TYPES.has(t))) risk_level = "medium";
876
+ else risk_level = "low";
877
+ }
878
+ const recommendations = [];
879
+ if (risk_level === "high" || risk_level === "medium") {
880
+ recommendations.push("Apply mask({ strategy: 'redact' }) before storing or sharing this text.");
881
+ }
882
+ if (risk_level === "high") {
883
+ recommendations.push(
884
+ "Review applicable regulations (KVKK Art. 6, GDPR Art. 9) for special category data handling."
885
+ );
886
+ }
887
+ if (recommendations.length === 0) {
888
+ recommendations.push("No PII detected \u2014 text is safe for LLM processing.");
889
+ }
890
+ return {
891
+ has_pii: types.length > 0,
892
+ pii_types: types,
893
+ risk_level,
894
+ masking_required: types.length > 0,
895
+ recommendations
896
+ };
897
+ }
754
898
  // Annotate the CommonJS export names for ESM import in node:
755
899
  0 && (module.exports = {
756
900
  applyMask,
757
901
  audit,
758
902
  auditBatch,
903
+ auditStream,
904
+ complianceReport,
759
905
  detectPii,
760
906
  mask,
761
907
  noiseMetrics,
package/dist/index.d.cts CHANGED
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
54
54
  * // "Contact: [REDACTED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.5.1";
57
+ declare const version = "0.6.0";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -116,5 +116,33 @@ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAudit
116
116
  * Apply masking to PII findings in *text*.
117
117
  */
118
118
  declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
119
+ /**
120
+ * Async generator that audits texts one at a time from an async iterable.
121
+ *
122
+ * Yields one AuditResult per input text. Processing is sequential.
123
+ *
124
+ * @example
125
+ * async function* lines() {
126
+ * for (const line of data) yield line;
127
+ * }
128
+ * for await (const result of auditStream(lines())) {
129
+ * console.log(result.quality_grade, result.pii_summary);
130
+ * }
131
+ */
132
+ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
133
+ type RiskLevel = "none" | "low" | "medium" | "high";
134
+ interface ComplianceReport {
135
+ has_pii: boolean;
136
+ pii_types: string[];
137
+ risk_level: RiskLevel;
138
+ masking_required: boolean;
139
+ recommendations: string[];
140
+ }
141
+ /**
142
+ * Generate a KVKK/GDPR compliance summary for an AuditResult.
143
+ *
144
+ * This is a technical summary only — not a legal document or regulatory opinion.
145
+ */
146
+ declare function complianceReport(result: AuditResult): ComplianceReport;
119
147
 
120
- export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
148
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
package/dist/index.d.ts CHANGED
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
54
54
  * // "Contact: [REDACTED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.5.1";
57
+ declare const version = "0.6.0";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -116,5 +116,33 @@ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAudit
116
116
  * Apply masking to PII findings in *text*.
117
117
  */
118
118
  declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
119
+ /**
120
+ * Async generator that audits texts one at a time from an async iterable.
121
+ *
122
+ * Yields one AuditResult per input text. Processing is sequential.
123
+ *
124
+ * @example
125
+ * async function* lines() {
126
+ * for (const line of data) yield line;
127
+ * }
128
+ * for await (const result of auditStream(lines())) {
129
+ * console.log(result.quality_grade, result.pii_summary);
130
+ * }
131
+ */
132
+ declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
133
+ type RiskLevel = "none" | "low" | "medium" | "high";
134
+ interface ComplianceReport {
135
+ has_pii: boolean;
136
+ pii_types: string[];
137
+ risk_level: RiskLevel;
138
+ masking_required: boolean;
139
+ recommendations: string[];
140
+ }
141
+ /**
142
+ * Generate a KVKK/GDPR compliance summary for an AuditResult.
143
+ *
144
+ * This is a technical summary only — not a legal document or regulatory opinion.
145
+ */
146
+ declare function complianceReport(result: AuditResult): ComplianceReport;
119
147
 
120
- export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
148
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
package/dist/index.js CHANGED
@@ -299,6 +299,28 @@ var _EIN_INVALID_PREFIXES = /* @__PURE__ */ new Set([
299
299
  function validEinUs(s) {
300
300
  return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
301
301
  }
302
+ var PESEL_PL_RE = /\b(\d{11})\b/g;
303
+ function validPeselPl(s) {
304
+ if (s.length !== 11 || !/^\d+$/.test(s)) return false;
305
+ const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
306
+ const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
307
+ return (10 - total % 10) % 10 === parseInt(s[10]);
308
+ }
309
+ var SVNR_AT_RE = /\b(\d{10})\b/g;
310
+ function validSvnrAt(s) {
311
+ if (s.length !== 10 || !/^\d+$/.test(s)) return false;
312
+ const weights = [3, 7, 9, 0, 5, 8, 4, 2, 1, 6];
313
+ const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
314
+ return total % 10 === parseInt(s[3]);
315
+ }
316
+ var NRRNISS_BE_RE = /\b(\d{11})\b/g;
317
+ function validNrrnissBe(s) {
318
+ if (s.length !== 11 || !/^\d+$/.test(s)) return false;
319
+ const body = parseInt(s.slice(0, 9));
320
+ const check = parseInt(s.slice(9));
321
+ if (97 - body % 97 === check) return true;
322
+ return 97 - (2e9 + body) % 97 === check;
323
+ }
302
324
  var LOCALE_DETECTORS = {
303
325
  tr: /* @__PURE__ */ new Set([
304
326
  "national_id_tr",
@@ -318,7 +340,10 @@ var LOCALE_DETECTORS = {
318
340
  it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
319
341
  nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
320
342
  es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
321
- uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"])
343
+ uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
344
+ pl: /* @__PURE__ */ new Set(["national_id_pl"]),
345
+ at: /* @__PURE__ */ new Set(["social_id_at"]),
346
+ be: /* @__PURE__ */ new Set(["national_id_be"])
322
347
  };
323
348
  var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
324
349
  function activeDetectors(locale) {
@@ -570,6 +595,27 @@ function detectPii(text, locale = "und") {
570
595
  findings.push({ type: "company_name_intl", value: m[1], start: m.index, end: m.index + m[1].length });
571
596
  }
572
597
  }
598
+ if (active.has("national_id_pl")) {
599
+ PESEL_PL_RE.lastIndex = 0;
600
+ let m;
601
+ while ((m = PESEL_PL_RE.exec(t)) !== null) {
602
+ if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
603
+ }
604
+ }
605
+ if (active.has("social_id_at")) {
606
+ SVNR_AT_RE.lastIndex = 0;
607
+ let m;
608
+ while ((m = SVNR_AT_RE.exec(t)) !== null) {
609
+ if (validSvnrAt(m[1])) findings.push({ type: "social_id_at", value: m[1], start: m.index, end: m.index + m[1].length });
610
+ }
611
+ }
612
+ if (active.has("national_id_be")) {
613
+ NRRNISS_BE_RE.lastIndex = 0;
614
+ let m;
615
+ while ((m = NRRNISS_BE_RE.exec(t)) !== null) {
616
+ if (validNrrnissBe(m[1])) findings.push({ type: "national_id_be", value: m[1], start: m.index, end: m.index + m[1].length });
617
+ }
618
+ }
573
619
  findings.sort((a, b) => a.start - b.start);
574
620
  const specificIbanSpans = new Set(
575
621
  findings.filter((f) => f.type === "iban_tr" || f.type === "iban_intl").map((f) => `${f.start}:${f.end}`)
@@ -625,18 +671,56 @@ function noiseMetrics(text) {
625
671
 
626
672
  // src/mask.ts
627
673
  import { createHash } from "crypto";
628
- var SYNTHETIC = {
674
+ var TCKN_POOL = ["12345678950", "10000000146", "23456789060"];
675
+ var IBAN_TR_POOL = ["TR330006100519786457841326", "TR390006199999888888888813"];
676
+ var NAME_POOL = [
677
+ "Ahmet Yilmaz",
678
+ "Mehmet Demir",
679
+ "Ayse Kaya",
680
+ "Fatma Celik",
681
+ "Ali Sahin",
682
+ "Zeynep Arslan",
683
+ "Mustafa Ozturk",
684
+ "Emine Dogan",
685
+ "Ibrahim Kurt",
686
+ "Hatice Aydin",
687
+ "Hasan Yildiz",
688
+ "Elif Gunes",
689
+ "Huseyin Cetin",
690
+ "Meryem Polat",
691
+ "Omer Koc",
692
+ "Busra Tekin",
693
+ "Yusuf Erdogan",
694
+ "Selin Bozkurt",
695
+ "Kemal Akin",
696
+ "Derya Ucar"
697
+ ];
698
+ var STATIC_SYNTHETIC = {
629
699
  email: "user@example.com",
630
700
  phone: "+1 000 000 0000",
631
701
  phone_tr: "0500 000 00 00",
632
- national_id_tr: "00000000000",
702
+ phone_intl: "+1 000 000 0000",
633
703
  ssn: "000-00-0000",
634
704
  iban: "XX00 0000 0000 0000 0000 00",
635
705
  credit_card: "0000 0000 0000 0000",
636
706
  ip: "0.0.0.0",
637
- name: "AD SOYAD"
707
+ ip_v6: "2001:db8::1",
708
+ national_id_pl: "00000000000",
709
+ social_id_at: "0000000000",
710
+ national_id_be: "00000000000"
638
711
  };
639
712
  var VALID_STRATEGIES = /* @__PURE__ */ new Set(["redact", "replace", "token", "hash"]);
713
+ function pick(pool, seed) {
714
+ const h = createHash("sha256").update(seed).digest("hex");
715
+ const idx = parseInt(h.slice(0, 8), 16) % pool.length;
716
+ return pool[idx];
717
+ }
718
+ function synthetic(ptype, original) {
719
+ if (ptype === "national_id_tr") return pick(TCKN_POOL, original);
720
+ if (ptype === "iban_tr" || ptype === "iban_intl") return pick(IBAN_TR_POOL, original);
721
+ if (ptype === "name") return pick(NAME_POOL, original);
722
+ return STATIC_SYNTHETIC[ptype] ?? `[${ptype.toUpperCase()}]`;
723
+ }
640
724
  function applyMask(text, findings, strategy = "redact") {
641
725
  if (!VALID_STRATEGIES.has(strategy)) {
642
726
  throw new Error(`Unknown strategy "${strategy}". Use: redact, replace, token, hash`);
@@ -653,7 +737,7 @@ function applyMask(text, findings, strategy = "redact") {
653
737
  if (strategy === "redact") {
654
738
  replacement = `[REDACTED_${tag}]`;
655
739
  } else if (strategy === "replace") {
656
- replacement = SYNTHETIC[type] ?? `[${tag}]`;
740
+ replacement = synthetic(type, value);
657
741
  } else if (strategy === "token") {
658
742
  replacement = `<PII_${tag}_${counter[type]}>`;
659
743
  } else {
@@ -666,7 +750,7 @@ function applyMask(text, findings, strategy = "redact") {
666
750
  }
667
751
 
668
752
  // src/index.ts
669
- var version = "0.5.1";
753
+ var version = "0.6.0";
670
754
  function computeQualityScore(completeness, avgLength, garbageRatio) {
671
755
  const lengthScore = Math.min(avgLength / 500, 1);
672
756
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -717,10 +801,70 @@ function auditBatch(texts, options = {}) {
717
801
  function mask(text, findings, options = {}) {
718
802
  return applyMask(text, findings, options.strategy ?? "redact");
719
803
  }
804
+ async function* auditStream(texts, options = {}) {
805
+ for await (const text of texts) {
806
+ yield audit(text, options);
807
+ }
808
+ }
809
+ var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
810
+ "national_id_tr",
811
+ "ssn",
812
+ "credit_card",
813
+ "national_id_pl",
814
+ "national_id_be",
815
+ "social_id_at",
816
+ "social_id_de",
817
+ "social_id_uk",
818
+ "national_id_it",
819
+ "national_id_nl",
820
+ "national_id_es",
821
+ "national_id_us",
822
+ "tax_id_tr",
823
+ "tax_id_de"
824
+ ]);
825
+ var MEDIUM_RISK_TYPES = /* @__PURE__ */ new Set([
826
+ "email",
827
+ "phone_tr",
828
+ "phone_intl",
829
+ "iban",
830
+ "iban_tr",
831
+ "iban_intl",
832
+ "name"
833
+ ]);
834
+ function complianceReport(result) {
835
+ const types = [...new Set(result.pii.map((f) => f.type))].sort();
836
+ let risk_level = "none";
837
+ if (types.length > 0) {
838
+ if (types.some((t) => HIGH_RISK_TYPES.has(t))) risk_level = "high";
839
+ else if (types.some((t) => MEDIUM_RISK_TYPES.has(t))) risk_level = "medium";
840
+ else risk_level = "low";
841
+ }
842
+ const recommendations = [];
843
+ if (risk_level === "high" || risk_level === "medium") {
844
+ recommendations.push("Apply mask({ strategy: 'redact' }) before storing or sharing this text.");
845
+ }
846
+ if (risk_level === "high") {
847
+ recommendations.push(
848
+ "Review applicable regulations (KVKK Art. 6, GDPR Art. 9) for special category data handling."
849
+ );
850
+ }
851
+ if (recommendations.length === 0) {
852
+ recommendations.push("No PII detected \u2014 text is safe for LLM processing.");
853
+ }
854
+ return {
855
+ has_pii: types.length > 0,
856
+ pii_types: types,
857
+ risk_level,
858
+ masking_required: types.length > 0,
859
+ recommendations
860
+ };
861
+ }
720
862
  export {
721
863
  applyMask,
722
864
  audit,
723
865
  auditBatch,
866
+ auditStream,
867
+ complianceReport,
724
868
  detectPii,
725
869
  mask,
726
870
  noiseMetrics,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.5.1",
3
+ "version": "0.6.0",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",
@@ -14,7 +14,7 @@
14
14
  ],
15
15
  "license": "MIT",
16
16
  "author": "FlexOrch",
17
- "homepage": "https://github.com/flexorch/flexorch-audit-js",
17
+ "homepage": "https://flexorch.com",
18
18
  "repository": {
19
19
  "type": "git",
20
20
  "url": "git+https://github.com/flexorch/flexorch-audit-js.git"