@flexorch/audit 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,8 @@ const clean = mask(text, result.pii, { strategy: "redact" })
35
35
  npm install @flexorch/audit
36
36
  ```
37
37
 
38
+ ![demo](assets/demo.svg)
39
+
38
40
  ## Locale support
39
41
 
40
42
  | `locale` | Active detectors |
package/dist/index.cjs CHANGED
@@ -22,6 +22,7 @@ var index_exports = {};
22
22
  __export(index_exports, {
23
23
  applyMask: () => applyMask,
24
24
  audit: () => audit,
25
+ auditBatch: () => auditBatch,
25
26
  detectPii: () => detectPii,
26
27
  mask: () => mask,
27
28
  noiseMetrics: () => noiseMetrics,
@@ -36,8 +37,14 @@ var PHONE_INTL_RE = /\+\d{1,3}[\s\-.]?\(?\d{1,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{4}\
36
37
  var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
37
38
  var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
38
39
  var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
40
+ var _H = "[0-9a-fA-F]{1,4}";
41
+ var IPV6_RE = new RegExp(
42
+ `(?<![:\\.\\w])(?:(?:${_H}:){7}${_H}|(?:${_H}:){1,7}:|::(?:(?:${_H}:){0,6}${_H})?|(?:${_H}:){1,6}:${_H}|(?:${_H}:){1,5}(?::${_H}){1,2}|(?:${_H}:){1,4}(?::${_H}){1,3}|(?:${_H}:){1,3}(?::${_H}){1,4}|(?:${_H}:){1,2}(?::${_H}){1,5}|${_H}:(?::${_H}){1,6})(?![:\\.\\w])`,
43
+ "gi"
44
+ );
39
45
  var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
40
46
  var TCKN_RE = /\b([1-9]\d{10})\b/g;
47
+ var VKN_RE = /\b([1-9]\d{9})\b/g;
41
48
  var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
42
49
  var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
43
50
  var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
@@ -54,6 +61,20 @@ function validTckn(s) {
54
61
  if ((sumOdd * 7 - sumEven) % 10 !== d[9]) return false;
55
62
  return d.slice(0, 10).reduce((a, b) => a + b, 0) % 10 === d[10];
56
63
  }
64
+ function validVkn(s) {
65
+ if (s.length !== 10 || !/^\d+$/.test(s) || s[0] === "0") return false;
66
+ const d = s.split("").map(Number);
67
+ let total = 0;
68
+ for (let i = 0; i < 9; i++) {
69
+ const x = (d[i] + (9 - i)) % 10;
70
+ if (x !== 0) {
71
+ let y = x * Math.pow(2, 9 - i) % 9;
72
+ if (y === 0) y = 9;
73
+ total += y;
74
+ }
75
+ }
76
+ return (10 - total % 10) % 10 === d[9];
77
+ }
57
78
  function luhn(number) {
58
79
  const digits = number.replace(/\D/g, "");
59
80
  if (digits.length < 13 || digits.length > 19) return false;
@@ -68,12 +89,26 @@ function luhn(number) {
68
89
  }
69
90
  return total % 10 === 0;
70
91
  }
92
+ function validIban(s) {
93
+ const rearranged = s.slice(4) + s.slice(0, 4);
94
+ const numeric = rearranged.toUpperCase().split("").map((c) => {
95
+ const code = c.charCodeAt(0);
96
+ return code >= 65 && code <= 90 ? String(code - 55) : c;
97
+ }).join("");
98
+ let remainder = 0;
99
+ for (let i = 0; i < numeric.length; i += 9) {
100
+ const chunk = Number(String(remainder) + numeric.slice(i, i + 9));
101
+ if (!Number.isFinite(chunk)) return false;
102
+ remainder = chunk % 97;
103
+ }
104
+ return remainder === 1;
105
+ }
71
106
  var LOCALE_DETECTORS = {
72
- tr: /* @__PURE__ */ new Set(["national_id_tr", "phone_tr", "name"]),
107
+ tr: /* @__PURE__ */ new Set(["national_id_tr", "tax_id_tr", "phone_tr", "name"]),
73
108
  us: /* @__PURE__ */ new Set(["ssn", "phone"]),
74
109
  eu: /* @__PURE__ */ new Set(["phone"])
75
110
  };
76
- var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip"]);
111
+ var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
77
112
  function activeDetectors(locale) {
78
113
  if (locale === "all") {
79
114
  const active2 = new Set(UNIVERSAL);
@@ -111,7 +146,15 @@ function detectPii(text, locale = "tr") {
111
146
  }
112
147
  }
113
148
  }
114
- if (active.has("iban")) findings.push(...findAll(IBAN_RE, t, "iban"));
149
+ if (active.has("iban")) {
150
+ IBAN_RE.lastIndex = 0;
151
+ let m;
152
+ while ((m = IBAN_RE.exec(t)) !== null) {
153
+ if (validIban(m[0])) {
154
+ findings.push({ type: "iban", value: m[0], start: m.index, end: m.index + m[0].length });
155
+ }
156
+ }
157
+ }
115
158
  if (active.has("credit_card")) {
116
159
  CC_RE.lastIndex = 0;
117
160
  let m;
@@ -122,6 +165,7 @@ function detectPii(text, locale = "tr") {
122
165
  }
123
166
  }
124
167
  if (active.has("ip")) findings.push(...findAll(IPV4_RE, t, "ip"));
168
+ if (active.has("ip_v6")) findings.push(...findAll(IPV6_RE, t, "ip_v6"));
125
169
  if (active.has("phone_tr")) findings.push(...findAll(PHONE_TR_RE, t, "phone_tr"));
126
170
  if (active.has("national_id_tr")) {
127
171
  TCKN_RE.lastIndex = 0;
@@ -132,6 +176,15 @@ function detectPii(text, locale = "tr") {
132
176
  }
133
177
  }
134
178
  }
179
+ if (active.has("tax_id_tr")) {
180
+ VKN_RE.lastIndex = 0;
181
+ let m;
182
+ while ((m = VKN_RE.exec(t)) !== null) {
183
+ if (validVkn(m[1])) {
184
+ findings.push({ type: "tax_id_tr", value: m[1], start: m.index, end: m.index + m[0].length });
185
+ }
186
+ }
187
+ }
135
188
  if (active.has("name")) {
136
189
  NAME_RE.lastIndex = 0;
137
190
  let m;
@@ -221,7 +274,7 @@ function applyMask(text, findings, strategy = "redact") {
221
274
  }
222
275
 
223
276
  // src/index.ts
224
- var version = "0.2.0";
277
+ var version = "0.3.0";
225
278
  function computeQualityScore(completeness, avgLength, garbageRatio) {
226
279
  const lengthScore = Math.min(avgLength / 500, 1);
227
280
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -249,6 +302,25 @@ function audit(text, options = {}) {
249
302
  const pii_summary = Array.from(counts.entries()).sort(([a], [b]) => a.localeCompare(b)).map(([type, count]) => ({ type, count }));
250
303
  return { quality_grade, quality_score, pii_summary, pii, quality, noise };
251
304
  }
305
+ function auditBatch(texts, options = {}) {
306
+ if (texts.length === 0) {
307
+ return { results: [], duplicate_ratio: 0, pii_summary: [], avg_quality_score: 0 };
308
+ }
309
+ const results = texts.map((t) => audit(t, options));
310
+ const seen = /* @__PURE__ */ new Set();
311
+ let dupCount = 0;
312
+ for (const t of texts) {
313
+ if (seen.has(t)) dupCount++;
314
+ else seen.add(t);
315
+ }
316
+ const duplicate_ratio = Math.round(dupCount / texts.length * 1e4) / 1e4;
317
+ const allPii = results.flatMap((r) => r.pii);
318
+ const counts = /* @__PURE__ */ new Map();
319
+ for (const f of allPii) counts.set(f.type, (counts.get(f.type) ?? 0) + 1);
320
+ const pii_summary = Array.from(counts.entries()).sort(([a], [b]) => a.localeCompare(b)).map(([type, count]) => ({ type, count }));
321
+ const avg_quality_score = Math.round(results.reduce((sum, r) => sum + r.quality_score, 0) / results.length * 1e4) / 1e4;
322
+ return { results, duplicate_ratio, pii_summary, avg_quality_score };
323
+ }
252
324
  function mask(text, findings, options = {}) {
253
325
  return applyMask(text, findings, options.strategy ?? "redact");
254
326
  }
@@ -256,6 +328,7 @@ function mask(text, findings, options = {}) {
256
328
  0 && (module.exports = {
257
329
  applyMask,
258
330
  audit,
331
+ auditBatch,
259
332
  detectPii,
260
333
  mask,
261
334
  noiseMetrics,
package/dist/index.d.cts CHANGED
@@ -45,7 +45,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
45
45
  * // "Contact: [REDACTED_EMAIL]"
46
46
  */
47
47
 
48
- declare const version = "0.2.0";
48
+ declare const version = "0.3.0";
49
49
  type QualityGrade = "A" | "B" | "C" | "D";
50
50
  interface PiiSummaryEntry {
51
51
  type: string;
@@ -54,12 +54,12 @@ interface PiiSummaryEntry {
54
54
  interface AuditOptions {
55
55
  /**
56
56
  * Active locale-specific detectors.
57
- * - "tr" — Turkish: TCKN, phone_tr, name (default)
57
+ * - "tr" — Turkish: TCKN, VKN, phone_tr, name (default)
58
58
  * - "us" — US: SSN, E.164 phone
59
59
  * - "eu" — EU: E.164 phone
60
60
  * - "all" — All detectors
61
61
  *
62
- * Universal detectors (email, iban, credit_card, ip) are always active.
62
+ * Universal detectors (email, iban, credit_card, ip, ip_v6) are always active.
63
63
  */
64
64
  locale?: string;
65
65
  }
@@ -83,9 +83,23 @@ interface MaskOptions {
83
83
  * Audit *text* for LLM dataset readiness.
84
84
  */
85
85
  declare function audit(text: string, options?: AuditOptions): AuditResult;
86
+ interface BatchAuditResult {
87
+ /** One AuditResult per input text, in order. */
88
+ results: AuditResult[];
89
+ /** Fraction of texts that are exact duplicates (0.0–1.0). */
90
+ duplicate_ratio: number;
91
+ /** PII counts aggregated across all texts. */
92
+ pii_summary: PiiSummaryEntry[];
93
+ /** Mean quality_score across all texts. */
94
+ avg_quality_score: number;
95
+ }
96
+ /**
97
+ * Audit a list of texts and aggregate metrics — including duplicate_ratio.
98
+ */
99
+ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAuditResult;
86
100
  /**
87
101
  * Apply masking to PII findings in *text*.
88
102
  */
89
103
  declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
90
104
 
91
- export { type AuditOptions, type AuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, detectPii, mask, noiseMetrics, qualityMetrics, version };
105
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, qualityMetrics, version };
package/dist/index.d.ts CHANGED
@@ -45,7 +45,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
45
45
  * // "Contact: [REDACTED_EMAIL]"
46
46
  */
47
47
 
48
- declare const version = "0.2.0";
48
+ declare const version = "0.3.0";
49
49
  type QualityGrade = "A" | "B" | "C" | "D";
50
50
  interface PiiSummaryEntry {
51
51
  type: string;
@@ -54,12 +54,12 @@ interface PiiSummaryEntry {
54
54
  interface AuditOptions {
55
55
  /**
56
56
  * Active locale-specific detectors.
57
- * - "tr" — Turkish: TCKN, phone_tr, name (default)
57
+ * - "tr" — Turkish: TCKN, VKN, phone_tr, name (default)
58
58
  * - "us" — US: SSN, E.164 phone
59
59
  * - "eu" — EU: E.164 phone
60
60
  * - "all" — All detectors
61
61
  *
62
- * Universal detectors (email, iban, credit_card, ip) are always active.
62
+ * Universal detectors (email, iban, credit_card, ip, ip_v6) are always active.
63
63
  */
64
64
  locale?: string;
65
65
  }
@@ -83,9 +83,23 @@ interface MaskOptions {
83
83
  * Audit *text* for LLM dataset readiness.
84
84
  */
85
85
  declare function audit(text: string, options?: AuditOptions): AuditResult;
86
+ interface BatchAuditResult {
87
+ /** One AuditResult per input text, in order. */
88
+ results: AuditResult[];
89
+ /** Fraction of texts that are exact duplicates (0.0–1.0). */
90
+ duplicate_ratio: number;
91
+ /** PII counts aggregated across all texts. */
92
+ pii_summary: PiiSummaryEntry[];
93
+ /** Mean quality_score across all texts. */
94
+ avg_quality_score: number;
95
+ }
96
+ /**
97
+ * Audit a list of texts and aggregate metrics — including duplicate_ratio.
98
+ */
99
+ declare function auditBatch(texts: string[], options?: AuditOptions): BatchAuditResult;
86
100
  /**
87
101
  * Apply masking to PII findings in *text*.
88
102
  */
89
103
  declare function mask(text: string, findings: PiiFinding[], options?: MaskOptions): string;
90
104
 
91
- export { type AuditOptions, type AuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, detectPii, mask, noiseMetrics, qualityMetrics, version };
105
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, applyMask, audit, auditBatch, detectPii, mask, noiseMetrics, qualityMetrics, version };
package/dist/index.js CHANGED
@@ -4,8 +4,14 @@ var PHONE_INTL_RE = /\+\d{1,3}[\s\-.]?\(?\d{1,4}\)?[\s\-.]?\d{3,4}[\s\-.]?\d{4}\
4
4
  var IBAN_RE = /\b[A-Z]{2}\d{2}[0-9A-Z]{11,30}\b/g;
5
5
  var CC_RE = /\b\d{4}[ \-]\d{4}[ \-]\d{4}[ \-]\d{4}\b/g;
6
6
  var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g;
7
+ var _H = "[0-9a-fA-F]{1,4}";
8
+ var IPV6_RE = new RegExp(
9
+ `(?<![:\\.\\w])(?:(?:${_H}:){7}${_H}|(?:${_H}:){1,7}:|::(?:(?:${_H}:){0,6}${_H})?|(?:${_H}:){1,6}:${_H}|(?:${_H}:){1,5}(?::${_H}){1,2}|(?:${_H}:){1,4}(?::${_H}){1,3}|(?:${_H}:){1,3}(?::${_H}){1,4}|(?:${_H}:){1,2}(?::${_H}){1,5}|${_H}:(?::${_H}){1,6})(?![:\\.\\w])`,
10
+ "gi"
11
+ );
7
12
  var PHONE_TR_RE = /\b(?:\+90|0)?\s*5\d{2}\s*\d{3}\s*\d{2}\s*\d{2}\b/g;
8
13
  var TCKN_RE = /\b([1-9]\d{10})\b/g;
14
+ var VKN_RE = /\b([1-9]\d{9})\b/g;
9
15
  var NAME_PREFIX_TR = "(?:Ad[\u0131i]\\s*(?:Soyad[\u0131i])?|Soyad[\u0131i]|\u0130sim|M\xFC\u015Fteri\\s+Ad[\u0131i]|Yetkili(?:\\s+Ki\u015Fi)?|\xC7al\u0131\u015Fan\\s+Ad[\u0131i]|Personel\\s+Ad[\u0131i]|Ki\u015Fi\\s+Ad[\u0131i]|Sat\u0131c\u0131\\s+Ad[\u0131i]|Al\u0131c\u0131\\s+Ad[\u0131i]|\u0130lgili\\s+Ki\u015Fi|Hesap\\s+Sahibi)";
10
16
  var NAME_PREFIX_EN = "(?:Full\\s+Name|Customer\\s+Name|Employee\\s+Name|Contact\\s+Name|Authorized\\s+(?:By|Person)|Account\\s+Holder|(?<!\\bUser\\s)Name)";
11
17
  var NAME_VALUE = "([A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+(?:\\s+[A-Z\xC7\u011E\u0130\xD6\u015E\xDC][a-z\xE7\u011F\u0131\u015F\xF6\u015F\xFC]+){0,2})";
@@ -22,6 +28,20 @@ function validTckn(s) {
22
28
  if ((sumOdd * 7 - sumEven) % 10 !== d[9]) return false;
23
29
  return d.slice(0, 10).reduce((a, b) => a + b, 0) % 10 === d[10];
24
30
  }
31
+ function validVkn(s) {
32
+ if (s.length !== 10 || !/^\d+$/.test(s) || s[0] === "0") return false;
33
+ const d = s.split("").map(Number);
34
+ let total = 0;
35
+ for (let i = 0; i < 9; i++) {
36
+ const x = (d[i] + (9 - i)) % 10;
37
+ if (x !== 0) {
38
+ let y = x * Math.pow(2, 9 - i) % 9;
39
+ if (y === 0) y = 9;
40
+ total += y;
41
+ }
42
+ }
43
+ return (10 - total % 10) % 10 === d[9];
44
+ }
25
45
  function luhn(number) {
26
46
  const digits = number.replace(/\D/g, "");
27
47
  if (digits.length < 13 || digits.length > 19) return false;
@@ -36,12 +56,26 @@ function luhn(number) {
36
56
  }
37
57
  return total % 10 === 0;
38
58
  }
59
+ function validIban(s) {
60
+ const rearranged = s.slice(4) + s.slice(0, 4);
61
+ const numeric = rearranged.toUpperCase().split("").map((c) => {
62
+ const code = c.charCodeAt(0);
63
+ return code >= 65 && code <= 90 ? String(code - 55) : c;
64
+ }).join("");
65
+ let remainder = 0;
66
+ for (let i = 0; i < numeric.length; i += 9) {
67
+ const chunk = Number(String(remainder) + numeric.slice(i, i + 9));
68
+ if (!Number.isFinite(chunk)) return false;
69
+ remainder = chunk % 97;
70
+ }
71
+ return remainder === 1;
72
+ }
39
73
  var LOCALE_DETECTORS = {
40
- tr: /* @__PURE__ */ new Set(["national_id_tr", "phone_tr", "name"]),
74
+ tr: /* @__PURE__ */ new Set(["national_id_tr", "tax_id_tr", "phone_tr", "name"]),
41
75
  us: /* @__PURE__ */ new Set(["ssn", "phone"]),
42
76
  eu: /* @__PURE__ */ new Set(["phone"])
43
77
  };
44
- var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip"]);
78
+ var UNIVERSAL = /* @__PURE__ */ new Set(["email", "iban", "credit_card", "ip", "ip_v6"]);
45
79
  function activeDetectors(locale) {
46
80
  if (locale === "all") {
47
81
  const active2 = new Set(UNIVERSAL);
@@ -79,7 +113,15 @@ function detectPii(text, locale = "tr") {
79
113
  }
80
114
  }
81
115
  }
82
- if (active.has("iban")) findings.push(...findAll(IBAN_RE, t, "iban"));
116
+ if (active.has("iban")) {
117
+ IBAN_RE.lastIndex = 0;
118
+ let m;
119
+ while ((m = IBAN_RE.exec(t)) !== null) {
120
+ if (validIban(m[0])) {
121
+ findings.push({ type: "iban", value: m[0], start: m.index, end: m.index + m[0].length });
122
+ }
123
+ }
124
+ }
83
125
  if (active.has("credit_card")) {
84
126
  CC_RE.lastIndex = 0;
85
127
  let m;
@@ -90,6 +132,7 @@ function detectPii(text, locale = "tr") {
90
132
  }
91
133
  }
92
134
  if (active.has("ip")) findings.push(...findAll(IPV4_RE, t, "ip"));
135
+ if (active.has("ip_v6")) findings.push(...findAll(IPV6_RE, t, "ip_v6"));
93
136
  if (active.has("phone_tr")) findings.push(...findAll(PHONE_TR_RE, t, "phone_tr"));
94
137
  if (active.has("national_id_tr")) {
95
138
  TCKN_RE.lastIndex = 0;
@@ -100,6 +143,15 @@ function detectPii(text, locale = "tr") {
100
143
  }
101
144
  }
102
145
  }
146
+ if (active.has("tax_id_tr")) {
147
+ VKN_RE.lastIndex = 0;
148
+ let m;
149
+ while ((m = VKN_RE.exec(t)) !== null) {
150
+ if (validVkn(m[1])) {
151
+ findings.push({ type: "tax_id_tr", value: m[1], start: m.index, end: m.index + m[0].length });
152
+ }
153
+ }
154
+ }
103
155
  if (active.has("name")) {
104
156
  NAME_RE.lastIndex = 0;
105
157
  let m;
@@ -189,7 +241,7 @@ function applyMask(text, findings, strategy = "redact") {
189
241
  }
190
242
 
191
243
  // src/index.ts
192
- var version = "0.2.0";
244
+ var version = "0.3.0";
193
245
  function computeQualityScore(completeness, avgLength, garbageRatio) {
194
246
  const lengthScore = Math.min(avgLength / 500, 1);
195
247
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -217,12 +269,32 @@ function audit(text, options = {}) {
217
269
  const pii_summary = Array.from(counts.entries()).sort(([a], [b]) => a.localeCompare(b)).map(([type, count]) => ({ type, count }));
218
270
  return { quality_grade, quality_score, pii_summary, pii, quality, noise };
219
271
  }
272
+ function auditBatch(texts, options = {}) {
273
+ if (texts.length === 0) {
274
+ return { results: [], duplicate_ratio: 0, pii_summary: [], avg_quality_score: 0 };
275
+ }
276
+ const results = texts.map((t) => audit(t, options));
277
+ const seen = /* @__PURE__ */ new Set();
278
+ let dupCount = 0;
279
+ for (const t of texts) {
280
+ if (seen.has(t)) dupCount++;
281
+ else seen.add(t);
282
+ }
283
+ const duplicate_ratio = Math.round(dupCount / texts.length * 1e4) / 1e4;
284
+ const allPii = results.flatMap((r) => r.pii);
285
+ const counts = /* @__PURE__ */ new Map();
286
+ for (const f of allPii) counts.set(f.type, (counts.get(f.type) ?? 0) + 1);
287
+ const pii_summary = Array.from(counts.entries()).sort(([a], [b]) => a.localeCompare(b)).map(([type, count]) => ({ type, count }));
288
+ const avg_quality_score = Math.round(results.reduce((sum, r) => sum + r.quality_score, 0) / results.length * 1e4) / 1e4;
289
+ return { results, duplicate_ratio, pii_summary, avg_quality_score };
290
+ }
220
291
  function mask(text, findings, options = {}) {
221
292
  return applyMask(text, findings, options.strategy ?? "redact");
222
293
  }
223
294
  export {
224
295
  applyMask,
225
296
  audit,
297
+ auditBatch,
226
298
  detectPii,
227
299
  mask,
228
300
  noiseMetrics,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",
@@ -17,7 +17,7 @@
17
17
  "homepage": "https://github.com/flexorch/flexorch-audit-js",
18
18
  "repository": {
19
19
  "type": "git",
20
- "url": "https://github.com/flexorch/flexorch-audit-js.git"
20
+ "url": "git+https://github.com/flexorch/flexorch-audit-js.git"
21
21
  },
22
22
  "bugs": {
23
23
  "url": "https://github.com/flexorch/flexorch-audit-js/issues"