@flexorch/audit 0.6.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -26,10 +26,12 @@ __export(index_exports, {
26
26
  auditStream: () => auditStream,
27
27
  complianceReport: () => complianceReport,
28
28
  detectPii: () => detectPii,
29
+ estimateTokens: () => estimateTokens,
29
30
  mask: () => mask,
30
31
  noiseMetrics: () => noiseMetrics,
31
32
  noiseRatio: () => noiseRatio,
32
33
  qualityMetrics: () => qualityMetrics,
34
+ redactForLlm: () => redactForLlm,
33
35
  version: () => version
34
36
  });
35
37
  module.exports = __toCommonJS(index_exports);
@@ -57,6 +59,7 @@ var COMPANY_NAME_TR_RE = new RegExp(
57
59
  "gu"
58
60
  );
59
61
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
62
+ var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
60
63
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
61
64
  var _TR_PROVINCES_SORTED = [
62
65
  "Afyonkarahisar",
@@ -336,12 +339,24 @@ function validEinUs(s) {
336
339
  return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
337
340
  }
338
341
  var PESEL_PL_RE = /\b(\d{11})\b/g;
342
+ var NIP_PL_RE = /(?:NIP|Numer\s+NIP|Numer\s+Identyfikacji\s+Podatkowej)\s*[:#]*\s*(\d{10})\b/gi;
339
343
  function validPeselPl(s) {
340
344
  if (s.length !== 11 || !/^\d+$/.test(s)) return false;
341
345
  const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
342
346
  const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
343
347
  return (10 - total % 10) % 10 === parseInt(s[10]);
344
348
  }
349
+ function validNifPt(s) {
350
+ if (s.length !== 9 || !/^\d+$/.test(s) || s[0] === "0") return false;
351
+ let total = 0;
352
+ for (let i = 0; i < 8; i++) total += (9 - i) * parseInt(s[i]);
353
+ const check = (11 - total % 11) % 11;
354
+ return (check >= 10 ? 0 : check) === parseInt(s[8]);
355
+ }
356
+ var NIF_PT_RE = /(?:NIF|N[uú]mero\s+de\s+Contribuinte|Contribuinte)\s*[:#]*\s*(\d{9})\b/gi;
357
+ var PERSONNUMMER_SE_RE = /\b(\d{6,8}[-+]\d{4})\b/g;
358
+ var CPR_DK_RE = /\b(\d{6}-\d{4})\b/g;
359
+ var HETU_FI_RE = /\b(\d{6}[+\-A]\d{3}[0-9A-FHJ-NPR-Y])\b/g;
345
360
  var SVNR_AT_RE = /\b(\d{10})\b/g;
346
361
  function validSvnrAt(s) {
347
362
  if (s.length !== 10 || !/^\d+$/.test(s)) return false;
@@ -367,17 +382,22 @@ var LOCALE_DETECTORS = {
367
382
  "company_name_tr",
368
383
  "mersis_no",
369
384
  "postal_code_tr",
370
- "province_tr"
385
+ "province_tr",
386
+ "sgk_no"
371
387
  ]),
372
388
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
373
389
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
374
- de: /* @__PURE__ */ new Set(["tax_id_de", "social_id_de"]),
375
- fr: /* @__PURE__ */ new Set(["siret_fr", "company_id_fr", "social_id_fr"]),
390
+ de: /* @__PURE__ */ new Set(["tax_id_de", "social_id_de", "social_id_at"]),
391
+ fr: /* @__PURE__ */ new Set(["siret_fr", "company_id_fr", "social_id_fr", "national_id_be"]),
376
392
  it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
377
- nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
393
+ nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl", "national_id_be"]),
378
394
  es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
379
395
  uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
380
- pl: /* @__PURE__ */ new Set(["national_id_pl"]),
396
+ pl: /* @__PURE__ */ new Set(["national_id_pl", "tax_id_pl"]),
397
+ pt: /* @__PURE__ */ new Set(["tax_id_pt"]),
398
+ sv: /* @__PURE__ */ new Set(["national_id_se"]),
399
+ da: /* @__PURE__ */ new Set(["national_id_dk"]),
400
+ fi: /* @__PURE__ */ new Set(["national_id_fi"]),
381
401
  at: /* @__PURE__ */ new Set(["social_id_at"]),
382
402
  be: /* @__PURE__ */ new Set(["national_id_be"])
383
403
  };
@@ -490,6 +510,14 @@ function detectPii(text, locale = "und") {
490
510
  findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
491
511
  }
492
512
  }
513
+ if (active.has("sgk_no")) {
514
+ SGK_RE.lastIndex = 0;
515
+ let m;
516
+ while ((m = SGK_RE.exec(t)) !== null) {
517
+ const start = m.index + m[0].lastIndexOf(m[1]);
518
+ findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
519
+ }
520
+ }
493
521
  if (active.has("postal_code_tr")) {
494
522
  POSTAL_CODE_TR_RE.lastIndex = 0;
495
523
  let m;
@@ -638,6 +666,45 @@ function detectPii(text, locale = "und") {
638
666
  if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
639
667
  }
640
668
  }
669
+ if (active.has("tax_id_pl")) {
670
+ NIP_PL_RE.lastIndex = 0;
671
+ let m;
672
+ while ((m = NIP_PL_RE.exec(t)) !== null) {
673
+ const start = m.index + m[0].lastIndexOf(m[1]);
674
+ findings.push({ type: "tax_id_pl", value: m[1], start, end: start + m[1].length });
675
+ }
676
+ }
677
+ if (active.has("tax_id_pt")) {
678
+ NIF_PT_RE.lastIndex = 0;
679
+ let m;
680
+ while ((m = NIF_PT_RE.exec(t)) !== null) {
681
+ if (validNifPt(m[1])) {
682
+ const start = m.index + m[0].lastIndexOf(m[1]);
683
+ findings.push({ type: "tax_id_pt", value: m[1], start, end: start + m[1].length });
684
+ }
685
+ }
686
+ }
687
+ if (active.has("national_id_se")) {
688
+ PERSONNUMMER_SE_RE.lastIndex = 0;
689
+ let m;
690
+ while ((m = PERSONNUMMER_SE_RE.exec(t)) !== null) {
691
+ findings.push({ type: "national_id_se", value: m[1], start: m.index, end: m.index + m[1].length });
692
+ }
693
+ }
694
+ if (active.has("national_id_dk")) {
695
+ CPR_DK_RE.lastIndex = 0;
696
+ let m;
697
+ while ((m = CPR_DK_RE.exec(t)) !== null) {
698
+ findings.push({ type: "national_id_dk", value: m[1], start: m.index, end: m.index + m[1].length });
699
+ }
700
+ }
701
+ if (active.has("national_id_fi")) {
702
+ HETU_FI_RE.lastIndex = 0;
703
+ let m;
704
+ while ((m = HETU_FI_RE.exec(t)) !== null) {
705
+ findings.push({ type: "national_id_fi", value: m[1], start: m.index, end: m.index + m[1].length });
706
+ }
707
+ }
641
708
  if (active.has("social_id_at")) {
642
709
  SVNR_AT_RE.lastIndex = 0;
643
710
  let m;
@@ -786,7 +853,7 @@ function applyMask(text, findings, strategy = "redact") {
786
853
  }
787
854
 
788
855
  // src/index.ts
789
- var version = "0.6.0";
856
+ var version = "0.7.0";
790
857
  function computeQualityScore(completeness, avgLength, garbageRatio) {
791
858
  const lengthScore = Math.min(avgLength / 500, 1);
792
859
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -842,6 +909,16 @@ async function* auditStream(texts, options = {}) {
842
909
  yield audit(text, options);
843
910
  }
844
911
  }
912
+ function redactForLlm(text, options = {}) {
913
+ const { strategy, ...auditOptions } = options;
914
+ const result = audit(text, auditOptions);
915
+ return mask(text, result.pii, { strategy });
916
+ }
917
+ function estimateTokens(text) {
918
+ if (!text || !text.trim()) return 0;
919
+ const words = text.trim().split(/\s+/).length;
920
+ return Math.max(1, Math.round(words * 4 / 3));
921
+ }
845
922
  var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
846
923
  "national_id_tr",
847
924
  "ssn",
@@ -903,9 +980,11 @@ function complianceReport(result) {
903
980
  auditStream,
904
981
  complianceReport,
905
982
  detectPii,
983
+ estimateTokens,
906
984
  mask,
907
985
  noiseMetrics,
908
986
  noiseRatio,
909
987
  qualityMetrics,
988
+ redactForLlm,
910
989
  version
911
990
  });
package/dist/index.d.cts CHANGED
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
54
54
  * // "Contact: [REDACTED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.6.0";
57
+ declare const version = "0.7.0";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
130
130
  * }
131
131
  */
132
132
  declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
133
+ /**
134
+ * Audit *text* and return a PII-free version ready for LLM processing.
135
+ *
136
+ * One-shot convenience wrapper around audit() + mask(). Equivalent to:
137
+ * const result = audit(text, { locale })
138
+ * return mask(text, result.pii, { strategy })
139
+ *
140
+ * @example
141
+ * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
142
+ * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
143
+ */
144
+ declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
145
+ /**
146
+ * Estimate the token count of *text* using a word-based heuristic.
147
+ *
148
+ * Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
149
+ * No external dependencies — accuracy within ~15% of real tokenizers for
150
+ * English and most European languages. Treat as a planning estimate.
151
+ *
152
+ * @example
153
+ * estimateTokens("The quick brown fox") // → 7
154
+ * estimateTokens("") // → 0
155
+ */
156
+ declare function estimateTokens(text: string): number;
133
157
  type RiskLevel = "none" | "low" | "medium" | "high";
134
158
  interface ComplianceReport {
135
159
  has_pii: boolean;
@@ -145,4 +169,4 @@ interface ComplianceReport {
145
169
  */
146
170
  declare function complianceReport(result: AuditResult): ComplianceReport;
147
171
 
148
- export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
172
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };
package/dist/index.d.ts CHANGED
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
54
54
  * // "Contact: [REDACTED_EMAIL]"
55
55
  */
56
56
 
57
- declare const version = "0.6.0";
57
+ declare const version = "0.7.0";
58
58
  type QualityGrade = "A" | "B" | "C" | "D";
59
59
  interface PiiSummaryEntry {
60
60
  type: string;
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
130
130
  * }
131
131
  */
132
132
  declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
133
+ /**
134
+ * Audit *text* and return a PII-free version ready for LLM processing.
135
+ *
136
+ * One-shot convenience wrapper around audit() + mask(). Equivalent to:
137
+ * const result = audit(text, { locale })
138
+ * return mask(text, result.pii, { strategy })
139
+ *
140
+ * @example
141
+ * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
142
+ * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
143
+ */
144
+ declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
145
+ /**
146
+ * Estimate the token count of *text* using a word-based heuristic.
147
+ *
148
+ * Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
149
+ * No external dependencies — accuracy within ~15% of real tokenizers for
150
+ * English and most European languages. Treat as a planning estimate.
151
+ *
152
+ * @example
153
+ * estimateTokens("The quick brown fox") // → 7
154
+ * estimateTokens("") // → 0
155
+ */
156
+ declare function estimateTokens(text: string): number;
133
157
  type RiskLevel = "none" | "low" | "medium" | "high";
134
158
  interface ComplianceReport {
135
159
  has_pii: boolean;
@@ -145,4 +169,4 @@ interface ComplianceReport {
145
169
  */
146
170
  declare function complianceReport(result: AuditResult): ComplianceReport;
147
171
 
148
- export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
172
+ export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };
package/dist/index.js CHANGED
@@ -21,6 +21,7 @@ var COMPANY_NAME_TR_RE = new RegExp(
21
21
  "gu"
22
22
  );
23
23
  var MERSIS_RE = /\b([1-9]\d{15})\b/g;
24
+ var SGK_RE = /(?:SGK\s*(?:Sicil\s*No(?:su)?|No(?:su)?|Numara(?:s[ıi])?)?|Sigortal[ıi]\s*(?:Sicil\s*)?(?:No|Numara(?:s[ıi])?)|SSK\s*(?:No|Numara(?:s[ıi])?|Sicil))\s*[:#]*\s*(\d{10,11})\b/giu;
24
25
  var POSTAL_CODE_TR_RE = /\b((?:0[1-9]|[1-7]\d|80|81)\d{3})\b/g;
25
26
  var _TR_PROVINCES_SORTED = [
26
27
  "Afyonkarahisar",
@@ -300,12 +301,24 @@ function validEinUs(s) {
300
301
  return !_EIN_INVALID_PREFIXES.has(s.slice(0, 2));
301
302
  }
302
303
  var PESEL_PL_RE = /\b(\d{11})\b/g;
304
+ var NIP_PL_RE = /(?:NIP|Numer\s+NIP|Numer\s+Identyfikacji\s+Podatkowej)\s*[:#]*\s*(\d{10})\b/gi;
303
305
  function validPeselPl(s) {
304
306
  if (s.length !== 11 || !/^\d+$/.test(s)) return false;
305
307
  const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
306
308
  const total = weights.reduce((sum, w, i) => sum + w * parseInt(s[i]), 0);
307
309
  return (10 - total % 10) % 10 === parseInt(s[10]);
308
310
  }
311
+ function validNifPt(s) {
312
+ if (s.length !== 9 || !/^\d+$/.test(s) || s[0] === "0") return false;
313
+ let total = 0;
314
+ for (let i = 0; i < 8; i++) total += (9 - i) * parseInt(s[i]);
315
+ const check = (11 - total % 11) % 11;
316
+ return (check >= 10 ? 0 : check) === parseInt(s[8]);
317
+ }
318
+ var NIF_PT_RE = /(?:NIF|N[uú]mero\s+de\s+Contribuinte|Contribuinte)\s*[:#]*\s*(\d{9})\b/gi;
319
+ var PERSONNUMMER_SE_RE = /\b(\d{6,8}[-+]\d{4})\b/g;
320
+ var CPR_DK_RE = /\b(\d{6}-\d{4})\b/g;
321
+ var HETU_FI_RE = /\b(\d{6}[+\-A]\d{3}[0-9A-FHJ-NPR-Y])\b/g;
309
322
  var SVNR_AT_RE = /\b(\d{10})\b/g;
310
323
  function validSvnrAt(s) {
311
324
  if (s.length !== 10 || !/^\d+$/.test(s)) return false;
@@ -331,17 +344,22 @@ var LOCALE_DETECTORS = {
331
344
  "company_name_tr",
332
345
  "mersis_no",
333
346
  "postal_code_tr",
334
- "province_tr"
347
+ "province_tr",
348
+ "sgk_no"
335
349
  ]),
336
350
  us: /* @__PURE__ */ new Set(["ssn", "tax_id_us", "national_id_us", "phone_intl", "company_name_intl"]),
337
351
  eu: /* @__PURE__ */ new Set(["phone_intl", "iban_intl", "company_name_intl"]),
338
- de: /* @__PURE__ */ new Set(["tax_id_de", "social_id_de"]),
339
- fr: /* @__PURE__ */ new Set(["siret_fr", "company_id_fr", "social_id_fr"]),
352
+ de: /* @__PURE__ */ new Set(["tax_id_de", "social_id_de", "social_id_at"]),
353
+ fr: /* @__PURE__ */ new Set(["siret_fr", "company_id_fr", "social_id_fr", "national_id_be"]),
340
354
  it: /* @__PURE__ */ new Set(["national_id_it", "tax_id_it"]),
341
- nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl"]),
355
+ nl: /* @__PURE__ */ new Set(["national_id_nl", "company_id_nl", "national_id_be"]),
342
356
  es: /* @__PURE__ */ new Set(["national_id_es", "tax_id_es"]),
343
357
  uk: /* @__PURE__ */ new Set(["social_id_uk", "tax_id_uk"]),
344
- pl: /* @__PURE__ */ new Set(["national_id_pl"]),
358
+ pl: /* @__PURE__ */ new Set(["national_id_pl", "tax_id_pl"]),
359
+ pt: /* @__PURE__ */ new Set(["tax_id_pt"]),
360
+ sv: /* @__PURE__ */ new Set(["national_id_se"]),
361
+ da: /* @__PURE__ */ new Set(["national_id_dk"]),
362
+ fi: /* @__PURE__ */ new Set(["national_id_fi"]),
345
363
  at: /* @__PURE__ */ new Set(["social_id_at"]),
346
364
  be: /* @__PURE__ */ new Set(["national_id_be"])
347
365
  };
@@ -454,6 +472,14 @@ function detectPii(text, locale = "und") {
454
472
  findings.push({ type: "mersis_no", value: m[1], start: m.index, end: m.index + m[1].length });
455
473
  }
456
474
  }
475
+ if (active.has("sgk_no")) {
476
+ SGK_RE.lastIndex = 0;
477
+ let m;
478
+ while ((m = SGK_RE.exec(t)) !== null) {
479
+ const start = m.index + m[0].lastIndexOf(m[1]);
480
+ findings.push({ type: "sgk_no", value: m[1], start, end: start + m[1].length });
481
+ }
482
+ }
457
483
  if (active.has("postal_code_tr")) {
458
484
  POSTAL_CODE_TR_RE.lastIndex = 0;
459
485
  let m;
@@ -602,6 +628,45 @@ function detectPii(text, locale = "und") {
602
628
  if (validPeselPl(m[1])) findings.push({ type: "national_id_pl", value: m[1], start: m.index, end: m.index + m[1].length });
603
629
  }
604
630
  }
631
+ if (active.has("tax_id_pl")) {
632
+ NIP_PL_RE.lastIndex = 0;
633
+ let m;
634
+ while ((m = NIP_PL_RE.exec(t)) !== null) {
635
+ const start = m.index + m[0].lastIndexOf(m[1]);
636
+ findings.push({ type: "tax_id_pl", value: m[1], start, end: start + m[1].length });
637
+ }
638
+ }
639
+ if (active.has("tax_id_pt")) {
640
+ NIF_PT_RE.lastIndex = 0;
641
+ let m;
642
+ while ((m = NIF_PT_RE.exec(t)) !== null) {
643
+ if (validNifPt(m[1])) {
644
+ const start = m.index + m[0].lastIndexOf(m[1]);
645
+ findings.push({ type: "tax_id_pt", value: m[1], start, end: start + m[1].length });
646
+ }
647
+ }
648
+ }
649
+ if (active.has("national_id_se")) {
650
+ PERSONNUMMER_SE_RE.lastIndex = 0;
651
+ let m;
652
+ while ((m = PERSONNUMMER_SE_RE.exec(t)) !== null) {
653
+ findings.push({ type: "national_id_se", value: m[1], start: m.index, end: m.index + m[1].length });
654
+ }
655
+ }
656
+ if (active.has("national_id_dk")) {
657
+ CPR_DK_RE.lastIndex = 0;
658
+ let m;
659
+ while ((m = CPR_DK_RE.exec(t)) !== null) {
660
+ findings.push({ type: "national_id_dk", value: m[1], start: m.index, end: m.index + m[1].length });
661
+ }
662
+ }
663
+ if (active.has("national_id_fi")) {
664
+ HETU_FI_RE.lastIndex = 0;
665
+ let m;
666
+ while ((m = HETU_FI_RE.exec(t)) !== null) {
667
+ findings.push({ type: "national_id_fi", value: m[1], start: m.index, end: m.index + m[1].length });
668
+ }
669
+ }
605
670
  if (active.has("social_id_at")) {
606
671
  SVNR_AT_RE.lastIndex = 0;
607
672
  let m;
@@ -750,7 +815,7 @@ function applyMask(text, findings, strategy = "redact") {
750
815
  }
751
816
 
752
817
  // src/index.ts
753
- var version = "0.6.0";
818
+ var version = "0.7.0";
754
819
  function computeQualityScore(completeness, avgLength, garbageRatio) {
755
820
  const lengthScore = Math.min(avgLength / 500, 1);
756
821
  const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -806,6 +871,16 @@ async function* auditStream(texts, options = {}) {
806
871
  yield audit(text, options);
807
872
  }
808
873
  }
874
+ function redactForLlm(text, options = {}) {
875
+ const { strategy, ...auditOptions } = options;
876
+ const result = audit(text, auditOptions);
877
+ return mask(text, result.pii, { strategy });
878
+ }
879
+ function estimateTokens(text) {
880
+ if (!text || !text.trim()) return 0;
881
+ const words = text.trim().split(/\s+/).length;
882
+ return Math.max(1, Math.round(words * 4 / 3));
883
+ }
809
884
  var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
810
885
  "national_id_tr",
811
886
  "ssn",
@@ -866,9 +941,11 @@ export {
866
941
  auditStream,
867
942
  complianceReport,
868
943
  detectPii,
944
+ estimateTokens,
869
945
  mask,
870
946
  noiseMetrics,
871
947
  noiseRatio,
872
948
  qualityMetrics,
949
+ redactForLlm,
873
950
  version
874
951
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flexorch/audit",
3
- "version": "0.6.0",
3
+ "version": "0.8.1",
4
4
  "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
5
5
  "keywords": [
6
6
  "pii",