npm - @flexorch/audit - Versions diffs - 0.6.0 → 0.7.0 - Mend

@flexorch/audit 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -26,10 +26,12 @@ __export(index_exports, {
   auditStream: () => auditStream,
   complianceReport: () => complianceReport,
   detectPii: () => detectPii,
+  estimateTokens: () => estimateTokens,
   mask: () => mask,
   noiseMetrics: () => noiseMetrics,
   noiseRatio: () => noiseRatio,
   qualityMetrics: () => qualityMetrics,
+  redactForLlm: () => redactForLlm,
   version: () => version
 });
 module.exports = __toCommonJS(index_exports);
@@ -786,7 +788,7 @@ function applyMask(text, findings, strategy = "redact") {
 }
 // src/index.ts
-var version = "0.6.0";
+var version = "0.7.0";
 function computeQualityScore(completeness, avgLength, garbageRatio) {
   const lengthScore = Math.min(avgLength / 500, 1);
   const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -842,6 +844,16 @@ async function* auditStream(texts, options = {}) {
     yield audit(text, options);
   }
 }
+function redactForLlm(text, options = {}) {
+  const { strategy, ...auditOptions } = options;
+  const result = audit(text, auditOptions);
+  return mask(text, result.pii, { strategy });
+}
+function estimateTokens(text) {
+  if (!text || !text.trim()) return 0;
+  const words = text.trim().split(/\s+/).length;
+  return Math.max(1, Math.round(words * 4 / 3));
+}
 var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
   "national_id_tr",
   "ssn",
@@ -903,9 +915,11 @@ function complianceReport(result) {
   auditStream,
   complianceReport,
   detectPii,
+  estimateTokens,
   mask,
   noiseMetrics,
   noiseRatio,
   qualityMetrics,
+  redactForLlm,
   version
 });

package/dist/index.d.cts CHANGED Viewed

@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
  * // "Contact: [REDACTED_EMAIL]"
  */
-declare const version = "0.6.0";
+declare const version = "0.7.0";
 type QualityGrade = "A" | "B" | "C" | "D";
 interface PiiSummaryEntry {
     type: string;
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
  * }
  */
 declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
+/**
+ * Audit *text* and return a PII-free version ready for LLM processing.
+ *
+ * One-shot convenience wrapper around audit() + mask(). Equivalent to:
+ *   const result = audit(text, { locale })
+ *   return mask(text, result.pii, { strategy })
+ *
+ * @example
+ * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
+ * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
+ */
+declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
+/**
+ * Estimate the token count of *text* using a word-based heuristic.
+ *
+ * Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
+ * No external dependencies — accuracy within ~15% of real tokenizers for
+ * English and most European languages. Treat as a planning estimate.
+ *
+ * @example
+ * estimateTokens("The quick brown fox")   // → 7
+ * estimateTokens("")                       // → 0
+ */
+declare function estimateTokens(text: string): number;
 type RiskLevel = "none" | "low" | "medium" | "high";
 interface ComplianceReport {
     has_pii: boolean;
@@ -145,4 +169,4 @@ interface ComplianceReport {
  */
 declare function complianceReport(result: AuditResult): ComplianceReport;
-export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
+export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };

package/dist/index.d.ts CHANGED Viewed

@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
  * // "Contact: [REDACTED_EMAIL]"
  */
-declare const version = "0.6.0";
+declare const version = "0.7.0";
 type QualityGrade = "A" | "B" | "C" | "D";
 interface PiiSummaryEntry {
     type: string;
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
  * }
  */
 declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
+/**
+ * Audit *text* and return a PII-free version ready for LLM processing.
+ *
+ * One-shot convenience wrapper around audit() + mask(). Equivalent to:
+ *   const result = audit(text, { locale })
+ *   return mask(text, result.pii, { strategy })
+ *
+ * @example
+ * const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
+ * // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
+ */
+declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
+/**
+ * Estimate the token count of *text* using a word-based heuristic.
+ *
+ * Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
+ * No external dependencies — accuracy within ~15% of real tokenizers for
+ * English and most European languages. Treat as a planning estimate.
+ *
+ * @example
+ * estimateTokens("The quick brown fox")   // → 7
+ * estimateTokens("")                       // → 0
+ */
+declare function estimateTokens(text: string): number;
 type RiskLevel = "none" | "low" | "medium" | "high";
 interface ComplianceReport {
     has_pii: boolean;
@@ -145,4 +169,4 @@ interface ComplianceReport {
  */
 declare function complianceReport(result: AuditResult): ComplianceReport;
-export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
+export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };

package/dist/index.js CHANGED Viewed

@@ -750,7 +750,7 @@ function applyMask(text, findings, strategy = "redact") {
 }
 // src/index.ts
-var version = "0.6.0";
+var version = "0.7.0";
 function computeQualityScore(completeness, avgLength, garbageRatio) {
   const lengthScore = Math.min(avgLength / 500, 1);
   const noiseScore = Math.max(0, 1 - garbageRatio * 10);
@@ -806,6 +806,16 @@ async function* auditStream(texts, options = {}) {
     yield audit(text, options);
   }
 }
+function redactForLlm(text, options = {}) {
+  const { strategy, ...auditOptions } = options;
+  const result = audit(text, auditOptions);
+  return mask(text, result.pii, { strategy });
+}
+function estimateTokens(text) {
+  if (!text || !text.trim()) return 0;
+  const words = text.trim().split(/\s+/).length;
+  return Math.max(1, Math.round(words * 4 / 3));
+}
 var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
   "national_id_tr",
   "ssn",
@@ -866,9 +876,11 @@ export {
   auditStream,
   complianceReport,
   detectPii,
+  estimateTokens,
   mask,
   noiseMetrics,
   noiseRatio,
   qualityMetrics,
+  redactForLlm,
   version
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flexorch/audit",
-  "version": "0.6.0",
+  "version": "0.7.0",
   "description": "Zero-dependency PII + quality + noise audit for LLM datasets (TR/EU/US)",
   "keywords": [
     "pii",