@flexorch/audit 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +15 -1
- package/dist/index.d.cts +26 -2
- package/dist/index.d.ts +26 -2
- package/dist/index.js +13 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -26,10 +26,12 @@ __export(index_exports, {
|
|
|
26
26
|
auditStream: () => auditStream,
|
|
27
27
|
complianceReport: () => complianceReport,
|
|
28
28
|
detectPii: () => detectPii,
|
|
29
|
+
estimateTokens: () => estimateTokens,
|
|
29
30
|
mask: () => mask,
|
|
30
31
|
noiseMetrics: () => noiseMetrics,
|
|
31
32
|
noiseRatio: () => noiseRatio,
|
|
32
33
|
qualityMetrics: () => qualityMetrics,
|
|
34
|
+
redactForLlm: () => redactForLlm,
|
|
33
35
|
version: () => version
|
|
34
36
|
});
|
|
35
37
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -786,7 +788,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
786
788
|
}
|
|
787
789
|
|
|
788
790
|
// src/index.ts
|
|
789
|
-
var version = "0.
|
|
791
|
+
var version = "0.7.0";
|
|
790
792
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
791
793
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
792
794
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|
|
@@ -842,6 +844,16 @@ async function* auditStream(texts, options = {}) {
|
|
|
842
844
|
yield audit(text, options);
|
|
843
845
|
}
|
|
844
846
|
}
|
|
847
|
+
function redactForLlm(text, options = {}) {
|
|
848
|
+
const { strategy, ...auditOptions } = options;
|
|
849
|
+
const result = audit(text, auditOptions);
|
|
850
|
+
return mask(text, result.pii, { strategy });
|
|
851
|
+
}
|
|
852
|
+
function estimateTokens(text) {
|
|
853
|
+
if (!text || !text.trim()) return 0;
|
|
854
|
+
const words = text.trim().split(/\s+/).length;
|
|
855
|
+
return Math.max(1, Math.round(words * 4 / 3));
|
|
856
|
+
}
|
|
845
857
|
var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
846
858
|
"national_id_tr",
|
|
847
859
|
"ssn",
|
|
@@ -903,9 +915,11 @@ function complianceReport(result) {
|
|
|
903
915
|
auditStream,
|
|
904
916
|
complianceReport,
|
|
905
917
|
detectPii,
|
|
918
|
+
estimateTokens,
|
|
906
919
|
mask,
|
|
907
920
|
noiseMetrics,
|
|
908
921
|
noiseRatio,
|
|
909
922
|
qualityMetrics,
|
|
923
|
+
redactForLlm,
|
|
910
924
|
version
|
|
911
925
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
54
54
|
* // "Contact: [REDACTED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.7.0";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
|
|
|
130
130
|
* }
|
|
131
131
|
*/
|
|
132
132
|
declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
|
|
133
|
+
/**
|
|
134
|
+
* Audit *text* and return a PII-free version ready for LLM processing.
|
|
135
|
+
*
|
|
136
|
+
* One-shot convenience wrapper around audit() + mask(). Equivalent to:
|
|
137
|
+
* const result = audit(text, { locale })
|
|
138
|
+
* return mask(text, result.pii, { strategy })
|
|
139
|
+
*
|
|
140
|
+
* @example
|
|
141
|
+
* const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
|
|
142
|
+
* // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
|
|
143
|
+
*/
|
|
144
|
+
declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
|
|
145
|
+
/**
|
|
146
|
+
* Estimate the token count of *text* using a word-based heuristic.
|
|
147
|
+
*
|
|
148
|
+
* Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
|
|
149
|
+
* No external dependencies — accuracy within ~15% of real tokenizers for
|
|
150
|
+
* English and most European languages. Treat as a planning estimate.
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* estimateTokens("The quick brown fox") // → 7
|
|
154
|
+
* estimateTokens("") // → 0
|
|
155
|
+
*/
|
|
156
|
+
declare function estimateTokens(text: string): number;
|
|
133
157
|
type RiskLevel = "none" | "low" | "medium" | "high";
|
|
134
158
|
interface ComplianceReport {
|
|
135
159
|
has_pii: boolean;
|
|
@@ -145,4 +169,4 @@ interface ComplianceReport {
|
|
|
145
169
|
*/
|
|
146
170
|
declare function complianceReport(result: AuditResult): ComplianceReport;
|
|
147
171
|
|
|
148
|
-
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
|
172
|
+
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };
|
package/dist/index.d.ts
CHANGED
|
@@ -54,7 +54,7 @@ declare function applyMask(text: string, findings: PiiFinding[], strategy?: Mask
|
|
|
54
54
|
* // "Contact: [REDACTED_EMAIL]"
|
|
55
55
|
*/
|
|
56
56
|
|
|
57
|
-
declare const version = "0.
|
|
57
|
+
declare const version = "0.7.0";
|
|
58
58
|
type QualityGrade = "A" | "B" | "C" | "D";
|
|
59
59
|
interface PiiSummaryEntry {
|
|
60
60
|
type: string;
|
|
@@ -130,6 +130,30 @@ declare function mask(text: string, findings: PiiFinding[], options?: MaskOption
|
|
|
130
130
|
* }
|
|
131
131
|
*/
|
|
132
132
|
declare function auditStream(texts: AsyncIterable<string>, options?: AuditOptions): AsyncGenerator<AuditResult>;
|
|
133
|
+
/**
|
|
134
|
+
* Audit *text* and return a PII-free version ready for LLM processing.
|
|
135
|
+
*
|
|
136
|
+
* One-shot convenience wrapper around audit() + mask(). Equivalent to:
|
|
137
|
+
* const result = audit(text, { locale })
|
|
138
|
+
* return mask(text, result.pii, { strategy })
|
|
139
|
+
*
|
|
140
|
+
* @example
|
|
141
|
+
* const clean = redactForLlm("TCKN: 12345678950, email: ali@example.com", { locale: "tr" })
|
|
142
|
+
* // "TCKN: [REDACTED_NATIONAL_ID_TR], email: [REDACTED_EMAIL]"
|
|
143
|
+
*/
|
|
144
|
+
declare function redactForLlm(text: string, options?: AuditOptions & MaskOptions): string;
|
|
145
|
+
/**
|
|
146
|
+
* Estimate the token count of *text* using a word-based heuristic.
|
|
147
|
+
*
|
|
148
|
+
* Uses the standard approximation: 1 token ≈ 0.75 words (words × 4/3).
|
|
149
|
+
* No external dependencies — accuracy within ~15% of real tokenizers for
|
|
150
|
+
* English and most European languages. Treat as a planning estimate.
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* estimateTokens("The quick brown fox") // → 7
|
|
154
|
+
* estimateTokens("") // → 0
|
|
155
|
+
*/
|
|
156
|
+
declare function estimateTokens(text: string): number;
|
|
133
157
|
type RiskLevel = "none" | "low" | "medium" | "high";
|
|
134
158
|
interface ComplianceReport {
|
|
135
159
|
has_pii: boolean;
|
|
@@ -145,4 +169,4 @@ interface ComplianceReport {
|
|
|
145
169
|
*/
|
|
146
170
|
declare function complianceReport(result: AuditResult): ComplianceReport;
|
|
147
171
|
|
|
148
|
-
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, mask, noiseMetrics, noiseRatio, qualityMetrics, version };
|
|
172
|
+
export { type AuditOptions, type AuditResult, type BatchAuditResult, type ComplianceReport, type MaskOptions, type MaskStrategy, type NoiseMetrics, type PiiFinding, type PiiSummaryEntry, type QualityGrade, type QualityMetrics, type RiskLevel, applyMask, audit, auditBatch, auditStream, complianceReport, detectPii, estimateTokens, mask, noiseMetrics, noiseRatio, qualityMetrics, redactForLlm, version };
|
package/dist/index.js
CHANGED
|
@@ -750,7 +750,7 @@ function applyMask(text, findings, strategy = "redact") {
|
|
|
750
750
|
}
|
|
751
751
|
|
|
752
752
|
// src/index.ts
|
|
753
|
-
var version = "0.
|
|
753
|
+
var version = "0.7.0";
|
|
754
754
|
function computeQualityScore(completeness, avgLength, garbageRatio) {
|
|
755
755
|
const lengthScore = Math.min(avgLength / 500, 1);
|
|
756
756
|
const noiseScore = Math.max(0, 1 - garbageRatio * 10);
|
|
@@ -806,6 +806,16 @@ async function* auditStream(texts, options = {}) {
|
|
|
806
806
|
yield audit(text, options);
|
|
807
807
|
}
|
|
808
808
|
}
|
|
809
|
+
function redactForLlm(text, options = {}) {
|
|
810
|
+
const { strategy, ...auditOptions } = options;
|
|
811
|
+
const result = audit(text, auditOptions);
|
|
812
|
+
return mask(text, result.pii, { strategy });
|
|
813
|
+
}
|
|
814
|
+
function estimateTokens(text) {
|
|
815
|
+
if (!text || !text.trim()) return 0;
|
|
816
|
+
const words = text.trim().split(/\s+/).length;
|
|
817
|
+
return Math.max(1, Math.round(words * 4 / 3));
|
|
818
|
+
}
|
|
809
819
|
var HIGH_RISK_TYPES = /* @__PURE__ */ new Set([
|
|
810
820
|
"national_id_tr",
|
|
811
821
|
"ssn",
|
|
@@ -866,9 +876,11 @@ export {
|
|
|
866
876
|
auditStream,
|
|
867
877
|
complianceReport,
|
|
868
878
|
detectPii,
|
|
879
|
+
estimateTokens,
|
|
869
880
|
mask,
|
|
870
881
|
noiseMetrics,
|
|
871
882
|
noiseRatio,
|
|
872
883
|
qualityMetrics,
|
|
884
|
+
redactForLlm,
|
|
873
885
|
version
|
|
874
886
|
};
|