@stackone/redaction 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ import{t as e}from"./chunk-Cfxk5zVN.mjs";const t=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],n=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const r={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:t,piiPatterns:n},i={};function createConfig(e){let t=e?.config;return{entropy:{...r,...t?.entropy},model:{...i,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const a=new Set([`secret`]);export{buildSkipRanges as a,applyRedactions as i,a as n,isInSkipRange as o,createConfig as r,mergeSpans as s,r as t};
@@ -0,0 +1 @@
1
+ var e=Object.create,t=Object.defineProperty,__name=(e,n)=>t(e,`name`,{value:n,configurable:!0}),n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,__copyProps=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},__toESM=(n,r,a)=>(a=n==null?{}:e(i(n)),__copyProps(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n));const o=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],s=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const c={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:o,piiPatterns:s},l={},u=`[REDACTED]`;function createConfig(e){let t=e?.config;return{entropy:{...c,...t?.entropy},model:{...l,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const d=new Set([`secret`]);Object.defineProperty(exports,`a`,{enumerable:!0,get:function(){return buildSkipRanges}}),Object.defineProperty(exports,`c`,{enumerable:!0,get:function(){return __name}}),Object.defineProperty(exports,`i`,{enumerable:!0,get:function(){return applyRedactions}}),Object.defineProperty(exports,`l`,{enumerable:!0,get:function(){return __toESM}}),Object.defineProperty(exports,`n`,{enumerable:!0,get:function(){return d}}),Object.defineProperty(exports,`o`,{enumerable:!0,get:function(){return isInSkipRange}}),Object.defineProperty(exports,`r`,{enumerable:!0,get:function(){return createConfig}}),Object.defineProperty(exports,`s`,{enumerable:!0,get:function(){return mergeSpans}}),Object.defineProperty(exports,`t`,{enumerable:!0,get:function(){return c}});
@@ -1 +1 @@
1
- var e=Object.create,t=Object.defineProperty,__name=(e,n)=>t(e,`name`,{value:n,configurable:!0}),n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,__copyProps=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},__toESM=(n,r,a)=>(a=n==null?{}:e(i(n)),__copyProps(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n));let o=require(`@stackone/utils`),s=require(`fast-redact`);s=__toESM(s);const c=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`],l=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),u=`**redacted**`;let d=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>u,partialCensorFn=e=>typeof e==`string`?e.startsWith(u)?e:e.length<=10?u:`${u}${e.slice(-5)}`:u,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},f={[d.FULL]:censorFn,[d.PARTIAL]:partialCensorFn},p=getKeysToRedact(c),m=[...getPathsToRedact(l),...Array.from(p)],redactUrl=(e,t=d.FULL,n)=>{if(!(0,o.isMissing)(e))try{let n=new URL(e),r=[...n.searchParams].reduce((e,[n,r])=>(e.set(n,p.has(n)?f[t](r):r),e),new URLSearchParams);return`${n.origin}${n.pathname}${r.toString()?`?`+r:``}${n.hash}`}catch(t){return n?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),e}},redactPath=(e,t=d.FULL,n)=>{if(!(0,o.isMissing)(e))try{let[n,r]=e.split(`?`);return r?`${n}?${[...new URLSearchParams(r)].reduce((e,[n,r])=>(e.set(n,p.has(n)?f[t](r):r),e),new URLSearchParams)}`:e}catch(t){return n?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=d.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(p.has(r)?e[r]=f[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});let h;const getRedactors=()=>((0,o.isMissing)(h)&&(h={[d.FULL]:(0,s.default)({paths:m,serialize:!1,censor:f[d.FULL]}),[d.PARTIAL]:(0,s.default)({paths:m,serialize:!1,censor:f[d.PARTIAL]})}),h),redactObject=(e,t=d.FULL)=>(0,o.isMissing)(e)||typeof e!=`object`||!e?e:getRedactors()[t]((0,o.deepCopy)(e)),g=!1;exports.CensorType=d,exports.I_AM_EDGE=!1,exports.redactFields=redactFields,exports.redactObject=redactObject,exports.redactPath=redactPath,exports.redactUrl=redactUrl;
1
+ const e=require(`./config-D8tiH_wn.cjs`);let t=require(`@stackone/utils`),n=require(`fast-redact`);n=e.l(n);const r=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`],i=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),a=`**redacted**`;let o=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>a,partialCensorFn=e=>typeof e==`string`?e.startsWith(a)?e:e.length<=10?a:`${a}${e.slice(-5)}`:a,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},s={[o.FULL]:censorFn,[o.PARTIAL]:partialCensorFn},c=getKeysToRedact(r),l=[...getPathsToRedact(i),...Array.from(c)],redactUrl=(e,n=o.FULL,r)=>{if(!(0,t.isMissing)(e))try{let t=new URL(e),r=[...t.searchParams].reduce((e,[t,r])=>(e.set(t,c.has(t)?s[n](r):r),e),new URLSearchParams);return`${t.origin}${t.pathname}${r.toString()?`?`+r:``}${t.hash}`}catch(t){return r?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),e}},redactPath=(e,n=o.FULL,r)=>{if(!(0,t.isMissing)(e))try{let[t,r]=e.split(`?`);return r?`${t}?${[...new URLSearchParams(r)].reduce((e,[t,r])=>(e.set(t,c.has(t)?s[n](r):r),e),new URLSearchParams)}`:e}catch(t){return r?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=o.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(c.has(r)?e[r]=s[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});function shannonEntropy(e){if(!e)return 0;let t=new Map;for(let n of e)t.set(n,(t.get(n)??0)+1);let n=e.length,r=0;for(let e of t.values()){let t=e/n;r-=t*Math.log2(t)}return r}function entropyScrub(t,n=`[REDACTED]`,r=e.t){let i=e.a(t,r.skipPatterns),a=[],o=r.candidatePattern.flags.includes(`g`)?r.candidatePattern.flags:`${r.candidatePattern.flags}g`,s=new RegExp(r.candidatePattern.source,o),c;for(;(c=s.exec(t))!==null;){let t=c[0];t.length<r.minLength||e.o(c.index,c.index+t.length,i)||shannonEntropy(t)>=r.threshold&&a.push({start:c.index,end:c.index+t.length,original:t,replacement:n,source:`entropy`})}for(let o of r.piiPatterns??[]){let r=new RegExp(o.source,o.flags.includes(`g`)?o.flags:`${o.flags}g`),s;for(;(s=r.exec(t))!==null;){let t=s.index,r=t+s[0].length;e.o(t,r,i)||a.push({start:t,end:r,original:s[0],replacement:n,source:`entropy`})}}let l=e.s(a);return{scrubbed:e.i(t,l),redactions:l}}var PiiRedaction=class{constructor(t){this.tier2=null,this.tier2LoadPromise=null,this.config=e.r(t),this.enableTier2=t?.enableTier2??!1,this.logger=t?.logger}async warmupTier2(){this.enableTier2&&(await this.loadTier2(),this.tier2&&await this.tier2.warmup())}isTier2Ready(){return this.tier2?.isReady()??!1}scrub(e){let t=performance.now(),{scrubbed:n,redactions:r}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return{scrubbed:n,redactions:r,tier:`entropy`,modelAvailable:!1,latencyMs:performance.now()-t}}async scrubAsync(t){let n=performance.now();if(!this.enableTier2)return this.scrub(t);let r=!1,i=[];try{if(await this.loadTier2(),!this.tier2)throw Error(`Tier 2 classifier not loaded`);let e=await this.tier2.detectSecrets(t);for(let t of e)i.push({start:t.start,end:t.end,original:t.text,replacement:this.config.redactionToken,source:`model`,label:t.label});r=!0}catch(e){this.logger?.warning({category:`pii-redaction`,message:`Tier 2 model detection failed, falling back to entropy only`,error:e instanceof Error?e:void 0,code:`TIER2_FALLBACK`})}let{redactions:a}=entropyScrub(t,this.config.redactionToken,this.config.entropy);i.push(...a);let o=e.s(i);return{scrubbed:e.i(t,o),redactions:o,tier:r?`model+entropy`:`entropy`,modelAvailable:r,latencyMs:performance.now()-n}}detect(e){let{redactions:t}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return t}getConfig(){return structuredClone(this.config)}async loadTier2(){if(!this.tier2){if(this.tier2LoadPromise)return this.tier2LoadPromise;this.tier2LoadPromise=(async()=>{let{Tier2Classifier:e}=await Promise.resolve().then(()=>require(`./tier2-classifier-D2oIFZNi.cjs`));this.tier2=new e({onnxModelPath:this.config.model.modelPath})})();try{await this.tier2LoadPromise}catch(e){throw this.tier2LoadPromise=null,Error(`Failed to load Tier 2 classifier: ${e instanceof Error?e.message:String(e)}. Ensure onnxruntime-node and @huggingface/transformers are installed, and the model has been downloaded via \`npm run download-model\`.`)}}}};function createPiiRedaction(e){return new PiiRedaction(e)}let u;const getRedactors=()=>((0,t.isMissing)(u)&&(u={[o.FULL]:(0,n.default)({paths:l,serialize:!1,censor:s[o.FULL]}),[o.PARTIAL]:(0,n.default)({paths:l,serialize:!1,censor:s[o.PARTIAL]})}),u),redactObject=(e,n=o.FULL)=>(0,t.isMissing)(e)||typeof e!=`object`||!e?e:getRedactors()[n]((0,t.deepCopy)(e)),d=!1;exports.CensorType=o,exports.I_AM_EDGE=!1,exports.PiiRedaction=PiiRedaction,exports.createPiiRedaction=createPiiRedaction,exports.entropyScrub=entropyScrub,exports.redactFields=redactFields,exports.redactObject=redactObject,exports.redactPath=redactPath,exports.redactUrl=redactUrl,exports.shannonEntropy=shannonEntropy;
@@ -71,8 +71,79 @@ declare const redactUrl: (value: string | undefined, censorType?: CensorType, lo
71
71
  declare const redactPath: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
72
72
  declare const redactFields: <T = unknown>(value: T, censorType?: CensorType, recursive?: boolean) => T;
73
73
  //#endregion
74
+ //#region src/pii/types.d.ts
75
+ interface EntropyConfig {
76
+ threshold: number;
77
+ minLength: number;
78
+ candidatePattern: RegExp;
79
+ skipPatterns: RegExp[];
80
+ piiPatterns: RegExp[];
81
+ }
82
+ interface ModelConfig {
83
+ modelPath?: string;
84
+ }
85
+ interface PiiRedactionConfig {
86
+ entropy: EntropyConfig;
87
+ model: ModelConfig;
88
+ redactionToken: string;
89
+ }
90
+ interface PiiRedactionOptions {
91
+ config?: Partial<PiiRedactionConfig>;
92
+ enableTier2?: boolean;
93
+ redactionToken?: string;
94
+ modelPath?: string;
95
+ logger?: ILogger;
96
+ }
97
+ interface RedactionSpan {
98
+ start: number;
99
+ end: number;
100
+ original: string;
101
+ replacement: string;
102
+ source: 'entropy' | 'model';
103
+ label?: string;
104
+ }
105
+ interface RedactionResult {
106
+ scrubbed: string;
107
+ redactions: RedactionSpan[];
108
+ tier: 'entropy' | 'model+entropy';
109
+ modelAvailable: boolean;
110
+ latencyMs: number;
111
+ }
112
+ interface EntropyScrubResult {
113
+ scrubbed: string;
114
+ redactions: RedactionSpan[];
115
+ }
116
+ interface ModelDetection {
117
+ label: string;
118
+ start: number;
119
+ end: number;
120
+ text: string;
121
+ }
122
+ //#endregion
123
+ //#region src/pii/core/pii-redaction.d.ts
124
+ declare class PiiRedaction {
125
+ private config;
126
+ private enableTier2;
127
+ private logger?;
128
+ private tier2;
129
+ private tier2LoadPromise;
130
+ constructor(options?: PiiRedactionOptions);
131
+ warmupTier2(): Promise<void>;
132
+ isTier2Ready(): boolean;
133
+ scrub(text: string): RedactionResult;
134
+ scrubAsync(text: string): Promise<RedactionResult>;
135
+ detect(text: string): RedactionSpan[];
136
+ getConfig(): PiiRedactionConfig;
137
+ private loadTier2;
138
+ }
139
+ declare function createPiiRedaction(options?: PiiRedactionOptions): PiiRedaction;
140
+ //#endregion
141
+ //#region src/pii/detectors/entropy-detector.d.ts
142
+ declare function shannonEntropy(s: string): number;
143
+ declare function entropyScrub(text: string, redactionToken?: string, config?: EntropyConfig): EntropyScrubResult;
144
+ //#endregion
74
145
  //#region src/node/index.d.ts
75
146
  declare const redactObject: <T>(obj: T, censorType?: CensorType) => T;
76
147
  declare const I_AM_EDGE = false;
77
148
  //#endregion
78
- export { CensorType, I_AM_EDGE, redactFields, redactObject, redactPath, redactUrl };
149
+ export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
@@ -72,8 +72,79 @@ declare const redactUrl: (value: string | undefined, censorType?: CensorType, lo
72
72
  declare const redactPath: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
73
73
  declare const redactFields: <T = unknown>(value: T, censorType?: CensorType, recursive?: boolean) => T;
74
74
  //#endregion
75
+ //#region src/pii/types.d.ts
76
+ interface EntropyConfig {
77
+ threshold: number;
78
+ minLength: number;
79
+ candidatePattern: RegExp;
80
+ skipPatterns: RegExp[];
81
+ piiPatterns: RegExp[];
82
+ }
83
+ interface ModelConfig {
84
+ modelPath?: string;
85
+ }
86
+ interface PiiRedactionConfig {
87
+ entropy: EntropyConfig;
88
+ model: ModelConfig;
89
+ redactionToken: string;
90
+ }
91
+ interface PiiRedactionOptions {
92
+ config?: Partial<PiiRedactionConfig>;
93
+ enableTier2?: boolean;
94
+ redactionToken?: string;
95
+ modelPath?: string;
96
+ logger?: ILogger;
97
+ }
98
+ interface RedactionSpan {
99
+ start: number;
100
+ end: number;
101
+ original: string;
102
+ replacement: string;
103
+ source: 'entropy' | 'model';
104
+ label?: string;
105
+ }
106
+ interface RedactionResult {
107
+ scrubbed: string;
108
+ redactions: RedactionSpan[];
109
+ tier: 'entropy' | 'model+entropy';
110
+ modelAvailable: boolean;
111
+ latencyMs: number;
112
+ }
113
+ interface EntropyScrubResult {
114
+ scrubbed: string;
115
+ redactions: RedactionSpan[];
116
+ }
117
+ interface ModelDetection {
118
+ label: string;
119
+ start: number;
120
+ end: number;
121
+ text: string;
122
+ }
123
+ //#endregion
124
+ //#region src/pii/core/pii-redaction.d.ts
125
+ declare class PiiRedaction {
126
+ private config;
127
+ private enableTier2;
128
+ private logger?;
129
+ private tier2;
130
+ private tier2LoadPromise;
131
+ constructor(options?: PiiRedactionOptions);
132
+ warmupTier2(): Promise<void>;
133
+ isTier2Ready(): boolean;
134
+ scrub(text: string): RedactionResult;
135
+ scrubAsync(text: string): Promise<RedactionResult>;
136
+ detect(text: string): RedactionSpan[];
137
+ getConfig(): PiiRedactionConfig;
138
+ private loadTier2;
139
+ }
140
+ declare function createPiiRedaction(options?: PiiRedactionOptions): PiiRedaction;
141
+ //#endregion
142
+ //#region src/pii/detectors/entropy-detector.d.ts
143
+ declare function shannonEntropy(s: string): number;
144
+ declare function entropyScrub(text: string, redactionToken?: string, config?: EntropyConfig): EntropyScrubResult;
145
+ //#endregion
75
146
  //#region src/node/index.d.ts
76
147
  declare const redactObject: <T>(obj: T, censorType?: CensorType) => T;
77
148
  declare const I_AM_EDGE = false;
78
149
  //#endregion
79
- export { CensorType, I_AM_EDGE, redactFields, redactObject, redactPath, redactUrl };
150
+ export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
@@ -1 +1 @@
1
- import{t as e}from"./chunk-Cfxk5zVN.mjs";import{deepCopy as t,isMissing as n}from"@stackone/utils";import r from"fast-redact";const i=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`],a=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),o=`**redacted**`;let s=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>o,partialCensorFn=e=>typeof e==`string`?e.startsWith(o)?e:e.length<=10?o:`${o}${e.slice(-5)}`:o,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},c={[s.FULL]:censorFn,[s.PARTIAL]:partialCensorFn},l=getKeysToRedact(i),u=[...getPathsToRedact(a),...Array.from(l)],redactUrl=(e,t=s.FULL,r)=>{if(!n(e))try{let n=new URL(e),r=[...n.searchParams].reduce((e,[n,r])=>(e.set(n,l.has(n)?c[t](r):r),e),new URLSearchParams);return`${n.origin}${n.pathname}${r.toString()?`?`+r:``}${n.hash}`}catch(t){return r?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),e}},redactPath=(e,t=s.FULL,r)=>{if(!n(e))try{let[n,r]=e.split(`?`);return r?`${n}?${[...new URLSearchParams(r)].reduce((e,[n,r])=>(e.set(n,l.has(n)?c[t](r):r),e),new URLSearchParams)}`:e}catch(t){return r?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=s.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(l.has(r)?e[r]=c[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});let d;const getRedactors=()=>(n(d)&&(d={[s.FULL]:r({paths:u,serialize:!1,censor:c[s.FULL]}),[s.PARTIAL]:r({paths:u,serialize:!1,censor:c[s.PARTIAL]})}),d),redactObject=(e,r=s.FULL)=>n(e)||typeof e!=`object`||!e?e:getRedactors()[r](t(e)),f=!1;export{s as CensorType,f as I_AM_EDGE,redactFields,redactObject,redactPath,redactUrl};
1
+ import{t as e}from"./chunk-Cfxk5zVN.mjs";import{a as t,i as n,o as r,r as i,s as a,t as o}from"./config-ConbNbEF.mjs";import{deepCopy as s,isMissing as c}from"@stackone/utils";import l from"fast-redact";const u=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`],d=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),f=`**redacted**`;let p=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>f,partialCensorFn=e=>typeof e==`string`?e.startsWith(f)?e:e.length<=10?f:`${f}${e.slice(-5)}`:f,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},m={[p.FULL]:censorFn,[p.PARTIAL]:partialCensorFn},h=getKeysToRedact(u),g=[...getPathsToRedact(d),...Array.from(h)],redactUrl=(e,t=p.FULL,n)=>{if(!c(e))try{let n=new URL(e),r=[...n.searchParams].reduce((e,[n,r])=>(e.set(n,h.has(n)?m[t](r):r),e),new URLSearchParams);return`${n.origin}${n.pathname}${r.toString()?`?`+r:``}${n.hash}`}catch(t){return n?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),e}},redactPath=(e,t=p.FULL,n)=>{if(!c(e))try{let[n,r]=e.split(`?`);return r?`${n}?${[...new URLSearchParams(r)].reduce((e,[n,r])=>(e.set(n,h.has(n)?m[t](r):r),e),new URLSearchParams)}`:e}catch(t){return n?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=p.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(h.has(r)?e[r]=m[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});function shannonEntropy(e){if(!e)return 0;let t=new Map;for(let n of e)t.set(n,(t.get(n)??0)+1);let n=e.length,r=0;for(let e of t.values()){let t=e/n;r-=t*Math.log2(t)}return r}function entropyScrub(e,i=`[REDACTED]`,s=o){let c=t(e,s.skipPatterns),l=[],u=s.candidatePattern.flags.includes(`g`)?s.candidatePattern.flags:`${s.candidatePattern.flags}g`,d=new RegExp(s.candidatePattern.source,u),f;for(;(f=d.exec(e))!==null;){let e=f[0];e.length<s.minLength||r(f.index,f.index+e.length,c)||shannonEntropy(e)>=s.threshold&&l.push({start:f.index,end:f.index+e.length,original:e,replacement:i,source:`entropy`})}for(let t of s.piiPatterns??[]){let n=new RegExp(t.source,t.flags.includes(`g`)?t.flags:`${t.flags}g`),a;for(;(a=n.exec(e))!==null;){let e=a.index,t=e+a[0].length;r(e,t,c)||l.push({start:e,end:t,original:a[0],replacement:i,source:`entropy`})}}let p=a(l);return{scrubbed:n(e,p),redactions:p}}var PiiRedaction=class{constructor(e){this.tier2=null,this.tier2LoadPromise=null,this.config=i(e),this.enableTier2=e?.enableTier2??!1,this.logger=e?.logger}async warmupTier2(){this.enableTier2&&(await this.loadTier2(),this.tier2&&await this.tier2.warmup())}isTier2Ready(){return this.tier2?.isReady()??!1}scrub(e){let t=performance.now(),{scrubbed:n,redactions:r}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return{scrubbed:n,redactions:r,tier:`entropy`,modelAvailable:!1,latencyMs:performance.now()-t}}async scrubAsync(e){let t=performance.now();if(!this.enableTier2)return this.scrub(e);let r=!1,i=[];try{if(await this.loadTier2(),!this.tier2)throw Error(`Tier 2 classifier not loaded`);let t=await this.tier2.detectSecrets(e);for(let e of t)i.push({start:e.start,end:e.end,original:e.text,replacement:this.config.redactionToken,source:`model`,label:e.label});r=!0}catch(e){this.logger?.warning({category:`pii-redaction`,message:`Tier 2 model detection failed, falling back to entropy only`,error:e instanceof Error?e:void 0,code:`TIER2_FALLBACK`})}let{redactions:o}=entropyScrub(e,this.config.redactionToken,this.config.entropy);i.push(...o);let s=a(i);return{scrubbed:n(e,s),redactions:s,tier:r?`model+entropy`:`entropy`,modelAvailable:r,latencyMs:performance.now()-t}}detect(e){let{redactions:t}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return t}getConfig(){return structuredClone(this.config)}async loadTier2(){if(!this.tier2){if(this.tier2LoadPromise)return this.tier2LoadPromise;this.tier2LoadPromise=(async()=>{let{Tier2Classifier:e}=await import(`./tier2-classifier-DbSeja9q.mjs`);this.tier2=new e({onnxModelPath:this.config.model.modelPath})})();try{await this.tier2LoadPromise}catch(e){throw this.tier2LoadPromise=null,Error(`Failed to load Tier 2 classifier: ${e instanceof Error?e.message:String(e)}. Ensure onnxruntime-node and @huggingface/transformers are installed, and the model has been downloaded via \`npm run download-model\`.`)}}}};function createPiiRedaction(e){return new PiiRedaction(e)}let _;const getRedactors=()=>(c(_)&&(_={[p.FULL]:l({paths:g,serialize:!1,censor:m[p.FULL]}),[p.PARTIAL]:l({paths:g,serialize:!1,censor:m[p.PARTIAL]})}),_),redactObject=(e,t=p.FULL)=>c(e)||typeof e!=`object`||!e?e:getRedactors()[t](s(e)),v=!1;export{p as CensorType,v as I_AM_EDGE,PiiRedaction,createPiiRedaction,entropyScrub,redactFields,redactObject,redactPath,redactUrl,shannonEntropy};
@@ -0,0 +1 @@
1
+ const e=require(`./config-D8tiH_wn.cjs`);let t=require(`fs`),n=require(`os`),r=require(`path`);const i=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return(0,r.join)((0,n.homedir)(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return i.every(n=>(0,t.existsSync)((0,r.join)(e,n)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=i.filter(e=>!(0,t.existsSync)((0,r.join)(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let n=await import(`onnxruntime-node`);this.OrtTensor=n.Tensor;let a=(0,r.resolve)(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await n.InferenceSession.create(a);let o=(0,r.resolve)(this.modelPath,`config.json`),s=JSON.parse((0,t.readFileSync)(o,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(s.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}var Tier2Classifier=class{constructor(e={}){this.onnxClassifier=new OnnxClassifier(e.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(t){return(await this.detectAll(t)).filter(t=>e.n.has(t.label))}async scrub(t,n=`[REDACTED]`){return t.trim()?e.i(t,(await this.detectSecrets(t)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):t}};exports.Tier2Classifier=Tier2Classifier;
@@ -0,0 +1 @@
1
+ import{t as e}from"./chunk-Cfxk5zVN.mjs";import{i as t,n}from"./config-ConbNbEF.mjs";import{existsSync as r,readFileSync as i}from"fs";import{homedir as a}from"os";import{join as o,resolve as s}from"path";const c=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return o(a(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return c.every(t=>r(o(e,t)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=c.filter(e=>!r(o(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let t=await import(`onnxruntime-node`);this.OrtTensor=t.Tensor;let n=s(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await t.InferenceSession.create(n);let a=s(this.modelPath,`config.json`),l=JSON.parse(i(a,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(l.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}var Tier2Classifier=class{constructor(e={}){this.onnxClassifier=new OnnxClassifier(e.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(e){return(await this.detectAll(e)).filter(e=>n.has(e.label))}async scrub(e,n=`[REDACTED]`){return e.trim()?t(e,(await this.detectSecrets(e)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):e}};export{Tier2Classifier};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stackone/redaction",
3
- "version": "1.5.0",
3
+ "version": "1.6.0",
4
4
  "description": "",
5
5
  "main": "dist/node/index.cjs",
6
6
  "files": [
@@ -43,6 +43,7 @@
43
43
  "lint:fix": "npm run code:check:fix",
44
44
  "test": "vitest run",
45
45
  "test:watch": "vitest watch",
46
+ "download-model": "node scripts/download-model.mjs",
46
47
  "publish-release": "npm publish --access=public"
47
48
  },
48
49
  "keywords": [],
@@ -52,5 +53,17 @@
52
53
  "@stackone/logger": "*",
53
54
  "@stackone/utils": "*",
54
55
  "fast-redact": "3.3.0"
56
+ },
57
+ "peerDependencies": {
58
+ "@huggingface/transformers": "^3.0.0",
59
+ "onnxruntime-node": ">=1.16.0"
60
+ },
61
+ "peerDependenciesMeta": {
62
+ "@huggingface/transformers": {
63
+ "optional": true
64
+ },
65
+ "onnxruntime-node": {
66
+ "optional": true
67
+ }
55
68
  }
56
69
  }