@stackone/redaction 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/edge/index.d.mts +1 -1
- package/dist/edge/index.mjs +1 -1
- package/dist/node/config-M7RWfWu6.mjs +1 -0
- package/dist/node/config-tfC_1JL3.cjs +1 -0
- package/dist/node/index.cjs +1 -1
- package/dist/node/index.d.cts +7 -2
- package/dist/node/index.d.mts +7 -2
- package/dist/node/index.mjs +1 -1
- package/dist/node/tier2-classifier-CehRVVU3.mjs +1 -0
- package/dist/node/tier2-classifier-uX1K94UJ.cjs +1 -0
- package/package.json +1 -1
- package/dist/node/config-ConbNbEF.mjs +0 -1
- package/dist/node/config-D8tiH_wn.cjs +0 -1
- package/dist/node/tier2-classifier-D2oIFZNi.cjs +0 -1
- package/dist/node/tier2-classifier-DbSeja9q.mjs +0 -1
package/dist/edge/index.d.mts
CHANGED
|
@@ -68,7 +68,7 @@ declare enum CensorType {
|
|
|
68
68
|
FULL = "FULL",
|
|
69
69
|
PARTIAL = "PARTIAL",
|
|
70
70
|
}
|
|
71
|
-
declare const redactUrl: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
71
|
+
declare const redactUrl: (value: string | URL | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
72
72
|
declare const redactPath: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
73
73
|
declare const redactFields: <T = unknown>(value: T, censorType?: CensorType, recursive?: boolean) => T;
|
|
74
74
|
//#endregion
|
package/dist/edge/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{isMissing as t}from"@stackone/utils";const n=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`],r=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),i=`**redacted**`;let a=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>i,partialCensorFn=e=>typeof e==`string`?e.startsWith(i)?e:e.length<=10?i:`${i}${e.slice(-5)}`:i,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},o={[a.FULL]:censorFn,[a.PARTIAL]:partialCensorFn},s=getKeysToRedact(n),c=[...getPathsToRedact(r),...Array.from(s)],redactUrl=(e,n=a.FULL,r)=>{if(!t(e))try{let t=new URL(e),r=[...t.searchParams].reduce((e,[t,r])=>(e.set(t,s.has(t)?o[n](r):r),e),new URLSearchParams);return`${t.origin}${t.pathname}${r.toString()?`?`+r:``}${t.hash}`}catch(t){return r?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),e}},redactPath=(e,n=a.FULL,r)=>{if(!t(e))try{let[t,r]=e.split(`?`);return r?`${t}?${[...new URLSearchParams(r)].reduce((e,[t,r])=>(e.set(t,s.has(t)?o[n](r):r),e),new URLSearchParams)}`:e}catch(t){return r?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=a.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(s.has(r)?e[r]=o[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});let l;const getRedactors=()=>(t(l)&&(l={[a.FULL]:edgeRedact({paths:c,serialize:!1,censor:o[a.FULL]}),[a.PARTIAL]:edgeRedact({paths:c,serialize:!1,censor:o[a.PARTIAL]})}),l),split=e=>e.replace(/\[(\d+)\]/g,`.$1`).split(`.`).filter(Boolean),fallbackCensor=()=>`***`,edgeRedact=e=>{let t=(e.paths??[]).map(split),n=typeof e.censor==`function`?e.censor:fallbackCensor,r=!!e.serialize,redact=e=>{if(typeof e!=`object`||!e)return r?String(e):e;for(let r of t){let t=e;for(let e=0;e<r.length-1&&t&&typeof t==`object`;e++)t=t[r[e]];let i=r[r.length-1];t&&Object.prototype.hasOwnProperty.call(t,i)&&(t[i]=n(t[i]))}return r?JSON.stringify(e):e};return redact.restore=()=>{},redact},safeClone=e=>{let t=globalThis.structuredClone;return typeof t==`function`?t(e):JSON.parse(JSON.stringify(e))},redactObject=(e,t=a.FULL)=>{if(typeof e!=`object`||!e)return e;let n=safeClone(e);return getRedactors()[t](n)},u=!0;export{a as CensorType,u as I_AM_EDGE,redactFields,redactObject,redactPath,redactUrl,safeClone};
|
|
1
|
+
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{isMissing as t}from"@stackone/utils";const n=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`,`token`],r=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),i=`**redacted**`;let a=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>i,partialCensorFn=e=>typeof e==`string`?e.startsWith(i)?e:e.length<=10?i:`${i}${e.slice(-5)}`:i,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},o={[a.FULL]:censorFn,[a.PARTIAL]:partialCensorFn},s=getKeysToRedact(n),c=[...getPathsToRedact(r),...Array.from(s)],redactUrl=(e,n=a.FULL,r)=>{if(!t(e))try{let t=new URL(e),r=[...t.searchParams].reduce((e,[t,r])=>(e.set(t,s.has(t)?o[n](r):r),e),new URLSearchParams);return`${t.origin}${t.pathname}${r.toString()?`?`+r:``}${t.hash}`}catch(t){return r?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),typeof e==`string`?e:e?.toString()}},redactPath=(e,n=a.FULL,r)=>{if(!t(e))try{let[t,r]=e.split(`?`);return r?`${t}?${[...new URLSearchParams(r)].reduce((e,[t,r])=>(e.set(t,s.has(t)?o[n](r):r),e),new URLSearchParams)}`:e}catch(t){return r?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=a.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(s.has(r)?e[r]=o[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});let l;const getRedactors=()=>(t(l)&&(l={[a.FULL]:edgeRedact({paths:c,serialize:!1,censor:o[a.FULL]}),[a.PARTIAL]:edgeRedact({paths:c,serialize:!1,censor:o[a.PARTIAL]})}),l),split=e=>e.replace(/\[(\d+)\]/g,`.$1`).split(`.`).filter(Boolean),fallbackCensor=()=>`***`,edgeRedact=e=>{let t=(e.paths??[]).map(split),n=typeof e.censor==`function`?e.censor:fallbackCensor,r=!!e.serialize,redact=e=>{if(typeof e!=`object`||!e)return r?String(e):e;for(let r of t){let t=e;for(let e=0;e<r.length-1&&t&&typeof t==`object`;e++)t=t[r[e]];let i=r[r.length-1];t&&Object.prototype.hasOwnProperty.call(t,i)&&(t[i]=n(t[i]))}return r?JSON.stringify(e):e};return redact.restore=()=>{},redact},safeClone=e=>{let t=globalThis.structuredClone;return typeof t==`function`?t(e):JSON.parse(JSON.stringify(e))},redactObject=(e,t=a.FULL)=>{if(typeof e!=`object`||!e)return e;let n=safeClone(e);return getRedactors()[t](n)},u=!0;export{a as CensorType,u as I_AM_EDGE,redactFields,redactObject,redactPath,redactUrl,safeClone};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{existsSync as t,readFileSync as n}from"fs";import{homedir as r}from"os";import{join as i,resolve as a}from"path";const o=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return i(r(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return o.every(n=>t(i(e,n)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=o.filter(e=>!t(i(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let r=await import(`onnxruntime-node`);this.OrtTensor=r.Tensor;let s=a(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await r.InferenceSession.create(s);let c=a(this.modelPath,`config.json`),l=JSON.parse(n(c,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(l.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}const s=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],c=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const l={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:s,piiPatterns:c},u={};function createConfig(e){let t=e?.config;return{entropy:{...l,...t?.entropy},model:{...u,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const d=new Set([`secret`]);export{buildSkipRanges as a,o as c,isModelPresent as d,applyRedactions as i,OnnxClassifier as l,d as n,isInSkipRange as o,createConfig as r,mergeSpans as s,l as t,getDefaultModelCacheDir as u};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
var e=Object.create,t=Object.defineProperty,__name=(e,n)=>t(e,`name`,{value:n,configurable:!0}),n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,__copyProps=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},__toESM=(n,r,a)=>(a=n==null?{}:e(i(n)),__copyProps(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n));let o=require(`fs`),s=require(`os`),c=require(`path`);const l=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return(0,c.join)((0,s.homedir)(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return l.every(t=>(0,o.existsSync)((0,c.join)(e,t)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=l.filter(e=>!(0,o.existsSync)((0,c.join)(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let t=await import(`onnxruntime-node`);this.OrtTensor=t.Tensor;let n=(0,c.resolve)(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await t.InferenceSession.create(n);let r=(0,c.resolve)(this.modelPath,`config.json`),i=JSON.parse((0,o.readFileSync)(r,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(i.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}const u=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],d=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const f={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:u,piiPatterns:d},p={},m=`[REDACTED]`;function createConfig(e){let t=e?.config;return{entropy:{...f,...t?.entropy},model:{...p,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const h=new Set([`secret`]);Object.defineProperty(exports,`a`,{enumerable:!0,get:function(){return buildSkipRanges}}),Object.defineProperty(exports,`c`,{enumerable:!0,get:function(){return l}}),Object.defineProperty(exports,`d`,{enumerable:!0,get:function(){return isModelPresent}}),Object.defineProperty(exports,`f`,{enumerable:!0,get:function(){return __name}}),Object.defineProperty(exports,`i`,{enumerable:!0,get:function(){return applyRedactions}}),Object.defineProperty(exports,`l`,{enumerable:!0,get:function(){return OnnxClassifier}}),Object.defineProperty(exports,`n`,{enumerable:!0,get:function(){return h}}),Object.defineProperty(exports,`o`,{enumerable:!0,get:function(){return isInSkipRange}}),Object.defineProperty(exports,`p`,{enumerable:!0,get:function(){return __toESM}}),Object.defineProperty(exports,`r`,{enumerable:!0,get:function(){return createConfig}}),Object.defineProperty(exports,`s`,{enumerable:!0,get:function(){return mergeSpans}}),Object.defineProperty(exports,`t`,{enumerable:!0,get:function(){return f}}),Object.defineProperty(exports,`u`,{enumerable:!0,get:function(){return getDefaultModelCacheDir}});
|
package/dist/node/index.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
const e=require(`./config-
|
|
1
|
+
const e=require(`./config-tfC_1JL3.cjs`);let t=require(`@stackone/utils`),n=require(`fast-redact`);n=e.p(n);const r=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`,`token`],i=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),a=`**redacted**`;let o=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>a,partialCensorFn=e=>typeof e==`string`?e.startsWith(a)?e:e.length<=10?a:`${a}${e.slice(-5)}`:a,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},s={[o.FULL]:censorFn,[o.PARTIAL]:partialCensorFn},c=getKeysToRedact(r),l=[...getPathsToRedact(i),...Array.from(c)],redactUrl=(e,n=o.FULL,r)=>{if(!(0,t.isMissing)(e))try{let t=new URL(e),r=[...t.searchParams].reduce((e,[t,r])=>(e.set(t,c.has(t)?s[n](r):r),e),new URLSearchParams);return`${t.origin}${t.pathname}${r.toString()?`?`+r:``}${t.hash}`}catch(t){return r?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),typeof e==`string`?e:e?.toString()}},redactPath=(e,n=o.FULL,r)=>{if(!(0,t.isMissing)(e))try{let[t,r]=e.split(`?`);return r?`${t}?${[...new URLSearchParams(r)].reduce((e,[t,r])=>(e.set(t,c.has(t)?s[n](r):r),e),new URLSearchParams)}`:e}catch(t){return r?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=o.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(c.has(r)?e[r]=s[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});function shannonEntropy(e){if(!e)return 0;let t=new Map;for(let n of e)t.set(n,(t.get(n)??0)+1);let n=e.length,r=0;for(let e of t.values()){let t=e/n;r-=t*Math.log2(t)}return r}function entropyScrub(t,n=`[REDACTED]`,r=e.t){let i=e.a(t,r.skipPatterns),a=[],o=r.candidatePattern.flags.includes(`g`)?r.candidatePattern.flags:`${r.candidatePattern.flags}g`,s=new RegExp(r.candidatePattern.source,o),c;for(;(c=s.exec(t))!==null;){let t=c[0];t.length<r.minLength||e.o(c.index,c.index+t.length,i)||shannonEntropy(t)>=r.threshold&&a.push({start:c.index,end:c.index+t.length,original:t,replacement:n,source:`entropy`})}for(let o of r.piiPatterns??[]){let r=new RegExp(o.source,o.flags.includes(`g`)?o.flags:`${o.flags}g`),s;for(;(s=r.exec(t))!==null;){let t=s.index,r=t+s[0].length;e.o(t,r,i)||a.push({start:t,end:r,original:s[0],replacement:n,source:`entropy`})}}let l=e.s(a);return{scrubbed:e.i(t,l),redactions:l}}var PiiRedaction=class{constructor(t){this.tier2=null,this.tier2LoadPromise=null,this.config=e.r(t),this.enableTier2=t?.enableTier2??!1,this.logger=t?.logger}async warmupTier2(){this.enableTier2&&(await this.loadTier2(),this.tier2&&await this.tier2.warmup())}isTier2Ready(){return this.tier2?.isReady()??!1}scrub(e){let t=performance.now(),{scrubbed:n,redactions:r}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return{scrubbed:n,redactions:r,tier:`entropy`,modelAvailable:!1,latencyMs:performance.now()-t}}async scrubAsync(t){let n=performance.now();if(!this.enableTier2)return this.scrub(t);let r=!1,i=[];try{if(await this.loadTier2(),!this.tier2)throw Error(`Tier 2 classifier not loaded`);let e=await this.tier2.detectSecrets(t);for(let t of e)i.push({start:t.start,end:t.end,original:t.text,replacement:this.config.redactionToken,source:`model`,label:t.label});r=!0}catch(e){this.logger?.warning({category:`pii-redaction`,message:`Tier 2 model detection failed, falling back to entropy only`,error:e instanceof Error?e:void 0,code:`TIER2_FALLBACK`})}let{redactions:a}=entropyScrub(t,this.config.redactionToken,this.config.entropy);i.push(...a);let o=e.s(i);return{scrubbed:e.i(t,o),redactions:o,tier:r?`model+entropy`:`entropy`,modelAvailable:r,latencyMs:performance.now()-n}}detect(e){let{redactions:t}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return t}getConfig(){return structuredClone(this.config)}async loadTier2(){if(!this.tier2){if(this.tier2LoadPromise)return this.tier2LoadPromise;this.tier2LoadPromise=(async()=>{let{Tier2Classifier:e}=await Promise.resolve().then(()=>require(`./tier2-classifier-uX1K94UJ.cjs`));this.tier2=new e({onnxModelPath:this.config.model.modelPath})})();try{await this.tier2LoadPromise}catch(e){throw this.tier2LoadPromise=null,Error(`Failed to load Tier 2 classifier: ${e instanceof Error?e.message:String(e)}. Ensure onnxruntime-node and @huggingface/transformers are installed, and the model has been downloaded via \`npm run download-model\`.`)}}}};function createPiiRedaction(e){return new PiiRedaction(e)}let u;const getRedactors=()=>((0,t.isMissing)(u)&&(u={[o.FULL]:(0,n.default)({paths:l,serialize:!1,censor:s[o.FULL]}),[o.PARTIAL]:(0,n.default)({paths:l,serialize:!1,censor:s[o.PARTIAL]})}),u),redactObject=(e,n=o.FULL)=>(0,t.isMissing)(e)||typeof e!=`object`||!e?e:getRedactors()[n]((0,t.deepCopy)(e)),d=!1;exports.CensorType=o,exports.I_AM_EDGE=!1,exports.MODEL_FILES=e.c,exports.PiiRedaction=PiiRedaction,exports.createPiiRedaction=createPiiRedaction,exports.entropyScrub=entropyScrub,exports.getDefaultModelCacheDir=e.u,exports.isModelPresent=e.d,exports.redactFields=redactFields,exports.redactObject=redactObject,exports.redactPath=redactPath,exports.redactUrl=redactUrl,exports.shannonEntropy=shannonEntropy;
|
package/dist/node/index.d.cts
CHANGED
|
@@ -67,7 +67,7 @@ declare enum CensorType {
|
|
|
67
67
|
FULL = "FULL",
|
|
68
68
|
PARTIAL = "PARTIAL",
|
|
69
69
|
}
|
|
70
|
-
declare const redactUrl: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
70
|
+
declare const redactUrl: (value: string | URL | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
71
71
|
declare const redactPath: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
72
72
|
declare const redactFields: <T = unknown>(value: T, censorType?: CensorType, recursive?: boolean) => T;
|
|
73
73
|
//#endregion
|
|
@@ -120,6 +120,11 @@ interface ModelDetection {
|
|
|
120
120
|
text: string;
|
|
121
121
|
}
|
|
122
122
|
//#endregion
|
|
123
|
+
//#region src/pii/classifiers/onnx-classifier.d.ts
|
|
124
|
+
declare const MODEL_FILES: string[];
|
|
125
|
+
declare function getDefaultModelCacheDir(): string;
|
|
126
|
+
declare function isModelPresent(modelPath: string): boolean;
|
|
127
|
+
//#endregion
|
|
123
128
|
//#region src/pii/core/pii-redaction.d.ts
|
|
124
129
|
declare class PiiRedaction {
|
|
125
130
|
private config;
|
|
@@ -146,4 +151,4 @@ declare function entropyScrub(text: string, redactionToken?: string, config?: En
|
|
|
146
151
|
declare const redactObject: <T>(obj: T, censorType?: CensorType) => T;
|
|
147
152
|
declare const I_AM_EDGE = false;
|
|
148
153
|
//#endregion
|
|
149
|
-
export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
|
|
154
|
+
export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, MODEL_FILES, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, getDefaultModelCacheDir, isModelPresent, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
|
package/dist/node/index.d.mts
CHANGED
|
@@ -68,7 +68,7 @@ declare enum CensorType {
|
|
|
68
68
|
FULL = "FULL",
|
|
69
69
|
PARTIAL = "PARTIAL",
|
|
70
70
|
}
|
|
71
|
-
declare const redactUrl: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
71
|
+
declare const redactUrl: (value: string | URL | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
72
72
|
declare const redactPath: (value: string | undefined, censorType?: CensorType, logger?: ILogger) => string | undefined;
|
|
73
73
|
declare const redactFields: <T = unknown>(value: T, censorType?: CensorType, recursive?: boolean) => T;
|
|
74
74
|
//#endregion
|
|
@@ -121,6 +121,11 @@ interface ModelDetection {
|
|
|
121
121
|
text: string;
|
|
122
122
|
}
|
|
123
123
|
//#endregion
|
|
124
|
+
//#region src/pii/classifiers/onnx-classifier.d.ts
|
|
125
|
+
declare const MODEL_FILES: string[];
|
|
126
|
+
declare function getDefaultModelCacheDir(): string;
|
|
127
|
+
declare function isModelPresent(modelPath: string): boolean;
|
|
128
|
+
//#endregion
|
|
124
129
|
//#region src/pii/core/pii-redaction.d.ts
|
|
125
130
|
declare class PiiRedaction {
|
|
126
131
|
private config;
|
|
@@ -147,4 +152,4 @@ declare function entropyScrub(text: string, redactionToken?: string, config?: En
|
|
|
147
152
|
declare const redactObject: <T>(obj: T, censorType?: CensorType) => T;
|
|
148
153
|
declare const I_AM_EDGE = false;
|
|
149
154
|
//#endregion
|
|
150
|
-
export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
|
|
155
|
+
export { CensorType, type EntropyConfig, type EntropyScrubResult, I_AM_EDGE, MODEL_FILES, type ModelConfig, type ModelDetection, PiiRedaction, type PiiRedactionConfig, type PiiRedactionOptions, type RedactionResult, type RedactionSpan, createPiiRedaction, entropyScrub, getDefaultModelCacheDir, isModelPresent, redactFields, redactObject, redactPath, redactUrl, shannonEntropy };
|
package/dist/node/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{a as t,
|
|
1
|
+
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{a as t,c as n,d as r,i,o as a,r as o,s,t as c,u as l}from"./config-M7RWfWu6.mjs";import{deepCopy as u,isMissing as d}from"@stackone/utils";import f from"fast-redact";const p=[`client_secret`,`access_token`,`refresh_token`,`api_key`,`password`,`job_board_token`,`private_key`,`certificate`,`service_user_token`,`key_id`,`secret_key`,`provhash`,`admin_key`,`session_key`,`id_token`,`authorization`,`bh_rest_token`,`external_trigger_token`,`tempauth`,`token`],m=`req.headers.authorization,req.headers.cookie,req.headers.cookies,req.headers["set-cookie"],req.headers["set-cookies"],req.headers.httpsAgent.options.cert,req.headers.httpsAgent.options.key,req.headers["x-stackone-external-trigger-token"],error.config.headers.authorization,error.config.data,error.config.headers.cookie,error.config.headers.cookies,error.config.headers["set-cookie"],error.config.headers["set-cookies"],error.config.httpsAgent.options.cert,error.config.httpsAgent.options.key,err.config.headers.authorization,err.config.data,err.config.headers.cookie,err.config.headers.cookies,err.config.headers["set-cookie"],err.config.headers["set-cookies"],err.config.httpsAgent.options.cert,err.config.httpsAgent.options.key,res.headers.authorization,res.headers.cookie,res.headers.cookies,res.headers["set-cookie"],res.headers["set-cookies"],res.headers.httpsAgent.options.cert,res.headers.httpsAgent.options.key,context.credentials`.split(`,`),h=`**redacted**`;let g=function(e){return e.FULL=`FULL`,e.PARTIAL=`PARTIAL`,e}({});const censorFn=e=>h,partialCensorFn=e=>typeof e==`string`?e.startsWith(h)?e:e.length<=10?h:`${h}${e.slice(-5)}`:h,capitalizeWord=e=>`${e.charAt(0).toUpperCase()}${e.slice(1)}`,capitalizeLastWordAfterLastDot=e=>{let t=e.split(`.`);if(t.length<2)return capitalizeWord(e);let n=capitalizeWord(t.pop()??``);return[...t,n].join(`.`)},capitalizeWordInsideLastBrackets=e=>{let t=e.lastIndexOf(`[`),n=e.indexOf(`]`,t);if(t===-1||n===-1)return;let r=e.slice(t,n+1).replace(/(["'])([^"']*)(["'])/g,(e,t,n)=>n?t+capitalizeWord(n)+t:t+t);return`${e.slice(0,t)}${r}${e.slice(n+1)}`},capitalizeKey=e=>capitalizeWordInsideLastBrackets(e)||capitalizeLastWordAfterLastDot(e),getPathsToRedact=(e=[],t=!0)=>{if(!t)return[];let n=e.map(e=>capitalizeKey(e));return[...e,...n]},getKeysToRedact=(e=[],t=!0)=>{if(!t)return new Set;let n=new Set;return e.forEach(e=>{n.add(e);let t;t=e.includes(`_`)?e.replace(/_([a-z])/g,(e,t)=>t.toUpperCase()):/^[A-Z]/.test(e)?e.charAt(0).toLowerCase()+e.slice(1):e;let r=t.charAt(0).toUpperCase()+t.slice(1),i=t.replace(/([A-Z])/g,`_$1`).toLowerCase();n.add(t),n.add(r),n.add(i)}),n},_={[g.FULL]:censorFn,[g.PARTIAL]:partialCensorFn},v=getKeysToRedact(p),y=[...getPathsToRedact(m),...Array.from(v)],redactUrl=(e,t=g.FULL,n)=>{if(!d(e))try{let n=new URL(e),r=[...n.searchParams].reduce((e,[n,r])=>(e.set(n,v.has(n)?_[t](r):r),e),new URLSearchParams);return`${n.origin}${n.pathname}${r.toString()?`?`+r:``}${n.hash}`}catch(t){return n?.warning({message:`Invalid URL provided, unable to redact`,context:{value:e,error:t},category:`redactUrl`}),typeof e==`string`?e:e?.toString()}},redactPath=(e,t=g.FULL,n)=>{if(!d(e))try{let[n,r]=e.split(`?`);return r?`${n}?${[...new URLSearchParams(r)].reduce((e,[n,r])=>(e.set(n,v.has(n)?_[t](r):r),e),new URLSearchParams)}`:e}catch(t){return n?.warning({message:`Invalid path provided, unable to redact`,context:{value:e,error:t},category:`redactPath`}),e}},redactFields=(e,t=g.FULL,n=!1)=>typeof e!=`object`||!e?e:Array.isArray(e)?n?e.map(e=>typeof e==`object`&&e?redactFields(e,t,n):e):[...e]:e.constructor!==Object&&e.constructor!==void 0?e:Object.entries(e).reduce((e,[r,i])=>(v.has(r)?e[r]=_[t](i):n&&typeof i==`object`&&i?e[r]=redactFields(i,t,n):e[r]=i,e),{});function shannonEntropy(e){if(!e)return 0;let t=new Map;for(let n of e)t.set(n,(t.get(n)??0)+1);let n=e.length,r=0;for(let e of t.values()){let t=e/n;r-=t*Math.log2(t)}return r}function entropyScrub(e,n=`[REDACTED]`,r=c){let o=t(e,r.skipPatterns),l=[],u=r.candidatePattern.flags.includes(`g`)?r.candidatePattern.flags:`${r.candidatePattern.flags}g`,d=new RegExp(r.candidatePattern.source,u),f;for(;(f=d.exec(e))!==null;){let e=f[0];e.length<r.minLength||a(f.index,f.index+e.length,o)||shannonEntropy(e)>=r.threshold&&l.push({start:f.index,end:f.index+e.length,original:e,replacement:n,source:`entropy`})}for(let t of r.piiPatterns??[]){let r=new RegExp(t.source,t.flags.includes(`g`)?t.flags:`${t.flags}g`),i;for(;(i=r.exec(e))!==null;){let e=i.index,t=e+i[0].length;a(e,t,o)||l.push({start:e,end:t,original:i[0],replacement:n,source:`entropy`})}}let p=s(l);return{scrubbed:i(e,p),redactions:p}}var PiiRedaction=class{constructor(e){this.tier2=null,this.tier2LoadPromise=null,this.config=o(e),this.enableTier2=e?.enableTier2??!1,this.logger=e?.logger}async warmupTier2(){this.enableTier2&&(await this.loadTier2(),this.tier2&&await this.tier2.warmup())}isTier2Ready(){return this.tier2?.isReady()??!1}scrub(e){let t=performance.now(),{scrubbed:n,redactions:r}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return{scrubbed:n,redactions:r,tier:`entropy`,modelAvailable:!1,latencyMs:performance.now()-t}}async scrubAsync(e){let t=performance.now();if(!this.enableTier2)return this.scrub(e);let n=!1,r=[];try{if(await this.loadTier2(),!this.tier2)throw Error(`Tier 2 classifier not loaded`);let t=await this.tier2.detectSecrets(e);for(let e of t)r.push({start:e.start,end:e.end,original:e.text,replacement:this.config.redactionToken,source:`model`,label:e.label});n=!0}catch(e){this.logger?.warning({category:`pii-redaction`,message:`Tier 2 model detection failed, falling back to entropy only`,error:e instanceof Error?e:void 0,code:`TIER2_FALLBACK`})}let{redactions:a}=entropyScrub(e,this.config.redactionToken,this.config.entropy);r.push(...a);let o=s(r);return{scrubbed:i(e,o),redactions:o,tier:n?`model+entropy`:`entropy`,modelAvailable:n,latencyMs:performance.now()-t}}detect(e){let{redactions:t}=entropyScrub(e,this.config.redactionToken,this.config.entropy);return t}getConfig(){return structuredClone(this.config)}async loadTier2(){if(!this.tier2){if(this.tier2LoadPromise)return this.tier2LoadPromise;this.tier2LoadPromise=(async()=>{let{Tier2Classifier:e}=await import(`./tier2-classifier-CehRVVU3.mjs`);this.tier2=new e({onnxModelPath:this.config.model.modelPath})})();try{await this.tier2LoadPromise}catch(e){throw this.tier2LoadPromise=null,Error(`Failed to load Tier 2 classifier: ${e instanceof Error?e.message:String(e)}. Ensure onnxruntime-node and @huggingface/transformers are installed, and the model has been downloaded via \`npm run download-model\`.`)}}}};function createPiiRedaction(e){return new PiiRedaction(e)}let b;const getRedactors=()=>(d(b)&&(b={[g.FULL]:f({paths:y,serialize:!1,censor:_[g.FULL]}),[g.PARTIAL]:f({paths:y,serialize:!1,censor:_[g.PARTIAL]})}),b),redactObject=(e,t=g.FULL)=>d(e)||typeof e!=`object`||!e?e:getRedactors()[t](u(e)),x=!1;export{g as CensorType,x as I_AM_EDGE,n as MODEL_FILES,PiiRedaction,createPiiRedaction,entropyScrub,l as getDefaultModelCacheDir,r as isModelPresent,redactFields,redactObject,redactPath,redactUrl,shannonEntropy};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{i as t,l as n,n as r}from"./config-M7RWfWu6.mjs";var Tier2Classifier=class{constructor(e={}){this.onnxClassifier=new n(e.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(e){return(await this.detectAll(e)).filter(e=>r.has(e.label))}async scrub(e,n=`[REDACTED]`){return e.trim()?t(e,(await this.detectSecrets(e)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):e}};export{Tier2Classifier};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
const e=require(`./config-tfC_1JL3.cjs`);var Tier2Classifier=class{constructor(t={}){this.onnxClassifier=new e.l(t.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(t){return(await this.detectAll(t)).filter(t=>e.n.has(t.label))}async scrub(t,n=`[REDACTED]`){return t.trim()?e.i(t,(await this.detectSecrets(t)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):t}};exports.Tier2Classifier=Tier2Classifier;
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import{t as e}from"./chunk-Cfxk5zVN.mjs";const t=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],n=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const r={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:t,piiPatterns:n},i={};function createConfig(e){let t=e?.config;return{entropy:{...r,...t?.entropy},model:{...i,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const a=new Set([`secret`]);export{buildSkipRanges as a,applyRedactions as i,a as n,isInSkipRange as o,createConfig as r,mergeSpans as s,r as t};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
var e=Object.create,t=Object.defineProperty,__name=(e,n)=>t(e,`name`,{value:n,configurable:!0}),n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,__copyProps=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},__toESM=(n,r,a)=>(a=n==null?{}:e(i(n)),__copyProps(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n));const o=[/https?:\/\/\S+/,/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/i,/[a-f0-9]{40}/,/(?:^|\/)[\w._-]+\.[\w._-]+(?:\/|$)/],s=[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,/(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}/,/(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}/,/sk-[a-zA-Z0-9]{20,}/,/ghp_[a-zA-Z0-9]{36}/,/gho_[a-zA-Z0-9]{36}/,/github_pat_[a-zA-Z0-9_]{22,}/,/xox[bpras]-[a-zA-Z0-9-]+/,/sk_live_[a-zA-Z0-9]{24,}/,/pk_live_[a-zA-Z0-9]{24,}/];function buildSkipRanges(e,t){let n=[];for(let r of t){let t=new RegExp(r.source,r.flags.includes(`g`)?r.flags:`${r.flags}g`),i;for(;(i=t.exec(e))!==null;)n.push([i.index,i.index+i[0].length])}return n}function isInSkipRange(e,t,n){return n.some(([n,r])=>e<r&&t>n)}function mergeSpans(e){if(e.length<=1)return e;let t=[...e].sort((e,t)=>e.start-t.start),n=[t[0]];for(let e=1;e<t.length;e++){let r=t[e],i=n[n.length-1];r.start<=i.end?r.end>i.end&&(i.end=r.end):n.push(r)}return n}function applyRedactions(e,t){let n=e;for(let e of[...t].sort((e,t)=>t.start-e.start))n=n.slice(0,e.start)+e.replacement+n.slice(e.end);return n}const c={threshold:4,minLength:20,candidatePattern:/[A-Za-z0-9_\-./+=:~!]{20,}/g,skipPatterns:o,piiPatterns:s},l={},u=`[REDACTED]`;function createConfig(e){let t=e?.config;return{entropy:{...c,...t?.entropy},model:{...l,...t?.model,...e?.modelPath?{modelPath:e.modelPath}:{}},redactionToken:e?.redactionToken??t?.redactionToken??`[REDACTED]`}}const d=new Set([`secret`]);Object.defineProperty(exports,`a`,{enumerable:!0,get:function(){return buildSkipRanges}}),Object.defineProperty(exports,`c`,{enumerable:!0,get:function(){return __name}}),Object.defineProperty(exports,`i`,{enumerable:!0,get:function(){return applyRedactions}}),Object.defineProperty(exports,`l`,{enumerable:!0,get:function(){return __toESM}}),Object.defineProperty(exports,`n`,{enumerable:!0,get:function(){return d}}),Object.defineProperty(exports,`o`,{enumerable:!0,get:function(){return isInSkipRange}}),Object.defineProperty(exports,`r`,{enumerable:!0,get:function(){return createConfig}}),Object.defineProperty(exports,`s`,{enumerable:!0,get:function(){return mergeSpans}}),Object.defineProperty(exports,`t`,{enumerable:!0,get:function(){return c}});
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
const e=require(`./config-D8tiH_wn.cjs`);let t=require(`fs`),n=require(`os`),r=require(`path`);const i=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return(0,r.join)((0,n.homedir)(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return i.every(n=>(0,t.existsSync)((0,r.join)(e,n)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=i.filter(e=>!(0,t.existsSync)((0,r.join)(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let n=await import(`onnxruntime-node`);this.OrtTensor=n.Tensor;let a=(0,r.resolve)(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await n.InferenceSession.create(a);let o=(0,r.resolve)(this.modelPath,`config.json`),s=JSON.parse((0,t.readFileSync)(o,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(s.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}var Tier2Classifier=class{constructor(e={}){this.onnxClassifier=new OnnxClassifier(e.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(t){return(await this.detectAll(t)).filter(t=>e.n.has(t.label))}async scrub(t,n=`[REDACTED]`){return t.trim()?e.i(t,(await this.detectSecrets(t)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):t}};exports.Tier2Classifier=Tier2Classifier;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import{t as e}from"./chunk-Cfxk5zVN.mjs";import{i as t,n}from"./config-ConbNbEF.mjs";import{existsSync as r,readFileSync as i}from"fs";import{homedir as a}from"os";import{join as o,resolve as s}from"path";const c=[`onnx/model_quantized.onnx`,`onnx/model_quantized.onnx_data`,`tokenizer.json`,`config.json`];function getDefaultModelCacheDir(){return o(a(),`.cache`,`stackone`,`pii-redaction`,`privacy-filter`)}function isModelPresent(e){return c.every(t=>r(o(e,t)))}var OnnxClassifier=class{constructor(e){this.session=null,this.tokenizer=null,this.OrtTensor=null,this.id2label={},this.loadingPromise=null,this.modelPath=e??getDefaultModelCacheDir()}async loadModel(e){if(e&&(this.modelPath=e),!(this.session&&this.tokenizer)){if(this.loadingPromise)return this.loadingPromise;this.loadingPromise=this._loadModel();try{await this.loadingPromise}catch(e){throw this.loadingPromise=null,e}}}async _loadModel(){if(!isModelPresent(this.modelPath)){let e=c.filter(e=>!r(o(this.modelPath,e)));throw Error(`Tier 2 model not found at ${this.modelPath}. Missing: ${e.join(`, `)}. Run \`npm run download-model\` in the redaction package, or provide a modelPath.`)}let{Tokenizer:e}=await import(`@huggingface/transformers`);this.tokenizer=await e.from_pretrained(this.modelPath,{local_files_only:!0});let t=await import(`onnxruntime-node`);this.OrtTensor=t.Tensor;let n=s(this.modelPath,`onnx`,`model_quantized.onnx`);this.session=await t.InferenceSession.create(n);let a=s(this.modelPath,`config.json`),l=JSON.parse(i(a,`utf-8`));this.id2label={};for(let[e,t]of Object.entries(l.id2label))this.id2label[Number(e)]=t}async detect(e){if(await this.ensureLoaded(),!this.tokenizer||!this.OrtTensor||!this.session)throw Error(`Model not loaded. Call loadModel() first.`);let t=this.tokenizer.encode(e),n=t.ids,r=t.offsets;if(n.length===0)return[];let i=new BigInt64Array(n.map(e=>BigInt(e))),a=new BigInt64Array(n.length).fill(1n),o=new this.OrtTensor(`int64`,i,[1,n.length]),s=new this.OrtTensor(`int64`,a,[1,n.length]),c=(await this.session.run({input_ids:o,attention_mask:s})).logits;if(!c)throw Error(`ONNX model returned no logits`);let l=n.length,u=c.dims[2]??0,d=[];for(let e=0;e<l;e++){let t=-1/0,n=0;for(let r=0;r<u;r++){let i=Number(c.data[e*u+r]);i>t&&(t=i,n=r)}d.push(n)}return bioesToSpans(d,r,e,this.id2label)}async warmup(){await this.loadModel()}isLoaded(){return this.session!==null&&this.tokenizer!==null}async ensureLoaded(){(!this.session||!this.tokenizer)&&await this.loadModel()}};function bioesToSpans(e,t,n,r){let i=[],a=null,o=-1,s=-1;for(let c=0;c<e.length;c++){let l=e[c],u=l===void 0?`O`:r[l]??`O`;if(u===`O`){a!==null&&(i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null);continue}let d=u.indexOf(`-`);if(d===-1)continue;let f=u.slice(0,d),p=u.slice(d+1),m=t[c];if(!m)continue;let[h,g]=m;f===`S`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null):f===`B`?(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`I`?a===p?s=g:(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=p,o=h,s=g):f===`E`&&(a===p?(s=g,i.push({label:a,start:o,end:s,text:n.slice(o,s)}),a=null):(a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i.push({label:p,start:h,end:g,text:n.slice(h,g)}),a=null))}return a!==null&&i.push({label:a,start:o,end:s,text:n.slice(o,s)}),i}var Tier2Classifier=class{constructor(e={}){this.onnxClassifier=new OnnxClassifier(e.onnxModelPath)}isReady(){return this.onnxClassifier.isLoaded()}async warmup(){await this.onnxClassifier.warmup()}async detectAll(e){return e.trim()?this.onnxClassifier.detect(e):[]}async detectSecrets(e){return(await this.detectAll(e)).filter(e=>n.has(e.label))}async scrub(e,n=`[REDACTED]`){return e.trim()?t(e,(await this.detectSecrets(e)).map(e=>({start:e.start,end:e.end,original:e.text,replacement:n,source:`model`,label:e.label}))):e}};export{Tier2Classifier};
|