mellon 0.0.22 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -0
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +38 -33
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -20,6 +20,12 @@ export interface DetectorConfig {
|
|
|
20
20
|
audioProcessorPath?: string;
|
|
21
21
|
ortCdnUrl?: string;
|
|
22
22
|
audioUtils?: AudioUtils;
|
|
23
|
+
/** Enable console logging. Pass `true` for info+warn+error, or a custom logger. Defaults to silent. */
|
|
24
|
+
log?: boolean | {
|
|
25
|
+
info?: (...a: unknown[]) => void;
|
|
26
|
+
warn?: (...a: unknown[]) => void;
|
|
27
|
+
error?: (...a: unknown[]) => void;
|
|
28
|
+
};
|
|
23
29
|
}
|
|
24
30
|
export interface EnrollmentSessionConfig {
|
|
25
31
|
wasmPaths?: string;
|
package/dist/mellon.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Ut(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function Dt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const c=Math.PI*s/this.size;t[s]=Math.cos(c),t[s+1]=-Math.sin(c)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,c=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=c[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=c[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var l=i>>>2;for(r=0;r<s;r+=i)for(var _=r+l,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+l,w=p+l,g=w+l,T=t[u],b=t[u+1],A=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],R=t[g+1],P=T,E=b,M=a[f],U=d*a[f+1],D=A*M-y*U,z=A*U+y*M,k=a[2*f],N=d*a[2*f+1],B=F*k-S*N,j=F*N+S*k,K=a[3*f],O=d*a[3*f+1],$=C*K-R*O,G=C*O+R*K,J=P+B,x=E+j,I=P-B,Y=E-j,Q=D+$,L=z+G,W=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,ct=Y-W,lt=I-V,ht=Y+W;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=ct,t[w]=nt,t[w+1]=at,t[g]=lt,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],c=i[s+n],d=i[s+n+1],a=r+c,l=h+d,_=r-c,v=h-d;e[t]=a,e[t+1]=l,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,c=n*3,d=i[s],a=i[s+1],l=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+c],p=i[s+c+1],w=d+v,g=a+f,T=d-v,b=a-f,A=l+u,y=_+p,F=r*(l-u),S=r*(_-p),C=w+A,R=g+y,P=T+S,E=b-F,M=w-A,U=g-y,D=T-S,z=b+F;e[t]=C,e[t+1]=R,e[t+2]=P,e[t+3]=E,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,c=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=c[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=c[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var l=i>>>1,_=l>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,b=t[p],A=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],R=t[T],P=t[T+1],E=b,M=A,U=a[u],D=d*a[u+1],z=y*U-F*D,k=y*D+F*U,N=a[2*u],B=d*a[2*u+1],j=S*N-C*B,K=S*B+C*N,O=a[3*u],$=d*a[3*u+1],G=R*O-P*$,J=R*$+P*O,x=E+j,I=M+K,Y=E-j,Q=M-K,L=z+G,W=k+J,V=d*(z-G),tt=d*(k-J),ot=x+L,nt=I+W,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var ct=x-L,lt=I-W;t[g]=ct,t[g+1]=lt;continue}if(f!==v){var ht=Y,yt=-Q,Tt=x,At=-I,bt=-d*tt,Ft=-d*V,St=-d*W,Ct=-d*L,Rt=ht+bt,Pt=yt+Ft,Et=Tt+Ct,Mt=At-St,ut=r+_-f,vt=r+l-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Et,t[vt+1]=Mt}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],c=r+h,d=r-h;e[t]=c,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,c=n*3,d=i[s],a=i[s+n],l=i[s+h],_=i[s+c],v=d+l,f=d-l,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,b=v-u,A=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=b,e[t+5]=0,e[t+6]=A,e[t+7]=y},_t}var zt=Dt();const xt=Ut(zt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let c=0;c<this._nfilt+2;c++)e[c]=s+c*(n-s)/(this._nfilt+1);const r=e.map(c=>this._melToHz(c)).map(c=>Math.floor((this._nfft+1)*c/this._sampleRate)),h=[];for(let c=0;c<this._nfilt;c++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[c];a<r[c+1];a++)d[a]=(a-r[c])/(r[c+1]-r[c]);for(let a=r[c+1];a<r[c+2];a++)d[a]=(r[c+2]-a)/(r[c+2]-r[c+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const c=h*s;i.fill(0);for(let l=0;l<t&&c+l<o.length;l++)i[l]=o[c+l];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let l=0;l<a.length;l++){const _=r[2*l],v=r[2*l+1];a[l]=1/this._nfft*(_*_+v*v),a[l]===0&&(a[l]=1e-30)}for(let l=0;l<this._nfilt;l++){let _=0;const v=this._melFilters[l];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+l]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function It(){return st!==null}class H{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=H.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=H.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class Lt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=gt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=wt,ortCdnUrl:h=q,audioUtils:c=new ft}=t||{};this._audioUtils=c,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const d=localStorage.getItem(this._thresholdStorageKey);this._threshold=d!==null?Math.max(0,Math.min(1,Number(d))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:l}=await i.read();if(a)break;r.push(l),h+=l.length,s.downloaded+=l.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const c=new Uint8Array(h);let d=0;for(const a of r)c.set(a,d),d+=a.length;return c.buffer}async _init(o){const t={downloaded:0,total:0},s=H.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const l of a.triggers)!e.has(l.name)&&l.defaultRefPath&&!n.has(l.name)&&(e.add(l.name),i.push({name:l.name,path:l.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=It(),[c,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:c);for(let a=0;a<i.length;a++)try{const l=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(l),H.saveWord(l,this._refsStorageKey)}catch{console.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let c=!1;for(const d of this._commands){if(c)break;for(const a of d.triggers){const l=this._refEmbeddings.get(a.name);if(!l)continue;const _=this._audioUtils.maxCosineSim(h,l);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),c=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Wt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),c=[];h.ondataavailable=d=>{d.data.size>0&&c.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const l=await new Blob(c,{type:((d=c[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(l);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",gt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Lt;exports.EnrollmentSession=Wt;exports.Storage=H;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function zt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>2;for(r=0;r<s;r+=i)for(var _=r+c,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+c,w=p+c,g=w+c,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],R=T,P=A,M=a[f],U=d*a[f+1],D=b*M-y*U,z=b*U+y*M,H=a[2*f],k=d*a[2*f+1],O=F*H-S*k,B=F*k+S*H,j=a[3*f],K=d*a[3*f+1],$=C*j-E*K,G=C*K+E*j,J=R+O,x=P+B,I=R-O,Y=P-B,Q=D+$,L=z+G,N=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,lt=Y-N,ct=I-V,ht=Y+N;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=lt,t[w]=nt,t[w+1]=at,t[g]=ct,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],l=i[s+n],d=i[s+n+1],a=r+l,c=h+d,_=r-l,v=h-d;e[t]=a,e[t+1]=c,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+1],c=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+l],p=i[s+l+1],w=d+v,g=a+f,T=d-v,A=a-f,b=c+u,y=_+p,F=r*(c-u),S=r*(_-p),C=w+b,E=g+y,R=T+S,P=A-F,M=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=R,e[t+3]=P,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>1,_=c>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],R=t[T+1],P=A,M=b,U=a[u],D=d*a[u+1],z=y*U-F*D,H=y*D+F*U,k=a[2*u],O=d*a[2*u+1],B=S*k-C*O,j=S*O+C*k,K=a[3*u],$=d*a[3*u+1],G=E*K-R*$,J=E*$+R*K,x=P+B,I=M+j,Y=P-B,Q=M-j,L=z+G,N=H+J,V=d*(z-G),tt=d*(H-J),ot=x+L,nt=I+N,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var lt=x-L,ct=I-N;t[g]=lt,t[g+1]=ct;continue}if(f!==v){var ht=Y,Tt=-Q,bt=x,At=-I,Ft=-d*tt,St=-d*V,Ct=-d*N,Et=-d*L,Rt=ht+Ft,Pt=Tt+St,Mt=bt+Et,Ut=At-Ct,ut=r+_-f,vt=r+c-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Mt,t[vt+1]=Ut}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],l=r+h,d=r-h;e[t]=l,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+n],c=i[s+h],_=i[s+l],v=d+c,f=d-c,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},_t}var xt=zt();const It=Dt(xt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new It(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)d[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)d[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;i.fill(0);for(let c=0;c<t&&l+c<o.length;c++)i[c]=o[l+c];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=0;c<a.length;c++){const _=r[2*c],v=r[2*c+1];a[c]=1/this._nfft*(_*_+v*v),a[c]===0&&(a[c]=1e-30)}for(let c=0;c<this._nfilt;c++){let _=0;const v=this._melFilters[c];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+c]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function Lt(){return st!==null}class W{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=W.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=W.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const wt={info:()=>{},warn:()=>{},error:()=>{}},Nt={info:console.info.bind(console),warn:console.warn.bind(console),error:console.error.bind(console)};class Wt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=yt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=gt,ortCdnUrl:h=q,audioUtils:l=new ft,log:d=!1}=t||{};this._log=d===!1?wt:d===!0?Nt:{...wt,...d},this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const a=localStorage.getItem(this._thresholdStorageKey);this._threshold=a!==null?Math.max(0,Math.min(1,Number(a))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:c}=await i.read();if(a)break;r.push(c),h+=c.length,s.downloaded+=c.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let d=0;for(const a of r)l.set(a,d),d+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=W.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const c of a.triggers)!e.has(c.name)&&c.defaultRefPath&&!n.has(c.name)&&(e.add(c.name),i.push({name:c.name,path:c.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=Lt(),[l,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:l);for(let a=0;a<i.length;a++)try{const c=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(c),W.saveWord(c,this._refsStorageKey)}catch{this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);this._log.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let l=!1;for(const d of this._commands){if(l)break;for(const a of d.triggers){const c=this._refEmbeddings.get(a.name);if(!c)continue;const _=this._audioUtils.maxCosineSim(h,c);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),l=!0;break}}}}catch(s){this._log.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=d=>{d.data.size>0&&l.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const c=await new Blob(l,{type:((d=l[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(c);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",yt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=yt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Wt;exports.EnrollmentSession=Ht;exports.Storage=W;
|
package/dist/mellon.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
function
|
|
1
|
+
function Et(m) {
|
|
2
2
|
return m && m.__esModule && Object.prototype.hasOwnProperty.call(m, "default") ? m.default : m;
|
|
3
3
|
}
|
|
4
4
|
var dt, vt;
|
|
@@ -75,7 +75,7 @@ function Ut() {
|
|
|
75
75
|
var l = i >>> 2;
|
|
76
76
|
for (r = 0; r < s; r += i)
|
|
77
77
|
for (var _ = r + l, v = r, f = 0; v < _; v += 2, f += e) {
|
|
78
|
-
const u = v, p = u + l, w = p + l, g = w + l, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1],
|
|
78
|
+
const u = v, p = u + l, w = p + l, g = w + l, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], S = t[g], M = t[g + 1], P = b, R = T, E = a[f], U = d * a[f + 1], D = F * E - y * U, x = F * U + y * E, k = a[2 * f], B = d * a[2 * f + 1], j = A * k - C * B, H = A * B + C * k, K = a[3 * f], L = d * a[3 * f + 1], $ = S * K - M * L, J = S * L + M * K, G = P + j, z = R + H, I = P - j, O = R - H, Y = D + $, N = x + J, W = d * (D - $), Q = d * (x - J), X = G + Y, rt = z + N, ot = G - Y, nt = z - N, at = I + Q, it = O - W, ct = I - Q, lt = O + W;
|
|
79
79
|
t[u] = X, t[u + 1] = rt, t[p] = at, t[p + 1] = it, t[w] = ot, t[w + 1] = nt, t[g] = ct, t[g + 1] = lt;
|
|
80
80
|
}
|
|
81
81
|
}
|
|
@@ -83,8 +83,8 @@ function Ut() {
|
|
|
83
83
|
const e = this._out, i = this._data, r = i[s], h = i[s + 1], c = i[s + n], d = i[s + n + 1], a = r + c, l = h + d, _ = r - c, v = h - d;
|
|
84
84
|
e[t] = a, e[t + 1] = l, e[t + 2] = _, e[t + 3] = v;
|
|
85
85
|
}, m.prototype._singleTransform4 = function(t, s, n) {
|
|
86
|
-
const e = this._out, i = this._data, r = this._inv ? -1 : 1, h = n * 2, c = n * 3, d = i[s], a = i[s + 1], l = i[s + n], _ = i[s + n + 1], v = i[s + h], f = i[s + h + 1], u = i[s + c], p = i[s + c + 1], w = d + v, g = a + f, b = d - v, T = a - f, F = l + u, y = _ + p, A = r * (l - u), C = r * (_ - p),
|
|
87
|
-
e[t] =
|
|
86
|
+
const e = this._out, i = this._data, r = this._inv ? -1 : 1, h = n * 2, c = n * 3, d = i[s], a = i[s + 1], l = i[s + n], _ = i[s + n + 1], v = i[s + h], f = i[s + h + 1], u = i[s + c], p = i[s + c + 1], w = d + v, g = a + f, b = d - v, T = a - f, F = l + u, y = _ + p, A = r * (l - u), C = r * (_ - p), S = w + F, M = g + y, P = b + C, R = T - A, E = w - F, U = g - y, D = b - C, x = T + A;
|
|
87
|
+
e[t] = S, e[t + 1] = M, e[t + 2] = P, e[t + 3] = R, e[t + 4] = E, e[t + 5] = U, e[t + 6] = D, e[t + 7] = x;
|
|
88
88
|
}, m.prototype._realTransform4 = function() {
|
|
89
89
|
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1, r, h, c = this._bitrev;
|
|
90
90
|
if (i === 4)
|
|
@@ -103,15 +103,15 @@ function Ut() {
|
|
|
103
103
|
var l = i >>> 1, _ = l >>> 1, v = _ >>> 1;
|
|
104
104
|
for (r = 0; r < s; r += i)
|
|
105
105
|
for (var f = 0, u = 0; f <= v; f += 2, u += e) {
|
|
106
|
-
var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g],
|
|
106
|
+
var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], S = t[g + 1], M = t[b], P = t[b + 1], R = T, E = F, U = a[u], D = d * a[u + 1], x = y * U - A * D, k = y * D + A * U, B = a[2 * u], j = d * a[2 * u + 1], H = C * B - S * j, K = C * j + S * B, L = a[3 * u], $ = d * a[3 * u + 1], J = M * L - P * $, G = M * $ + P * L, z = R + H, I = E + K, O = R - H, Y = E - K, N = x + J, W = k + G, Q = d * (x - J), X = d * (k - G), rt = z + N, ot = I + W, nt = O + X, at = Y - Q;
|
|
107
107
|
if (t[p] = rt, t[p + 1] = ot, t[w] = nt, t[w + 1] = at, f === 0) {
|
|
108
|
-
var it = z -
|
|
108
|
+
var it = z - N, ct = I - W;
|
|
109
109
|
t[g] = it, t[g + 1] = ct;
|
|
110
110
|
continue;
|
|
111
111
|
}
|
|
112
112
|
if (f !== v) {
|
|
113
|
-
var lt =
|
|
114
|
-
t[ft] =
|
|
113
|
+
var lt = O, gt = -Y, yt = z, bt = -I, Ft = -d * X, Tt = -d * Q, At = -d * W, Ct = -d * N, St = lt + Ft, Mt = gt + Tt, Pt = yt + Ct, Rt = bt - At, ft = r + _ - f, ut = r + l - f;
|
|
114
|
+
t[ft] = St, t[ft + 1] = Mt, t[ut] = Pt, t[ut + 1] = Rt;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
}
|
|
@@ -123,11 +123,11 @@ function Ut() {
|
|
|
123
123
|
e[t] = w, e[t + 1] = 0, e[t + 2] = g, e[t + 3] = b, e[t + 4] = T, e[t + 5] = 0, e[t + 6] = F, e[t + 7] = y;
|
|
124
124
|
}, dt;
|
|
125
125
|
}
|
|
126
|
-
var
|
|
127
|
-
const
|
|
128
|
-
class
|
|
126
|
+
var Dt = Ut();
|
|
127
|
+
const xt = /* @__PURE__ */ Et(Dt);
|
|
128
|
+
class wt {
|
|
129
129
|
constructor(o = 16e3, t = 512, s = 64) {
|
|
130
|
-
this._sampleRate = o, this._nfft = t, this._nfilt = s, this._fft = new
|
|
130
|
+
this._sampleRate = o, this._nfft = t, this._nfilt = s, this._fft = new xt(t), this._melFilters = this._createMelFilterbank();
|
|
131
131
|
}
|
|
132
132
|
_hzToMel(o) {
|
|
133
133
|
return 2595 * Math.log10(1 + o / 700);
|
|
@@ -205,7 +205,7 @@ async function _t(m = mt, o = st, t = et, s) {
|
|
|
205
205
|
})
|
|
206
206
|
)), Z;
|
|
207
207
|
}
|
|
208
|
-
function
|
|
208
|
+
function zt() {
|
|
209
209
|
return Z !== null;
|
|
210
210
|
}
|
|
211
211
|
class V {
|
|
@@ -229,22 +229,27 @@ class V {
|
|
|
229
229
|
}
|
|
230
230
|
}
|
|
231
231
|
}
|
|
232
|
-
|
|
232
|
+
const pt = { info: () => {
|
|
233
|
+
}, warn: () => {
|
|
234
|
+
}, error: () => {
|
|
235
|
+
} }, It = { info: console.info.bind(console), warn: console.warn.bind(console), error: console.error.bind(console) };
|
|
236
|
+
class kt {
|
|
233
237
|
constructor(o, t) {
|
|
234
238
|
this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0, this._initPromise = null;
|
|
235
239
|
const {
|
|
236
240
|
refsStorageKey: s = q,
|
|
237
|
-
thresholdStorageKey: n =
|
|
241
|
+
thresholdStorageKey: n = Wt,
|
|
238
242
|
wasmPaths: e = st,
|
|
239
243
|
modelPath: i = mt,
|
|
240
|
-
audioProcessorPath: r =
|
|
244
|
+
audioProcessorPath: r = Nt,
|
|
241
245
|
ortCdnUrl: h = et,
|
|
242
|
-
audioUtils: c = new
|
|
246
|
+
audioUtils: c = new wt(),
|
|
247
|
+
log: d = !1
|
|
243
248
|
} = t || {};
|
|
244
|
-
this._audioUtils = c, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = r, this._wasmPaths = e, this._modelPath = i, this._ortCdnUrl = h;
|
|
249
|
+
this._log = d === !1 ? pt : d === !0 ? It : { ...pt, ...d }, this._audioUtils = c, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = r, this._wasmPaths = e, this._modelPath = i, this._ortCdnUrl = h;
|
|
245
250
|
try {
|
|
246
|
-
const
|
|
247
|
-
this._threshold =
|
|
251
|
+
const a = localStorage.getItem(this._thresholdStorageKey);
|
|
252
|
+
this._threshold = a !== null ? Math.max(0, Math.min(1, Number(a))) : 0.65;
|
|
248
253
|
} catch {
|
|
249
254
|
this._threshold = 0.65;
|
|
250
255
|
}
|
|
@@ -293,7 +298,7 @@ class Wt {
|
|
|
293
298
|
for (const a of this._commands)
|
|
294
299
|
for (const l of a.triggers)
|
|
295
300
|
!e.has(l.name) && l.defaultRefPath && !n.has(l.name) && (e.add(l.name), i.push({ name: l.name, path: l.defaultRefPath }));
|
|
296
|
-
const r = tt(this._wasmPaths, this._ortCdnUrl), h =
|
|
301
|
+
const r = tt(this._wasmPaths, this._ortCdnUrl), h = zt(), [c, ...d] = await Promise.all([
|
|
297
302
|
h ? Promise.resolve(null) : this._trackFetch(this._modelPath, o, t),
|
|
298
303
|
...i.map(({ path: a }) => this._trackFetch(a, o, t))
|
|
299
304
|
]);
|
|
@@ -308,11 +313,11 @@ class Wt {
|
|
|
308
313
|
const l = JSON.parse(new TextDecoder().decode(d[a]));
|
|
309
314
|
this.addCustomWord(l), V.saveWord(l, this._refsStorageKey);
|
|
310
315
|
} catch {
|
|
311
|
-
|
|
316
|
+
this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`);
|
|
312
317
|
}
|
|
313
318
|
for (const a of s)
|
|
314
319
|
this._refEmbeddings.set(a.word_name, a.embeddings);
|
|
315
|
-
|
|
320
|
+
this._log.info("[Mellon] init complete, loaded refs:", [...this._refEmbeddings.keys()]);
|
|
316
321
|
}
|
|
317
322
|
/**
|
|
318
323
|
* Loads the ONNX model and all reference embeddings.
|
|
@@ -375,22 +380,22 @@ class Wt {
|
|
|
375
380
|
if (!l) continue;
|
|
376
381
|
const _ = this._audioUtils.maxCosineSim(h, l);
|
|
377
382
|
if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
|
|
378
|
-
this._lastMatchAt = t,
|
|
383
|
+
this._lastMatchAt = t, this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _), c = !0;
|
|
379
384
|
break;
|
|
380
385
|
}
|
|
381
386
|
}
|
|
382
387
|
}
|
|
383
388
|
} catch (s) {
|
|
384
|
-
|
|
389
|
+
this._log.error("[Mellon] inference error:", s);
|
|
385
390
|
} finally {
|
|
386
391
|
this._inferring = !1;
|
|
387
392
|
}
|
|
388
393
|
}
|
|
389
394
|
}
|
|
390
395
|
}
|
|
391
|
-
class
|
|
396
|
+
class Bt {
|
|
392
397
|
constructor(o, t) {
|
|
393
|
-
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || st, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || et, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new
|
|
398
|
+
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || st, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || et, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new wt();
|
|
394
399
|
}
|
|
395
400
|
/** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
|
|
396
401
|
async recordSample() {
|
|
@@ -432,16 +437,16 @@ class kt {
|
|
|
432
437
|
return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
|
|
433
438
|
}
|
|
434
439
|
}
|
|
435
|
-
const st = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", et = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",
|
|
440
|
+
const st = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", et = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", Nt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", q = "mellon-refs", Wt = "mellon-threshold";
|
|
436
441
|
export {
|
|
437
|
-
|
|
438
|
-
|
|
442
|
+
wt as AudioUtils,
|
|
443
|
+
Nt as DEFAULT_AUDIO_PROCESSOR_PATH,
|
|
439
444
|
mt as DEFAULT_MODEL_PATH,
|
|
440
445
|
et as DEFAULT_ORT_CDN_URL,
|
|
441
446
|
q as DEFAULT_REFS_STORAGE_KEY,
|
|
442
|
-
|
|
447
|
+
Wt as DEFAULT_THRESHOLD_STORAGE_KEY,
|
|
443
448
|
st as DEFAULT_WASM_PATHS,
|
|
444
|
-
|
|
445
|
-
|
|
449
|
+
kt as Detector,
|
|
450
|
+
Bt as EnrollmentSession,
|
|
446
451
|
V as Storage
|
|
447
452
|
};
|