mellon 0.0.20 → 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +153 -146
- package/package.json +1 -1
package/dist/mellon.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Ut(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var dt,pt;function Dt(){if(pt)return dt;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var c=0;c<this._bitrev.length;c++){this._bitrev[c]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[c]|=(c>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return dt=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform4(r,u,e)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>2;for(r=0;r<s;r+=c)for(var _=r+d,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+d,w=p+d,g=w+d,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],M=T,P=A,R=a[f],U=i*a[f+1],D=b*R-y*U,z=b*U+y*R,k=a[2*f],N=i*a[2*f+1],B=F*k-S*N,j=F*N+S*k,K=a[3*f],O=i*a[3*f+1],$=C*K-E*O,G=C*O+E*K,J=M+B,x=P+j,I=M-B,Y=P-j,Q=D+$,L=z+G,W=i*(D-$),V=i*(z-G),tt=J+Q,rt=x+L,ot=J-Q,nt=x-L,at=I+V,it=Y-W,ct=I-V,lt=Y+W;t[u]=tt,t[u+1]=rt,t[p]=at,t[p+1]=it,t[w]=ot,t[w+1]=nt,t[g]=ct,t[g+1]=lt}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+1],l=c[s+n],i=c[s+n+1],a=r+l,d=h+i,_=r-l,v=h-i;e[t]=a,e[t+1]=d,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+1],d=c[s+n],_=c[s+n+1],v=c[s+h],f=c[s+h+1],u=c[s+l],p=c[s+l+1],w=i+v,g=a+f,T=i-v,A=a-f,b=d+u,y=_+p,F=r*(d-u),S=r*(_-p),C=w+b,E=g+y,M=T+S,P=A-F,R=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=M,e[t+3]=P,e[t+4]=R,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform2(r,ht>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform4(r,ht>>>1,e>>>1)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>1,_=d>>>1,v=_>>>1;for(r=0;r<s;r+=c)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],M=t[T+1],P=A,R=b,U=a[u],D=i*a[u+1],z=y*U-F*D,k=y*D+F*U,N=a[2*u],B=i*a[2*u+1],j=S*N-C*B,K=S*B+C*N,O=a[3*u],$=i*a[3*u+1],G=E*O-M*$,J=E*$+M*O,x=P+j,I=R+K,Y=P-j,Q=R-K,L=z+G,W=k+J,V=i*(z-G),tt=i*(k-J),rt=x+L,ot=I+W,nt=Y+tt,at=Q-V;if(t[p]=rt,t[p+1]=ot,t[w]=nt,t[w+1]=at,f===0){var it=x-L,ct=I-W;t[g]=it,t[g+1]=ct;continue}if(f!==v){var lt=Y,yt=-Q,Tt=x,bt=-I,At=-i*tt,Ft=-i*V,St=-i*W,Ct=-i*L,Et=lt+At,Mt=yt+Ft,Pt=Tt+Ct,Rt=bt-St,ut=r+_-f,vt=r+d-f;t[ut]=Et,t[ut+1]=Mt,t[vt]=Pt,t[vt+1]=Rt}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+n],l=r+h,i=r-h;e[t]=l,e[t+1]=0,e[t+2]=i,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+n],d=c[s+h],_=c[s+l],v=i+d,f=i-d,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},dt}var zt=Dt();const xt=Ut(zt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const i=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)i[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)i[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(i)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),c=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;c.fill(0);for(let d=0;d<t&&l+d<o.length;d++)c[d]=o[l+d];const i=this._fft.toComplexArray(c,null);this._fft.transform(r,i);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let d=0;d<a.length;d++){const _=r[2*d],v=r[2*d+1];a[d]=1/this._nfft*(_*_+v*v),a[d]===0&&(a[d]=1e-30)}for(let d=0;d<this._nfilt;d++){let _=0;const v=this._melFilters[d];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+d]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const c=(e+1)/2;c>s&&(s=c)}return s}}async function st(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let _t=null;async function mt(m=et,o=Z,t=q,s){return _t||(_t=st(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),_t}class H{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=H.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=H.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class It{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=gt,wasmPaths:e=Z,modelPath:c=et,audioProcessorPath:r=wt,ortCdnUrl:h=q,audioUtils:l=new ft}=t||{};this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=c,this._ortCdnUrl=h;try{const i=localStorage.getItem(this._thresholdStorageKey);this._threshold=i!==null?Math.max(0,Math.min(1,Number(i))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const c=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:d}=await c.read();if(a)break;r.push(d),h+=d.length,s.downloaded+=d.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let i=0;for(const a of r)l.set(a,i),i+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=new Set,n=[];for(const i of this._commands)for(const a of i.triggers)!s.has(a.name)&&a.defaultRefPath&&(s.add(a.name),n.push({name:a.name,path:a.defaultRefPath}));const e=st(this._wasmPaths,this._ortCdnUrl),[c,...r]=await Promise.all([this._trackFetch(this._modelPath,o,t),...n.map(({path:i})=>this._trackFetch(i,o,t))]);await e,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,c);const h=H.loadWords(this._refsStorageKey),l=new Set(h.map(i=>i.word_name));for(let i=0;i<n.length;i++)try{const a=JSON.parse(new TextDecoder().decode(r[i]));this.addCustomWord(a),l.has(a.word_name)||H.saveWord(a,this._refsStorageKey)}catch{console.warn(`[Mellon] failed to parse ref file: ${n[i].path}`)}for(const i of h)this._refEmbeddings.set(i.word_name,i.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([st(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),c=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:c}),h=r[Object.keys(r)[0]].data;let l=!1;for(const i of this._commands){if(l)break;for(const a of i.triggers){const d=this._refEmbeddings.get(a.name);if(!d)continue;const _=this._audioUtils.maxCosineSim(h,d);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof i.onMatch=="function"&&i.onMatch(a.name,_),l=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Lt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||et,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((c,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=i=>{i.data.size>0&&l.push(i.data)},h.onstop=async()=>{var i;for(const a of o.getTracks())a.stop();try{const d=await new Blob(l,{type:((i=l[0])==null?void 0:i.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(d);await t.close(),c(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([st(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),c=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:c}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",et="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",gt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=et;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=It;exports.EnrollmentSession=Lt;exports.Storage=H;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Ut(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function Dt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const c=Math.PI*s/this.size;t[s]=Math.cos(c),t[s+1]=-Math.sin(c)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,c=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=c[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=c[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var l=i>>>2;for(r=0;r<s;r+=i)for(var _=r+l,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+l,w=p+l,g=w+l,T=t[u],b=t[u+1],A=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],R=t[g+1],P=T,E=b,M=a[f],U=d*a[f+1],D=A*M-y*U,z=A*U+y*M,k=a[2*f],N=d*a[2*f+1],B=F*k-S*N,j=F*N+S*k,K=a[3*f],O=d*a[3*f+1],$=C*K-R*O,G=C*O+R*K,J=P+B,x=E+j,I=P-B,Y=E-j,Q=D+$,L=z+G,W=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,ct=Y-W,lt=I-V,ht=Y+W;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=ct,t[w]=nt,t[w+1]=at,t[g]=lt,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],c=i[s+n],d=i[s+n+1],a=r+c,l=h+d,_=r-c,v=h-d;e[t]=a,e[t+1]=l,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,c=n*3,d=i[s],a=i[s+1],l=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+c],p=i[s+c+1],w=d+v,g=a+f,T=d-v,b=a-f,A=l+u,y=_+p,F=r*(l-u),S=r*(_-p),C=w+A,R=g+y,P=T+S,E=b-F,M=w-A,U=g-y,D=T-S,z=b+F;e[t]=C,e[t+1]=R,e[t+2]=P,e[t+3]=E,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,c=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=c[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=c[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var l=i>>>1,_=l>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,b=t[p],A=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],R=t[T],P=t[T+1],E=b,M=A,U=a[u],D=d*a[u+1],z=y*U-F*D,k=y*D+F*U,N=a[2*u],B=d*a[2*u+1],j=S*N-C*B,K=S*B+C*N,O=a[3*u],$=d*a[3*u+1],G=R*O-P*$,J=R*$+P*O,x=E+j,I=M+K,Y=E-j,Q=M-K,L=z+G,W=k+J,V=d*(z-G),tt=d*(k-J),ot=x+L,nt=I+W,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var ct=x-L,lt=I-W;t[g]=ct,t[g+1]=lt;continue}if(f!==v){var ht=Y,yt=-Q,Tt=x,At=-I,bt=-d*tt,Ft=-d*V,St=-d*W,Ct=-d*L,Rt=ht+bt,Pt=yt+Ft,Et=Tt+Ct,Mt=At-St,ut=r+_-f,vt=r+l-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Et,t[vt+1]=Mt}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],c=r+h,d=r-h;e[t]=c,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,c=n*3,d=i[s],a=i[s+n],l=i[s+h],_=i[s+c],v=d+l,f=d-l,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,b=v-u,A=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=b,e[t+5]=0,e[t+6]=A,e[t+7]=y},_t}var zt=Dt();const xt=Ut(zt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let c=0;c<this._nfilt+2;c++)e[c]=s+c*(n-s)/(this._nfilt+1);const r=e.map(c=>this._melToHz(c)).map(c=>Math.floor((this._nfft+1)*c/this._sampleRate)),h=[];for(let c=0;c<this._nfilt;c++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[c];a<r[c+1];a++)d[a]=(a-r[c])/(r[c+1]-r[c]);for(let a=r[c+1];a<r[c+2];a++)d[a]=(r[c+2]-a)/(r[c+2]-r[c+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const c=h*s;i.fill(0);for(let l=0;l<t&&c+l<o.length;l++)i[l]=o[c+l];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let l=0;l<a.length;l++){const _=r[2*l],v=r[2*l+1];a[l]=1/this._nfft*(_*_+v*v),a[l]===0&&(a[l]=1e-30)}for(let l=0;l<this._nfilt;l++){let _=0;const v=this._melFilters[l];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+l]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function It(){return st!==null}class H{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=H.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=H.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class Lt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=gt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=wt,ortCdnUrl:h=q,audioUtils:c=new ft}=t||{};this._audioUtils=c,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const d=localStorage.getItem(this._thresholdStorageKey);this._threshold=d!==null?Math.max(0,Math.min(1,Number(d))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:l}=await i.read();if(a)break;r.push(l),h+=l.length,s.downloaded+=l.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const c=new Uint8Array(h);let d=0;for(const a of r)c.set(a,d),d+=a.length;return c.buffer}async _init(o){const t={downloaded:0,total:0},s=H.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const l of a.triggers)!e.has(l.name)&&l.defaultRefPath&&!n.has(l.name)&&(e.add(l.name),i.push({name:l.name,path:l.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=It(),[c,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:c);for(let a=0;a<i.length;a++)try{const l=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(l),H.saveWord(l,this._refsStorageKey)}catch{console.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let c=!1;for(const d of this._commands){if(c)break;for(const a of d.triggers){const l=this._refEmbeddings.get(a.name);if(!l)continue;const _=this._audioUtils.maxCosineSim(h,l);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),c=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Wt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),c=[];h.ondataavailable=d=>{d.data.size>0&&c.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const l=await new Blob(c,{type:((d=c[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(l);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",gt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Lt;exports.EnrollmentSession=Wt;exports.Storage=H;
|
package/dist/mellon.mjs
CHANGED
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
function Rt(m) {
|
|
2
2
|
return m && m.__esModule && Object.prototype.hasOwnProperty.call(m, "default") ? m.default : m;
|
|
3
3
|
}
|
|
4
|
-
var
|
|
4
|
+
var dt, vt;
|
|
5
5
|
function Ut() {
|
|
6
|
-
if (vt) return
|
|
6
|
+
if (vt) return dt;
|
|
7
7
|
vt = 1;
|
|
8
8
|
function m(o) {
|
|
9
9
|
if (this.size = o | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
|
|
10
10
|
throw new Error("FFT size must be a power of two and bigger than 1");
|
|
11
11
|
this._csize = o << 1;
|
|
12
12
|
for (var t = new Array(this.size * 2), s = 0; s < t.length; s += 2) {
|
|
13
|
-
const
|
|
14
|
-
t[s] = Math.cos(
|
|
13
|
+
const c = Math.PI * s / this.size;
|
|
14
|
+
t[s] = Math.cos(c), t[s + 1] = -Math.sin(c);
|
|
15
15
|
}
|
|
16
16
|
this.table = t;
|
|
17
17
|
for (var n = 0, e = 1; this.size > e; e <<= 1)
|
|
18
18
|
n++;
|
|
19
19
|
this._width = n % 2 === 0 ? n - 1 : n, this._bitrev = new Array(1 << this._width);
|
|
20
|
-
for (var
|
|
21
|
-
this._bitrev[
|
|
20
|
+
for (var i = 0; i < this._bitrev.length; i++) {
|
|
21
|
+
this._bitrev[i] = 0;
|
|
22
22
|
for (var r = 0; r < this._width; r += 2) {
|
|
23
|
-
var
|
|
24
|
-
this._bitrev[
|
|
23
|
+
var h = this._width - r - 2;
|
|
24
|
+
this._bitrev[i] |= (i >>> r & 3) << h;
|
|
25
25
|
}
|
|
26
26
|
}
|
|
27
27
|
this._out = null, this._data = null, this._inv = 0;
|
|
28
28
|
}
|
|
29
|
-
return
|
|
29
|
+
return dt = m, m.prototype.fromComplexArray = function(t, s) {
|
|
30
30
|
for (var n = s || new Array(t.length >>> 1), e = 0; e < t.length; e += 2)
|
|
31
31
|
n[e >>> 1] = t[e];
|
|
32
32
|
return n;
|
|
@@ -58,70 +58,70 @@ function Ut() {
|
|
|
58
58
|
t[n] /= this.size;
|
|
59
59
|
this._out = null, this._data = null;
|
|
60
60
|
}, m.prototype._transform4 = function() {
|
|
61
|
-
var t = this._out, s = this._csize, n = this._width, e = 1 << n,
|
|
62
|
-
if (
|
|
63
|
-
for (r = 0,
|
|
64
|
-
const u = h
|
|
61
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1, r, h, c = this._bitrev;
|
|
62
|
+
if (i === 4)
|
|
63
|
+
for (r = 0, h = 0; r < s; r += i, h++) {
|
|
64
|
+
const u = c[h];
|
|
65
65
|
this._singleTransform2(r, u, e);
|
|
66
66
|
}
|
|
67
67
|
else
|
|
68
|
-
for (r = 0,
|
|
69
|
-
const u = h
|
|
68
|
+
for (r = 0, h = 0; r < s; r += i, h++) {
|
|
69
|
+
const u = c[h];
|
|
70
70
|
this._singleTransform4(r, u, e);
|
|
71
71
|
}
|
|
72
|
-
var
|
|
72
|
+
var d = this._inv ? -1 : 1, a = this.table;
|
|
73
73
|
for (e >>= 2; e >= 2; e >>= 2) {
|
|
74
|
-
|
|
75
|
-
var
|
|
76
|
-
for (r = 0; r < s; r +=
|
|
77
|
-
for (var _ = r +
|
|
78
|
-
const u = v, p = u +
|
|
79
|
-
t[u] = X, t[u + 1] =
|
|
74
|
+
i = s / e << 1;
|
|
75
|
+
var l = i >>> 2;
|
|
76
|
+
for (r = 0; r < s; r += i)
|
|
77
|
+
for (var _ = r + l, v = r, f = 0; v < _; v += 2, f += e) {
|
|
78
|
+
const u = v, p = u + l, w = p + l, g = w + l, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], M = t[g], S = t[g + 1], P = b, R = T, U = a[f], E = d * a[f + 1], D = F * U - y * E, x = F * E + y * U, B = a[2 * f], N = d * a[2 * f + 1], j = A * B - C * N, H = A * N + C * B, K = a[3 * f], $ = d * a[3 * f + 1], L = M * K - S * $, J = M * $ + S * K, G = P + j, z = R + H, I = P - j, Y = R - H, O = D + L, W = x + J, k = d * (D - L), Q = d * (x - J), X = G + O, rt = z + W, ot = G - O, nt = z - W, at = I + Q, it = Y - k, ct = I - Q, lt = Y + k;
|
|
79
|
+
t[u] = X, t[u + 1] = rt, t[p] = at, t[p + 1] = it, t[w] = ot, t[w + 1] = nt, t[g] = ct, t[g + 1] = lt;
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
82
|
}, m.prototype._singleTransform2 = function(t, s, n) {
|
|
83
|
-
const e = this._out,
|
|
84
|
-
e[t] = a, e[t + 1] =
|
|
83
|
+
const e = this._out, i = this._data, r = i[s], h = i[s + 1], c = i[s + n], d = i[s + n + 1], a = r + c, l = h + d, _ = r - c, v = h - d;
|
|
84
|
+
e[t] = a, e[t + 1] = l, e[t + 2] = _, e[t + 3] = v;
|
|
85
85
|
}, m.prototype._singleTransform4 = function(t, s, n) {
|
|
86
|
-
const e = this._out,
|
|
86
|
+
const e = this._out, i = this._data, r = this._inv ? -1 : 1, h = n * 2, c = n * 3, d = i[s], a = i[s + 1], l = i[s + n], _ = i[s + n + 1], v = i[s + h], f = i[s + h + 1], u = i[s + c], p = i[s + c + 1], w = d + v, g = a + f, b = d - v, T = a - f, F = l + u, y = _ + p, A = r * (l - u), C = r * (_ - p), M = w + F, S = g + y, P = b + C, R = T - A, U = w - F, E = g - y, D = b - C, x = T + A;
|
|
87
87
|
e[t] = M, e[t + 1] = S, e[t + 2] = P, e[t + 3] = R, e[t + 4] = U, e[t + 5] = E, e[t + 6] = D, e[t + 7] = x;
|
|
88
88
|
}, m.prototype._realTransform4 = function() {
|
|
89
|
-
var t = this._out, s = this._csize, n = this._width, e = 1 << n,
|
|
90
|
-
if (
|
|
91
|
-
for (r = 0,
|
|
92
|
-
const ht = h
|
|
89
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1, r, h, c = this._bitrev;
|
|
90
|
+
if (i === 4)
|
|
91
|
+
for (r = 0, h = 0; r < s; r += i, h++) {
|
|
92
|
+
const ht = c[h];
|
|
93
93
|
this._singleRealTransform2(r, ht >>> 1, e >>> 1);
|
|
94
94
|
}
|
|
95
95
|
else
|
|
96
|
-
for (r = 0,
|
|
97
|
-
const ht = h
|
|
96
|
+
for (r = 0, h = 0; r < s; r += i, h++) {
|
|
97
|
+
const ht = c[h];
|
|
98
98
|
this._singleRealTransform4(r, ht >>> 1, e >>> 1);
|
|
99
99
|
}
|
|
100
|
-
var
|
|
100
|
+
var d = this._inv ? -1 : 1, a = this.table;
|
|
101
101
|
for (e >>= 2; e >= 2; e >>= 2) {
|
|
102
|
-
|
|
103
|
-
var
|
|
104
|
-
for (r = 0; r < s; r +=
|
|
102
|
+
i = s / e << 1;
|
|
103
|
+
var l = i >>> 1, _ = l >>> 1, v = _ >>> 1;
|
|
104
|
+
for (r = 0; r < s; r += i)
|
|
105
105
|
for (var f = 0, u = 0; f <= v; f += 2, u += e) {
|
|
106
|
-
var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], M = t[g + 1], S = t[b], P = t[b + 1], R = T, U = F, E = a[u], D =
|
|
107
|
-
if (t[p] =
|
|
108
|
-
var
|
|
109
|
-
t[g] =
|
|
106
|
+
var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], M = t[g + 1], S = t[b], P = t[b + 1], R = T, U = F, E = a[u], D = d * a[u + 1], x = y * E - A * D, B = y * D + A * E, N = a[2 * u], j = d * a[2 * u + 1], H = C * N - M * j, K = C * j + M * N, $ = a[3 * u], L = d * a[3 * u + 1], J = S * $ - P * L, G = S * L + P * $, z = R + H, I = U + K, Y = R - H, O = U - K, W = x + J, k = B + G, Q = d * (x - J), X = d * (B - G), rt = z + W, ot = I + k, nt = Y + X, at = O - Q;
|
|
107
|
+
if (t[p] = rt, t[p + 1] = ot, t[w] = nt, t[w + 1] = at, f === 0) {
|
|
108
|
+
var it = z - W, ct = I - k;
|
|
109
|
+
t[g] = it, t[g + 1] = ct;
|
|
110
110
|
continue;
|
|
111
111
|
}
|
|
112
112
|
if (f !== v) {
|
|
113
|
-
var
|
|
113
|
+
var lt = Y, wt = -O, gt = z, yt = -I, bt = -d * X, Ft = -d * Q, Tt = -d * k, At = -d * W, Ct = lt + bt, Mt = wt + Ft, St = gt + At, Pt = yt - Tt, ft = r + _ - f, ut = r + l - f;
|
|
114
114
|
t[ft] = Ct, t[ft + 1] = Mt, t[ut] = St, t[ut + 1] = Pt;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
118
|
}, m.prototype._singleRealTransform2 = function(t, s, n) {
|
|
119
|
-
const e = this._out,
|
|
120
|
-
e[t] =
|
|
119
|
+
const e = this._out, i = this._data, r = i[s], h = i[s + n], c = r + h, d = r - h;
|
|
120
|
+
e[t] = c, e[t + 1] = 0, e[t + 2] = d, e[t + 3] = 0;
|
|
121
121
|
}, m.prototype._singleRealTransform4 = function(t, s, n) {
|
|
122
|
-
const e = this._out,
|
|
122
|
+
const e = this._out, i = this._data, r = this._inv ? -1 : 1, h = n * 2, c = n * 3, d = i[s], a = i[s + n], l = i[s + h], _ = i[s + c], v = d + l, f = d - l, u = a + _, p = r * (a - _), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
|
|
123
123
|
e[t] = w, e[t + 1] = 0, e[t + 2] = g, e[t + 3] = b, e[t + 4] = T, e[t + 5] = 0, e[t + 6] = F, e[t + 7] = y;
|
|
124
|
-
},
|
|
124
|
+
}, dt;
|
|
125
125
|
}
|
|
126
126
|
var Et = Ut();
|
|
127
127
|
const Dt = /* @__PURE__ */ Rt(Et);
|
|
@@ -137,40 +137,40 @@ class pt {
|
|
|
137
137
|
}
|
|
138
138
|
_createMelFilterbank() {
|
|
139
139
|
const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), e = new Float32Array(this._nfilt + 2);
|
|
140
|
-
for (let
|
|
141
|
-
e[
|
|
142
|
-
const r = e.map((
|
|
143
|
-
for (let
|
|
144
|
-
const
|
|
145
|
-
for (let a = r[
|
|
146
|
-
|
|
147
|
-
for (let a = r[
|
|
148
|
-
|
|
149
|
-
|
|
140
|
+
for (let c = 0; c < this._nfilt + 2; c++)
|
|
141
|
+
e[c] = s + c * (n - s) / (this._nfilt + 1);
|
|
142
|
+
const r = e.map((c) => this._melToHz(c)).map((c) => Math.floor((this._nfft + 1) * c / this._sampleRate)), h = [];
|
|
143
|
+
for (let c = 0; c < this._nfilt; c++) {
|
|
144
|
+
const d = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
145
|
+
for (let a = r[c]; a < r[c + 1]; a++)
|
|
146
|
+
d[a] = (a - r[c]) / (r[c + 1] - r[c]);
|
|
147
|
+
for (let a = r[c + 1]; a < r[c + 2]; a++)
|
|
148
|
+
d[a] = (r[c + 2] - a) / (r[c + 2] - r[c + 1]);
|
|
149
|
+
h.push(d);
|
|
150
150
|
}
|
|
151
|
-
return
|
|
151
|
+
return h;
|
|
152
152
|
}
|
|
153
153
|
/** Returns a flat Float32Array of shape [numFrames × nfilt]. */
|
|
154
154
|
logfbank(o) {
|
|
155
|
-
const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), e = new Float32Array(n * this._nfilt),
|
|
156
|
-
for (let
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
for (let
|
|
160
|
-
|
|
161
|
-
const
|
|
162
|
-
this._fft.transform(r,
|
|
155
|
+
const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), e = new Float32Array(n * this._nfilt), i = new Float32Array(this._nfft), r = this._fft.createComplexArray();
|
|
156
|
+
for (let h = 0; h < n; h++) {
|
|
157
|
+
const c = h * s;
|
|
158
|
+
i.fill(0);
|
|
159
|
+
for (let l = 0; l < t && c + l < o.length; l++)
|
|
160
|
+
i[l] = o[c + l];
|
|
161
|
+
const d = this._fft.toComplexArray(i, null);
|
|
162
|
+
this._fft.transform(r, d);
|
|
163
163
|
const a = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
164
|
-
for (let
|
|
165
|
-
const _ = r[2 *
|
|
166
|
-
a[
|
|
164
|
+
for (let l = 0; l < a.length; l++) {
|
|
165
|
+
const _ = r[2 * l], v = r[2 * l + 1];
|
|
166
|
+
a[l] = 1 / this._nfft * (_ * _ + v * v), a[l] === 0 && (a[l] = 1e-30);
|
|
167
167
|
}
|
|
168
|
-
for (let
|
|
168
|
+
for (let l = 0; l < this._nfilt; l++) {
|
|
169
169
|
let _ = 0;
|
|
170
|
-
const v = this._melFilters[
|
|
170
|
+
const v = this._melFilters[l];
|
|
171
171
|
for (let f = 0; f < a.length; f++)
|
|
172
172
|
_ += a[f] * v[f];
|
|
173
|
-
_ === 0 && (_ = 1e-30), e[
|
|
173
|
+
_ === 0 && (_ = 1e-30), e[h * this._nfilt + l] = Math.log(_);
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
176
|
return e;
|
|
@@ -180,22 +180,22 @@ class pt {
|
|
|
180
180
|
for (const n of t) {
|
|
181
181
|
let e = 0;
|
|
182
182
|
for (let r = 0; r < n.length; r++) e += o[r] * n[r];
|
|
183
|
-
const
|
|
184
|
-
|
|
183
|
+
const i = (e + 1) / 2;
|
|
184
|
+
i > s && (s = i);
|
|
185
185
|
}
|
|
186
186
|
return s;
|
|
187
187
|
}
|
|
188
188
|
}
|
|
189
|
-
async function
|
|
189
|
+
async function tt(m = st, o = et) {
|
|
190
190
|
const t = await import(
|
|
191
191
|
/* @vite-ignore */
|
|
192
192
|
o
|
|
193
193
|
);
|
|
194
194
|
return t.env.wasm.wasmPaths = m, t.env.wasm.numThreads = 1, t;
|
|
195
195
|
}
|
|
196
|
-
let
|
|
197
|
-
async function _t(m = mt, o =
|
|
198
|
-
return
|
|
196
|
+
let Z = null;
|
|
197
|
+
async function _t(m = mt, o = st, t = et, s) {
|
|
198
|
+
return Z || (Z = tt(o, t).then(
|
|
199
199
|
(n) => s ? n.InferenceSession.create(new Uint8Array(s), {
|
|
200
200
|
executionProviders: ["wasm"],
|
|
201
201
|
graphOptimizationLevel: "all"
|
|
@@ -203,10 +203,13 @@ async function _t(m = mt, o = tt, t = st, s) {
|
|
|
203
203
|
executionProviders: ["wasm"],
|
|
204
204
|
graphOptimizationLevel: "all"
|
|
205
205
|
})
|
|
206
|
-
)),
|
|
206
|
+
)), Z;
|
|
207
|
+
}
|
|
208
|
+
function xt() {
|
|
209
|
+
return Z !== null;
|
|
207
210
|
}
|
|
208
211
|
class V {
|
|
209
|
-
static loadWords(o =
|
|
212
|
+
static loadWords(o = q) {
|
|
210
213
|
try {
|
|
211
214
|
const t = localStorage.getItem(o);
|
|
212
215
|
return t ? JSON.parse(t) : [];
|
|
@@ -214,11 +217,11 @@ class V {
|
|
|
214
217
|
return [];
|
|
215
218
|
}
|
|
216
219
|
}
|
|
217
|
-
static saveWord(o, t =
|
|
220
|
+
static saveWord(o, t = q) {
|
|
218
221
|
const s = V.loadWords(t).filter((n) => n.word_name !== o.word_name);
|
|
219
222
|
localStorage.setItem(t, JSON.stringify([...s, o]));
|
|
220
223
|
}
|
|
221
|
-
static deleteWord(o, t =
|
|
224
|
+
static deleteWord(o, t = q) {
|
|
222
225
|
try {
|
|
223
226
|
const s = V.loadWords(t).filter((n) => n.word_name !== o);
|
|
224
227
|
localStorage.setItem(t, JSON.stringify(s));
|
|
@@ -226,22 +229,22 @@ class V {
|
|
|
226
229
|
}
|
|
227
230
|
}
|
|
228
231
|
}
|
|
229
|
-
class
|
|
232
|
+
class Wt {
|
|
230
233
|
constructor(o, t) {
|
|
231
234
|
this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0, this._initPromise = null;
|
|
232
235
|
const {
|
|
233
|
-
refsStorageKey: s =
|
|
234
|
-
thresholdStorageKey: n =
|
|
235
|
-
wasmPaths: e =
|
|
236
|
-
modelPath:
|
|
237
|
-
audioProcessorPath: r =
|
|
238
|
-
ortCdnUrl:
|
|
239
|
-
audioUtils:
|
|
236
|
+
refsStorageKey: s = q,
|
|
237
|
+
thresholdStorageKey: n = It,
|
|
238
|
+
wasmPaths: e = st,
|
|
239
|
+
modelPath: i = mt,
|
|
240
|
+
audioProcessorPath: r = zt,
|
|
241
|
+
ortCdnUrl: h = et,
|
|
242
|
+
audioUtils: c = new pt()
|
|
240
243
|
} = t || {};
|
|
241
|
-
this._audioUtils =
|
|
244
|
+
this._audioUtils = c, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = r, this._wasmPaths = e, this._modelPath = i, this._ortCdnUrl = h;
|
|
242
245
|
try {
|
|
243
|
-
const
|
|
244
|
-
this._threshold =
|
|
246
|
+
const d = localStorage.getItem(this._thresholdStorageKey);
|
|
247
|
+
this._threshold = d !== null ? Math.max(0, Math.min(1, Number(d))) : 0.65;
|
|
245
248
|
} catch {
|
|
246
249
|
this._threshold = 0.65;
|
|
247
250
|
}
|
|
@@ -271,40 +274,44 @@ class It {
|
|
|
271
274
|
const a = await n.arrayBuffer();
|
|
272
275
|
return s.downloaded += a.byteLength, e || (s.total += a.byteLength), t == null || t(s.downloaded, s.total), a;
|
|
273
276
|
}
|
|
274
|
-
const
|
|
275
|
-
let
|
|
277
|
+
const i = n.body.getReader(), r = [];
|
|
278
|
+
let h = 0;
|
|
276
279
|
for (; ; ) {
|
|
277
|
-
const { done: a, value:
|
|
280
|
+
const { done: a, value: l } = await i.read();
|
|
278
281
|
if (a) break;
|
|
279
|
-
r.push(
|
|
282
|
+
r.push(l), h += l.length, s.downloaded += l.length, t == null || t(s.downloaded, s.total);
|
|
280
283
|
}
|
|
281
|
-
e || (s.total +=
|
|
282
|
-
const
|
|
283
|
-
let
|
|
284
|
+
e || (s.total += h);
|
|
285
|
+
const c = new Uint8Array(h);
|
|
286
|
+
let d = 0;
|
|
284
287
|
for (const a of r)
|
|
285
|
-
|
|
286
|
-
return
|
|
288
|
+
c.set(a, d), d += a.length;
|
|
289
|
+
return c.buffer;
|
|
287
290
|
}
|
|
288
291
|
async _init(o) {
|
|
289
|
-
const t = { downloaded: 0, total: 0 }, s = /* @__PURE__ */ new Set(),
|
|
290
|
-
for (const
|
|
291
|
-
for (const
|
|
292
|
-
!
|
|
293
|
-
const
|
|
294
|
-
this._trackFetch(this._modelPath, o, t),
|
|
295
|
-
...
|
|
292
|
+
const t = { downloaded: 0, total: 0 }, s = V.loadWords(this._refsStorageKey), n = new Set(s.map((a) => a.word_name)), e = /* @__PURE__ */ new Set(), i = [];
|
|
293
|
+
for (const a of this._commands)
|
|
294
|
+
for (const l of a.triggers)
|
|
295
|
+
!e.has(l.name) && l.defaultRefPath && !n.has(l.name) && (e.add(l.name), i.push({ name: l.name, path: l.defaultRefPath }));
|
|
296
|
+
const r = tt(this._wasmPaths, this._ortCdnUrl), h = xt(), [c, ...d] = await Promise.all([
|
|
297
|
+
h ? Promise.resolve(null) : this._trackFetch(this._modelPath, o, t),
|
|
298
|
+
...i.map(({ path: a }) => this._trackFetch(a, o, t))
|
|
296
299
|
]);
|
|
297
|
-
await
|
|
298
|
-
|
|
299
|
-
|
|
300
|
+
await r, await _t(
|
|
301
|
+
this._modelPath,
|
|
302
|
+
this._wasmPaths,
|
|
303
|
+
this._ortCdnUrl,
|
|
304
|
+
h ? void 0 : c
|
|
305
|
+
);
|
|
306
|
+
for (let a = 0; a < i.length; a++)
|
|
300
307
|
try {
|
|
301
|
-
const
|
|
302
|
-
this.addCustomWord(
|
|
308
|
+
const l = JSON.parse(new TextDecoder().decode(d[a]));
|
|
309
|
+
this.addCustomWord(l), V.saveWord(l, this._refsStorageKey);
|
|
303
310
|
} catch {
|
|
304
|
-
console.warn(`[Mellon] failed to parse ref file: ${
|
|
311
|
+
console.warn(`[Mellon] failed to parse ref file: ${i[a].path}`);
|
|
305
312
|
}
|
|
306
|
-
for (const
|
|
307
|
-
this._refEmbeddings.set(
|
|
313
|
+
for (const a of s)
|
|
314
|
+
this._refEmbeddings.set(a.word_name, a.embeddings);
|
|
308
315
|
console.info("[Mellon] init complete, loaded refs:", [...this._refEmbeddings.keys()]);
|
|
309
316
|
}
|
|
310
317
|
/**
|
|
@@ -359,16 +366,16 @@ class It {
|
|
|
359
366
|
if (!(t - this._lastInferenceAt < 300)) {
|
|
360
367
|
this._lastInferenceAt = t, this._inferring = !0;
|
|
361
368
|
try {
|
|
362
|
-
const [s, n] = await Promise.all([
|
|
363
|
-
let
|
|
364
|
-
for (const
|
|
365
|
-
if (
|
|
366
|
-
for (const a of
|
|
367
|
-
const
|
|
368
|
-
if (!
|
|
369
|
-
const _ = this._audioUtils.maxCosineSim(
|
|
369
|
+
const [s, n] = await Promise.all([tt(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), e = this._audioUtils.logfbank(o), i = new s.Tensor("float32", e, [1, 1, 149, 64]), r = await n.run({ input: i }), h = r[Object.keys(r)[0]].data;
|
|
370
|
+
let c = !1;
|
|
371
|
+
for (const d of this._commands) {
|
|
372
|
+
if (c) break;
|
|
373
|
+
for (const a of d.triggers) {
|
|
374
|
+
const l = this._refEmbeddings.get(a.name);
|
|
375
|
+
if (!l) continue;
|
|
376
|
+
const _ = this._audioUtils.maxCosineSim(h, l);
|
|
370
377
|
if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
|
|
371
|
-
this._lastMatchAt = t, console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`), typeof
|
|
378
|
+
this._lastMatchAt = t, console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _), c = !0;
|
|
372
379
|
break;
|
|
373
380
|
}
|
|
374
381
|
}
|
|
@@ -381,28 +388,28 @@ class It {
|
|
|
381
388
|
}
|
|
382
389
|
}
|
|
383
390
|
}
|
|
384
|
-
class
|
|
391
|
+
class kt {
|
|
385
392
|
constructor(o, t) {
|
|
386
|
-
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) ||
|
|
393
|
+
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || st, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || et, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new pt();
|
|
387
394
|
}
|
|
388
395
|
/** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
|
|
389
396
|
async recordSample() {
|
|
390
|
-
const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((
|
|
391
|
-
const
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
},
|
|
395
|
-
var
|
|
397
|
+
const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((i, r) => {
|
|
398
|
+
const h = new MediaRecorder(o), c = [];
|
|
399
|
+
h.ondataavailable = (d) => {
|
|
400
|
+
d.data.size > 0 && c.push(d.data);
|
|
401
|
+
}, h.onstop = async () => {
|
|
402
|
+
var d;
|
|
396
403
|
for (const a of o.getTracks()) a.stop();
|
|
397
404
|
try {
|
|
398
|
-
const
|
|
399
|
-
await t.close(),
|
|
405
|
+
const l = await new Blob(c, { type: ((d = c[0]) == null ? void 0 : d.type) || "audio/webm" }).arrayBuffer(), _ = await t.decodeAudioData(l);
|
|
406
|
+
await t.close(), i(_.getChannelData(0).slice());
|
|
400
407
|
} catch (a) {
|
|
401
408
|
r(a);
|
|
402
409
|
}
|
|
403
|
-
},
|
|
410
|
+
}, h.start(), setTimeout(() => {
|
|
404
411
|
try {
|
|
405
|
-
|
|
412
|
+
h.stop();
|
|
406
413
|
} catch {
|
|
407
414
|
}
|
|
408
415
|
}, 1500);
|
|
@@ -417,24 +424,24 @@ class Wt {
|
|
|
417
424
|
}
|
|
418
425
|
/** Runs ONNX inference on every recorded sample to produce reference embeddings. */
|
|
419
426
|
async generateRef() {
|
|
420
|
-
const [o, t] = await Promise.all([
|
|
427
|
+
const [o, t] = await Promise.all([tt(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
|
|
421
428
|
for (const n of this._samples) {
|
|
422
|
-
const e = this._audioUtils.logfbank(n),
|
|
423
|
-
s.push(
|
|
429
|
+
const e = this._audioUtils.logfbank(n), i = new o.Tensor("float32", e, [1, 1, 149, 64]), r = await t.run({ input: i }), h = Array.from(r[Object.keys(r)[0]].data);
|
|
430
|
+
s.push(h);
|
|
424
431
|
}
|
|
425
432
|
return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
|
|
426
433
|
}
|
|
427
434
|
}
|
|
428
|
-
const
|
|
435
|
+
const st = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", et = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", zt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", q = "mellon-refs", It = "mellon-threshold";
|
|
429
436
|
export {
|
|
430
437
|
pt as AudioUtils,
|
|
431
|
-
|
|
438
|
+
zt as DEFAULT_AUDIO_PROCESSOR_PATH,
|
|
432
439
|
mt as DEFAULT_MODEL_PATH,
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
440
|
+
et as DEFAULT_ORT_CDN_URL,
|
|
441
|
+
q as DEFAULT_REFS_STORAGE_KEY,
|
|
442
|
+
It as DEFAULT_THRESHOLD_STORAGE_KEY,
|
|
443
|
+
st as DEFAULT_WASM_PATHS,
|
|
444
|
+
Wt as Detector,
|
|
445
|
+
kt as EnrollmentSession,
|
|
439
446
|
V as Storage
|
|
440
447
|
};
|