mellon 0.0.23 → 0.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +1 -1
- package/package.json +1 -1
package/dist/mellon.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function zt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>2;for(r=0;r<s;r+=i)for(var _=r+c,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+c,w=p+c,g=w+c,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],R=T,P=A,M=a[f],U=d*a[f+1],D=b*M-y*U,z=b*U+y*M,H=a[2*f],k=d*a[2*f+1],O=F*H-S*k,B=F*k+S*H,j=a[3*f],K=d*a[3*f+1],$=C*j-E*K,G=C*K+E*j,J=R+O,x=P+B,I=R-O,Y=P-B,Q=D+$,L=z+G,N=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,lt=Y-N,ct=I-V,ht=Y+N;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=lt,t[w]=nt,t[w+1]=at,t[g]=ct,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],l=i[s+n],d=i[s+n+1],a=r+l,c=h+d,_=r-l,v=h-d;e[t]=a,e[t+1]=c,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+1],c=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+l],p=i[s+l+1],w=d+v,g=a+f,T=d-v,A=a-f,b=c+u,y=_+p,F=r*(c-u),S=r*(_-p),C=w+b,E=g+y,R=T+S,P=A-F,M=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=R,e[t+3]=P,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>1,_=c>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],R=t[T+1],P=A,M=b,U=a[u],D=d*a[u+1],z=y*U-F*D,H=y*D+F*U,k=a[2*u],O=d*a[2*u+1],B=S*k-C*O,j=S*O+C*k,K=a[3*u],$=d*a[3*u+1],G=E*K-R*$,J=E*$+R*K,x=P+B,I=M+j,Y=P-B,Q=M-j,L=z+G,N=H+J,V=d*(z-G),tt=d*(H-J),ot=x+L,nt=I+N,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var lt=x-L,ct=I-N;t[g]=lt,t[g+1]=ct;continue}if(f!==v){var ht=Y,Tt=-Q,bt=x,At=-I,Ft=-d*tt,St=-d*V,Ct=-d*N,Et=-d*L,Rt=ht+Ft,Pt=Tt+St,Mt=bt+Et,Ut=At-Ct,ut=r+_-f,vt=r+c-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Mt,t[vt+1]=Ut}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],l=r+h,d=r-h;e[t]=l,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+n],c=i[s+h],_=i[s+l],v=d+c,f=d-c,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},_t}var xt=zt();const It=Dt(xt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new It(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)d[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)d[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;i.fill(0);for(let c=0;c<t&&l+c<o.length;c++)i[c]=o[l+c];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=0;c<a.length;c++){const _=r[2*c],v=r[2*c+1];a[c]=1/this._nfft*(_*_+v*v),a[c]===0&&(a[c]=1e-30)}for(let c=0;c<this._nfilt;c++){let _=0;const v=this._melFilters[c];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+c]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function Lt(){return st!==null}class W{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=W.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=W.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const wt={info:()=>{},warn:()=>{},error:()=>{}},Nt={info:console.info.bind(console),warn:console.warn.bind(console),error:console.error.bind(console)};class Wt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=yt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=gt,ortCdnUrl:h=q,audioUtils:l=new ft,log:d=!1}=t||{};this._log=d===!1?wt:d===!0?Nt:{...wt,...d},this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const a=localStorage.getItem(this._thresholdStorageKey);this._threshold=a!==null?Math.max(0,Math.min(1,Number(a))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:c}=await i.read();if(a)break;r.push(c),h+=c.length,s.downloaded+=c.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let d=0;for(const a of r)l.set(a,d),d+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=W.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const c of a.triggers)!e.has(c.name)&&c.defaultRefPath&&!n.has(c.name)&&(e.add(c.name),i.push({name:c.name,path:c.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=Lt(),[l,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:l);for(let a=0;a<i.length;a++)try{const c=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(c),W.saveWord(c,this._refsStorageKey)}catch{this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);this._log.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let l=!1;for(const d of this._commands){if(l)break;for(const a of d.triggers){const c=this._refEmbeddings.get(a.name);if(!c)continue;const _=this._audioUtils.maxCosineSim(h,c);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,this._log.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),l=!0;break}}}}catch(s){this._log.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=d=>{d.data.size>0&&l.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const c=await new Blob(l,{type:((d=l[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(c);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",yt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=yt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Wt;exports.EnrollmentSession=Ht;exports.Storage=W;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function zt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>2;for(r=0;r<s;r+=i)for(var _=r+c,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+c,w=p+c,g=w+c,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],R=T,P=A,M=a[f],U=d*a[f+1],D=b*M-y*U,z=b*U+y*M,H=a[2*f],k=d*a[2*f+1],O=F*H-S*k,B=F*k+S*H,j=a[3*f],K=d*a[3*f+1],$=C*j-E*K,G=C*K+E*j,J=R+O,x=P+B,I=R-O,Y=P-B,Q=D+$,L=z+G,N=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,lt=Y-N,ct=I-V,ht=Y+N;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=lt,t[w]=nt,t[w+1]=at,t[g]=ct,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],l=i[s+n],d=i[s+n+1],a=r+l,c=h+d,_=r-l,v=h-d;e[t]=a,e[t+1]=c,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+1],c=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+l],p=i[s+l+1],w=d+v,g=a+f,T=d-v,A=a-f,b=c+u,y=_+p,F=r*(c-u),S=r*(_-p),C=w+b,E=g+y,R=T+S,P=A-F,M=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=R,e[t+3]=P,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>1,_=c>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],R=t[T+1],P=A,M=b,U=a[u],D=d*a[u+1],z=y*U-F*D,H=y*D+F*U,k=a[2*u],O=d*a[2*u+1],B=S*k-C*O,j=S*O+C*k,K=a[3*u],$=d*a[3*u+1],G=E*K-R*$,J=E*$+R*K,x=P+B,I=M+j,Y=P-B,Q=M-j,L=z+G,N=H+J,V=d*(z-G),tt=d*(H-J),ot=x+L,nt=I+N,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var lt=x-L,ct=I-N;t[g]=lt,t[g+1]=ct;continue}if(f!==v){var ht=Y,Tt=-Q,bt=x,At=-I,Ft=-d*tt,St=-d*V,Ct=-d*N,Et=-d*L,Rt=ht+Ft,Pt=Tt+St,Mt=bt+Et,Ut=At-Ct,ut=r+_-f,vt=r+c-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Mt,t[vt+1]=Ut}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],l=r+h,d=r-h;e[t]=l,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+n],c=i[s+h],_=i[s+l],v=d+c,f=d-c,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},_t}var xt=zt();const It=Dt(xt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new It(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)d[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)d[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;i.fill(0);for(let c=0;c<t&&l+c<o.length;c++)i[c]=o[l+c];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=0;c<a.length;c++){const _=r[2*c],v=r[2*c+1];a[c]=1/this._nfft*(_*_+v*v),a[c]===0&&(a[c]=1e-30)}for(let c=0;c<this._nfilt;c++){let _=0;const v=this._melFilters[c];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+c]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function Lt(){return st!==null}class W{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=W.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=W.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const wt={info:()=>{},warn:()=>{},error:()=>{}},Nt={info:console.info.bind(console),warn:console.warn.bind(console),error:console.error.bind(console)};class Wt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=yt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=gt,ortCdnUrl:h=q,audioUtils:l=new ft,log:d=!1}=t||{};this._log=d===!1?wt:d===!0?Nt:{...wt,...d},this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const a=localStorage.getItem(this._thresholdStorageKey);this._threshold=a!==null?Math.max(0,Math.min(1,Number(a))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:c}=await i.read();if(a)break;r.push(c),h+=c.length,s.downloaded+=c.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let d=0;for(const a of r)l.set(a,d),d+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=W.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const c of a.triggers)!e.has(c.name)&&c.defaultRefPath&&!n.has(c.name)&&(e.add(c.name),i.push({name:c.name,path:c.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=Lt(),[l,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:l);for(let a=0;a<i.length;a++)try{const c=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(c),W.saveWord(c,this._refsStorageKey)}catch{this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);this._log.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let l=!1;for(const d of this._commands){if(l)break;for(const a of d.triggers){const c=this._refEmbeddings.get(a.name);if(!c)continue;const _=this._audioUtils.maxCosineSim(h,c);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),l=!0;break}}}}catch(s){this._log.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=d=>{d.data.size>0&&l.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const c=await new Blob(l,{type:((d=l[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(c);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",yt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=yt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Wt;exports.EnrollmentSession=Ht;exports.Storage=W;
|
package/dist/mellon.mjs
CHANGED
|
@@ -380,7 +380,7 @@ class kt {
|
|
|
380
380
|
if (!l) continue;
|
|
381
381
|
const _ = this._audioUtils.maxCosineSim(h, l);
|
|
382
382
|
if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
|
|
383
|
-
this._lastMatchAt = t, this._log.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _), c = !0;
|
|
383
|
+
this._lastMatchAt = t, this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _), c = !0;
|
|
384
384
|
break;
|
|
385
385
|
}
|
|
386
386
|
}
|