mellon 0.0.24 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +147 -141
- package/package.json +1 -1
package/dist/mellon.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function zt(){if(pt)return _t;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[i]|=(i>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=i,h++){const u=l[h];this._singleTransform4(r,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>2;for(r=0;r<s;r+=i)for(var _=r+c,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+c,w=p+c,g=w+c,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],R=T,P=A,M=a[f],U=d*a[f+1],D=b*M-y*U,z=b*U+y*M,H=a[2*f],k=d*a[2*f+1],O=F*H-S*k,B=F*k+S*H,j=a[3*f],K=d*a[3*f+1],$=C*j-E*K,G=C*K+E*j,J=R+O,x=P+B,I=R-O,Y=P-B,Q=D+$,L=z+G,N=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,lt=Y-N,ct=I-V,ht=Y+N;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=lt,t[w]=nt,t[w+1]=at,t[g]=ct,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+1],l=i[s+n],d=i[s+n+1],a=r+l,c=h+d,_=r-l,v=h-d;e[t]=a,e[t+1]=c,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+1],c=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+l],p=i[s+l+1],w=d+v,g=a+f,T=d-v,A=a-f,b=c+u,y=_+p,F=r*(c-u),S=r*(_-p),C=w+b,E=g+y,R=T+S,P=A-F,M=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=R,e[t+3]=P,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,r,h,l=this._bitrev;if(i===4)for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform2(r,dt>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=i,h++){const dt=l[h];this._singleRealTransform4(r,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>1,_=c>>>1,v=_>>>1;for(r=0;r<s;r+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],R=t[T+1],P=A,M=b,U=a[u],D=d*a[u+1],z=y*U-F*D,H=y*D+F*U,k=a[2*u],O=d*a[2*u+1],B=S*k-C*O,j=S*O+C*k,K=a[3*u],$=d*a[3*u+1],G=E*K-R*$,J=E*$+R*K,x=P+B,I=M+j,Y=P-B,Q=M-j,L=z+G,N=H+J,V=d*(z-G),tt=d*(H-J),ot=x+L,nt=I+N,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var lt=x-L,ct=I-N;t[g]=lt,t[g+1]=ct;continue}if(f!==v){var ht=Y,Tt=-Q,bt=x,At=-I,Ft=-d*tt,St=-d*V,Ct=-d*N,Et=-d*L,Rt=ht+Ft,Pt=Tt+St,Mt=bt+Et,Ut=At-Ct,ut=r+_-f,vt=r+c-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Mt,t[vt+1]=Ut}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,r=i[s],h=i[s+n],l=r+h,d=r-h;e[t]=l,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,r=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+n],c=i[s+h],_=i[s+l],v=d+c,f=d-c,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},_t}var xt=zt();const It=Dt(xt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new It(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)d[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)d[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(d)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;i.fill(0);for(let c=0;c<t&&l+c<o.length;c++)i[c]=o[l+c];const d=this._fft.toComplexArray(i,null);this._fft.transform(r,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=0;c<a.length;c++){const _=r[2*c],v=r[2*c+1];a[c]=1/this._nfft*(_*_+v*v),a[c]===0&&(a[c]=1e-30)}for(let c=0;c<this._nfilt;c++){let _=0;const v=this._melFilters[c];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+c]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,o=Z,t=q,s){return st||(st=et(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function Lt(){return st!==null}class W{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=W.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=W.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const wt={info:()=>{},warn:()=>{},error:()=>{}},Nt={info:console.info.bind(console),warn:console.warn.bind(console),error:console.error.bind(console)};class Wt{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=yt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:r=gt,ortCdnUrl:h=q,audioUtils:l=new ft,log:d=!1}=t||{};this._log=d===!1?wt:d===!0?Nt:{...wt,...d},this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const a=localStorage.getItem(this._thresholdStorageKey);this._threshold=a!==null?Math.max(0,Math.min(1,Number(a))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:c}=await i.read();if(a)break;r.push(c),h+=c.length,s.downloaded+=c.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let d=0;for(const a of r)l.set(a,d),d+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=W.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const c of a.triggers)!e.has(c.name)&&c.defaultRefPath&&!n.has(c.name)&&(e.add(c.name),i.push({name:c.name,path:c.defaultRefPath}));const r=et(this._wasmPaths,this._ortCdnUrl),h=Lt(),[l,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,o,t),...i.map(({path:a})=>this._trackFetch(a,o,t))]);await r,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:l);for(let a=0;a<i.length;a++)try{const c=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(c),W.saveWord(c,this._refsStorageKey)}catch{this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);this._log.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),i=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:i}),h=r[Object.keys(r)[0]].data;let l=!1;for(const d of this._commands){if(l)break;for(const a of d.triggers){const c=this._refEmbeddings.get(a.name);if(!c)continue;const _=this._audioUtils.maxCosineSim(h,c);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),l=!0;break}}}}catch(s){this._log.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=d=>{d.data.size>0&&l.push(d.data)},h.onstop=async()=>{var d;for(const a of o.getTracks())a.stop();try{const c=await new Blob(l,{type:((d=l[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(c);await t.close(),i(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:i}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",yt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=yt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Wt;exports.EnrollmentSession=Ht;exports.Storage=W;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var _t,pt;function zt(){if(pt)return _t;pt=1;function m(r){if(this.size=r|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=r<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var o=0;o<this._width;o+=2){var h=this._width-o-2;this._bitrev[i]|=(i>>>o&3)<<h}}this._out=null,this._data=null,this._inv=0}return _t=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,o,h,l=this._bitrev;if(i===4)for(o=0,h=0;o<s;o+=i,h++){const u=l[h];this._singleTransform2(o,u,e)}else for(o=0,h=0;o<s;o+=i,h++){const u=l[h];this._singleTransform4(o,u,e)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>2;for(o=0;o<s;o+=i)for(var _=o+c,v=o,f=0;v<_;v+=2,f+=e){const u=v,p=u+c,w=p+c,g=w+c,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],R=T,P=A,M=a[f],U=d*a[f+1],D=b*M-y*U,z=b*U+y*M,H=a[2*f],k=d*a[2*f+1],O=F*H-S*k,B=F*k+S*H,j=a[3*f],K=d*a[3*f+1],$=C*j-E*K,G=C*K+E*j,J=R+O,x=P+B,I=R-O,Y=P-B,Q=D+$,L=z+G,N=d*(D-$),V=d*(z-G),tt=J+Q,ot=x+L,nt=J-Q,at=x-L,it=I+V,lt=Y-N,ct=I-V,ht=Y+N;t[u]=tt,t[u+1]=ot,t[p]=it,t[p+1]=lt,t[w]=nt,t[w+1]=at,t[g]=ct,t[g+1]=ht}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,i=this._data,o=i[s],h=i[s+1],l=i[s+n],d=i[s+n+1],a=o+l,c=h+d,_=o-l,v=h-d;e[t]=a,e[t+1]=c,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,i=this._data,o=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+1],c=i[s+n],_=i[s+n+1],v=i[s+h],f=i[s+h+1],u=i[s+l],p=i[s+l+1],w=d+v,g=a+f,T=d-v,A=a-f,b=c+u,y=_+p,F=o*(c-u),S=o*(_-p),C=w+b,E=g+y,R=T+S,P=A-F,M=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=R,e[t+3]=P,e[t+4]=M,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,i=s/e<<1,o,h,l=this._bitrev;if(i===4)for(o=0,h=0;o<s;o+=i,h++){const dt=l[h];this._singleRealTransform2(o,dt>>>1,e>>>1)}else for(o=0,h=0;o<s;o+=i,h++){const dt=l[h];this._singleRealTransform4(o,dt>>>1,e>>>1)}var d=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){i=s/e<<1;var c=i>>>1,_=c>>>1,v=_>>>1;for(o=0;o<s;o+=i)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=o+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],R=t[T+1],P=A,M=b,U=a[u],D=d*a[u+1],z=y*U-F*D,H=y*D+F*U,k=a[2*u],O=d*a[2*u+1],B=S*k-C*O,j=S*O+C*k,K=a[3*u],$=d*a[3*u+1],G=E*K-R*$,J=E*$+R*K,x=P+B,I=M+j,Y=P-B,Q=M-j,L=z+G,N=H+J,V=d*(z-G),tt=d*(H-J),ot=x+L,nt=I+N,at=Y+tt,it=Q-V;if(t[p]=ot,t[p+1]=nt,t[w]=at,t[w+1]=it,f===0){var lt=x-L,ct=I-N;t[g]=lt,t[g+1]=ct;continue}if(f!==v){var ht=Y,Tt=-Q,bt=x,At=-I,Ft=-d*tt,St=-d*V,Ct=-d*N,Et=-d*L,Rt=ht+Ft,Pt=Tt+St,Mt=bt+Et,Ut=At-Ct,ut=o+_-f,vt=o+c-f;t[ut]=Rt,t[ut+1]=Pt,t[vt]=Mt,t[vt+1]=Ut}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,i=this._data,o=i[s],h=i[s+n],l=o+h,d=o-h;e[t]=l,e[t+1]=0,e[t+2]=d,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,i=this._data,o=this._inv?-1:1,h=n*2,l=n*3,d=i[s],a=i[s+n],c=i[s+h],_=i[s+l],v=d+c,f=d-c,u=a+_,p=o*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},_t}var xt=zt();const It=Dt(xt);class ft{constructor(r=16e3,t=512,s=64){this._sampleRate=r,this._nfft=t,this._nfilt=s,this._fft=new It(t),this._melFilters=this._createMelFilterbank()}_hzToMel(r){return 2595*Math.log10(1+r/700)}_melToHz(r){return 700*(10**(r/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const o=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const d=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=o[l];a<o[l+1];a++)d[a]=(a-o[l])/(o[l+1]-o[l]);for(let a=o[l+1];a<o[l+2];a++)d[a]=(o[l+2]-a)/(o[l+2]-o[l+1]);h.push(d)}return h}logfbank(r){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((r.length-t)/s),e=new Float32Array(n*this._nfilt),i=new Float32Array(this._nfft),o=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;i.fill(0);for(let c=0;c<t&&l+c<r.length;c++)i[c]=r[l+c];const d=this._fft.toComplexArray(i,null);this._fft.transform(o,d);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=0;c<a.length;c++){const _=o[2*c],v=o[2*c+1];a[c]=1/this._nfft*(_*_+v*v),a[c]===0&&(a[c]=1e-30)}for(let c=0;c<this._nfilt;c++){let _=0;const v=this._melFilters[c];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+c]=Math.log(_)}}return e}maxCosineSim(r,t){let s=0;for(const n of t){let e=0;for(let o=0;o<n.length;o++)e+=r[o]*n[o];const i=(e+1)/2;i>s&&(s=i)}return s}}async function et(m=Z,r=q){const t=await import(r);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let st=null;async function mt(m=rt,r=Z,t=q,s){return st||(st=et(r,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),st}function Lt(){return st!==null}class W{static loadWords(r=X){try{const t=localStorage.getItem(r);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(r,t=X){const s=W.loadWords(t).filter(n=>n.word_name!==r.word_name);localStorage.setItem(t,JSON.stringify([...s,r]))}static deleteWord(r,t=X){try{const s=W.loadWords(t).filter(n=>n.word_name!==r);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const wt={info:()=>{},warn:()=>{},error:()=>{}},Nt={info:console.info.bind(console),warn:console.warn.bind(console),error:console.error.bind(console)};class Wt{constructor(r,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=yt,wasmPaths:e=Z,modelPath:i=rt,audioProcessorPath:o=gt,ortCdnUrl:h=q,audioUtils:l=new ft,log:d=!1}=t||{};this._log=d===!1?wt:d===!0?Nt:{...wt,...d},this._audioUtils=l,this._commands=r,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=o,this._wasmPaths=e,this._modelPath=i,this._ortCdnUrl=h;try{const a=localStorage.getItem(this._thresholdStorageKey);this._threshold=a!==null?Math.max(0,Math.min(1,Number(a))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(r){this._threshold=Math.max(0,Math.min(1,r));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(r,t,s){const n=await fetch(r);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${r}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const i=n.body.getReader(),o=[];let h=0;for(;;){const{done:a,value:c}=await i.read();if(a)break;o.push(c),h+=c.length,s.downloaded+=c.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let d=0;for(const a of o)l.set(a,d),d+=a.length;return l.buffer}async _init(r){const t={downloaded:0,total:0},s=W.loadWords(this._refsStorageKey),n=new Set(s.map(a=>a.word_name)),e=new Set,i=[];for(const a of this._commands)for(const c of a.triggers)!e.has(c.name)&&c.defaultRefPath&&!n.has(c.name)&&(e.add(c.name),i.push({name:c.name,path:c.defaultRefPath}));const o=et(this._wasmPaths,this._ortCdnUrl),h=Lt(),[l,...d]=await Promise.all([h?Promise.resolve(null):this._trackFetch(this._modelPath,r,t),...i.map(({path:a})=>this._trackFetch(a,r,t))]);await o,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,h?void 0:l);for(let a=0;a<i.length;a++)try{const c=JSON.parse(new TextDecoder().decode(d[a]));this.addCustomWord(c),W.saveWord(c,this._refsStorageKey)}catch{this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`)}for(const a of s)this._refEmbeddings.set(a.word_name,a.embeddings);this._log.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(r){this._initPromise||(this._initPromise=this._init(r)),await this._initPromise}addCustomWord(r){if(!(Array.isArray(r.embeddings)&&r.embeddings.length>0))throw new Error("invalid ref file for : "+r.word_name);this._refEmbeddings.set(r.word_name,r.embeddings)}async start(){if(this._started)return;await this.init();let r;try{r=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{r=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=r;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(r),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const r of this._stream.getTracks())r.stop();this._stream=null}}async _handleBuffer(r){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([et(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(r),i=new s.Tensor("float32",e,[1,1,149,64]),o=await n.run({input:i}),h=o[Object.keys(o)[0]].data;let l=!1;for(const d of this._commands){if(l)break;for(const a of d.triggers){const c=this._refEmbeddings.get(a.name);if(!c)continue;const _=this._audioUtils.maxCosineSim(h,c);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`),typeof d.onMatch=="function"&&d.onMatch(a.name,_),l=!0;break}}}}catch(s){this._log.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(r,t){this._config={},this._samples=[],this._wordName=r,this._config.modelPath=(t==null?void 0:t.modelPath)||rt,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const r=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((i,o)=>{const h=new MediaRecorder(r),l=[];h.ondataavailable=d=>{d.data.size>0&&l.push(d.data)},h.onstop=async()=>{var d;for(const a of r.getTracks())a.stop();try{const c=await new Blob(l,{type:((d=l[0])==null?void 0:d.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(c);await t.close(),i(_.getChannelData(0).slice())}catch(a){o(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}getSample(r){if(r<0||r>=this._samples.length)throw new RangeError(`index ${r} out of bounds (${this._samples.length} samples)`);return this._samples[r]}deleteSample(r){if(r<0||r>=this._samples.length)throw new RangeError(`index ${r} out of bounds (${this._samples.length} samples)`);return this._samples.splice(r,1),this._samples.length}async generateRef(){const[r,t]=await Promise.all([et(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),i=new r.Tensor("float32",e,[1,1,149,64]),o=await t.run({input:i}),h=Array.from(o[Object.keys(o)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",rt="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",yt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=rt;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=yt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=Wt;exports.EnrollmentSession=Ht;exports.Storage=W;
|
package/dist/mellon.mjs
CHANGED
|
@@ -5,13 +5,13 @@ var dt, vt;
|
|
|
5
5
|
function Ut() {
|
|
6
6
|
if (vt) return dt;
|
|
7
7
|
vt = 1;
|
|
8
|
-
function m(
|
|
9
|
-
if (this.size =
|
|
8
|
+
function m(r) {
|
|
9
|
+
if (this.size = r | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
|
|
10
10
|
throw new Error("FFT size must be a power of two and bigger than 1");
|
|
11
|
-
this._csize =
|
|
11
|
+
this._csize = r << 1;
|
|
12
12
|
for (var t = new Array(this.size * 2), s = 0; s < t.length; s += 2) {
|
|
13
|
-
const
|
|
14
|
-
t[s] = Math.cos(
|
|
13
|
+
const l = Math.PI * s / this.size;
|
|
14
|
+
t[s] = Math.cos(l), t[s + 1] = -Math.sin(l);
|
|
15
15
|
}
|
|
16
16
|
this.table = t;
|
|
17
17
|
for (var n = 0, e = 1; this.size > e; e <<= 1)
|
|
@@ -19,9 +19,9 @@ function Ut() {
|
|
|
19
19
|
this._width = n % 2 === 0 ? n - 1 : n, this._bitrev = new Array(1 << this._width);
|
|
20
20
|
for (var i = 0; i < this._bitrev.length; i++) {
|
|
21
21
|
this._bitrev[i] = 0;
|
|
22
|
-
for (var
|
|
23
|
-
var h = this._width -
|
|
24
|
-
this._bitrev[i] |= (i >>>
|
|
22
|
+
for (var o = 0; o < this._width; o += 2) {
|
|
23
|
+
var h = this._width - o - 2;
|
|
24
|
+
this._bitrev[i] |= (i >>> o & 3) << h;
|
|
25
25
|
}
|
|
26
26
|
}
|
|
27
27
|
this._out = null, this._data = null, this._inv = 0;
|
|
@@ -58,144 +58,144 @@ function Ut() {
|
|
|
58
58
|
t[n] /= this.size;
|
|
59
59
|
this._out = null, this._data = null;
|
|
60
60
|
}, m.prototype._transform4 = function() {
|
|
61
|
-
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1,
|
|
61
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1, o, h, l = this._bitrev;
|
|
62
62
|
if (i === 4)
|
|
63
|
-
for (
|
|
64
|
-
const u =
|
|
65
|
-
this._singleTransform2(
|
|
63
|
+
for (o = 0, h = 0; o < s; o += i, h++) {
|
|
64
|
+
const u = l[h];
|
|
65
|
+
this._singleTransform2(o, u, e);
|
|
66
66
|
}
|
|
67
67
|
else
|
|
68
|
-
for (
|
|
69
|
-
const u =
|
|
70
|
-
this._singleTransform4(
|
|
68
|
+
for (o = 0, h = 0; o < s; o += i, h++) {
|
|
69
|
+
const u = l[h];
|
|
70
|
+
this._singleTransform4(o, u, e);
|
|
71
71
|
}
|
|
72
72
|
var d = this._inv ? -1 : 1, a = this.table;
|
|
73
73
|
for (e >>= 2; e >= 2; e >>= 2) {
|
|
74
74
|
i = s / e << 1;
|
|
75
|
-
var
|
|
76
|
-
for (
|
|
77
|
-
for (var _ =
|
|
78
|
-
const u = v, p = u +
|
|
79
|
-
t[u] = X, t[u + 1] = rt, t[p] = at, t[p + 1] = it, t[w] = ot, t[w + 1] = nt, t[g] =
|
|
75
|
+
var c = i >>> 2;
|
|
76
|
+
for (o = 0; o < s; o += i)
|
|
77
|
+
for (var _ = o + c, v = o, f = 0; v < _; v += 2, f += e) {
|
|
78
|
+
const u = v, p = u + c, w = p + c, g = w + c, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], S = t[g], M = t[g + 1], P = b, R = T, E = a[f], U = d * a[f + 1], D = F * E - y * U, z = F * U + y * E, k = a[2 * f], B = d * a[2 * f + 1], j = A * k - C * B, $ = A * B + C * k, H = a[3 * f], K = d * a[3 * f + 1], L = S * H - M * K, J = S * K + M * H, G = P + j, x = R + $, I = P - j, O = R - $, Y = D + L, N = z + J, W = d * (D - L), Q = d * (z - J), X = G + Y, rt = x + N, ot = G - Y, nt = x - N, at = I + Q, it = O - W, lt = I - Q, ct = O + W;
|
|
79
|
+
t[u] = X, t[u + 1] = rt, t[p] = at, t[p + 1] = it, t[w] = ot, t[w + 1] = nt, t[g] = lt, t[g + 1] = ct;
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
82
|
}, m.prototype._singleTransform2 = function(t, s, n) {
|
|
83
|
-
const e = this._out, i = this._data,
|
|
84
|
-
e[t] = a, e[t + 1] =
|
|
83
|
+
const e = this._out, i = this._data, o = i[s], h = i[s + 1], l = i[s + n], d = i[s + n + 1], a = o + l, c = h + d, _ = o - l, v = h - d;
|
|
84
|
+
e[t] = a, e[t + 1] = c, e[t + 2] = _, e[t + 3] = v;
|
|
85
85
|
}, m.prototype._singleTransform4 = function(t, s, n) {
|
|
86
|
-
const e = this._out, i = this._data,
|
|
87
|
-
e[t] = S, e[t + 1] = M, e[t + 2] = P, e[t + 3] = R, e[t + 4] = E, e[t + 5] = U, e[t + 6] = D, e[t + 7] =
|
|
86
|
+
const e = this._out, i = this._data, o = this._inv ? -1 : 1, h = n * 2, l = n * 3, d = i[s], a = i[s + 1], c = i[s + n], _ = i[s + n + 1], v = i[s + h], f = i[s + h + 1], u = i[s + l], p = i[s + l + 1], w = d + v, g = a + f, b = d - v, T = a - f, F = c + u, y = _ + p, A = o * (c - u), C = o * (_ - p), S = w + F, M = g + y, P = b + C, R = T - A, E = w - F, U = g - y, D = b - C, z = T + A;
|
|
87
|
+
e[t] = S, e[t + 1] = M, e[t + 2] = P, e[t + 3] = R, e[t + 4] = E, e[t + 5] = U, e[t + 6] = D, e[t + 7] = z;
|
|
88
88
|
}, m.prototype._realTransform4 = function() {
|
|
89
|
-
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1,
|
|
89
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, i = s / e << 1, o, h, l = this._bitrev;
|
|
90
90
|
if (i === 4)
|
|
91
|
-
for (
|
|
92
|
-
const ht =
|
|
93
|
-
this._singleRealTransform2(
|
|
91
|
+
for (o = 0, h = 0; o < s; o += i, h++) {
|
|
92
|
+
const ht = l[h];
|
|
93
|
+
this._singleRealTransform2(o, ht >>> 1, e >>> 1);
|
|
94
94
|
}
|
|
95
95
|
else
|
|
96
|
-
for (
|
|
97
|
-
const ht =
|
|
98
|
-
this._singleRealTransform4(
|
|
96
|
+
for (o = 0, h = 0; o < s; o += i, h++) {
|
|
97
|
+
const ht = l[h];
|
|
98
|
+
this._singleRealTransform4(o, ht >>> 1, e >>> 1);
|
|
99
99
|
}
|
|
100
100
|
var d = this._inv ? -1 : 1, a = this.table;
|
|
101
101
|
for (e >>= 2; e >= 2; e >>= 2) {
|
|
102
102
|
i = s / e << 1;
|
|
103
|
-
var
|
|
104
|
-
for (
|
|
103
|
+
var c = i >>> 1, _ = c >>> 1, v = _ >>> 1;
|
|
104
|
+
for (o = 0; o < s; o += i)
|
|
105
105
|
for (var f = 0, u = 0; f <= v; f += 2, u += e) {
|
|
106
|
-
var p =
|
|
106
|
+
var p = o + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], S = t[g + 1], M = t[b], P = t[b + 1], R = T, E = F, U = a[u], D = d * a[u + 1], z = y * U - A * D, k = y * D + A * U, B = a[2 * u], j = d * a[2 * u + 1], $ = C * B - S * j, H = C * j + S * B, K = a[3 * u], L = d * a[3 * u + 1], J = M * K - P * L, G = M * L + P * K, x = R + $, I = E + H, O = R - $, Y = E - H, N = z + J, W = k + G, Q = d * (z - J), X = d * (k - G), rt = x + N, ot = I + W, nt = O + X, at = Y - Q;
|
|
107
107
|
if (t[p] = rt, t[p + 1] = ot, t[w] = nt, t[w + 1] = at, f === 0) {
|
|
108
|
-
var it =
|
|
109
|
-
t[g] = it, t[g + 1] =
|
|
108
|
+
var it = x - N, lt = I - W;
|
|
109
|
+
t[g] = it, t[g + 1] = lt;
|
|
110
110
|
continue;
|
|
111
111
|
}
|
|
112
112
|
if (f !== v) {
|
|
113
|
-
var
|
|
113
|
+
var ct = O, gt = -Y, yt = x, bt = -I, Ft = -d * X, Tt = -d * Q, At = -d * W, Ct = -d * N, St = ct + Ft, Mt = gt + Tt, Pt = yt + Ct, Rt = bt - At, ft = o + _ - f, ut = o + c - f;
|
|
114
114
|
t[ft] = St, t[ft + 1] = Mt, t[ut] = Pt, t[ut + 1] = Rt;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
118
|
}, m.prototype._singleRealTransform2 = function(t, s, n) {
|
|
119
|
-
const e = this._out, i = this._data,
|
|
120
|
-
e[t] =
|
|
119
|
+
const e = this._out, i = this._data, o = i[s], h = i[s + n], l = o + h, d = o - h;
|
|
120
|
+
e[t] = l, e[t + 1] = 0, e[t + 2] = d, e[t + 3] = 0;
|
|
121
121
|
}, m.prototype._singleRealTransform4 = function(t, s, n) {
|
|
122
|
-
const e = this._out, i = this._data,
|
|
122
|
+
const e = this._out, i = this._data, o = this._inv ? -1 : 1, h = n * 2, l = n * 3, d = i[s], a = i[s + n], c = i[s + h], _ = i[s + l], v = d + c, f = d - c, u = a + _, p = o * (a - _), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
|
|
123
123
|
e[t] = w, e[t + 1] = 0, e[t + 2] = g, e[t + 3] = b, e[t + 4] = T, e[t + 5] = 0, e[t + 6] = F, e[t + 7] = y;
|
|
124
124
|
}, dt;
|
|
125
125
|
}
|
|
126
126
|
var Dt = Ut();
|
|
127
|
-
const
|
|
127
|
+
const zt = /* @__PURE__ */ Et(Dt);
|
|
128
128
|
class wt {
|
|
129
|
-
constructor(
|
|
130
|
-
this._sampleRate =
|
|
129
|
+
constructor(r = 16e3, t = 512, s = 64) {
|
|
130
|
+
this._sampleRate = r, this._nfft = t, this._nfilt = s, this._fft = new zt(t), this._melFilters = this._createMelFilterbank();
|
|
131
131
|
}
|
|
132
|
-
_hzToMel(
|
|
133
|
-
return 2595 * Math.log10(1 +
|
|
132
|
+
_hzToMel(r) {
|
|
133
|
+
return 2595 * Math.log10(1 + r / 700);
|
|
134
134
|
}
|
|
135
|
-
_melToHz(
|
|
136
|
-
return 700 * (10 ** (
|
|
135
|
+
_melToHz(r) {
|
|
136
|
+
return 700 * (10 ** (r / 2595) - 1);
|
|
137
137
|
}
|
|
138
138
|
_createMelFilterbank() {
|
|
139
139
|
const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), e = new Float32Array(this._nfilt + 2);
|
|
140
|
-
for (let
|
|
141
|
-
e[
|
|
142
|
-
const
|
|
143
|
-
for (let
|
|
140
|
+
for (let l = 0; l < this._nfilt + 2; l++)
|
|
141
|
+
e[l] = s + l * (n - s) / (this._nfilt + 1);
|
|
142
|
+
const o = e.map((l) => this._melToHz(l)).map((l) => Math.floor((this._nfft + 1) * l / this._sampleRate)), h = [];
|
|
143
|
+
for (let l = 0; l < this._nfilt; l++) {
|
|
144
144
|
const d = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
145
|
-
for (let a =
|
|
146
|
-
d[a] = (a -
|
|
147
|
-
for (let a =
|
|
148
|
-
d[a] = (
|
|
145
|
+
for (let a = o[l]; a < o[l + 1]; a++)
|
|
146
|
+
d[a] = (a - o[l]) / (o[l + 1] - o[l]);
|
|
147
|
+
for (let a = o[l + 1]; a < o[l + 2]; a++)
|
|
148
|
+
d[a] = (o[l + 2] - a) / (o[l + 2] - o[l + 1]);
|
|
149
149
|
h.push(d);
|
|
150
150
|
}
|
|
151
151
|
return h;
|
|
152
152
|
}
|
|
153
153
|
/** Returns a flat Float32Array of shape [numFrames × nfilt]. */
|
|
154
|
-
logfbank(
|
|
155
|
-
const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((
|
|
154
|
+
logfbank(r) {
|
|
155
|
+
const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((r.length - t) / s), e = new Float32Array(n * this._nfilt), i = new Float32Array(this._nfft), o = this._fft.createComplexArray();
|
|
156
156
|
for (let h = 0; h < n; h++) {
|
|
157
|
-
const
|
|
157
|
+
const l = h * s;
|
|
158
158
|
i.fill(0);
|
|
159
|
-
for (let
|
|
160
|
-
i[
|
|
159
|
+
for (let c = 0; c < t && l + c < r.length; c++)
|
|
160
|
+
i[c] = r[l + c];
|
|
161
161
|
const d = this._fft.toComplexArray(i, null);
|
|
162
|
-
this._fft.transform(
|
|
162
|
+
this._fft.transform(o, d);
|
|
163
163
|
const a = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
164
|
-
for (let
|
|
165
|
-
const _ =
|
|
166
|
-
a[
|
|
164
|
+
for (let c = 0; c < a.length; c++) {
|
|
165
|
+
const _ = o[2 * c], v = o[2 * c + 1];
|
|
166
|
+
a[c] = 1 / this._nfft * (_ * _ + v * v), a[c] === 0 && (a[c] = 1e-30);
|
|
167
167
|
}
|
|
168
|
-
for (let
|
|
168
|
+
for (let c = 0; c < this._nfilt; c++) {
|
|
169
169
|
let _ = 0;
|
|
170
|
-
const v = this._melFilters[
|
|
170
|
+
const v = this._melFilters[c];
|
|
171
171
|
for (let f = 0; f < a.length; f++)
|
|
172
172
|
_ += a[f] * v[f];
|
|
173
|
-
_ === 0 && (_ = 1e-30), e[h * this._nfilt +
|
|
173
|
+
_ === 0 && (_ = 1e-30), e[h * this._nfilt + c] = Math.log(_);
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
176
|
return e;
|
|
177
177
|
}
|
|
178
|
-
maxCosineSim(
|
|
178
|
+
maxCosineSim(r, t) {
|
|
179
179
|
let s = 0;
|
|
180
180
|
for (const n of t) {
|
|
181
181
|
let e = 0;
|
|
182
|
-
for (let
|
|
182
|
+
for (let o = 0; o < n.length; o++) e += r[o] * n[o];
|
|
183
183
|
const i = (e + 1) / 2;
|
|
184
184
|
i > s && (s = i);
|
|
185
185
|
}
|
|
186
186
|
return s;
|
|
187
187
|
}
|
|
188
188
|
}
|
|
189
|
-
async function tt(m = st,
|
|
189
|
+
async function tt(m = st, r = et) {
|
|
190
190
|
const t = await import(
|
|
191
191
|
/* @vite-ignore */
|
|
192
|
-
|
|
192
|
+
r
|
|
193
193
|
);
|
|
194
194
|
return t.env.wasm.wasmPaths = m, t.env.wasm.numThreads = 1, t;
|
|
195
195
|
}
|
|
196
196
|
let Z = null;
|
|
197
|
-
async function _t(m = mt,
|
|
198
|
-
return Z || (Z = tt(
|
|
197
|
+
async function _t(m = mt, r = st, t = et, s) {
|
|
198
|
+
return Z || (Z = tt(r, t).then(
|
|
199
199
|
(n) => s ? n.InferenceSession.create(new Uint8Array(s), {
|
|
200
200
|
executionProviders: ["wasm"],
|
|
201
201
|
graphOptimizationLevel: "all"
|
|
@@ -205,25 +205,25 @@ async function _t(m = mt, o = st, t = et, s) {
|
|
|
205
205
|
})
|
|
206
206
|
)), Z;
|
|
207
207
|
}
|
|
208
|
-
function
|
|
208
|
+
function xt() {
|
|
209
209
|
return Z !== null;
|
|
210
210
|
}
|
|
211
211
|
class V {
|
|
212
|
-
static loadWords(
|
|
212
|
+
static loadWords(r = q) {
|
|
213
213
|
try {
|
|
214
|
-
const t = localStorage.getItem(
|
|
214
|
+
const t = localStorage.getItem(r);
|
|
215
215
|
return t ? JSON.parse(t) : [];
|
|
216
216
|
} catch {
|
|
217
217
|
return [];
|
|
218
218
|
}
|
|
219
219
|
}
|
|
220
|
-
static saveWord(
|
|
221
|
-
const s = V.loadWords(t).filter((n) => n.word_name !==
|
|
222
|
-
localStorage.setItem(t, JSON.stringify([...s,
|
|
220
|
+
static saveWord(r, t = q) {
|
|
221
|
+
const s = V.loadWords(t).filter((n) => n.word_name !== r.word_name);
|
|
222
|
+
localStorage.setItem(t, JSON.stringify([...s, r]));
|
|
223
223
|
}
|
|
224
|
-
static deleteWord(
|
|
224
|
+
static deleteWord(r, t = q) {
|
|
225
225
|
try {
|
|
226
|
-
const s = V.loadWords(t).filter((n) => n.word_name !==
|
|
226
|
+
const s = V.loadWords(t).filter((n) => n.word_name !== r);
|
|
227
227
|
localStorage.setItem(t, JSON.stringify(s));
|
|
228
228
|
} catch {
|
|
229
229
|
}
|
|
@@ -234,19 +234,19 @@ const pt = { info: () => {
|
|
|
234
234
|
}, error: () => {
|
|
235
235
|
} }, It = { info: console.info.bind(console), warn: console.warn.bind(console), error: console.error.bind(console) };
|
|
236
236
|
class kt {
|
|
237
|
-
constructor(
|
|
237
|
+
constructor(r, t) {
|
|
238
238
|
this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0, this._initPromise = null;
|
|
239
239
|
const {
|
|
240
240
|
refsStorageKey: s = q,
|
|
241
241
|
thresholdStorageKey: n = Wt,
|
|
242
242
|
wasmPaths: e = st,
|
|
243
243
|
modelPath: i = mt,
|
|
244
|
-
audioProcessorPath:
|
|
244
|
+
audioProcessorPath: o = Nt,
|
|
245
245
|
ortCdnUrl: h = et,
|
|
246
|
-
audioUtils:
|
|
246
|
+
audioUtils: l = new wt(),
|
|
247
247
|
log: d = !1
|
|
248
248
|
} = t || {};
|
|
249
|
-
this._log = d === !1 ? pt : d === !0 ? It : { ...pt, ...d }, this._audioUtils =
|
|
249
|
+
this._log = d === !1 ? pt : d === !0 ? It : { ...pt, ...d }, this._audioUtils = l, this._commands = r, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = o, this._wasmPaths = e, this._modelPath = i, this._ortCdnUrl = h;
|
|
250
250
|
try {
|
|
251
251
|
const a = localStorage.getItem(this._thresholdStorageKey);
|
|
252
252
|
this._threshold = a !== null ? Math.max(0, Math.min(1, Number(a))) : 0.65;
|
|
@@ -257,8 +257,8 @@ class kt {
|
|
|
257
257
|
get threshold() {
|
|
258
258
|
return this._threshold;
|
|
259
259
|
}
|
|
260
|
-
set threshold(
|
|
261
|
-
this._threshold = Math.max(0, Math.min(1,
|
|
260
|
+
set threshold(r) {
|
|
261
|
+
this._threshold = Math.max(0, Math.min(1, r));
|
|
262
262
|
try {
|
|
263
263
|
localStorage.setItem(this._thresholdStorageKey, String(this._threshold));
|
|
264
264
|
} catch {
|
|
@@ -271,47 +271,47 @@ class kt {
|
|
|
271
271
|
* Streams `url`, calling `onProgress(downloaded, total)` after each chunk.
|
|
272
272
|
* Falls back to a single-shot fetch when the body stream is unavailable.
|
|
273
273
|
*/
|
|
274
|
-
async _trackFetch(
|
|
275
|
-
const n = await fetch(
|
|
276
|
-
if (!n.ok) throw new Error(`HTTP ${n.status} fetching ${
|
|
274
|
+
async _trackFetch(r, t, s) {
|
|
275
|
+
const n = await fetch(r);
|
|
276
|
+
if (!n.ok) throw new Error(`HTTP ${n.status} fetching ${r}`);
|
|
277
277
|
const e = Number(n.headers.get("content-length") ?? "0");
|
|
278
278
|
if (e > 0 && (s.total += e), !n.body) {
|
|
279
279
|
const a = await n.arrayBuffer();
|
|
280
280
|
return s.downloaded += a.byteLength, e || (s.total += a.byteLength), t == null || t(s.downloaded, s.total), a;
|
|
281
281
|
}
|
|
282
|
-
const i = n.body.getReader(),
|
|
282
|
+
const i = n.body.getReader(), o = [];
|
|
283
283
|
let h = 0;
|
|
284
284
|
for (; ; ) {
|
|
285
|
-
const { done: a, value:
|
|
285
|
+
const { done: a, value: c } = await i.read();
|
|
286
286
|
if (a) break;
|
|
287
|
-
|
|
287
|
+
o.push(c), h += c.length, s.downloaded += c.length, t == null || t(s.downloaded, s.total);
|
|
288
288
|
}
|
|
289
289
|
e || (s.total += h);
|
|
290
|
-
const
|
|
290
|
+
const l = new Uint8Array(h);
|
|
291
291
|
let d = 0;
|
|
292
|
-
for (const a of
|
|
293
|
-
|
|
294
|
-
return
|
|
292
|
+
for (const a of o)
|
|
293
|
+
l.set(a, d), d += a.length;
|
|
294
|
+
return l.buffer;
|
|
295
295
|
}
|
|
296
|
-
async _init(
|
|
296
|
+
async _init(r) {
|
|
297
297
|
const t = { downloaded: 0, total: 0 }, s = V.loadWords(this._refsStorageKey), n = new Set(s.map((a) => a.word_name)), e = /* @__PURE__ */ new Set(), i = [];
|
|
298
298
|
for (const a of this._commands)
|
|
299
|
-
for (const
|
|
300
|
-
!e.has(
|
|
301
|
-
const
|
|
302
|
-
h ? Promise.resolve(null) : this._trackFetch(this._modelPath,
|
|
303
|
-
...i.map(({ path: a }) => this._trackFetch(a,
|
|
299
|
+
for (const c of a.triggers)
|
|
300
|
+
!e.has(c.name) && c.defaultRefPath && !n.has(c.name) && (e.add(c.name), i.push({ name: c.name, path: c.defaultRefPath }));
|
|
301
|
+
const o = tt(this._wasmPaths, this._ortCdnUrl), h = xt(), [l, ...d] = await Promise.all([
|
|
302
|
+
h ? Promise.resolve(null) : this._trackFetch(this._modelPath, r, t),
|
|
303
|
+
...i.map(({ path: a }) => this._trackFetch(a, r, t))
|
|
304
304
|
]);
|
|
305
|
-
await
|
|
305
|
+
await o, await _t(
|
|
306
306
|
this._modelPath,
|
|
307
307
|
this._wasmPaths,
|
|
308
308
|
this._ortCdnUrl,
|
|
309
|
-
h ? void 0 :
|
|
309
|
+
h ? void 0 : l
|
|
310
310
|
);
|
|
311
311
|
for (let a = 0; a < i.length; a++)
|
|
312
312
|
try {
|
|
313
|
-
const
|
|
314
|
-
this.addCustomWord(
|
|
313
|
+
const c = JSON.parse(new TextDecoder().decode(d[a]));
|
|
314
|
+
this.addCustomWord(c), V.saveWord(c, this._refsStorageKey);
|
|
315
315
|
} catch {
|
|
316
316
|
this._log.warn(`[Mellon] failed to parse ref file: ${i[a].path}`);
|
|
317
317
|
}
|
|
@@ -326,21 +326,21 @@ class kt {
|
|
|
326
326
|
*
|
|
327
327
|
* @param onProgress - optional callback invoked as each asset is loaded
|
|
328
328
|
*/
|
|
329
|
-
async init(
|
|
330
|
-
this._initPromise || (this._initPromise = this._init(
|
|
329
|
+
async init(r) {
|
|
330
|
+
this._initPromise || (this._initPromise = this._init(r)), await this._initPromise;
|
|
331
331
|
}
|
|
332
332
|
/** Adds (or replaces) the reference embeddings for a word without restarting. */
|
|
333
|
-
addCustomWord(
|
|
334
|
-
if (!(Array.isArray(
|
|
335
|
-
throw new Error("invalid ref file for : " +
|
|
336
|
-
this._refEmbeddings.set(
|
|
333
|
+
addCustomWord(r) {
|
|
334
|
+
if (!(Array.isArray(r.embeddings) && r.embeddings.length > 0))
|
|
335
|
+
throw new Error("invalid ref file for : " + r.word_name);
|
|
336
|
+
this._refEmbeddings.set(r.word_name, r.embeddings);
|
|
337
337
|
}
|
|
338
338
|
async start() {
|
|
339
339
|
if (this._started) return;
|
|
340
340
|
await this.init();
|
|
341
|
-
let
|
|
341
|
+
let r;
|
|
342
342
|
try {
|
|
343
|
-
|
|
343
|
+
r = await navigator.mediaDevices.getUserMedia({
|
|
344
344
|
audio: {
|
|
345
345
|
noiseSuppression: !1,
|
|
346
346
|
echoCancellation: !1,
|
|
@@ -349,38 +349,38 @@ class kt {
|
|
|
349
349
|
}
|
|
350
350
|
});
|
|
351
351
|
} catch {
|
|
352
|
-
|
|
352
|
+
r = await navigator.mediaDevices.getUserMedia({ audio: !0 });
|
|
353
353
|
}
|
|
354
|
-
this._stream =
|
|
354
|
+
this._stream = r;
|
|
355
355
|
const t = new AudioContext({ sampleRate: 16e3 });
|
|
356
356
|
this._audioCtx = t, await t.audioWorklet.addModule(this._audioProcessorPath);
|
|
357
|
-
const s = t.createMediaStreamSource(
|
|
357
|
+
const s = t.createMediaStreamSource(r), n = new AudioWorkletNode(t, "audio-processor");
|
|
358
358
|
n.port.onmessage = (e) => {
|
|
359
359
|
this._handleBuffer(e.data);
|
|
360
360
|
}, s.connect(n), n.connect(t.destination), this._started = !0;
|
|
361
361
|
}
|
|
362
362
|
async stop() {
|
|
363
363
|
if (this._started = !1, this._audioCtx && (await this._audioCtx.close(), this._audioCtx = null), this._stream) {
|
|
364
|
-
for (const
|
|
364
|
+
for (const r of this._stream.getTracks()) r.stop();
|
|
365
365
|
this._stream = null;
|
|
366
366
|
}
|
|
367
367
|
}
|
|
368
|
-
async _handleBuffer(
|
|
368
|
+
async _handleBuffer(r) {
|
|
369
369
|
if (this._inferring) return;
|
|
370
370
|
const t = Date.now();
|
|
371
371
|
if (!(t - this._lastInferenceAt < 300)) {
|
|
372
372
|
this._lastInferenceAt = t, this._inferring = !0;
|
|
373
373
|
try {
|
|
374
|
-
const [s, n] = await Promise.all([tt(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), e = this._audioUtils.logfbank(
|
|
375
|
-
let
|
|
374
|
+
const [s, n] = await Promise.all([tt(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), e = this._audioUtils.logfbank(r), i = new s.Tensor("float32", e, [1, 1, 149, 64]), o = await n.run({ input: i }), h = o[Object.keys(o)[0]].data;
|
|
375
|
+
let l = !1;
|
|
376
376
|
for (const d of this._commands) {
|
|
377
|
-
if (
|
|
377
|
+
if (l) break;
|
|
378
378
|
for (const a of d.triggers) {
|
|
379
|
-
const
|
|
380
|
-
if (!
|
|
381
|
-
const _ = this._audioUtils.maxCosineSim(h,
|
|
379
|
+
const c = this._refEmbeddings.get(a.name);
|
|
380
|
+
if (!c) continue;
|
|
381
|
+
const _ = this._audioUtils.maxCosineSim(h, c);
|
|
382
382
|
if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
|
|
383
|
-
this._lastMatchAt = t, this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _),
|
|
383
|
+
this._lastMatchAt = t, this._log.info(`[Mellon] match: "${a.name}" sim=${_.toFixed(3)}`), typeof d.onMatch == "function" && d.onMatch(a.name, _), l = !0;
|
|
384
384
|
break;
|
|
385
385
|
}
|
|
386
386
|
}
|
|
@@ -394,23 +394,23 @@ class kt {
|
|
|
394
394
|
}
|
|
395
395
|
}
|
|
396
396
|
class Bt {
|
|
397
|
-
constructor(
|
|
398
|
-
this._config = {}, this._samples = [], this._wordName =
|
|
397
|
+
constructor(r, t) {
|
|
398
|
+
this._config = {}, this._samples = [], this._wordName = r, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || st, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || et, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new wt();
|
|
399
399
|
}
|
|
400
400
|
/** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
|
|
401
401
|
async recordSample() {
|
|
402
|
-
const
|
|
403
|
-
const h = new MediaRecorder(
|
|
402
|
+
const r = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((i, o) => {
|
|
403
|
+
const h = new MediaRecorder(r), l = [];
|
|
404
404
|
h.ondataavailable = (d) => {
|
|
405
|
-
d.data.size > 0 &&
|
|
405
|
+
d.data.size > 0 && l.push(d.data);
|
|
406
406
|
}, h.onstop = async () => {
|
|
407
407
|
var d;
|
|
408
|
-
for (const a of
|
|
408
|
+
for (const a of r.getTracks()) a.stop();
|
|
409
409
|
try {
|
|
410
|
-
const
|
|
410
|
+
const c = await new Blob(l, { type: ((d = l[0]) == null ? void 0 : d.type) || "audio/webm" }).arrayBuffer(), _ = await t.decodeAudioData(c);
|
|
411
411
|
await t.close(), i(_.getChannelData(0).slice());
|
|
412
412
|
} catch (a) {
|
|
413
|
-
|
|
413
|
+
o(a);
|
|
414
414
|
}
|
|
415
415
|
}, h.start(), setTimeout(() => {
|
|
416
416
|
try {
|
|
@@ -421,17 +421,23 @@ class Bt {
|
|
|
421
421
|
}), n = 24e3, e = new Float32Array(n);
|
|
422
422
|
return e.set(s.slice(0, n)), this._samples.push(e), this._samples.length;
|
|
423
423
|
}
|
|
424
|
+
/** Returns the raw PCM Float32Array for the sample at the given index (16 kHz). */
|
|
425
|
+
getSample(r) {
|
|
426
|
+
if (r < 0 || r >= this._samples.length)
|
|
427
|
+
throw new RangeError(`index ${r} out of bounds (${this._samples.length} samples)`);
|
|
428
|
+
return this._samples[r];
|
|
429
|
+
}
|
|
424
430
|
/** Removes the sample at the given index. Returns the new sample count. */
|
|
425
|
-
deleteSample(
|
|
426
|
-
if (
|
|
427
|
-
throw new RangeError(`index ${
|
|
428
|
-
return this._samples.splice(
|
|
431
|
+
deleteSample(r) {
|
|
432
|
+
if (r < 0 || r >= this._samples.length)
|
|
433
|
+
throw new RangeError(`index ${r} out of bounds (${this._samples.length} samples)`);
|
|
434
|
+
return this._samples.splice(r, 1), this._samples.length;
|
|
429
435
|
}
|
|
430
436
|
/** Runs ONNX inference on every recorded sample to produce reference embeddings. */
|
|
431
437
|
async generateRef() {
|
|
432
|
-
const [
|
|
438
|
+
const [r, t] = await Promise.all([tt(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
|
|
433
439
|
for (const n of this._samples) {
|
|
434
|
-
const e = this._audioUtils.logfbank(n), i = new
|
|
440
|
+
const e = this._audioUtils.logfbank(n), i = new r.Tensor("float32", e, [1, 1, 149, 64]), o = await t.run({ input: i }), h = Array.from(o[Object.keys(o)[0]].data);
|
|
435
441
|
s.push(h);
|
|
436
442
|
}
|
|
437
443
|
return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
|