mellon 0.0.19 → 0.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Detector } from './Mellon';
1
+ import { Detector } from './Detector';
2
2
  import { EnrollmentSession } from './EnrollmentSession';
3
3
  import { Storage } from './Storage';
4
4
  import { AudioUtils } from './AudioUtils';
@@ -12,7 +12,7 @@ export interface Command {
12
12
  triggers: Trigger[];
13
13
  onMatch?: (trigger: TriggerName, confidence: number) => any;
14
14
  }
15
- export interface MellonConfig {
15
+ export interface DetectorConfig {
16
16
  refsStorageKey?: string;
17
17
  thresholdStorageKey?: string;
18
18
  wasmPaths?: string;
@@ -32,6 +32,13 @@ export interface WordRef {
32
32
  model_type?: string;
33
33
  embeddings: number[][];
34
34
  }
35
+ /**
36
+ * Called during {@link Detector.init} to report real download progress.
37
+ * @param downloaded - total bytes received so far across all assets
38
+ * @param total - sum of known Content-Length values for all assets;
39
+ * may still be 0 early on (before first header is received)
40
+ */
41
+ export type ProgressCallback = (downloaded: number, total: number) => void;
35
42
  export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/";
36
43
  export declare const DEFAULT_ORT_CDN_URL = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs";
37
44
  export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx";
package/dist/mellon.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var ht,pt;function Ut(){if(pt)return ht;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const i=Math.PI*s/this.size;t[s]=Math.cos(i),t[s+1]=-Math.sin(i)}this.table=t;for(var n=0,r=1;this.size>r;r<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var e=0;e<this._width;e+=2){var l=this._width-e-2;this._bitrev[a]|=(a>>>e&3)<<l}}this._out=null,this._data=null,this._inv=0}return ht=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),r=0;r<t.length;r+=2)n[r>>>1]=t[r];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),r=0;r<n.length;r+=2)n[r]=t[r>>>1],n[r+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,r=2;r<n;r+=2)t[s-r]=t[r],t[s-r+1]=-t[r+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,l,i=this._bitrev;if(a===4)for(e=0,l=0;e<s;e+=a,l++){const u=i[l];this._singleTransform2(e,u,r)}else for(e=0,l=0;e<s;e+=a,l++){const u=i[l];this._singleTransform4(e,u,r)}var h=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var _=a>>>2;for(e=0;e<s;e+=a)for(var d=e+_,v=e,f=0;v<d;v+=2,f+=r){const u=v,p=u+_,w=p+_,g=w+_,T=t[u],F=t[u+1],A=t[p],y=t[p+1],b=t[w],S=t[w+1],P=t[g],C=t[g+1],E=T,M=F,R=c[f],D=h*c[f+1],U=A*R-y*D,z=A*D+y*R,j=c[2*f],L=h*c[2*f+1],B=b*j-S*L,K=b*L+S*j,N=c[3*f],O=h*c[3*f+1],k=P*N-C*O,G=P*O+C*N,Y=E+B,x=M+K,I=E-B,$=M-K,J=U+k,H=z+G,W=h*(U-k),Q=h*(z-G),tt=Y+J,rt=x+H,et=Y-J,ot=x-H,nt=I+Q,at=$-W,it=I-Q,ct=$+W;t[u]=tt,t[u+1]=rt,t[p]=nt,t[p+1]=at,t[w]=et,t[w+1]=ot,t[g]=it,t[g+1]=ct}}},m.prototype._singleTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],l=a[s+1],i=a[s+n],h=a[s+n+1],c=e+i,_=l+h,d=e-i,v=l-h;r[t]=c,r[t+1]=_,r[t+2]=d,r[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,l=n*2,i=n*3,h=a[s],c=a[s+1],_=a[s+n],d=a[s+n+1],v=a[s+l],f=a[s+l+1],u=a[s+i],p=a[s+i+1],w=h+v,g=c+f,T=h-v,F=c-f,A=_+u,y=d+p,b=e*(_-u),S=e*(d-p),P=w+A,C=g+y,E=T+S,M=F-b,R=w-A,D=g-y,U=T-S,z=F+b;r[t]=P,r[t+1]=C,r[t+2]=E,r[t+3]=M,r[t+4]=R,r[t+5]=D,r[t+6]=U,r[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,l,i=this._bitrev;if(a===4)for(e=0,l=0;e<s;e+=a,l++){const lt=i[l];this._singleRealTransform2(e,lt>>>1,r>>>1)}else for(e=0,l=0;e<s;e+=a,l++){const lt=i[l];this._singleRealTransform4(e,lt>>>1,r>>>1)}var h=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var _=a>>>1,d=_>>>1,v=d>>>1;for(e=0;e<s;e+=a)for(var f=0,u=0;f<=v;f+=2,u+=r){var p=e+f,w=p+d,g=w+d,T=g+d,F=t[p],A=t[p+1],y=t[w],b=t[w+1],S=t[g],P=t[g+1],C=t[T],E=t[T+1],M=F,R=A,D=c[u],U=h*c[u+1],z=y*D-b*U,j=y*U+b*D,L=c[2*u],B=h*c[2*u+1],K=S*L-P*B,N=S*B+P*L,O=c[3*u],k=h*c[3*u+1],G=C*O-E*k,Y=C*k+E*O,x=M+K,I=R+N,$=M-K,J=R-N,H=z+G,W=j+Y,Q=h*(z-G),tt=h*(j-Y),rt=x+H,et=I+W,ot=$+tt,nt=J-Q;if(t[p]=rt,t[p+1]=et,t[w]=ot,t[w+1]=nt,f===0){var at=x-H,it=I-W;t[g]=at,t[g+1]=it;continue}if(f!==v){var ct=$,yt=-J,Tt=x,At=-I,Ft=-h*tt,bt=-h*Q,St=-h*W,Pt=-h*H,Ct=ct+Ft,Et=yt+bt,Mt=Tt+Pt,Rt=At-St,ut=e+d-f,vt=e+_-f;t[ut]=Ct,t[ut+1]=Et,t[vt]=Mt,t[vt+1]=Rt}}}},m.prototype._singleRealTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],l=a[s+n],i=e+l,h=e-l;r[t]=i,r[t+1]=0,r[t+2]=h,r[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,l=n*2,i=n*3,h=a[s],c=a[s+n],_=a[s+l],d=a[s+i],v=h+_,f=h-_,u=c+d,p=e*(c-d),w=v+u,g=f,T=-p,F=v-u,A=f,y=p;r[t]=w,r[t+1]=0,r[t+2]=g,r[t+3]=T,r[t+4]=F,r[t+5]=0,r[t+6]=A,r[t+7]=y},ht}var zt=Ut();const xt=Dt(zt);class mt{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),r=new Float32Array(this._nfilt+2);for(let i=0;i<this._nfilt+2;i++)r[i]=s+i*(n-s)/(this._nfilt+1);const e=r.map(i=>this._melToHz(i)).map(i=>Math.floor((this._nfft+1)*i/this._sampleRate)),l=[];for(let i=0;i<this._nfilt;i++){const h=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=e[i];c<e[i+1];c++)h[c]=(c-e[i])/(e[i+1]-e[i]);for(let c=e[i+1];c<e[i+2];c++)h[c]=(e[i+2]-c)/(e[i+2]-e[i+1]);l.push(h)}return l}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),r=new Float32Array(n*this._nfilt),a=new Float32Array(this._nfft),e=this._fft.createComplexArray();for(let l=0;l<n;l++){const i=l*s;a.fill(0);for(let _=0;_<t&&i+_<o.length;_++)a[_]=o[i+_];const h=this._fft.toComplexArray(a,null);this._fft.transform(e,h);const c=new Float32Array(Math.floor(this._nfft/2)+1);for(let _=0;_<c.length;_++){const d=e[2*_],v=e[2*_+1];c[_]=1/this._nfft*(d*d+v*v),c[_]===0&&(c[_]=1e-30)}for(let _=0;_<this._nfilt;_++){let d=0;const v=this._melFilters[_];for(let f=0;f<c.length;f++)d+=c[f]*v[f];d===0&&(d=1e-30),r[l*this._nfilt+_]=Math.log(d)}}return r}maxCosineSim(o,t){let s=0;for(const n of t){let r=0;for(let e=0;e<n.length;e++)r+=o[e]*n[e];const a=(r+1)/2;a>s&&(s=a)}return s}}async function ft(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let _t=null;async function dt(m=st,o=Z,t=q){return _t||(_t=ft(o,t).then(s=>s.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),_t}class X{static loadWords(o=V){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=V){const s=X.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=V){try{const s=X.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class It{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:s=V,thresholdStorageKey:n=gt,wasmPaths:r=Z,modelPath:a=st,audioProcessorPath:e=wt,ortCdnUrl:l=q,audioUtils:i=new mt}=t||{};this._audioUtils=i,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=e,this._wasmPaths=r,this._modelPath=a,this._ortCdnUrl=l;try{const h=localStorage.getItem(this._thresholdStorageKey);this._threshold=h!==null?Math.max(0,Math.min(1,Number(h))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await dt(this._modelPath,this._wasmPaths,this._ortCdnUrl);const o=new Set;for(const t of this._commands)for(const s of t.triggers)if(!o.has(s.name)&&(o.add(s.name),s.defaultRefPath)){const n=await fetch(s.defaultRefPath);if(n.ok){const r=await n.json();this.addCustomWord(r)}}for(const t of X.loadWords(this._refsStorageKey))this._refEmbeddings.set(t.word_name,t.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this._initPromise;let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=r=>{this._handleBuffer(r.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([ft(this._wasmPaths,this._ortCdnUrl),dt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),r=this._audioUtils.logfbank(o),a=new s.Tensor("float32",r,[1,1,149,64]),e=await n.run({input:a}),l=e[Object.keys(e)[0]].data;let i=!1;for(const h of this._commands){if(i)break;for(const c of h.triggers){const _=this._refEmbeddings.get(c.name);if(!_)continue;const d=this._audioUtils.maxCosineSim(l,_);if(d>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${c}" sim=${d.toFixed(3)}`),typeof h.onMatch=="function"&&h.onMatch(c.name,d),i=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||st,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new mt}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((a,e)=>{const l=new MediaRecorder(o),i=[];l.ondataavailable=h=>{h.data.size>0&&i.push(h.data)},l.onstop=async()=>{var h;for(const c of o.getTracks())c.stop();try{const _=await new Blob(i,{type:((h=i[0])==null?void 0:h.type)||"audio/webm"}).arrayBuffer(),d=await t.decodeAudioData(_);await t.close(),a(d.getChannelData(0).slice())}catch(c){e(c)}},l.start(),setTimeout(()=>{try{l.stop()}catch{}},1500)}),n=24e3,r=new Float32Array(n);return r.set(s.slice(0,n)),this._samples.push(r),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([ft(this._config.wasmPaths,this._config.ortCdnUrl),dt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const r=this._audioUtils.logfbank(n),a=new o.Tensor("float32",r,[1,1,149,64]),e=await t.run({input:a}),l=Array.from(e[Object.keys(e)[0]].data);s.push(l)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",st="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",V="mellon-refs",gt="mellon-threshold";exports.AudioUtils=mt;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=st;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=V;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=It;exports.EnrollmentSession=Ht;exports.Storage=X;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Ut(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var dt,pt;function Dt(){if(pt)return dt;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var c=0;c<this._bitrev.length;c++){this._bitrev[c]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[c]|=(c>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return dt=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform4(r,u,e)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>2;for(r=0;r<s;r+=c)for(var _=r+d,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+d,w=p+d,g=w+d,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],M=T,P=A,R=a[f],U=i*a[f+1],D=b*R-y*U,z=b*U+y*R,k=a[2*f],N=i*a[2*f+1],B=F*k-S*N,j=F*N+S*k,K=a[3*f],O=i*a[3*f+1],$=C*K-E*O,G=C*O+E*K,J=M+B,x=P+j,I=M-B,Y=P-j,Q=D+$,L=z+G,W=i*(D-$),V=i*(z-G),tt=J+Q,rt=x+L,ot=J-Q,nt=x-L,at=I+V,it=Y-W,ct=I-V,lt=Y+W;t[u]=tt,t[u+1]=rt,t[p]=at,t[p+1]=it,t[w]=ot,t[w+1]=nt,t[g]=ct,t[g+1]=lt}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+1],l=c[s+n],i=c[s+n+1],a=r+l,d=h+i,_=r-l,v=h-i;e[t]=a,e[t+1]=d,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+1],d=c[s+n],_=c[s+n+1],v=c[s+h],f=c[s+h+1],u=c[s+l],p=c[s+l+1],w=i+v,g=a+f,T=i-v,A=a-f,b=d+u,y=_+p,F=r*(d-u),S=r*(_-p),C=w+b,E=g+y,M=T+S,P=A-F,R=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=M,e[t+3]=P,e[t+4]=R,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform2(r,ht>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform4(r,ht>>>1,e>>>1)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>1,_=d>>>1,v=_>>>1;for(r=0;r<s;r+=c)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],M=t[T+1],P=A,R=b,U=a[u],D=i*a[u+1],z=y*U-F*D,k=y*D+F*U,N=a[2*u],B=i*a[2*u+1],j=S*N-C*B,K=S*B+C*N,O=a[3*u],$=i*a[3*u+1],G=E*O-M*$,J=E*$+M*O,x=P+j,I=R+K,Y=P-j,Q=R-K,L=z+G,W=k+J,V=i*(z-G),tt=i*(k-J),rt=x+L,ot=I+W,nt=Y+tt,at=Q-V;if(t[p]=rt,t[p+1]=ot,t[w]=nt,t[w+1]=at,f===0){var it=x-L,ct=I-W;t[g]=it,t[g+1]=ct;continue}if(f!==v){var lt=Y,yt=-Q,Tt=x,bt=-I,At=-i*tt,Ft=-i*V,St=-i*W,Ct=-i*L,Et=lt+At,Mt=yt+Ft,Pt=Tt+Ct,Rt=bt-St,ut=r+_-f,vt=r+d-f;t[ut]=Et,t[ut+1]=Mt,t[vt]=Pt,t[vt+1]=Rt}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+n],l=r+h,i=r-h;e[t]=l,e[t+1]=0,e[t+2]=i,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+n],d=c[s+h],_=c[s+l],v=i+d,f=i-d,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},dt}var zt=Dt();const xt=Ut(zt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const i=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)i[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)i[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(i)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),c=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;c.fill(0);for(let d=0;d<t&&l+d<o.length;d++)c[d]=o[l+d];const i=this._fft.toComplexArray(c,null);this._fft.transform(r,i);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let d=0;d<a.length;d++){const _=r[2*d],v=r[2*d+1];a[d]=1/this._nfft*(_*_+v*v),a[d]===0&&(a[d]=1e-30)}for(let d=0;d<this._nfilt;d++){let _=0;const v=this._melFilters[d];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+d]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const c=(e+1)/2;c>s&&(s=c)}return s}}async function st(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let _t=null;async function mt(m=et,o=Z,t=q,s){return _t||(_t=st(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),_t}class H{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=H.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=H.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class It{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=gt,wasmPaths:e=Z,modelPath:c=et,audioProcessorPath:r=wt,ortCdnUrl:h=q,audioUtils:l=new ft}=t||{};this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=c,this._ortCdnUrl=h;try{const i=localStorage.getItem(this._thresholdStorageKey);this._threshold=i!==null?Math.max(0,Math.min(1,Number(i))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const c=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:d}=await c.read();if(a)break;r.push(d),h+=d.length,s.downloaded+=d.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let i=0;for(const a of r)l.set(a,i),i+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=new Set,n=[];for(const i of this._commands)for(const a of i.triggers)!s.has(a.name)&&a.defaultRefPath&&(s.add(a.name),n.push({name:a.name,path:a.defaultRefPath}));const e=st(this._wasmPaths,this._ortCdnUrl),[c,...r]=await Promise.all([this._trackFetch(this._modelPath,o,t),...n.map(({path:i})=>this._trackFetch(i,o,t))]);await e,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,c);const h=H.loadWords(this._refsStorageKey),l=new Set(h.map(i=>i.word_name));for(let i=0;i<n.length;i++)try{const a=JSON.parse(new TextDecoder().decode(r[i]));this.addCustomWord(a),l.has(a.word_name)||H.saveWord(a,this._refsStorageKey)}catch{console.warn(`[Mellon] failed to parse ref file: ${n[i].path}`)}for(const i of h)this._refEmbeddings.set(i.word_name,i.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([st(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),c=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:c}),h=r[Object.keys(r)[0]].data;let l=!1;for(const i of this._commands){if(l)break;for(const a of i.triggers){const d=this._refEmbeddings.get(a.name);if(!d)continue;const _=this._audioUtils.maxCosineSim(h,d);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof i.onMatch=="function"&&i.onMatch(a.name,_),l=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Lt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||et,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((c,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=i=>{i.data.size>0&&l.push(i.data)},h.onstop=async()=>{var i;for(const a of o.getTracks())a.stop();try{const d=await new Blob(l,{type:((i=l[0])==null?void 0:i.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(d);await t.close(),c(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([st(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),c=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:c}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",et="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",gt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=et;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=It;exports.EnrollmentSession=Lt;exports.Storage=H;
package/dist/mellon.mjs CHANGED
@@ -1,34 +1,34 @@
1
1
  function Rt(m) {
2
2
  return m && m.__esModule && Object.prototype.hasOwnProperty.call(m, "default") ? m.default : m;
3
3
  }
4
- var ht, vt;
5
- function Et() {
6
- if (vt) return ht;
4
+ var lt, vt;
5
+ function Ut() {
6
+ if (vt) return lt;
7
7
  vt = 1;
8
8
  function m(o) {
9
9
  if (this.size = o | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
10
10
  throw new Error("FFT size must be a power of two and bigger than 1");
11
11
  this._csize = o << 1;
12
12
  for (var t = new Array(this.size * 2), s = 0; s < t.length; s += 2) {
13
- const i = Math.PI * s / this.size;
14
- t[s] = Math.cos(i), t[s + 1] = -Math.sin(i);
13
+ const h = Math.PI * s / this.size;
14
+ t[s] = Math.cos(h), t[s + 1] = -Math.sin(h);
15
15
  }
16
16
  this.table = t;
17
- for (var n = 0, r = 1; this.size > r; r <<= 1)
17
+ for (var n = 0, e = 1; this.size > e; e <<= 1)
18
18
  n++;
19
19
  this._width = n % 2 === 0 ? n - 1 : n, this._bitrev = new Array(1 << this._width);
20
- for (var a = 0; a < this._bitrev.length; a++) {
21
- this._bitrev[a] = 0;
22
- for (var e = 0; e < this._width; e += 2) {
23
- var h = this._width - e - 2;
24
- this._bitrev[a] |= (a >>> e & 3) << h;
20
+ for (var c = 0; c < this._bitrev.length; c++) {
21
+ this._bitrev[c] = 0;
22
+ for (var r = 0; r < this._width; r += 2) {
23
+ var l = this._width - r - 2;
24
+ this._bitrev[c] |= (c >>> r & 3) << l;
25
25
  }
26
26
  }
27
27
  this._out = null, this._data = null, this._inv = 0;
28
28
  }
29
- return ht = m, m.prototype.fromComplexArray = function(t, s) {
30
- for (var n = s || new Array(t.length >>> 1), r = 0; r < t.length; r += 2)
31
- n[r >>> 1] = t[r];
29
+ return lt = m, m.prototype.fromComplexArray = function(t, s) {
30
+ for (var n = s || new Array(t.length >>> 1), e = 0; e < t.length; e += 2)
31
+ n[e >>> 1] = t[e];
32
32
  return n;
33
33
  }, m.prototype.createComplexArray = function() {
34
34
  const t = new Array(this._csize);
@@ -36,12 +36,12 @@ function Et() {
36
36
  t[s] = 0;
37
37
  return t;
38
38
  }, m.prototype.toComplexArray = function(t, s) {
39
- for (var n = s || this.createComplexArray(), r = 0; r < n.length; r += 2)
40
- n[r] = t[r >>> 1], n[r + 1] = 0;
39
+ for (var n = s || this.createComplexArray(), e = 0; e < n.length; e += 2)
40
+ n[e] = t[e >>> 1], n[e + 1] = 0;
41
41
  return n;
42
42
  }, m.prototype.completeSpectrum = function(t) {
43
- for (var s = this._csize, n = s >>> 1, r = 2; r < n; r += 2)
44
- t[s - r] = t[r], t[s - r + 1] = -t[r + 1];
43
+ for (var s = this._csize, n = s >>> 1, e = 2; e < n; e += 2)
44
+ t[s - e] = t[e], t[s - e + 1] = -t[e + 1];
45
45
  }, m.prototype.transform = function(t, s) {
46
46
  if (t === s)
47
47
  throw new Error("Input and output buffers must be different");
@@ -58,73 +58,73 @@ function Et() {
58
58
  t[n] /= this.size;
59
59
  this._out = null, this._data = null;
60
60
  }, m.prototype._transform4 = function() {
61
- var t = this._out, s = this._csize, n = this._width, r = 1 << n, a = s / r << 1, e, h, i = this._bitrev;
62
- if (a === 4)
63
- for (e = 0, h = 0; e < s; e += a, h++) {
64
- const u = i[h];
65
- this._singleTransform2(e, u, r);
61
+ var t = this._out, s = this._csize, n = this._width, e = 1 << n, c = s / e << 1, r, l, h = this._bitrev;
62
+ if (c === 4)
63
+ for (r = 0, l = 0; r < s; r += c, l++) {
64
+ const u = h[l];
65
+ this._singleTransform2(r, u, e);
66
66
  }
67
67
  else
68
- for (e = 0, h = 0; e < s; e += a, h++) {
69
- const u = i[h];
70
- this._singleTransform4(e, u, r);
68
+ for (r = 0, l = 0; r < s; r += c, l++) {
69
+ const u = h[l];
70
+ this._singleTransform4(r, u, e);
71
71
  }
72
- var l = this._inv ? -1 : 1, c = this.table;
73
- for (r >>= 2; r >= 2; r >>= 2) {
74
- a = s / r << 1;
75
- var _ = a >>> 2;
76
- for (e = 0; e < s; e += a)
77
- for (var d = e + _, v = e, f = 0; v < d; v += 2, f += r) {
78
- const u = v, p = u + _, w = p + _, g = w + _, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], P = t[g], M = t[g + 1], S = b, R = T, E = c[f], U = l * c[f + 1], D = F * E - y * U, z = F * U + y * E, B = c[2 * f], N = l * c[2 * f + 1], H = A * B - C * N, K = A * N + C * B, k = c[3 * f], $ = l * c[3 * f + 1], G = P * k - M * $, J = P * $ + M * k, L = S + H, x = R + K, I = S - H, Y = R - K, Q = D + G, W = z + J, j = l * (D - G), V = l * (z - J), X = L + Q, st = x + W, rt = L - Q, et = x - W, ot = I + V, nt = Y - j, at = I - V, it = Y + j;
79
- t[u] = X, t[u + 1] = st, t[p] = ot, t[p + 1] = nt, t[w] = rt, t[w + 1] = et, t[g] = at, t[g + 1] = it;
72
+ var i = this._inv ? -1 : 1, a = this.table;
73
+ for (e >>= 2; e >= 2; e >>= 2) {
74
+ c = s / e << 1;
75
+ var d = c >>> 2;
76
+ for (r = 0; r < s; r += c)
77
+ for (var _ = r + d, v = r, f = 0; v < _; v += 2, f += e) {
78
+ const u = v, p = u + d, w = p + d, g = w + d, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], M = t[g], S = t[g + 1], P = b, R = T, U = a[f], E = i * a[f + 1], D = F * U - y * E, x = F * E + y * U, B = a[2 * f], N = i * a[2 * f + 1], j = A * B - C * N, H = A * N + C * B, K = a[3 * f], $ = i * a[3 * f + 1], L = M * K - S * $, J = M * $ + S * K, G = P + j, z = R + H, I = P - j, Y = R - H, O = D + L, W = x + J, k = i * (D - L), Q = i * (x - J), X = G + O, et = z + W, rt = G - O, ot = z - W, nt = I + Q, at = Y - k, it = I - Q, ct = Y + k;
79
+ t[u] = X, t[u + 1] = et, t[p] = nt, t[p + 1] = at, t[w] = rt, t[w + 1] = ot, t[g] = it, t[g + 1] = ct;
80
80
  }
81
81
  }
82
82
  }, m.prototype._singleTransform2 = function(t, s, n) {
83
- const r = this._out, a = this._data, e = a[s], h = a[s + 1], i = a[s + n], l = a[s + n + 1], c = e + i, _ = h + l, d = e - i, v = h - l;
84
- r[t] = c, r[t + 1] = _, r[t + 2] = d, r[t + 3] = v;
83
+ const e = this._out, c = this._data, r = c[s], l = c[s + 1], h = c[s + n], i = c[s + n + 1], a = r + h, d = l + i, _ = r - h, v = l - i;
84
+ e[t] = a, e[t + 1] = d, e[t + 2] = _, e[t + 3] = v;
85
85
  }, m.prototype._singleTransform4 = function(t, s, n) {
86
- const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + 1], _ = a[s + n], d = a[s + n + 1], v = a[s + h], f = a[s + h + 1], u = a[s + i], p = a[s + i + 1], w = l + v, g = c + f, b = l - v, T = c - f, F = _ + u, y = d + p, A = e * (_ - u), C = e * (d - p), P = w + F, M = g + y, S = b + C, R = T - A, E = w - F, U = g - y, D = b - C, z = T + A;
87
- r[t] = P, r[t + 1] = M, r[t + 2] = S, r[t + 3] = R, r[t + 4] = E, r[t + 5] = U, r[t + 6] = D, r[t + 7] = z;
86
+ const e = this._out, c = this._data, r = this._inv ? -1 : 1, l = n * 2, h = n * 3, i = c[s], a = c[s + 1], d = c[s + n], _ = c[s + n + 1], v = c[s + l], f = c[s + l + 1], u = c[s + h], p = c[s + h + 1], w = i + v, g = a + f, b = i - v, T = a - f, F = d + u, y = _ + p, A = r * (d - u), C = r * (_ - p), M = w + F, S = g + y, P = b + C, R = T - A, U = w - F, E = g - y, D = b - C, x = T + A;
87
+ e[t] = M, e[t + 1] = S, e[t + 2] = P, e[t + 3] = R, e[t + 4] = U, e[t + 5] = E, e[t + 6] = D, e[t + 7] = x;
88
88
  }, m.prototype._realTransform4 = function() {
89
- var t = this._out, s = this._csize, n = this._width, r = 1 << n, a = s / r << 1, e, h, i = this._bitrev;
90
- if (a === 4)
91
- for (e = 0, h = 0; e < s; e += a, h++) {
92
- const ct = i[h];
93
- this._singleRealTransform2(e, ct >>> 1, r >>> 1);
89
+ var t = this._out, s = this._csize, n = this._width, e = 1 << n, c = s / e << 1, r, l, h = this._bitrev;
90
+ if (c === 4)
91
+ for (r = 0, l = 0; r < s; r += c, l++) {
92
+ const ht = h[l];
93
+ this._singleRealTransform2(r, ht >>> 1, e >>> 1);
94
94
  }
95
95
  else
96
- for (e = 0, h = 0; e < s; e += a, h++) {
97
- const ct = i[h];
98
- this._singleRealTransform4(e, ct >>> 1, r >>> 1);
96
+ for (r = 0, l = 0; r < s; r += c, l++) {
97
+ const ht = h[l];
98
+ this._singleRealTransform4(r, ht >>> 1, e >>> 1);
99
99
  }
100
- var l = this._inv ? -1 : 1, c = this.table;
101
- for (r >>= 2; r >= 2; r >>= 2) {
102
- a = s / r << 1;
103
- var _ = a >>> 1, d = _ >>> 1, v = d >>> 1;
104
- for (e = 0; e < s; e += a)
105
- for (var f = 0, u = 0; f <= v; f += 2, u += r) {
106
- var p = e + f, w = p + d, g = w + d, b = g + d, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], P = t[g + 1], M = t[b], S = t[b + 1], R = T, E = F, U = c[u], D = l * c[u + 1], z = y * U - A * D, B = y * D + A * U, N = c[2 * u], H = l * c[2 * u + 1], K = C * N - P * H, k = C * H + P * N, $ = c[3 * u], G = l * c[3 * u + 1], J = M * $ - S * G, L = M * G + S * $, x = R + K, I = E + k, Y = R - K, Q = E - k, W = z + J, j = B + L, V = l * (z - J), X = l * (B - L), st = x + W, rt = I + j, et = Y + X, ot = Q - V;
107
- if (t[p] = st, t[p + 1] = rt, t[w] = et, t[w + 1] = ot, f === 0) {
108
- var nt = x - W, at = I - j;
109
- t[g] = nt, t[g + 1] = at;
100
+ var i = this._inv ? -1 : 1, a = this.table;
101
+ for (e >>= 2; e >= 2; e >>= 2) {
102
+ c = s / e << 1;
103
+ var d = c >>> 1, _ = d >>> 1, v = _ >>> 1;
104
+ for (r = 0; r < s; r += c)
105
+ for (var f = 0, u = 0; f <= v; f += 2, u += e) {
106
+ var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], M = t[g + 1], S = t[b], P = t[b + 1], R = T, U = F, E = a[u], D = i * a[u + 1], x = y * E - A * D, B = y * D + A * E, N = a[2 * u], j = i * a[2 * u + 1], H = C * N - M * j, K = C * j + M * N, $ = a[3 * u], L = i * a[3 * u + 1], J = S * $ - P * L, G = S * L + P * $, z = R + H, I = U + K, Y = R - H, O = U - K, W = x + J, k = B + G, Q = i * (x - J), X = i * (B - G), et = z + W, rt = I + k, ot = Y + X, nt = O - Q;
107
+ if (t[p] = et, t[p + 1] = rt, t[w] = ot, t[w + 1] = nt, f === 0) {
108
+ var at = z - W, it = I - k;
109
+ t[g] = at, t[g + 1] = it;
110
110
  continue;
111
111
  }
112
112
  if (f !== v) {
113
- var it = Y, wt = -Q, gt = x, yt = -I, bt = -l * X, Ft = -l * V, Tt = -l * j, At = -l * W, Ct = it + bt, Pt = wt + Ft, Mt = gt + At, St = yt - Tt, ft = e + d - f, ut = e + _ - f;
114
- t[ft] = Ct, t[ft + 1] = Pt, t[ut] = Mt, t[ut + 1] = St;
113
+ var ct = Y, wt = -O, gt = z, yt = -I, bt = -i * X, Ft = -i * Q, Tt = -i * k, At = -i * W, Ct = ct + bt, Mt = wt + Ft, St = gt + At, Pt = yt - Tt, ft = r + _ - f, ut = r + d - f;
114
+ t[ft] = Ct, t[ft + 1] = Mt, t[ut] = St, t[ut + 1] = Pt;
115
115
  }
116
116
  }
117
117
  }
118
118
  }, m.prototype._singleRealTransform2 = function(t, s, n) {
119
- const r = this._out, a = this._data, e = a[s], h = a[s + n], i = e + h, l = e - h;
120
- r[t] = i, r[t + 1] = 0, r[t + 2] = l, r[t + 3] = 0;
119
+ const e = this._out, c = this._data, r = c[s], l = c[s + n], h = r + l, i = r - l;
120
+ e[t] = h, e[t + 1] = 0, e[t + 2] = i, e[t + 3] = 0;
121
121
  }, m.prototype._singleRealTransform4 = function(t, s, n) {
122
- const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + n], _ = a[s + h], d = a[s + i], v = l + _, f = l - _, u = c + d, p = e * (c - d), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
123
- r[t] = w, r[t + 1] = 0, r[t + 2] = g, r[t + 3] = b, r[t + 4] = T, r[t + 5] = 0, r[t + 6] = F, r[t + 7] = y;
124
- }, ht;
122
+ const e = this._out, c = this._data, r = this._inv ? -1 : 1, l = n * 2, h = n * 3, i = c[s], a = c[s + n], d = c[s + l], _ = c[s + h], v = i + d, f = i - d, u = a + _, p = r * (a - _), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
123
+ e[t] = w, e[t + 1] = 0, e[t + 2] = g, e[t + 3] = b, e[t + 4] = T, e[t + 5] = 0, e[t + 6] = F, e[t + 7] = y;
124
+ }, lt;
125
125
  }
126
- var Ut = Et();
127
- const Dt = /* @__PURE__ */ Rt(Ut);
126
+ var Et = Ut();
127
+ const Dt = /* @__PURE__ */ Rt(Et);
128
128
  class pt {
129
129
  constructor(o = 16e3, t = 512, s = 64) {
130
130
  this._sampleRate = o, this._nfft = t, this._nfilt = s, this._fft = new Dt(t), this._melFilters = this._createMelFilterbank();
@@ -136,73 +136,76 @@ class pt {
136
136
  return 700 * (10 ** (o / 2595) - 1);
137
137
  }
138
138
  _createMelFilterbank() {
139
- const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), r = new Float32Array(this._nfilt + 2);
140
- for (let i = 0; i < this._nfilt + 2; i++)
141
- r[i] = s + i * (n - s) / (this._nfilt + 1);
142
- const e = r.map((i) => this._melToHz(i)).map((i) => Math.floor((this._nfft + 1) * i / this._sampleRate)), h = [];
143
- for (let i = 0; i < this._nfilt; i++) {
144
- const l = new Float32Array(Math.floor(this._nfft / 2) + 1);
145
- for (let c = e[i]; c < e[i + 1]; c++)
146
- l[c] = (c - e[i]) / (e[i + 1] - e[i]);
147
- for (let c = e[i + 1]; c < e[i + 2]; c++)
148
- l[c] = (e[i + 2] - c) / (e[i + 2] - e[i + 1]);
149
- h.push(l);
139
+ const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), e = new Float32Array(this._nfilt + 2);
140
+ for (let h = 0; h < this._nfilt + 2; h++)
141
+ e[h] = s + h * (n - s) / (this._nfilt + 1);
142
+ const r = e.map((h) => this._melToHz(h)).map((h) => Math.floor((this._nfft + 1) * h / this._sampleRate)), l = [];
143
+ for (let h = 0; h < this._nfilt; h++) {
144
+ const i = new Float32Array(Math.floor(this._nfft / 2) + 1);
145
+ for (let a = r[h]; a < r[h + 1]; a++)
146
+ i[a] = (a - r[h]) / (r[h + 1] - r[h]);
147
+ for (let a = r[h + 1]; a < r[h + 2]; a++)
148
+ i[a] = (r[h + 2] - a) / (r[h + 2] - r[h + 1]);
149
+ l.push(i);
150
150
  }
151
- return h;
151
+ return l;
152
152
  }
153
153
  /** Returns a flat Float32Array of shape [numFrames × nfilt]. */
154
154
  logfbank(o) {
155
- const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), r = new Float32Array(n * this._nfilt), a = new Float32Array(this._nfft), e = this._fft.createComplexArray();
156
- for (let h = 0; h < n; h++) {
157
- const i = h * s;
158
- a.fill(0);
159
- for (let _ = 0; _ < t && i + _ < o.length; _++)
160
- a[_] = o[i + _];
161
- const l = this._fft.toComplexArray(a, null);
162
- this._fft.transform(e, l);
163
- const c = new Float32Array(Math.floor(this._nfft / 2) + 1);
164
- for (let _ = 0; _ < c.length; _++) {
165
- const d = e[2 * _], v = e[2 * _ + 1];
166
- c[_] = 1 / this._nfft * (d * d + v * v), c[_] === 0 && (c[_] = 1e-30);
155
+ const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), e = new Float32Array(n * this._nfilt), c = new Float32Array(this._nfft), r = this._fft.createComplexArray();
156
+ for (let l = 0; l < n; l++) {
157
+ const h = l * s;
158
+ c.fill(0);
159
+ for (let d = 0; d < t && h + d < o.length; d++)
160
+ c[d] = o[h + d];
161
+ const i = this._fft.toComplexArray(c, null);
162
+ this._fft.transform(r, i);
163
+ const a = new Float32Array(Math.floor(this._nfft / 2) + 1);
164
+ for (let d = 0; d < a.length; d++) {
165
+ const _ = r[2 * d], v = r[2 * d + 1];
166
+ a[d] = 1 / this._nfft * (_ * _ + v * v), a[d] === 0 && (a[d] = 1e-30);
167
167
  }
168
- for (let _ = 0; _ < this._nfilt; _++) {
169
- let d = 0;
170
- const v = this._melFilters[_];
171
- for (let f = 0; f < c.length; f++)
172
- d += c[f] * v[f];
173
- d === 0 && (d = 1e-30), r[h * this._nfilt + _] = Math.log(d);
168
+ for (let d = 0; d < this._nfilt; d++) {
169
+ let _ = 0;
170
+ const v = this._melFilters[d];
171
+ for (let f = 0; f < a.length; f++)
172
+ _ += a[f] * v[f];
173
+ _ === 0 && (_ = 1e-30), e[l * this._nfilt + d] = Math.log(_);
174
174
  }
175
175
  }
176
- return r;
176
+ return e;
177
177
  }
178
178
  maxCosineSim(o, t) {
179
179
  let s = 0;
180
180
  for (const n of t) {
181
- let r = 0;
182
- for (let e = 0; e < n.length; e++) r += o[e] * n[e];
183
- const a = (r + 1) / 2;
184
- a > s && (s = a);
181
+ let e = 0;
182
+ for (let r = 0; r < n.length; r++) e += o[r] * n[r];
183
+ const c = (e + 1) / 2;
184
+ c > s && (s = c);
185
185
  }
186
186
  return s;
187
187
  }
188
188
  }
189
- async function dt(m = q, o = tt) {
189
+ async function q(m = tt, o = st) {
190
190
  const t = await import(
191
191
  /* @vite-ignore */
192
192
  o
193
193
  );
194
194
  return t.env.wasm.wasmPaths = m, t.env.wasm.numThreads = 1, t;
195
195
  }
196
- let lt = null;
197
- async function _t(m = mt, o = q, t = tt) {
198
- return lt || (lt = dt(o, t).then(
199
- (s) => s.InferenceSession.create(m, {
196
+ let dt = null;
197
+ async function _t(m = mt, o = tt, t = st, s) {
198
+ return dt || (dt = q(o, t).then(
199
+ (n) => s ? n.InferenceSession.create(new Uint8Array(s), {
200
+ executionProviders: ["wasm"],
201
+ graphOptimizationLevel: "all"
202
+ }) : n.InferenceSession.create(m, {
200
203
  executionProviders: ["wasm"],
201
204
  graphOptimizationLevel: "all"
202
205
  })
203
- )), lt;
206
+ )), dt;
204
207
  }
205
- class O {
208
+ class V {
206
209
  static loadWords(o = Z) {
207
210
  try {
208
211
  const t = localStorage.getItem(o);
@@ -212,12 +215,12 @@ class O {
212
215
  }
213
216
  }
214
217
  static saveWord(o, t = Z) {
215
- const s = O.loadWords(t).filter((n) => n.word_name !== o.word_name);
218
+ const s = V.loadWords(t).filter((n) => n.word_name !== o.word_name);
216
219
  localStorage.setItem(t, JSON.stringify([...s, o]));
217
220
  }
218
221
  static deleteWord(o, t = Z) {
219
222
  try {
220
- const s = O.loadWords(t).filter((n) => n.word_name !== o);
223
+ const s = V.loadWords(t).filter((n) => n.word_name !== o);
221
224
  localStorage.setItem(t, JSON.stringify(s));
222
225
  } catch {
223
226
  }
@@ -225,24 +228,23 @@ class O {
225
228
  }
226
229
  class It {
227
230
  constructor(o, t) {
228
- this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0;
231
+ this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0, this._initPromise = null;
229
232
  const {
230
233
  refsStorageKey: s = Z,
231
- thresholdStorageKey: n = xt,
232
- wasmPaths: r = q,
233
- modelPath: a = mt,
234
- audioProcessorPath: e = zt,
235
- ortCdnUrl: h = tt,
236
- audioUtils: i = new pt()
234
+ thresholdStorageKey: n = zt,
235
+ wasmPaths: e = tt,
236
+ modelPath: c = mt,
237
+ audioProcessorPath: r = xt,
238
+ ortCdnUrl: l = st,
239
+ audioUtils: h = new pt()
237
240
  } = t || {};
238
- this._audioUtils = i, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = e, this._wasmPaths = r, this._modelPath = a, this._ortCdnUrl = h;
241
+ this._audioUtils = h, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = r, this._wasmPaths = e, this._modelPath = c, this._ortCdnUrl = l;
239
242
  try {
240
- const l = localStorage.getItem(this._thresholdStorageKey);
241
- this._threshold = l !== null ? Math.max(0, Math.min(1, Number(l))) : 0.65;
243
+ const i = localStorage.getItem(this._thresholdStorageKey);
244
+ this._threshold = i !== null ? Math.max(0, Math.min(1, Number(i))) : 0.65;
242
245
  } catch {
243
246
  this._threshold = 0.65;
244
247
  }
245
- this._initPromise = this._init();
246
248
  }
247
249
  get threshold() {
248
250
  return this._threshold;
@@ -257,25 +259,63 @@ class It {
257
259
  get listening() {
258
260
  return this._started;
259
261
  }
260
- async _init() {
261
- await _t(this._modelPath, this._wasmPaths, this._ortCdnUrl);
262
- const o = /* @__PURE__ */ new Set();
263
- for (const t of this._commands)
264
- for (const s of t.triggers)
265
- if (!o.has(s.name) && (o.add(s.name), s.defaultRefPath)) {
266
- const n = await fetch(s.defaultRefPath);
267
- if (n.ok) {
268
- const r = await n.json();
269
- this.addCustomWord(r);
270
- }
271
- }
272
- for (const t of O.loadWords(this._refsStorageKey))
273
- this._refEmbeddings.set(t.word_name, t.embeddings);
262
+ /**
263
+ * Streams `url`, calling `onProgress(downloaded, total)` after each chunk.
264
+ * Falls back to a single-shot fetch when the body stream is unavailable.
265
+ */
266
+ async _trackFetch(o, t, s) {
267
+ const n = await fetch(o);
268
+ if (!n.ok) throw new Error(`HTTP ${n.status} fetching ${o}`);
269
+ const e = Number(n.headers.get("content-length") ?? "0");
270
+ if (e > 0 && (s.total += e), !n.body) {
271
+ const a = await n.arrayBuffer();
272
+ return s.downloaded += a.byteLength, e || (s.total += a.byteLength), t == null || t(s.downloaded, s.total), a;
273
+ }
274
+ const c = n.body.getReader(), r = [];
275
+ let l = 0;
276
+ for (; ; ) {
277
+ const { done: a, value: d } = await c.read();
278
+ if (a) break;
279
+ r.push(d), l += d.length, s.downloaded += d.length, t == null || t(s.downloaded, s.total);
280
+ }
281
+ e || (s.total += l);
282
+ const h = new Uint8Array(l);
283
+ let i = 0;
284
+ for (const a of r)
285
+ h.set(a, i), i += a.length;
286
+ return h.buffer;
287
+ }
288
+ async _init(o) {
289
+ const t = { downloaded: 0, total: 0 }, s = /* @__PURE__ */ new Set(), n = [];
290
+ for (const i of this._commands)
291
+ for (const a of i.triggers)
292
+ !s.has(a.name) && a.defaultRefPath && (s.add(a.name), n.push({ name: a.name, path: a.defaultRefPath }));
293
+ const e = q(this._wasmPaths, this._ortCdnUrl), [c, ...r] = await Promise.all([
294
+ this._trackFetch(this._modelPath, o, t),
295
+ ...n.map(({ path: i }) => this._trackFetch(i, o, t))
296
+ ]);
297
+ await e, await _t(this._modelPath, this._wasmPaths, this._ortCdnUrl, c);
298
+ const l = V.loadWords(this._refsStorageKey), h = new Set(l.map((i) => i.word_name));
299
+ for (let i = 0; i < n.length; i++)
300
+ try {
301
+ const a = JSON.parse(new TextDecoder().decode(r[i]));
302
+ this.addCustomWord(a), h.has(a.word_name) || V.saveWord(a, this._refsStorageKey);
303
+ } catch {
304
+ console.warn(`[Mellon] failed to parse ref file: ${n[i].path}`);
305
+ }
306
+ for (const i of l)
307
+ this._refEmbeddings.set(i.word_name, i.embeddings);
274
308
  console.info("[Mellon] init complete, loaded refs:", [...this._refEmbeddings.keys()]);
275
309
  }
276
- /** Ensures the ONNX model is loaded — call before generateRef() in enrollment. */
277
- async init() {
278
- await this._initPromise;
310
+ /**
311
+ * Loads the ONNX model and all reference embeddings.
312
+ * Must be called before {@link start}.
313
+ * Safe to call multiple times — the work is only done once.
314
+ *
315
+ * @param onProgress - optional callback invoked as each asset is loaded
316
+ */
317
+ async init(o) {
318
+ this._initPromise || (this._initPromise = this._init(o)), await this._initPromise;
279
319
  }
280
320
  /** Adds (or replaces) the reference embeddings for a word without restarting. */
281
321
  addCustomWord(o) {
@@ -285,7 +325,7 @@ class It {
285
325
  }
286
326
  async start() {
287
327
  if (this._started) return;
288
- await this._initPromise;
328
+ await this.init();
289
329
  let o;
290
330
  try {
291
331
  o = await navigator.mediaDevices.getUserMedia({
@@ -303,8 +343,8 @@ class It {
303
343
  const t = new AudioContext({ sampleRate: 16e3 });
304
344
  this._audioCtx = t, await t.audioWorklet.addModule(this._audioProcessorPath);
305
345
  const s = t.createMediaStreamSource(o), n = new AudioWorkletNode(t, "audio-processor");
306
- n.port.onmessage = (r) => {
307
- this._handleBuffer(r.data);
346
+ n.port.onmessage = (e) => {
347
+ this._handleBuffer(e.data);
308
348
  }, s.connect(n), n.connect(t.destination), this._started = !0;
309
349
  }
310
350
  async stop() {
@@ -319,16 +359,16 @@ class It {
319
359
  if (!(t - this._lastInferenceAt < 300)) {
320
360
  this._lastInferenceAt = t, this._inferring = !0;
321
361
  try {
322
- const [s, n] = await Promise.all([dt(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), r = this._audioUtils.logfbank(o), a = new s.Tensor("float32", r, [1, 1, 149, 64]), e = await n.run({ input: a }), h = e[Object.keys(e)[0]].data;
323
- let i = !1;
324
- for (const l of this._commands) {
325
- if (i) break;
326
- for (const c of l.triggers) {
327
- const _ = this._refEmbeddings.get(c.name);
328
- if (!_) continue;
329
- const d = this._audioUtils.maxCosineSim(h, _);
330
- if (d >= this._threshold && t - this._lastMatchAt > 2e3) {
331
- this._lastMatchAt = t, console.info(`[Mellon] match: "${c}" sim=${d.toFixed(3)}`), typeof l.onMatch == "function" && l.onMatch(c.name, d), i = !0;
362
+ const [s, n] = await Promise.all([q(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), e = this._audioUtils.logfbank(o), c = new s.Tensor("float32", e, [1, 1, 149, 64]), r = await n.run({ input: c }), l = r[Object.keys(r)[0]].data;
363
+ let h = !1;
364
+ for (const i of this._commands) {
365
+ if (h) break;
366
+ for (const a of i.triggers) {
367
+ const d = this._refEmbeddings.get(a.name);
368
+ if (!d) continue;
369
+ const _ = this._audioUtils.maxCosineSim(l, d);
370
+ if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
371
+ this._lastMatchAt = t, console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`), typeof i.onMatch == "function" && i.onMatch(a.name, _), h = !0;
332
372
  break;
333
373
  }
334
374
  }
@@ -343,31 +383,31 @@ class It {
343
383
  }
344
384
  class Wt {
345
385
  constructor(o, t) {
346
- this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || q, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || tt, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new pt();
386
+ this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || tt, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || st, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new pt();
347
387
  }
348
388
  /** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
349
389
  async recordSample() {
350
- const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((a, e) => {
351
- const h = new MediaRecorder(o), i = [];
352
- h.ondataavailable = (l) => {
353
- l.data.size > 0 && i.push(l.data);
354
- }, h.onstop = async () => {
355
- var l;
356
- for (const c of o.getTracks()) c.stop();
390
+ const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((c, r) => {
391
+ const l = new MediaRecorder(o), h = [];
392
+ l.ondataavailable = (i) => {
393
+ i.data.size > 0 && h.push(i.data);
394
+ }, l.onstop = async () => {
395
+ var i;
396
+ for (const a of o.getTracks()) a.stop();
357
397
  try {
358
- const _ = await new Blob(i, { type: ((l = i[0]) == null ? void 0 : l.type) || "audio/webm" }).arrayBuffer(), d = await t.decodeAudioData(_);
359
- await t.close(), a(d.getChannelData(0).slice());
360
- } catch (c) {
361
- e(c);
398
+ const d = await new Blob(h, { type: ((i = h[0]) == null ? void 0 : i.type) || "audio/webm" }).arrayBuffer(), _ = await t.decodeAudioData(d);
399
+ await t.close(), c(_.getChannelData(0).slice());
400
+ } catch (a) {
401
+ r(a);
362
402
  }
363
- }, h.start(), setTimeout(() => {
403
+ }, l.start(), setTimeout(() => {
364
404
  try {
365
- h.stop();
405
+ l.stop();
366
406
  } catch {
367
407
  }
368
408
  }, 1500);
369
- }), n = 24e3, r = new Float32Array(n);
370
- return r.set(s.slice(0, n)), this._samples.push(r), this._samples.length;
409
+ }), n = 24e3, e = new Float32Array(n);
410
+ return e.set(s.slice(0, n)), this._samples.push(e), this._samples.length;
371
411
  }
372
412
  /** Removes the sample at the given index. Returns the new sample count. */
373
413
  deleteSample(o) {
@@ -377,24 +417,24 @@ class Wt {
377
417
  }
378
418
  /** Runs ONNX inference on every recorded sample to produce reference embeddings. */
379
419
  async generateRef() {
380
- const [o, t] = await Promise.all([dt(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
420
+ const [o, t] = await Promise.all([q(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
381
421
  for (const n of this._samples) {
382
- const r = this._audioUtils.logfbank(n), a = new o.Tensor("float32", r, [1, 1, 149, 64]), e = await t.run({ input: a }), h = Array.from(e[Object.keys(e)[0]].data);
383
- s.push(h);
422
+ const e = this._audioUtils.logfbank(n), c = new o.Tensor("float32", e, [1, 1, 149, 64]), r = await t.run({ input: c }), l = Array.from(r[Object.keys(r)[0]].data);
423
+ s.push(l);
384
424
  }
385
425
  return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
386
426
  }
387
427
  }
388
- const q = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", tt = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", zt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Z = "mellon-refs", xt = "mellon-threshold";
428
+ const tt = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", st = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", xt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Z = "mellon-refs", zt = "mellon-threshold";
389
429
  export {
390
430
  pt as AudioUtils,
391
- zt as DEFAULT_AUDIO_PROCESSOR_PATH,
431
+ xt as DEFAULT_AUDIO_PROCESSOR_PATH,
392
432
  mt as DEFAULT_MODEL_PATH,
393
- tt as DEFAULT_ORT_CDN_URL,
433
+ st as DEFAULT_ORT_CDN_URL,
394
434
  Z as DEFAULT_REFS_STORAGE_KEY,
395
- xt as DEFAULT_THRESHOLD_STORAGE_KEY,
396
- q as DEFAULT_WASM_PATHS,
435
+ zt as DEFAULT_THRESHOLD_STORAGE_KEY,
436
+ tt as DEFAULT_WASM_PATHS,
397
437
  It as Detector,
398
438
  Wt as EnrollmentSession,
399
- O as Storage
439
+ V as Storage
400
440
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mellon",
3
- "version": "0.0.19",
3
+ "version": "0.0.20",
4
4
  "description": "Offline, in-browser voice commands powered by EfficientWord-Net (ResNet-50 ArcFace).",
5
5
  "type": "module",
6
6
  "main": "./dist/mellon.cjs",