mellon 0.0.18 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,7 +17,7 @@ Offline, fully in-browser **hotword / wake-word detection** powered by [Efficien
17
17
  2. [Quick start](#quick-start)
18
18
  3. [Enrolling words](#enrolling-custom-words)
19
19
  4. [API reference](#api-reference)
20
- - [Mellon](#mellon)
20
+ - [Detector](#detector)
21
21
  - [EnrollmentSession](#enrollmentsession)
22
22
  5. [Science behind the lib](#science-behind-the-lib)
23
23
  ---
@@ -31,9 +31,9 @@ npm install mellon
31
31
  ## Quick start
32
32
 
33
33
  ```js
34
- import { Mellon } from 'mellon'
34
+ import { Detector } from 'mellon'
35
35
 
36
- const hotWordDetection = new Mellon([
36
+ const hotWordDetection = new Detector([
37
37
  {
38
38
  name: 'openDoors',
39
39
  triggers: [{ name: 'mellon', defaultRefPath: '/mellon-assets/mellon_ref.json' }],
@@ -71,9 +71,9 @@ await hotWordDetection.start() // opens the mic and listens for all registered t
71
71
  ## Enrolling custom words
72
72
 
73
73
  ```js
74
- import { Mellon, EnrollmentSession } from 'mellon'
74
+ import { Detector, EnrollmentSession, Storage } from 'mellon'
75
75
 
76
- const hotwordDetection = new Mellon([{
76
+ const hotwordDetection = new Detector([{
77
77
  name: 'startEngine',
78
78
  triggers: [{ name: 'start' }],
79
79
  onMatch: (triggerNameMatched, confidence) => { console.log('starting engine...') }
@@ -99,19 +99,19 @@ hotwordDetection.addCustomWord(ref)
99
99
  await hotwordDetection.start()
100
100
 
101
101
  // 4b. Persist for future sessions
102
- Mellon.saveWord(ref)
102
+ Storage.saveWord(ref)
103
103
  ```
104
104
 
105
105
  ---
106
106
 
107
107
  ## API reference
108
108
 
109
- ### `Mellon`
109
+ ### `Detector`
110
110
 
111
111
  The easiest way to use the library. Wraps mic access, AudioWorklet wiring, and detector management into a single class.
112
112
 
113
113
  ```ts
114
- class Mellon {
114
+ class Detector {
115
115
  constructor(commands: Command[], config?: MellonConfig)
116
116
  readonly threshold: number // read/write; persisted in localStorage
117
117
  readonly listening: boolean
@@ -121,7 +121,19 @@ class Mellon {
121
121
  stop(): Promise<void>
122
122
  addCustomWord(ref: WordRef): void
123
123
 
124
- // Storage helpers — static, work without a Mellon instance
124
+ // Storage helpers — static, work without a Detector instance
125
+ static loadWords(storageKey?: string): WordRef[]
126
+ static saveWord(ref: WordRef, storageKey?: string): void
127
+ static deleteWord(wordName: string, storageKey?: string): void
128
+ }
129
+ ```
130
+
131
+ ### `Storage`
132
+
133
+ Static helpers for persisting enrolled word references in `localStorage`.
134
+
135
+ ```ts
136
+ class Storage {
125
137
  static loadWords(storageKey?: string): WordRef[]
126
138
  static saveWord(ref: WordRef, storageKey?: string): void
127
139
  static deleteWord(wordName: string, storageKey?: string): void
package/dist/index.d.ts CHANGED
@@ -1,5 +1,7 @@
1
- import { Mellon } from './Mellon';
1
+ import { Detector } from './Mellon';
2
2
  import { EnrollmentSession } from './EnrollmentSession';
3
+ import { Storage } from './Storage';
4
+ import { AudioUtils } from './AudioUtils';
3
5
  export type TriggerName = string;
4
6
  export interface Trigger {
5
7
  name: TriggerName;
@@ -17,11 +19,13 @@ export interface MellonConfig {
17
19
  modelPath?: string;
18
20
  audioProcessorPath?: string;
19
21
  ortCdnUrl?: string;
22
+ audioUtils?: AudioUtils;
20
23
  }
21
24
  export interface EnrollmentSessionConfig {
22
25
  wasmPaths?: string;
23
26
  modelPath?: string;
24
27
  ortCdnUrl?: string;
28
+ audioUtils?: AudioUtils;
25
29
  }
26
30
  export interface WordRef {
27
31
  word_name: TriggerName;
@@ -34,4 +38,4 @@ export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mel
34
38
  export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js";
35
39
  export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs";
36
40
  export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold";
37
- export { Mellon, EnrollmentSession };
41
+ export { Detector, EnrollmentSession, Storage, AudioUtils };
package/dist/mellon.cjs CHANGED
@@ -1 +1 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var lt,ut;function Ut(){if(ut)return lt;ut=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const i=Math.PI*s/this.size;t[s]=Math.cos(i),t[s+1]=-Math.sin(i)}this.table=t;for(var n=0,r=1;this.size>r;r<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var e=0;e<this._width;e+=2){var h=this._width-e-2;this._bitrev[a]|=(a>>>e&3)<<h}}this._out=null,this._data=null,this._inv=0}return lt=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),r=0;r<t.length;r+=2)n[r>>>1]=t[r];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),r=0;r<n.length;r+=2)n[r]=t[r>>>1],n[r+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,r=2;r<n;r+=2)t[s-r]=t[r],t[s-r+1]=-t[r+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,h,i=this._bitrev;if(a===4)for(e=0,h=0;e<s;e+=a,h++){const v=i[h];this._singleTransform2(e,v,r)}else for(e=0,h=0;e<s;e+=a,h++){const v=i[h];this._singleTransform4(e,v,r)}var l=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var d=a>>>2;for(e=0;e<s;e+=a)for(var _=e+d,u=e,f=0;u<_;u+=2,f+=r){const v=u,p=v+d,g=p+d,w=g+d,T=t[v],F=t[v+1],A=t[p],y=t[p+1],b=t[g],S=t[g+1],P=t[w],C=t[w+1],E=T,R=F,M=c[f],D=l*c[f+1],U=A*M-y*D,z=A*D+y*M,j=c[2*f],L=l*c[2*f+1],B=b*j-S*L,K=b*L+S*j,N=c[3*f],O=l*c[3*f+1],k=P*N-C*O,G=P*O+C*N,$=E+B,x=R+K,I=E-B,Y=R-K,J=U+k,H=z+G,W=l*(U-k),Q=l*(z-G),tt=$+J,rt=x+H,et=$-J,ot=x-H,nt=I+Q,at=Y-W,it=I-Q,ct=Y+W;t[v]=tt,t[v+1]=rt,t[p]=nt,t[p+1]=at,t[g]=et,t[g+1]=ot,t[w]=it,t[w+1]=ct}}},m.prototype._singleTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],h=a[s+1],i=a[s+n],l=a[s+n+1],c=e+i,d=h+l,_=e-i,u=h-l;r[t]=c,r[t+1]=d,r[t+2]=_,r[t+3]=u},m.prototype._singleTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,h=n*2,i=n*3,l=a[s],c=a[s+1],d=a[s+n],_=a[s+n+1],u=a[s+h],f=a[s+h+1],v=a[s+i],p=a[s+i+1],g=l+u,w=c+f,T=l-u,F=c-f,A=d+v,y=_+p,b=e*(d-v),S=e*(_-p),P=g+A,C=w+y,E=T+S,R=F-b,M=g-A,D=w-y,U=T-S,z=F+b;r[t]=P,r[t+1]=C,r[t+2]=E,r[t+3]=R,r[t+4]=M,r[t+5]=D,r[t+6]=U,r[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,h,i=this._bitrev;if(a===4)for(e=0,h=0;e<s;e+=a,h++){const ht=i[h];this._singleRealTransform2(e,ht>>>1,r>>>1)}else for(e=0,h=0;e<s;e+=a,h++){const ht=i[h];this._singleRealTransform4(e,ht>>>1,r>>>1)}var l=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var d=a>>>1,_=d>>>1,u=_>>>1;for(e=0;e<s;e+=a)for(var f=0,v=0;f<=u;f+=2,v+=r){var p=e+f,g=p+_,w=g+_,T=w+_,F=t[p],A=t[p+1],y=t[g],b=t[g+1],S=t[w],P=t[w+1],C=t[T],E=t[T+1],R=F,M=A,D=c[v],U=l*c[v+1],z=y*D-b*U,j=y*U+b*D,L=c[2*v],B=l*c[2*v+1],K=S*L-P*B,N=S*B+P*L,O=c[3*v],k=l*c[3*v+1],G=C*O-E*k,$=C*k+E*O,x=R+K,I=M+N,Y=R-K,J=M-N,H=z+G,W=j+$,Q=l*(z-G),tt=l*(j-$),rt=x+H,et=I+W,ot=Y+tt,nt=J-Q;if(t[p]=rt,t[p+1]=et,t[g]=ot,t[g+1]=nt,f===0){var at=x-H,it=I-W;t[w]=at,t[w+1]=it;continue}if(f!==u){var ct=Y,yt=-J,Tt=x,At=-I,Ft=-l*tt,bt=-l*Q,St=-l*W,Pt=-l*H,Ct=ct+Ft,Et=yt+bt,Rt=Tt+Pt,Mt=At-St,ft=e+_-f,vt=e+d-f;t[ft]=Ct,t[ft+1]=Et,t[vt]=Rt,t[vt+1]=Mt}}}},m.prototype._singleRealTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],h=a[s+n],i=e+h,l=e-h;r[t]=i,r[t+1]=0,r[t+2]=l,r[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,h=n*2,i=n*3,l=a[s],c=a[s+n],d=a[s+h],_=a[s+i],u=l+d,f=l-d,v=c+_,p=e*(c-_),g=u+v,w=f,T=-p,F=u-v,A=f,y=p;r[t]=g,r[t+1]=0,r[t+2]=w,r[t+3]=T,r[t+4]=F,r[t+5]=0,r[t+6]=A,r[t+7]=y},lt}var zt=Ut();const xt=Dt(zt);class pt{constructor(o=16e3,t=512,s=64){this.sampleRate=o,this.nfft=t,this.nfilt=s,this.fft=new xt(t),this.melFilters=this.createMelFilterbank()}hzToMel(o){return 2595*Math.log10(1+o/700)}melToHz(o){return 700*(10**(o/2595)-1)}createMelFilterbank(){const t=this.sampleRate/2,s=this.hzToMel(0),n=this.hzToMel(t),r=new Float32Array(this.nfilt+2);for(let i=0;i<this.nfilt+2;i++)r[i]=s+i*(n-s)/(this.nfilt+1);const e=r.map(i=>this.melToHz(i)).map(i=>Math.floor((this.nfft+1)*i/this.sampleRate)),h=[];for(let i=0;i<this.nfilt;i++){const l=new Float32Array(Math.floor(this.nfft/2)+1);for(let c=e[i];c<e[i+1];c++)l[c]=(c-e[i])/(e[i+1]-e[i]);for(let c=e[i+1];c<e[i+2];c++)l[c]=(e[i+2]-c)/(e[i+2]-e[i+1]);h.push(l)}return h}logfbank(o){const t=Math.floor(.025*this.sampleRate),s=Math.floor(.01*this.sampleRate),n=1+Math.ceil((o.length-t)/s),r=new Float32Array(n*this.nfilt),a=new Float32Array(this.nfft),e=this.fft.createComplexArray();for(let h=0;h<n;h++){const i=h*s;a.fill(0);for(let d=0;d<t&&i+d<o.length;d++)a[d]=o[i+d];const l=this.fft.toComplexArray(a,null);this.fft.transform(e,l);const c=new Float32Array(Math.floor(this.nfft/2)+1);for(let d=0;d<c.length;d++){const _=e[2*d],u=e[2*d+1];c[d]=1/this.nfft*(_*_+u*u),c[d]===0&&(c[d]=1e-30)}for(let d=0;d<this.nfilt;d++){let _=0;const u=this.melFilters[d];for(let f=0;f<c.length;f++)_+=c[f]*u[f];_===0&&(_=1e-30),r[h*this.nfilt+d]=Math.log(_)}}return r}}async function mt(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let dt=null;async function _t(m=st,o=Z,t=q){return dt||(dt=mt(o,t).then(s=>s.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),dt}const It=new pt;class X{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:s=V,thresholdStorageKey:n=wt,wasmPaths:r=Z,modelPath:a=st,audioProcessorPath:e=gt,ortCdnUrl:h=q}=t||{};this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=e,this._wasmPaths=r,this._modelPath=a,this._ortCdnUrl=h;try{const i=localStorage.getItem(this._thresholdStorageKey);this._threshold=i!==null?Math.max(0,Math.min(1,Number(i))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await _t(this._modelPath,this._wasmPaths,this._ortCdnUrl);const o=new Set;for(const t of this._commands)for(const s of t.triggers)if(!o.has(s.name)&&(o.add(s.name),s.defaultRefPath)){const n=await fetch(s.defaultRefPath);if(n.ok){const r=await n.json();this.addCustomWord(r)}}for(const t of X.loadWords(this._refsStorageKey))this._refEmbeddings.set(t.word_name,t.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this._initPromise;let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=r=>{this._handleBuffer(r.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([mt(this._wasmPaths,this._ortCdnUrl),_t(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),r=It.logfbank(o),a=new s.Tensor("float32",r,[1,1,149,64]),e=await n.run({input:a}),h=e[Object.keys(e)[0]].data;let i=!1;for(const l of this._commands){if(i)break;for(const c of l.triggers){const d=this._refEmbeddings.get(c.name);if(!d)continue;const _=this._maxCosineSim(h,d);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${c}" sim=${_.toFixed(3)}`),typeof l.onMatch=="function"&&l.onMatch(c.name,_),i=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}_maxCosineSim(o,t){let s=0;for(const n of t){let r=0;for(let e=0;e<n.length;e++)r+=o[e]*n[e];const a=(r+1)/2;a>s&&(s=a)}return s}static loadWords(o=V){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=V){const s=X.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=V){try{const s=X.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const Ht=new pt;class Wt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||st,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((a,e)=>{const h=new MediaRecorder(o),i=[];h.ondataavailable=l=>{l.data.size>0&&i.push(l.data)},h.onstop=async()=>{var l;for(const c of o.getTracks())c.stop();try{const d=await new Blob(i,{type:((l=i[0])==null?void 0:l.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(d);await t.close(),a(_.getChannelData(0).slice())}catch(c){e(c)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,r=new Float32Array(n);return r.set(s.slice(0,n)),this._samples.push(r),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([mt(this._config.wasmPaths,this._config.ortCdnUrl),_t(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const r=Ht.logfbank(n),a=new o.Tensor("float32",r,[1,1,149,64]),e=await t.run({input:a}),h=Array.from(e[Object.keys(e)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",st="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",V="mellon-refs",wt="mellon-threshold";exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=st;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=V;exports.DEFAULT_THRESHOLD_STORAGE_KEY=wt;exports.DEFAULT_WASM_PATHS=Z;exports.EnrollmentSession=Wt;exports.Mellon=X;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var ht,pt;function Ut(){if(pt)return ht;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const i=Math.PI*s/this.size;t[s]=Math.cos(i),t[s+1]=-Math.sin(i)}this.table=t;for(var n=0,r=1;this.size>r;r<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var e=0;e<this._width;e+=2){var l=this._width-e-2;this._bitrev[a]|=(a>>>e&3)<<l}}this._out=null,this._data=null,this._inv=0}return ht=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),r=0;r<t.length;r+=2)n[r>>>1]=t[r];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),r=0;r<n.length;r+=2)n[r]=t[r>>>1],n[r+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,r=2;r<n;r+=2)t[s-r]=t[r],t[s-r+1]=-t[r+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,l,i=this._bitrev;if(a===4)for(e=0,l=0;e<s;e+=a,l++){const u=i[l];this._singleTransform2(e,u,r)}else for(e=0,l=0;e<s;e+=a,l++){const u=i[l];this._singleTransform4(e,u,r)}var h=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var _=a>>>2;for(e=0;e<s;e+=a)for(var d=e+_,v=e,f=0;v<d;v+=2,f+=r){const u=v,p=u+_,w=p+_,g=w+_,T=t[u],F=t[u+1],A=t[p],y=t[p+1],b=t[w],S=t[w+1],P=t[g],C=t[g+1],E=T,M=F,R=c[f],D=h*c[f+1],U=A*R-y*D,z=A*D+y*R,j=c[2*f],L=h*c[2*f+1],B=b*j-S*L,K=b*L+S*j,N=c[3*f],O=h*c[3*f+1],k=P*N-C*O,G=P*O+C*N,Y=E+B,x=M+K,I=E-B,$=M-K,J=U+k,H=z+G,W=h*(U-k),Q=h*(z-G),tt=Y+J,rt=x+H,et=Y-J,ot=x-H,nt=I+Q,at=$-W,it=I-Q,ct=$+W;t[u]=tt,t[u+1]=rt,t[p]=nt,t[p+1]=at,t[w]=et,t[w+1]=ot,t[g]=it,t[g+1]=ct}}},m.prototype._singleTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],l=a[s+1],i=a[s+n],h=a[s+n+1],c=e+i,_=l+h,d=e-i,v=l-h;r[t]=c,r[t+1]=_,r[t+2]=d,r[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,l=n*2,i=n*3,h=a[s],c=a[s+1],_=a[s+n],d=a[s+n+1],v=a[s+l],f=a[s+l+1],u=a[s+i],p=a[s+i+1],w=h+v,g=c+f,T=h-v,F=c-f,A=_+u,y=d+p,b=e*(_-u),S=e*(d-p),P=w+A,C=g+y,E=T+S,M=F-b,R=w-A,D=g-y,U=T-S,z=F+b;r[t]=P,r[t+1]=C,r[t+2]=E,r[t+3]=M,r[t+4]=R,r[t+5]=D,r[t+6]=U,r[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,l,i=this._bitrev;if(a===4)for(e=0,l=0;e<s;e+=a,l++){const lt=i[l];this._singleRealTransform2(e,lt>>>1,r>>>1)}else for(e=0,l=0;e<s;e+=a,l++){const lt=i[l];this._singleRealTransform4(e,lt>>>1,r>>>1)}var h=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var _=a>>>1,d=_>>>1,v=d>>>1;for(e=0;e<s;e+=a)for(var f=0,u=0;f<=v;f+=2,u+=r){var p=e+f,w=p+d,g=w+d,T=g+d,F=t[p],A=t[p+1],y=t[w],b=t[w+1],S=t[g],P=t[g+1],C=t[T],E=t[T+1],M=F,R=A,D=c[u],U=h*c[u+1],z=y*D-b*U,j=y*U+b*D,L=c[2*u],B=h*c[2*u+1],K=S*L-P*B,N=S*B+P*L,O=c[3*u],k=h*c[3*u+1],G=C*O-E*k,Y=C*k+E*O,x=M+K,I=R+N,$=M-K,J=R-N,H=z+G,W=j+Y,Q=h*(z-G),tt=h*(j-Y),rt=x+H,et=I+W,ot=$+tt,nt=J-Q;if(t[p]=rt,t[p+1]=et,t[w]=ot,t[w+1]=nt,f===0){var at=x-H,it=I-W;t[g]=at,t[g+1]=it;continue}if(f!==v){var ct=$,yt=-J,Tt=x,At=-I,Ft=-h*tt,bt=-h*Q,St=-h*W,Pt=-h*H,Ct=ct+Ft,Et=yt+bt,Mt=Tt+Pt,Rt=At-St,ut=e+d-f,vt=e+_-f;t[ut]=Ct,t[ut+1]=Et,t[vt]=Mt,t[vt+1]=Rt}}}},m.prototype._singleRealTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],l=a[s+n],i=e+l,h=e-l;r[t]=i,r[t+1]=0,r[t+2]=h,r[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,l=n*2,i=n*3,h=a[s],c=a[s+n],_=a[s+l],d=a[s+i],v=h+_,f=h-_,u=c+d,p=e*(c-d),w=v+u,g=f,T=-p,F=v-u,A=f,y=p;r[t]=w,r[t+1]=0,r[t+2]=g,r[t+3]=T,r[t+4]=F,r[t+5]=0,r[t+6]=A,r[t+7]=y},ht}var zt=Ut();const xt=Dt(zt);class mt{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),r=new Float32Array(this._nfilt+2);for(let i=0;i<this._nfilt+2;i++)r[i]=s+i*(n-s)/(this._nfilt+1);const e=r.map(i=>this._melToHz(i)).map(i=>Math.floor((this._nfft+1)*i/this._sampleRate)),l=[];for(let i=0;i<this._nfilt;i++){const h=new Float32Array(Math.floor(this._nfft/2)+1);for(let c=e[i];c<e[i+1];c++)h[c]=(c-e[i])/(e[i+1]-e[i]);for(let c=e[i+1];c<e[i+2];c++)h[c]=(e[i+2]-c)/(e[i+2]-e[i+1]);l.push(h)}return l}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),r=new Float32Array(n*this._nfilt),a=new Float32Array(this._nfft),e=this._fft.createComplexArray();for(let l=0;l<n;l++){const i=l*s;a.fill(0);for(let _=0;_<t&&i+_<o.length;_++)a[_]=o[i+_];const h=this._fft.toComplexArray(a,null);this._fft.transform(e,h);const c=new Float32Array(Math.floor(this._nfft/2)+1);for(let _=0;_<c.length;_++){const d=e[2*_],v=e[2*_+1];c[_]=1/this._nfft*(d*d+v*v),c[_]===0&&(c[_]=1e-30)}for(let _=0;_<this._nfilt;_++){let d=0;const v=this._melFilters[_];for(let f=0;f<c.length;f++)d+=c[f]*v[f];d===0&&(d=1e-30),r[l*this._nfilt+_]=Math.log(d)}}return r}maxCosineSim(o,t){let s=0;for(const n of t){let r=0;for(let e=0;e<n.length;e++)r+=o[e]*n[e];const a=(r+1)/2;a>s&&(s=a)}return s}}async function ft(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let _t=null;async function dt(m=st,o=Z,t=q){return _t||(_t=ft(o,t).then(s=>s.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),_t}class X{static loadWords(o=V){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=V){const s=X.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=V){try{const s=X.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class It{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:s=V,thresholdStorageKey:n=gt,wasmPaths:r=Z,modelPath:a=st,audioProcessorPath:e=wt,ortCdnUrl:l=q,audioUtils:i=new mt}=t||{};this._audioUtils=i,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=e,this._wasmPaths=r,this._modelPath=a,this._ortCdnUrl=l;try{const h=localStorage.getItem(this._thresholdStorageKey);this._threshold=h!==null?Math.max(0,Math.min(1,Number(h))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await dt(this._modelPath,this._wasmPaths,this._ortCdnUrl);const o=new Set;for(const t of this._commands)for(const s of t.triggers)if(!o.has(s.name)&&(o.add(s.name),s.defaultRefPath)){const n=await fetch(s.defaultRefPath);if(n.ok){const r=await n.json();this.addCustomWord(r)}}for(const t of X.loadWords(this._refsStorageKey))this._refEmbeddings.set(t.word_name,t.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this._initPromise;let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=r=>{this._handleBuffer(r.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([ft(this._wasmPaths,this._ortCdnUrl),dt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),r=this._audioUtils.logfbank(o),a=new s.Tensor("float32",r,[1,1,149,64]),e=await n.run({input:a}),l=e[Object.keys(e)[0]].data;let i=!1;for(const h of this._commands){if(i)break;for(const c of h.triggers){const _=this._refEmbeddings.get(c.name);if(!_)continue;const d=this._audioUtils.maxCosineSim(l,_);if(d>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${c}" sim=${d.toFixed(3)}`),typeof h.onMatch=="function"&&h.onMatch(c.name,d),i=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Ht{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||st,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new mt}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((a,e)=>{const l=new MediaRecorder(o),i=[];l.ondataavailable=h=>{h.data.size>0&&i.push(h.data)},l.onstop=async()=>{var h;for(const c of o.getTracks())c.stop();try{const _=await new Blob(i,{type:((h=i[0])==null?void 0:h.type)||"audio/webm"}).arrayBuffer(),d=await t.decodeAudioData(_);await t.close(),a(d.getChannelData(0).slice())}catch(c){e(c)}},l.start(),setTimeout(()=>{try{l.stop()}catch{}},1500)}),n=24e3,r=new Float32Array(n);return r.set(s.slice(0,n)),this._samples.push(r),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([ft(this._config.wasmPaths,this._config.ortCdnUrl),dt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const r=this._audioUtils.logfbank(n),a=new o.Tensor("float32",r,[1,1,149,64]),e=await t.run({input:a}),l=Array.from(e[Object.keys(e)[0]].data);s.push(l)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",st="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",V="mellon-refs",gt="mellon-threshold";exports.AudioUtils=mt;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=st;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=V;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=It;exports.EnrollmentSession=Ht;exports.Storage=X;
package/dist/mellon.mjs CHANGED
@@ -1,11 +1,11 @@
1
- function Rt(_) {
2
- return _ && _.__esModule && Object.prototype.hasOwnProperty.call(_, "default") ? _.default : _;
1
+ function Rt(m) {
2
+ return m && m.__esModule && Object.prototype.hasOwnProperty.call(m, "default") ? m.default : m;
3
3
  }
4
- var ht, ut;
4
+ var ht, vt;
5
5
  function Et() {
6
- if (ut) return ht;
7
- ut = 1;
8
- function _(o) {
6
+ if (vt) return ht;
7
+ vt = 1;
8
+ function m(o) {
9
9
  if (this.size = o | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
10
10
  throw new Error("FFT size must be a power of two and bigger than 1");
11
11
  this._csize = o << 1;
@@ -26,66 +26,66 @@ function Et() {
26
26
  }
27
27
  this._out = null, this._data = null, this._inv = 0;
28
28
  }
29
- return ht = _, _.prototype.fromComplexArray = function(t, s) {
29
+ return ht = m, m.prototype.fromComplexArray = function(t, s) {
30
30
  for (var n = s || new Array(t.length >>> 1), r = 0; r < t.length; r += 2)
31
31
  n[r >>> 1] = t[r];
32
32
  return n;
33
- }, _.prototype.createComplexArray = function() {
33
+ }, m.prototype.createComplexArray = function() {
34
34
  const t = new Array(this._csize);
35
35
  for (var s = 0; s < t.length; s++)
36
36
  t[s] = 0;
37
37
  return t;
38
- }, _.prototype.toComplexArray = function(t, s) {
38
+ }, m.prototype.toComplexArray = function(t, s) {
39
39
  for (var n = s || this.createComplexArray(), r = 0; r < n.length; r += 2)
40
40
  n[r] = t[r >>> 1], n[r + 1] = 0;
41
41
  return n;
42
- }, _.prototype.completeSpectrum = function(t) {
42
+ }, m.prototype.completeSpectrum = function(t) {
43
43
  for (var s = this._csize, n = s >>> 1, r = 2; r < n; r += 2)
44
44
  t[s - r] = t[r], t[s - r + 1] = -t[r + 1];
45
- }, _.prototype.transform = function(t, s) {
45
+ }, m.prototype.transform = function(t, s) {
46
46
  if (t === s)
47
47
  throw new Error("Input and output buffers must be different");
48
48
  this._out = t, this._data = s, this._inv = 0, this._transform4(), this._out = null, this._data = null;
49
- }, _.prototype.realTransform = function(t, s) {
49
+ }, m.prototype.realTransform = function(t, s) {
50
50
  if (t === s)
51
51
  throw new Error("Input and output buffers must be different");
52
52
  this._out = t, this._data = s, this._inv = 0, this._realTransform4(), this._out = null, this._data = null;
53
- }, _.prototype.inverseTransform = function(t, s) {
53
+ }, m.prototype.inverseTransform = function(t, s) {
54
54
  if (t === s)
55
55
  throw new Error("Input and output buffers must be different");
56
56
  this._out = t, this._data = s, this._inv = 1, this._transform4();
57
57
  for (var n = 0; n < t.length; n++)
58
58
  t[n] /= this.size;
59
59
  this._out = null, this._data = null;
60
- }, _.prototype._transform4 = function() {
60
+ }, m.prototype._transform4 = function() {
61
61
  var t = this._out, s = this._csize, n = this._width, r = 1 << n, a = s / r << 1, e, h, i = this._bitrev;
62
62
  if (a === 4)
63
63
  for (e = 0, h = 0; e < s; e += a, h++) {
64
- const v = i[h];
65
- this._singleTransform2(e, v, r);
64
+ const u = i[h];
65
+ this._singleTransform2(e, u, r);
66
66
  }
67
67
  else
68
68
  for (e = 0, h = 0; e < s; e += a, h++) {
69
- const v = i[h];
70
- this._singleTransform4(e, v, r);
69
+ const u = i[h];
70
+ this._singleTransform4(e, u, r);
71
71
  }
72
72
  var l = this._inv ? -1 : 1, c = this.table;
73
73
  for (r >>= 2; r >= 2; r >>= 2) {
74
74
  a = s / r << 1;
75
- var d = a >>> 2;
75
+ var _ = a >>> 2;
76
76
  for (e = 0; e < s; e += a)
77
- for (var m = e + d, u = e, f = 0; u < m; u += 2, f += r) {
78
- const v = u, p = v + d, w = p + d, g = w + d, b = t[v], T = t[v + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], P = t[g], M = t[g + 1], S = b, R = T, E = c[f], D = l * c[f + 1], z = F * E - y * D, x = F * D + y * E, B = c[2 * f], N = l * c[2 * f + 1], H = A * B - C * N, K = A * N + C * B, k = c[3 * f], $ = l * c[3 * f + 1], G = P * k - M * $, J = P * $ + M * k, L = S + H, U = R + K, I = S - H, Y = R - K, Q = z + G, W = x + J, j = l * (z - G), V = l * (x - J), X = L + Q, st = U + W, rt = L - Q, et = U - W, ot = I + V, nt = Y - j, at = I - V, it = Y + j;
79
- t[v] = X, t[v + 1] = st, t[p] = ot, t[p + 1] = nt, t[w] = rt, t[w + 1] = et, t[g] = at, t[g + 1] = it;
77
+ for (var d = e + _, v = e, f = 0; v < d; v += 2, f += r) {
78
+ const u = v, p = u + _, w = p + _, g = w + _, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], P = t[g], M = t[g + 1], S = b, R = T, E = c[f], U = l * c[f + 1], D = F * E - y * U, z = F * U + y * E, B = c[2 * f], N = l * c[2 * f + 1], H = A * B - C * N, K = A * N + C * B, k = c[3 * f], $ = l * c[3 * f + 1], G = P * k - M * $, J = P * $ + M * k, L = S + H, x = R + K, I = S - H, Y = R - K, Q = D + G, W = z + J, j = l * (D - G), V = l * (z - J), X = L + Q, st = x + W, rt = L - Q, et = x - W, ot = I + V, nt = Y - j, at = I - V, it = Y + j;
79
+ t[u] = X, t[u + 1] = st, t[p] = ot, t[p + 1] = nt, t[w] = rt, t[w + 1] = et, t[g] = at, t[g + 1] = it;
80
80
  }
81
81
  }
82
- }, _.prototype._singleTransform2 = function(t, s, n) {
83
- const r = this._out, a = this._data, e = a[s], h = a[s + 1], i = a[s + n], l = a[s + n + 1], c = e + i, d = h + l, m = e - i, u = h - l;
84
- r[t] = c, r[t + 1] = d, r[t + 2] = m, r[t + 3] = u;
85
- }, _.prototype._singleTransform4 = function(t, s, n) {
86
- const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + 1], d = a[s + n], m = a[s + n + 1], u = a[s + h], f = a[s + h + 1], v = a[s + i], p = a[s + i + 1], w = l + u, g = c + f, b = l - u, T = c - f, F = d + v, y = m + p, A = e * (d - v), C = e * (m - p), P = w + F, M = g + y, S = b + C, R = T - A, E = w - F, D = g - y, z = b - C, x = T + A;
87
- r[t] = P, r[t + 1] = M, r[t + 2] = S, r[t + 3] = R, r[t + 4] = E, r[t + 5] = D, r[t + 6] = z, r[t + 7] = x;
88
- }, _.prototype._realTransform4 = function() {
82
+ }, m.prototype._singleTransform2 = function(t, s, n) {
83
+ const r = this._out, a = this._data, e = a[s], h = a[s + 1], i = a[s + n], l = a[s + n + 1], c = e + i, _ = h + l, d = e - i, v = h - l;
84
+ r[t] = c, r[t + 1] = _, r[t + 2] = d, r[t + 3] = v;
85
+ }, m.prototype._singleTransform4 = function(t, s, n) {
86
+ const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + 1], _ = a[s + n], d = a[s + n + 1], v = a[s + h], f = a[s + h + 1], u = a[s + i], p = a[s + i + 1], w = l + v, g = c + f, b = l - v, T = c - f, F = _ + u, y = d + p, A = e * (_ - u), C = e * (d - p), P = w + F, M = g + y, S = b + C, R = T - A, E = w - F, U = g - y, D = b - C, z = T + A;
87
+ r[t] = P, r[t + 1] = M, r[t + 2] = S, r[t + 3] = R, r[t + 4] = E, r[t + 5] = U, r[t + 6] = D, r[t + 7] = z;
88
+ }, m.prototype._realTransform4 = function() {
89
89
  var t = this._out, s = this._csize, n = this._width, r = 1 << n, a = s / r << 1, e, h, i = this._bitrev;
90
90
  if (a === 4)
91
91
  for (e = 0, h = 0; e < s; e += a, h++) {
@@ -100,48 +100,48 @@ function Et() {
100
100
  var l = this._inv ? -1 : 1, c = this.table;
101
101
  for (r >>= 2; r >= 2; r >>= 2) {
102
102
  a = s / r << 1;
103
- var d = a >>> 1, m = d >>> 1, u = m >>> 1;
103
+ var _ = a >>> 1, d = _ >>> 1, v = d >>> 1;
104
104
  for (e = 0; e < s; e += a)
105
- for (var f = 0, v = 0; f <= u; f += 2, v += r) {
106
- var p = e + f, w = p + m, g = w + m, b = g + m, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], P = t[g + 1], M = t[b], S = t[b + 1], R = T, E = F, D = c[v], z = l * c[v + 1], x = y * D - A * z, B = y * z + A * D, N = c[2 * v], H = l * c[2 * v + 1], K = C * N - P * H, k = C * H + P * N, $ = c[3 * v], G = l * c[3 * v + 1], J = M * $ - S * G, L = M * G + S * $, U = R + K, I = E + k, Y = R - K, Q = E - k, W = x + J, j = B + L, V = l * (x - J), X = l * (B - L), st = U + W, rt = I + j, et = Y + X, ot = Q - V;
105
+ for (var f = 0, u = 0; f <= v; f += 2, u += r) {
106
+ var p = e + f, w = p + d, g = w + d, b = g + d, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], P = t[g + 1], M = t[b], S = t[b + 1], R = T, E = F, U = c[u], D = l * c[u + 1], z = y * U - A * D, B = y * D + A * U, N = c[2 * u], H = l * c[2 * u + 1], K = C * N - P * H, k = C * H + P * N, $ = c[3 * u], G = l * c[3 * u + 1], J = M * $ - S * G, L = M * G + S * $, x = R + K, I = E + k, Y = R - K, Q = E - k, W = z + J, j = B + L, V = l * (z - J), X = l * (B - L), st = x + W, rt = I + j, et = Y + X, ot = Q - V;
107
107
  if (t[p] = st, t[p + 1] = rt, t[w] = et, t[w + 1] = ot, f === 0) {
108
- var nt = U - W, at = I - j;
108
+ var nt = x - W, at = I - j;
109
109
  t[g] = nt, t[g + 1] = at;
110
110
  continue;
111
111
  }
112
- if (f !== u) {
113
- var it = Y, wt = -Q, gt = U, yt = -I, bt = -l * X, Ft = -l * V, Tt = -l * j, At = -l * W, Ct = it + bt, Pt = wt + Ft, Mt = gt + At, St = yt - Tt, ft = e + m - f, vt = e + d - f;
114
- t[ft] = Ct, t[ft + 1] = Pt, t[vt] = Mt, t[vt + 1] = St;
112
+ if (f !== v) {
113
+ var it = Y, wt = -Q, gt = x, yt = -I, bt = -l * X, Ft = -l * V, Tt = -l * j, At = -l * W, Ct = it + bt, Pt = wt + Ft, Mt = gt + At, St = yt - Tt, ft = e + d - f, ut = e + _ - f;
114
+ t[ft] = Ct, t[ft + 1] = Pt, t[ut] = Mt, t[ut + 1] = St;
115
115
  }
116
116
  }
117
117
  }
118
- }, _.prototype._singleRealTransform2 = function(t, s, n) {
118
+ }, m.prototype._singleRealTransform2 = function(t, s, n) {
119
119
  const r = this._out, a = this._data, e = a[s], h = a[s + n], i = e + h, l = e - h;
120
120
  r[t] = i, r[t + 1] = 0, r[t + 2] = l, r[t + 3] = 0;
121
- }, _.prototype._singleRealTransform4 = function(t, s, n) {
122
- const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + n], d = a[s + h], m = a[s + i], u = l + d, f = l - d, v = c + m, p = e * (c - m), w = u + v, g = f, b = -p, T = u - v, F = f, y = p;
121
+ }, m.prototype._singleRealTransform4 = function(t, s, n) {
122
+ const r = this._out, a = this._data, e = this._inv ? -1 : 1, h = n * 2, i = n * 3, l = a[s], c = a[s + n], _ = a[s + h], d = a[s + i], v = l + _, f = l - _, u = c + d, p = e * (c - d), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
123
123
  r[t] = w, r[t + 1] = 0, r[t + 2] = g, r[t + 3] = b, r[t + 4] = T, r[t + 5] = 0, r[t + 6] = F, r[t + 7] = y;
124
124
  }, ht;
125
125
  }
126
- var Dt = Et();
127
- const zt = /* @__PURE__ */ Rt(Dt);
126
+ var Ut = Et();
127
+ const Dt = /* @__PURE__ */ Rt(Ut);
128
128
  class pt {
129
129
  constructor(o = 16e3, t = 512, s = 64) {
130
- this.sampleRate = o, this.nfft = t, this.nfilt = s, this.fft = new zt(t), this.melFilters = this.createMelFilterbank();
130
+ this._sampleRate = o, this._nfft = t, this._nfilt = s, this._fft = new Dt(t), this._melFilters = this._createMelFilterbank();
131
131
  }
132
- hzToMel(o) {
132
+ _hzToMel(o) {
133
133
  return 2595 * Math.log10(1 + o / 700);
134
134
  }
135
- melToHz(o) {
135
+ _melToHz(o) {
136
136
  return 700 * (10 ** (o / 2595) - 1);
137
137
  }
138
- createMelFilterbank() {
139
- const t = this.sampleRate / 2, s = this.hzToMel(0), n = this.hzToMel(t), r = new Float32Array(this.nfilt + 2);
140
- for (let i = 0; i < this.nfilt + 2; i++)
141
- r[i] = s + i * (n - s) / (this.nfilt + 1);
142
- const e = r.map((i) => this.melToHz(i)).map((i) => Math.floor((this.nfft + 1) * i / this.sampleRate)), h = [];
143
- for (let i = 0; i < this.nfilt; i++) {
144
- const l = new Float32Array(Math.floor(this.nfft / 2) + 1);
138
+ _createMelFilterbank() {
139
+ const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), r = new Float32Array(this._nfilt + 2);
140
+ for (let i = 0; i < this._nfilt + 2; i++)
141
+ r[i] = s + i * (n - s) / (this._nfilt + 1);
142
+ const e = r.map((i) => this._melToHz(i)).map((i) => Math.floor((this._nfft + 1) * i / this._sampleRate)), h = [];
143
+ for (let i = 0; i < this._nfilt; i++) {
144
+ const l = new Float32Array(Math.floor(this._nfft / 2) + 1);
145
145
  for (let c = e[i]; c < e[i + 1]; c++)
146
146
  l[c] = (c - e[i]) / (e[i + 1] - e[i]);
147
147
  for (let c = e[i + 1]; c < e[i + 2]; c++)
@@ -152,62 +152,93 @@ class pt {
152
152
  }
153
153
  /** Returns a flat Float32Array of shape [numFrames × nfilt]. */
154
154
  logfbank(o) {
155
- const t = Math.floor(0.025 * this.sampleRate), s = Math.floor(0.01 * this.sampleRate), n = 1 + Math.ceil((o.length - t) / s), r = new Float32Array(n * this.nfilt), a = new Float32Array(this.nfft), e = this.fft.createComplexArray();
155
+ const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), r = new Float32Array(n * this._nfilt), a = new Float32Array(this._nfft), e = this._fft.createComplexArray();
156
156
  for (let h = 0; h < n; h++) {
157
157
  const i = h * s;
158
158
  a.fill(0);
159
- for (let d = 0; d < t && i + d < o.length; d++)
160
- a[d] = o[i + d];
161
- const l = this.fft.toComplexArray(a, null);
162
- this.fft.transform(e, l);
163
- const c = new Float32Array(Math.floor(this.nfft / 2) + 1);
164
- for (let d = 0; d < c.length; d++) {
165
- const m = e[2 * d], u = e[2 * d + 1];
166
- c[d] = 1 / this.nfft * (m * m + u * u), c[d] === 0 && (c[d] = 1e-30);
159
+ for (let _ = 0; _ < t && i + _ < o.length; _++)
160
+ a[_] = o[i + _];
161
+ const l = this._fft.toComplexArray(a, null);
162
+ this._fft.transform(e, l);
163
+ const c = new Float32Array(Math.floor(this._nfft / 2) + 1);
164
+ for (let _ = 0; _ < c.length; _++) {
165
+ const d = e[2 * _], v = e[2 * _ + 1];
166
+ c[_] = 1 / this._nfft * (d * d + v * v), c[_] === 0 && (c[_] = 1e-30);
167
167
  }
168
- for (let d = 0; d < this.nfilt; d++) {
169
- let m = 0;
170
- const u = this.melFilters[d];
168
+ for (let _ = 0; _ < this._nfilt; _++) {
169
+ let d = 0;
170
+ const v = this._melFilters[_];
171
171
  for (let f = 0; f < c.length; f++)
172
- m += c[f] * u[f];
173
- m === 0 && (m = 1e-30), r[h * this.nfilt + d] = Math.log(m);
172
+ d += c[f] * v[f];
173
+ d === 0 && (d = 1e-30), r[h * this._nfilt + _] = Math.log(d);
174
174
  }
175
175
  }
176
176
  return r;
177
177
  }
178
+ maxCosineSim(o, t) {
179
+ let s = 0;
180
+ for (const n of t) {
181
+ let r = 0;
182
+ for (let e = 0; e < n.length; e++) r += o[e] * n[e];
183
+ const a = (r + 1) / 2;
184
+ a > s && (s = a);
185
+ }
186
+ return s;
187
+ }
178
188
  }
179
- async function mt(_ = q, o = tt) {
189
+ async function dt(m = q, o = tt) {
180
190
  const t = await import(
181
191
  /* @vite-ignore */
182
192
  o
183
193
  );
184
- return t.env.wasm.wasmPaths = _, t.env.wasm.numThreads = 1, t;
194
+ return t.env.wasm.wasmPaths = m, t.env.wasm.numThreads = 1, t;
185
195
  }
186
196
  let lt = null;
187
- async function dt(_ = _t, o = q, t = tt) {
188
- return lt || (lt = mt(o, t).then(
189
- (s) => s.InferenceSession.create(_, {
197
+ async function _t(m = mt, o = q, t = tt) {
198
+ return lt || (lt = dt(o, t).then(
199
+ (s) => s.InferenceSession.create(m, {
190
200
  executionProviders: ["wasm"],
191
201
  graphOptimizationLevel: "all"
192
202
  })
193
203
  )), lt;
194
204
  }
195
- const xt = new pt();
196
205
  class O {
206
+ static loadWords(o = Z) {
207
+ try {
208
+ const t = localStorage.getItem(o);
209
+ return t ? JSON.parse(t) : [];
210
+ } catch {
211
+ return [];
212
+ }
213
+ }
214
+ static saveWord(o, t = Z) {
215
+ const s = O.loadWords(t).filter((n) => n.word_name !== o.word_name);
216
+ localStorage.setItem(t, JSON.stringify([...s, o]));
217
+ }
218
+ static deleteWord(o, t = Z) {
219
+ try {
220
+ const s = O.loadWords(t).filter((n) => n.word_name !== o);
221
+ localStorage.setItem(t, JSON.stringify(s));
222
+ } catch {
223
+ }
224
+ }
225
+ }
226
+ class It {
197
227
  constructor(o, t) {
198
228
  this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0;
199
229
  const {
200
230
  refsStorageKey: s = Z,
201
- thresholdStorageKey: n = Wt,
231
+ thresholdStorageKey: n = xt,
202
232
  wasmPaths: r = q,
203
- modelPath: a = _t,
204
- audioProcessorPath: e = It,
205
- ortCdnUrl: h = tt
233
+ modelPath: a = mt,
234
+ audioProcessorPath: e = zt,
235
+ ortCdnUrl: h = tt,
236
+ audioUtils: i = new pt()
206
237
  } = t || {};
207
- this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = e, this._wasmPaths = r, this._modelPath = a, this._ortCdnUrl = h;
238
+ this._audioUtils = i, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = e, this._wasmPaths = r, this._modelPath = a, this._ortCdnUrl = h;
208
239
  try {
209
- const i = localStorage.getItem(this._thresholdStorageKey);
210
- this._threshold = i !== null ? Math.max(0, Math.min(1, Number(i))) : 0.65;
240
+ const l = localStorage.getItem(this._thresholdStorageKey);
241
+ this._threshold = l !== null ? Math.max(0, Math.min(1, Number(l))) : 0.65;
211
242
  } catch {
212
243
  this._threshold = 0.65;
213
244
  }
@@ -227,7 +258,7 @@ class O {
227
258
  return this._started;
228
259
  }
229
260
  async _init() {
230
- await dt(this._modelPath, this._wasmPaths, this._ortCdnUrl);
261
+ await _t(this._modelPath, this._wasmPaths, this._ortCdnUrl);
231
262
  const o = /* @__PURE__ */ new Set();
232
263
  for (const t of this._commands)
233
264
  for (const s of t.triggers)
@@ -288,16 +319,16 @@ class O {
288
319
  if (!(t - this._lastInferenceAt < 300)) {
289
320
  this._lastInferenceAt = t, this._inferring = !0;
290
321
  try {
291
- const [s, n] = await Promise.all([mt(this._wasmPaths, this._ortCdnUrl), dt(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), r = xt.logfbank(o), a = new s.Tensor("float32", r, [1, 1, 149, 64]), e = await n.run({ input: a }), h = e[Object.keys(e)[0]].data;
322
+ const [s, n] = await Promise.all([dt(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), r = this._audioUtils.logfbank(o), a = new s.Tensor("float32", r, [1, 1, 149, 64]), e = await n.run({ input: a }), h = e[Object.keys(e)[0]].data;
292
323
  let i = !1;
293
324
  for (const l of this._commands) {
294
325
  if (i) break;
295
326
  for (const c of l.triggers) {
296
- const d = this._refEmbeddings.get(c.name);
297
- if (!d) continue;
298
- const m = this._maxCosineSim(h, d);
299
- if (m >= this._threshold && t - this._lastMatchAt > 2e3) {
300
- this._lastMatchAt = t, console.info(`[Mellon] match: "${c}" sim=${m.toFixed(3)}`), typeof l.onMatch == "function" && l.onMatch(c.name, m), i = !0;
327
+ const _ = this._refEmbeddings.get(c.name);
328
+ if (!_) continue;
329
+ const d = this._audioUtils.maxCosineSim(h, _);
330
+ if (d >= this._threshold && t - this._lastMatchAt > 2e3) {
331
+ this._lastMatchAt = t, console.info(`[Mellon] match: "${c}" sim=${d.toFixed(3)}`), typeof l.onMatch == "function" && l.onMatch(c.name, d), i = !0;
301
332
  break;
302
333
  }
303
334
  }
@@ -309,40 +340,10 @@ class O {
309
340
  }
310
341
  }
311
342
  }
312
- _maxCosineSim(o, t) {
313
- let s = 0;
314
- for (const n of t) {
315
- let r = 0;
316
- for (let e = 0; e < n.length; e++) r += o[e] * n[e];
317
- const a = (r + 1) / 2;
318
- a > s && (s = a);
319
- }
320
- return s;
321
- }
322
- static loadWords(o = Z) {
323
- try {
324
- const t = localStorage.getItem(o);
325
- return t ? JSON.parse(t) : [];
326
- } catch {
327
- return [];
328
- }
329
- }
330
- static saveWord(o, t = Z) {
331
- const s = O.loadWords(t).filter((n) => n.word_name !== o.word_name);
332
- localStorage.setItem(t, JSON.stringify([...s, o]));
333
- }
334
- static deleteWord(o, t = Z) {
335
- try {
336
- const s = O.loadWords(t).filter((n) => n.word_name !== o);
337
- localStorage.setItem(t, JSON.stringify(s));
338
- } catch {
339
- }
340
- }
341
343
  }
342
- const Ut = new pt();
343
- class jt {
344
+ class Wt {
344
345
  constructor(o, t) {
345
- this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || _t, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || q, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || tt;
346
+ this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || q, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || tt, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new pt();
346
347
  }
347
348
  /** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
348
349
  async recordSample() {
@@ -354,8 +355,8 @@ class jt {
354
355
  var l;
355
356
  for (const c of o.getTracks()) c.stop();
356
357
  try {
357
- const d = await new Blob(i, { type: ((l = i[0]) == null ? void 0 : l.type) || "audio/webm" }).arrayBuffer(), m = await t.decodeAudioData(d);
358
- await t.close(), a(m.getChannelData(0).slice());
358
+ const _ = await new Blob(i, { type: ((l = i[0]) == null ? void 0 : l.type) || "audio/webm" }).arrayBuffer(), d = await t.decodeAudioData(_);
359
+ await t.close(), a(d.getChannelData(0).slice());
359
360
  } catch (c) {
360
361
  e(c);
361
362
  }
@@ -376,22 +377,24 @@ class jt {
376
377
  }
377
378
  /** Runs ONNX inference on every recorded sample to produce reference embeddings. */
378
379
  async generateRef() {
379
- const [o, t] = await Promise.all([mt(this._config.wasmPaths, this._config.ortCdnUrl), dt(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
380
+ const [o, t] = await Promise.all([dt(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
380
381
  for (const n of this._samples) {
381
- const r = Ut.logfbank(n), a = new o.Tensor("float32", r, [1, 1, 149, 64]), e = await t.run({ input: a }), h = Array.from(e[Object.keys(e)[0]].data);
382
+ const r = this._audioUtils.logfbank(n), a = new o.Tensor("float32", r, [1, 1, 149, 64]), e = await t.run({ input: a }), h = Array.from(e[Object.keys(e)[0]].data);
382
383
  s.push(h);
383
384
  }
384
385
  return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
385
386
  }
386
387
  }
387
- const q = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", tt = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", _t = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", It = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Z = "mellon-refs", Wt = "mellon-threshold";
388
+ const q = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", tt = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", zt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Z = "mellon-refs", xt = "mellon-threshold";
388
389
  export {
389
- It as DEFAULT_AUDIO_PROCESSOR_PATH,
390
- _t as DEFAULT_MODEL_PATH,
390
+ pt as AudioUtils,
391
+ zt as DEFAULT_AUDIO_PROCESSOR_PATH,
392
+ mt as DEFAULT_MODEL_PATH,
391
393
  tt as DEFAULT_ORT_CDN_URL,
392
394
  Z as DEFAULT_REFS_STORAGE_KEY,
393
- Wt as DEFAULT_THRESHOLD_STORAGE_KEY,
395
+ xt as DEFAULT_THRESHOLD_STORAGE_KEY,
394
396
  q as DEFAULT_WASM_PATHS,
395
- jt as EnrollmentSession,
396
- O as Mellon
397
+ It as Detector,
398
+ Wt as EnrollmentSession,
399
+ O as Storage
397
400
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mellon",
3
- "version": "0.0.18",
3
+ "version": "0.0.19",
4
4
  "description": "Offline, in-browser voice commands powered by EfficientWord-Net (ResNet-50 ArcFace).",
5
5
  "type": "module",
6
6
  "main": "./dist/mellon.cjs",