mellon 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/dist/mellon.cjs +2 -2
- package/dist/mellon.mjs +256 -263
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -66,14 +66,15 @@ const stt = new Mellon({
|
|
|
66
66
|
],
|
|
67
67
|
})
|
|
68
68
|
|
|
69
|
-
await stt.
|
|
69
|
+
await stt.init() // fetches refs and loads the model
|
|
70
|
+
await stt.start() // opens the mic and listens for all registered refs
|
|
70
71
|
|
|
71
72
|
stt.addEventListener('match', (e) => {
|
|
72
73
|
console.log(`Detected "${e.detail.name}" (${(e.detail.confidence * 100).toFixed(1)}%)`)
|
|
73
74
|
})
|
|
74
75
|
```
|
|
75
76
|
|
|
76
|
-
Refs are fetched
|
|
77
|
+
Refs are fetched and registered during `init()`. `start()` then listens for every registered word automatically. You can enroll your own words — see [Enrolling custom words](#enrolling-custom-words).
|
|
77
78
|
|
|
78
79
|
---
|
|
79
80
|
|
|
@@ -106,7 +107,7 @@ class Mellon extends EventTarget {
|
|
|
106
107
|
readonly isRunning: boolean
|
|
107
108
|
|
|
108
109
|
init(onProgress?: (pct: number) => void): Promise<void>
|
|
109
|
-
start(
|
|
110
|
+
start(): Promise<void>
|
|
110
111
|
stop(): void
|
|
111
112
|
addCustomWord(refData: RefData): void
|
|
112
113
|
enrollWord(wordName: string): EnrollmentSession
|
|
@@ -124,7 +125,6 @@ class Mellon extends EventTarget {
|
|
|
124
125
|
| Option | Type | Default | Description |
|
|
125
126
|
|---|---|---|---|
|
|
126
127
|
| `refs` | `(string \| RefData)[]` | `[]` | Refs to preload — URL strings are fetched during `init()` |
|
|
127
|
-
| `words` | `string[]` | `[]` | Subset of loaded refs to activate (defaults to all loaded refs) |
|
|
128
128
|
| `threshold` | `number` | `0.65` | Detection threshold (0–1) |
|
|
129
129
|
| `relaxationMs` | `number` | `2000` | Min ms between match events |
|
|
130
130
|
| `inferenceGapMs` | `number` | `300` | Min ms between inference runs |
|
|
@@ -259,6 +259,11 @@ Cross-Origin-Embedder-Policy: require-corp
|
|
|
259
259
|
|
|
260
260
|
---
|
|
261
261
|
|
|
262
|
+
## Science behind the software
|
|
263
|
+
|
|
264
|
+
If you're interested, check out [this paper](https://arxiv.org/pdf/2111.00379).
|
|
265
|
+
|
|
266
|
+
|
|
262
267
|
## License
|
|
263
268
|
|
|
264
269
|
MIT
|
package/dist/mellon.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const Kt="0.0.
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const Kt="0.0.10",Yt=[1,1,149,64],Qt=`https://cdn.jsdelivr.net/npm/mellon@${Kt}/dist/assets`,st={assetsPath:`${Qt}`};let I=null,q=null,tt=null;function Vt({assetsPath:a}={}){a!==void 0&&(st.assetsPath=a),I=null,q=null,tt=null}async function Xt(a){return I?(a==null||a(1),I):q||(q=(async()=>{const n=st.assetsPath.endsWith("/")?st.assetsPath:st.assetsPath+"/",t=n+"ort.all.min.mjs",e=n+"model.onnx";tt=await new Function("url","return import(url)")(t),tt.env.wasm.wasmPaths=n;const s=await fetch(e);if(!s.ok)throw new Error(`Failed to fetch model: ${s.status}`);const i=parseInt(s.headers.get("content-length")||"0",10),o=s.body.getReader(),c=[];let l=0;for(;;){const{done:m,value:_}=await o.read();if(m)break;c.push(_),l+=_.byteLength,i>0&&(a==null||a(l/i))}const h=new Uint8Array(l);let d=0;for(const m of c)h.set(m,d),d+=m.byteLength;return I=await tt.InferenceSession.create(h.buffer,{executionProviders:["wasm"],graphOptimizationLevel:"all"}),a==null||a(1),I})(),q)}async function Nt(a){if(!I)throw new Error("Model not loaded — call loadModel() first");const n=new tt.Tensor("float32",a,Yt),t=await I.run({input:n}),e=Object.keys(t)[0];return t[e].data}function Zt(a){return a&&a.__esModule&&Object.prototype.hasOwnProperty.call(a,"default")?a.default:a}var ft,At;function Ot(){if(At)return ft;At=1;function a(n){if(this.size=n|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=n<<1;for(var t=new Array(this.size*2),e=0;e<t.length;e+=2){const l=Math.PI*e/this.size;t[e]=Math.cos(l),t[e+1]=-Math.sin(l)}this.table=t;for(var r=0,s=1;this.size>s;s<<=1)r++;this._width=r%2===0?r-1:r,this._bitrev=new Array(1<<this._width);for(var i=0;i<this._bitrev.length;i++){this._bitrev[i]=0;for(var o=0;o<this._width;o+=2){var c=this._width-o-2;this._bitrev[i]|=(i>>>o&3)<<c}}this._out=null,this._data=null,this._inv=0}return ft=a,a.prototype.fromComplexArray=function(t,e){for(var r=e||new Array(t.length>>>1),s=0;s<t.length;s+=2)r[s>>>1]=t[s];return r},a.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var e=0;e<t.length;e++)t[e]=0;return t},a.prototype.toComplexArray=function(t,e){for(var r=e||this.createComplexArray(),s=0;s<r.length;s+=2)r[s]=t[s>>>1],r[s+1]=0;return r},a.prototype.completeSpectrum=function(t){for(var e=this._csize,r=e>>>1,s=2;s<r;s+=2)t[e-s]=t[s],t[e-s+1]=-t[s+1]},a.prototype.transform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._transform4(),this._out=null,this._data=null},a.prototype.realTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._realTransform4(),this._out=null,this._data=null},a.prototype.inverseTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=1,this._transform4();for(var r=0;r<t.length;r++)t[r]/=this.size;this._out=null,this._data=null},a.prototype._transform4=function(){var t=this._out,e=this._csize,r=this._width,s=1<<r,i=e/s<<1,o,c,l=this._bitrev;if(i===4)for(o=0,c=0;o<e;o+=i,c++){const u=l[c];this._singleTransform2(o,u,s)}else for(o=0,c=0;o<e;o+=i,c++){const u=l[c];this._singleTransform4(o,u,s)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){i=e/s<<1;var m=i>>>2;for(o=0;o<e;o+=i)for(var _=o+m,g=o,f=0;g<_;g+=2,f+=s){const u=g,p=u+m,v=p+m,w=v+m,b=t[u],A=t[u+1],E=t[p],y=t[p+1],F=t[v],C=t[v+1],M=t[w],T=t[w+1],x=b,R=A,S=d[f],z=h*d[f+1],N=E*S-y*z,k=E*z+y*S,P=d[2*f],L=h*d[2*f+1],G=F*P-C*L,H=F*L+C*P,J=d[3*f],K=h*d[3*f+1],Y=M*J-T*K,Q=M*K+T*J,V=x+G,W=R+H,B=x-G,X=R-H,Z=N+Y,U=k+Q,$=h*(N-Y),O=h*(k-Q),et=V+Z,at=W+U,it=V-Z,ct=W-U,lt=B+O,ht=X-$,dt=B-O,ut=X+$;t[u]=et,t[u+1]=at,t[p]=lt,t[p+1]=ht,t[v]=it,t[v+1]=ct,t[w]=dt,t[w+1]=ut}}},a.prototype._singleTransform2=function(t,e,r){const s=this._out,i=this._data,o=i[e],c=i[e+1],l=i[e+r],h=i[e+r+1],d=o+l,m=c+h,_=o-l,g=c-h;s[t]=d,s[t+1]=m,s[t+2]=_,s[t+3]=g},a.prototype._singleTransform4=function(t,e,r){const s=this._out,i=this._data,o=this._inv?-1:1,c=r*2,l=r*3,h=i[e],d=i[e+1],m=i[e+r],_=i[e+r+1],g=i[e+c],f=i[e+c+1],u=i[e+l],p=i[e+l+1],v=h+g,w=d+f,b=h-g,A=d-f,E=m+u,y=_+p,F=o*(m-u),C=o*(_-p),M=v+E,T=w+y,x=b+C,R=A-F,S=v-E,z=w-y,N=b-C,k=A+F;s[t]=M,s[t+1]=T,s[t+2]=x,s[t+3]=R,s[t+4]=S,s[t+5]=z,s[t+6]=N,s[t+7]=k},a.prototype._realTransform4=function(){var t=this._out,e=this._csize,r=this._width,s=1<<r,i=e/s<<1,o,c,l=this._bitrev;if(i===4)for(o=0,c=0;o<e;o+=i,c++){const mt=l[c];this._singleRealTransform2(o,mt>>>1,s>>>1)}else for(o=0,c=0;o<e;o+=i,c++){const mt=l[c];this._singleRealTransform4(o,mt>>>1,s>>>1)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){i=e/s<<1;var m=i>>>1,_=m>>>1,g=_>>>1;for(o=0;o<e;o+=i)for(var f=0,u=0;f<=g;f+=2,u+=s){var p=o+f,v=p+_,w=v+_,b=w+_,A=t[p],E=t[p+1],y=t[v],F=t[v+1],C=t[w],M=t[w+1],T=t[b],x=t[b+1],R=A,S=E,z=d[u],N=h*d[u+1],k=y*z-F*N,P=y*N+F*z,L=d[2*u],G=h*d[2*u+1],H=C*L-M*G,J=C*G+M*L,K=d[3*u],Y=h*d[3*u+1],Q=T*K-x*Y,V=T*Y+x*K,W=R+H,B=S+J,X=R-H,Z=S-J,U=k+Q,$=P+V,O=h*(k-Q),et=h*(P-V),at=W+U,it=B+$,ct=X+et,lt=Z-O;if(t[p]=at,t[p+1]=it,t[v]=ct,t[v+1]=lt,f===0){var ht=W-U,dt=B-$;t[w]=ht,t[w+1]=dt;continue}if(f!==g){var ut=X,Dt=-Z,jt=W,Wt=-B,Bt=-h*et,Ut=-h*O,$t=-h*$,Pt=-h*U,Lt=ut+Bt,Gt=Dt+Ut,Ht=jt+Pt,Jt=Wt-$t,bt=o+_-f,Et=o+m-f;t[bt]=Lt,t[bt+1]=Gt,t[Et]=Ht,t[Et+1]=Jt}}}},a.prototype._singleRealTransform2=function(t,e,r){const s=this._out,i=this._data,o=i[e],c=i[e+r],l=o+c,h=o-c;s[t]=l,s[t+1]=0,s[t+2]=h,s[t+3]=0},a.prototype._singleRealTransform4=function(t,e,r){const s=this._out,i=this._data,o=this._inv?-1:1,c=r*2,l=r*3,h=i[e],d=i[e+r],m=i[e+c],_=i[e+l],g=h+m,f=h-m,u=d+_,p=o*(d-_),v=g+u,w=f,b=-p,A=g-u,E=f,y=p;s[t]=v,s[t+1]=0,s[t+2]=w,s[t+3]=b,s[t+4]=A,s[t+5]=0,s[t+6]=E,s[t+7]=y},ft}var qt=Ot();const te=Zt(qt),nt=16e3,j=512,D=64,Ft=Math.floor(.025*nt),Ct=Math.floor(.01*nt);function Mt(a){return 2595*Math.log10(1+a/700)}function ee(a){return 700*(10**(a/2595)-1)}function se(){const a=Mt(0),n=Mt(nt/2),t=new Float64Array(D+2);for(let o=0;o<D+2;o++)t[o]=a+o*(n-a)/(D+1);const r=t.map(o=>ee(o)).map(o=>Math.floor((j+1)*o/nt)),s=[],i=Math.floor(j/2)+1;for(let o=0;o<D;o++){const c=new Float32Array(i);for(let l=r[o];l<r[o+1];l++)c[l]=(l-r[o])/(r[o+1]-r[o]);for(let l=r[o+1];l<r[o+2];l++)c[l]=(r[o+2]-l)/(r[o+2]-r[o+1]);s.push(c)}return s}const ne=se(),rt=new te(j),_t=new Float32Array(j),Tt=rt.createComplexArray(),pt=rt.createComplexArray(),xt=new Float32Array(Math.floor(j/2)+1);function kt(a){const n=1+Math.ceil((a.length-Ft)/Ct),t=new Float32Array(n*D),e=Math.floor(j/2)+1;for(let r=0;r<n;r++){const s=r*Ct;_t.fill(0);for(let i=0;i<Ft&&s+i<a.length;i++)_t[i]=a[s+i];rt.toComplexArray(_t,Tt),rt.transform(pt,Tt);for(let i=0;i<e;i++){const o=pt[2*i],c=pt[2*i+1],l=(o*o+c*c)/j;xt[i]=l===0?1e-30:l}for(let i=0;i<D;i++){const o=ne[i];let c=0;for(let l=0;l<e;l++)c+=xt[l]*o[l];t[r*D+i]=Math.log(c===0?1e-30:c)}}return t}function re(a,n){let t=0;for(let e=0;e<a.length;e++)t+=a[e]*n[e];return(t+1)/2}function oe(a,n){let t=0;for(const e of n){const r=re(a,e);r>t&&(t=r)}return t}class Rt extends EventTarget{constructor({name:n,refEmbeddings:t,threshold:e=.65,relaxationMs:r=2e3,inferenceGapMs:s=300}){super(),this.name=n,this.refEmbeddings=t,this.threshold=e,this.relaxationMs=r,this.inferenceGapMs=s,this._lastDetectionAt=0,this._lastInferenceAt=0,this._lastScore=0}get lastScore(){return this._lastScore}async scoreFrame(n){const t=Date.now();if(t-this._lastInferenceAt<this.inferenceGapMs)return null;this._lastInferenceAt=t;const e=kt(n),r=await Nt(e),s=oe(r,this.refEmbeddings);return this._lastScore=s,s>=this.threshold&&t-this._lastDetectionAt>=this.relaxationMs&&(this._lastDetectionAt=t,this.dispatchEvent(new CustomEvent("match",{detail:{name:this.name,confidence:s,timestamp:t}}))),s}}const St=16e3,ae=1500,vt=24e3;function zt(a){if(a.length===vt)return a;const n=new Float32Array(vt);return n.set(a.subarray(0,vt)),n}class It extends EventTarget{constructor(n){super(),this.wordName=n.trim().toLowerCase(),this.samples=[]}get sampleCount(){return this.samples.length}async recordSample(){const n=await navigator.mediaDevices.getUserMedia({audio:!0});return new Promise((t,e)=>{const r=new AudioContext({sampleRate:St}),s=new MediaRecorder(n),i=[];this.dispatchEvent(new CustomEvent("recording-start")),s.ondataavailable=o=>{o.data.size>0&&i.push(o.data)},s.onstop=async()=>{n.getTracks().forEach(o=>o.stop());try{const c=await new Blob(i,{type:"audio/webm"}).arrayBuffer(),l=await r.decodeAudioData(c);await r.close();const h=l.getChannelData(0),d=zt(new Float32Array(h)),m=this._push(d,`Recorded #${this.samples.length}`);t(m)}catch(o){await r.close().catch(()=>{}),e(o)}},s.start(),setTimeout(()=>s.stop(),ae)})}async addAudioFile(n){const t=await n.arrayBuffer(),e=new AudioContext({sampleRate:St}),r=await e.decodeAudioData(t);await e.close();const s=r.getChannelData(0),i=zt(new Float32Array(s));return this._push(i,n.name)}removeSample(n){this.samples.splice(n,1),this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:this.samples.length}}))}clearSamples(){this.samples=[],this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:0}}))}async generateRef(){if(this.samples.length<3)throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);this.dispatchEvent(new CustomEvent("generating",{detail:{total:this.samples.length}}));const n=[];for(let t=0;t<this.samples.length;t++){const e=kt(this.samples[t].audioBuffer),r=await Nt(e);n.push(Array.from(r)),this.dispatchEvent(new CustomEvent("progress",{detail:{done:t+1,total:this.samples.length}}))}return{word_name:this.wordName,model_type:"resnet_50_arc",embeddings:n}}_push(n,t){this.samples.push({audioBuffer:n,name:t});const e=this.samples.length;return this.dispatchEvent(new CustomEvent("sample-added",{detail:{count:e,name:t}})),e}}const ie=`/**
|
|
2
2
|
* public/audio-processor.js
|
|
3
3
|
* AudioWorklet that runs at 16 kHz and continuously emits the last
|
|
4
4
|
* 1.5-second window (24 000 samples) via a circular buffer.
|
|
@@ -35,4 +35,4 @@ class AudioProcessor extends AudioWorkletProcessor {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
registerProcessor('audio-processor', AudioProcessor)
|
|
38
|
-
`;let wt=null;function ce(){if(!wt){const
|
|
38
|
+
`;let wt=null;function ce(){if(!wt){const a=new Blob([ie],{type:"application/javascript"});wt=URL.createObjectURL(a)}return wt}const gt="mellon_custom_refs";function ot(){try{const a=localStorage.getItem(gt);return a?JSON.parse(a):[]}catch{return[]}}function le(a){const n=ot().filter(t=>t.word_name!==a.word_name);n.push(a),localStorage.setItem(gt,JSON.stringify(n))}function he(a){const n=ot().filter(t=>t.word_name!==a);localStorage.setItem(gt,JSON.stringify(n))}function de(a){const n=JSON.stringify(a,null,2),t=new Blob([n],{type:"application/json"}),e=URL.createObjectURL(t),r=Object.assign(document.createElement("a"),{href:e,download:`${a.word_name}_ref.json`});document.body.appendChild(r),r.click(),document.body.removeChild(r),URL.revokeObjectURL(e)}async function ue(a){const n=await a.text();let t;try{t=JSON.parse(n)}catch{throw new Error("Invalid JSON")}if(!t.embeddings||!Array.isArray(t.embeddings)||!t.embeddings.length)throw new Error('Missing or empty "embeddings" array');if(!Array.isArray(t.embeddings[0]))throw new Error('"embeddings" must be a 2D array');return t.word_name||(t.word_name=a.name.replace(/_ref\.json$/i,"").replace(/\.json$/i,"")),t}class yt extends EventTarget{constructor(n={}){super(),this._opts={refs:n.refs??[],threshold:n.threshold??.65,relaxationMs:n.relaxationMs??2e3,inferenceGapMs:n.inferenceGapMs??300,assetsPath:n.assetsPath},this._refs=new Map,this._detectors=new Map,this._audioCtx=null,this._workletNode=null,this._stream=null,this._initialized=!1,this._running=!1}get isInitialized(){return this._initialized}get isRunning(){return this._running}async init(n){var e;if(this._initialized){n==null||n(1);return}this._opts.assetsPath&&Vt({assetsPath:this._opts.assetsPath});try{await Xt(n)}catch(r){throw this.dispatchEvent(new CustomEvent("error",{detail:{error:r}})),r}const t=await ot();for(const r of this._opts.refs){const s=(e=r.match(/\/([^/]+?)_ref\.json$/))==null?void 0:e[1],i=t.find(o=>o.word_name===s);try{let o;if(i){this.addCustomWord(i);break}else if(typeof r=="string"){console.log("fetching ref : ",r);const c=await fetch(r);if(!c.ok)throw new Error(`HTTP ${c.status}`);o=await c.json()}else o=r;yt.saveWord(o),this.addCustomWord(o)}catch(o){const c=typeof r=="string"?r:r.word_name;console.warn(`[Mellon] Failed to load ref "${c}": ${o.message}`)}}this._initialized=!0,this.dispatchEvent(new CustomEvent("ready"))}async start(){this._initialized||await this.init();try{this._stream=await navigator.mediaDevices.getUserMedia({audio:!0})}catch(e){const r=new Error(`Microphone access denied: ${e.message}`);throw this.dispatchEvent(new CustomEvent("error",{detail:{error:r}})),r}this._audioCtx=new AudioContext({sampleRate:16e3});const n=ce();await this._audioCtx.audioWorklet.addModule(n);const t=this._audioCtx.createMediaStreamSource(this._stream);this._workletNode=new AudioWorkletNode(this._audioCtx,"audio-processor"),t.connect(this._workletNode),this._workletNode.connect(this._audioCtx.destination);for(const[e,r]of this._refs){const s=new Rt({name:e,refEmbeddings:r.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});s.addEventListener("match",i=>{this.dispatchEvent(new CustomEvent("match",{detail:i.detail}))}),this._detectors.set(e,s)}this._workletNode.port.onmessage=async e=>{const r=[];for(const s of this._detectors.values())r.push(s.scoreFrame(e.data));await Promise.allSettled(r)},this._running=!0}stop(){this._workletNode&&(this._workletNode.port.onmessage=null,this._workletNode.disconnect(),this._workletNode=null),this._stream&&(this._stream.getTracks().forEach(n=>n.stop()),this._stream=null),this._audioCtx&&(this._audioCtx.close(),this._audioCtx=null),this._detectors.clear(),this._running=!1}addCustomWord(n){if(this._refs.set(n.word_name,n),this._running&&this._workletNode){const t=new Rt({name:n.word_name,refEmbeddings:n.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});t.addEventListener("match",e=>{this.dispatchEvent(new CustomEvent("match",{detail:e.detail}))}),this._detectors.set(n.word_name,t)}}enrollWord(n){return new It(n)}static loadWords(){return ot()}static saveWord(n){le(n)}static deleteWord(n){he(n)}static importWordFile(n){return ue(n)}static exportWord(n){de(n)}}exports.EnrollmentSession=It;exports.Mellon=yt;
|
package/dist/mellon.mjs
CHANGED
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
const Jt = "0.0.
|
|
1
|
+
const Jt = "0.0.10", Kt = [1, 1, 149, 64], Yt = `https://cdn.jsdelivr.net/npm/mellon@${Jt}/dist/assets`, st = {
|
|
2
2
|
assetsPath: `${Yt}`
|
|
3
3
|
};
|
|
4
4
|
let I = null, q = null, tt = null;
|
|
5
|
-
function Qt({ assetsPath:
|
|
6
|
-
|
|
5
|
+
function Qt({ assetsPath: a } = {}) {
|
|
6
|
+
a !== void 0 && (st.assetsPath = a), I = null, q = null, tt = null;
|
|
7
7
|
}
|
|
8
|
-
async function Vt(
|
|
9
|
-
return I ? (
|
|
10
|
-
const
|
|
11
|
-
tt = await new Function("url", "return import(url)")(t), tt.env.wasm.wasmPaths =
|
|
12
|
-
const
|
|
13
|
-
if (!
|
|
14
|
-
const
|
|
8
|
+
async function Vt(a) {
|
|
9
|
+
return I ? (a == null || a(1), I) : q || (q = (async () => {
|
|
10
|
+
const n = st.assetsPath.endsWith("/") ? st.assetsPath : st.assetsPath + "/", t = n + "ort.all.min.mjs", e = n + "model.onnx";
|
|
11
|
+
tt = await new Function("url", "return import(url)")(t), tt.env.wasm.wasmPaths = n;
|
|
12
|
+
const s = await fetch(e);
|
|
13
|
+
if (!s.ok) throw new Error(`Failed to fetch model: ${s.status}`);
|
|
14
|
+
const i = parseInt(s.headers.get("content-length") || "0", 10), o = s.body.getReader(), c = [];
|
|
15
15
|
let l = 0;
|
|
16
16
|
for (; ; ) {
|
|
17
|
-
const { done: m, value: _ } = await
|
|
17
|
+
const { done: m, value: _ } = await o.read();
|
|
18
18
|
if (m) break;
|
|
19
|
-
c.push(_), l += _.byteLength,
|
|
19
|
+
c.push(_), l += _.byteLength, i > 0 && (a == null || a(l / i));
|
|
20
20
|
}
|
|
21
21
|
const h = new Uint8Array(l);
|
|
22
22
|
let d = 0;
|
|
@@ -25,192 +25,192 @@ async function Vt(i) {
|
|
|
25
25
|
return I = await tt.InferenceSession.create(h.buffer, {
|
|
26
26
|
executionProviders: ["wasm"],
|
|
27
27
|
graphOptimizationLevel: "all"
|
|
28
|
-
}),
|
|
28
|
+
}), a == null || a(1), I;
|
|
29
29
|
})(), q);
|
|
30
30
|
}
|
|
31
|
-
async function St(
|
|
31
|
+
async function St(a) {
|
|
32
32
|
if (!I) throw new Error("Model not loaded — call loadModel() first");
|
|
33
|
-
const
|
|
34
|
-
return t[
|
|
33
|
+
const n = new tt.Tensor("float32", a, Kt), t = await I.run({ input: n }), e = Object.keys(t)[0];
|
|
34
|
+
return t[e].data;
|
|
35
35
|
}
|
|
36
|
-
function Xt(
|
|
37
|
-
return
|
|
36
|
+
function Xt(a) {
|
|
37
|
+
return a && a.__esModule && Object.prototype.hasOwnProperty.call(a, "default") ? a.default : a;
|
|
38
38
|
}
|
|
39
|
-
var ft,
|
|
39
|
+
var ft, Et;
|
|
40
40
|
function Zt() {
|
|
41
|
-
if (
|
|
42
|
-
|
|
43
|
-
function
|
|
44
|
-
if (this.size =
|
|
41
|
+
if (Et) return ft;
|
|
42
|
+
Et = 1;
|
|
43
|
+
function a(n) {
|
|
44
|
+
if (this.size = n | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
|
|
45
45
|
throw new Error("FFT size must be a power of two and bigger than 1");
|
|
46
|
-
this._csize =
|
|
47
|
-
for (var t = new Array(this.size * 2),
|
|
48
|
-
const l = Math.PI *
|
|
49
|
-
t[
|
|
46
|
+
this._csize = n << 1;
|
|
47
|
+
for (var t = new Array(this.size * 2), e = 0; e < t.length; e += 2) {
|
|
48
|
+
const l = Math.PI * e / this.size;
|
|
49
|
+
t[e] = Math.cos(l), t[e + 1] = -Math.sin(l);
|
|
50
50
|
}
|
|
51
51
|
this.table = t;
|
|
52
|
-
for (var
|
|
53
|
-
|
|
54
|
-
this._width =
|
|
55
|
-
for (var
|
|
56
|
-
this._bitrev[
|
|
57
|
-
for (var
|
|
58
|
-
var c = this._width -
|
|
59
|
-
this._bitrev[
|
|
52
|
+
for (var r = 0, s = 1; this.size > s; s <<= 1)
|
|
53
|
+
r++;
|
|
54
|
+
this._width = r % 2 === 0 ? r - 1 : r, this._bitrev = new Array(1 << this._width);
|
|
55
|
+
for (var i = 0; i < this._bitrev.length; i++) {
|
|
56
|
+
this._bitrev[i] = 0;
|
|
57
|
+
for (var o = 0; o < this._width; o += 2) {
|
|
58
|
+
var c = this._width - o - 2;
|
|
59
|
+
this._bitrev[i] |= (i >>> o & 3) << c;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
this._out = null, this._data = null, this._inv = 0;
|
|
63
63
|
}
|
|
64
|
-
return ft =
|
|
65
|
-
for (var
|
|
66
|
-
|
|
67
|
-
return
|
|
68
|
-
},
|
|
64
|
+
return ft = a, a.prototype.fromComplexArray = function(t, e) {
|
|
65
|
+
for (var r = e || new Array(t.length >>> 1), s = 0; s < t.length; s += 2)
|
|
66
|
+
r[s >>> 1] = t[s];
|
|
67
|
+
return r;
|
|
68
|
+
}, a.prototype.createComplexArray = function() {
|
|
69
69
|
const t = new Array(this._csize);
|
|
70
|
-
for (var
|
|
71
|
-
t[
|
|
70
|
+
for (var e = 0; e < t.length; e++)
|
|
71
|
+
t[e] = 0;
|
|
72
72
|
return t;
|
|
73
|
-
},
|
|
74
|
-
for (var
|
|
75
|
-
|
|
76
|
-
return
|
|
77
|
-
},
|
|
78
|
-
for (var
|
|
79
|
-
t[
|
|
80
|
-
},
|
|
81
|
-
if (t ===
|
|
73
|
+
}, a.prototype.toComplexArray = function(t, e) {
|
|
74
|
+
for (var r = e || this.createComplexArray(), s = 0; s < r.length; s += 2)
|
|
75
|
+
r[s] = t[s >>> 1], r[s + 1] = 0;
|
|
76
|
+
return r;
|
|
77
|
+
}, a.prototype.completeSpectrum = function(t) {
|
|
78
|
+
for (var e = this._csize, r = e >>> 1, s = 2; s < r; s += 2)
|
|
79
|
+
t[e - s] = t[s], t[e - s + 1] = -t[s + 1];
|
|
80
|
+
}, a.prototype.transform = function(t, e) {
|
|
81
|
+
if (t === e)
|
|
82
82
|
throw new Error("Input and output buffers must be different");
|
|
83
|
-
this._out = t, this._data =
|
|
84
|
-
},
|
|
85
|
-
if (t ===
|
|
83
|
+
this._out = t, this._data = e, this._inv = 0, this._transform4(), this._out = null, this._data = null;
|
|
84
|
+
}, a.prototype.realTransform = function(t, e) {
|
|
85
|
+
if (t === e)
|
|
86
86
|
throw new Error("Input and output buffers must be different");
|
|
87
|
-
this._out = t, this._data =
|
|
88
|
-
},
|
|
89
|
-
if (t ===
|
|
87
|
+
this._out = t, this._data = e, this._inv = 0, this._realTransform4(), this._out = null, this._data = null;
|
|
88
|
+
}, a.prototype.inverseTransform = function(t, e) {
|
|
89
|
+
if (t === e)
|
|
90
90
|
throw new Error("Input and output buffers must be different");
|
|
91
|
-
this._out = t, this._data =
|
|
92
|
-
for (var
|
|
93
|
-
t[
|
|
91
|
+
this._out = t, this._data = e, this._inv = 1, this._transform4();
|
|
92
|
+
for (var r = 0; r < t.length; r++)
|
|
93
|
+
t[r] /= this.size;
|
|
94
94
|
this._out = null, this._data = null;
|
|
95
|
-
},
|
|
96
|
-
var t = this._out,
|
|
97
|
-
if (
|
|
98
|
-
for (
|
|
95
|
+
}, a.prototype._transform4 = function() {
|
|
96
|
+
var t = this._out, e = this._csize, r = this._width, s = 1 << r, i = e / s << 1, o, c, l = this._bitrev;
|
|
97
|
+
if (i === 4)
|
|
98
|
+
for (o = 0, c = 0; o < e; o += i, c++) {
|
|
99
99
|
const u = l[c];
|
|
100
|
-
this._singleTransform2(
|
|
100
|
+
this._singleTransform2(o, u, s);
|
|
101
101
|
}
|
|
102
102
|
else
|
|
103
|
-
for (
|
|
103
|
+
for (o = 0, c = 0; o < e; o += i, c++) {
|
|
104
104
|
const u = l[c];
|
|
105
|
-
this._singleTransform4(
|
|
105
|
+
this._singleTransform4(o, u, s);
|
|
106
106
|
}
|
|
107
107
|
var h = this._inv ? -1 : 1, d = this.table;
|
|
108
|
-
for (
|
|
109
|
-
|
|
110
|
-
var m =
|
|
111
|
-
for (
|
|
112
|
-
for (var _ =
|
|
113
|
-
const u = g, p = u + m, v = p + m, w = v + m, b = t[u],
|
|
108
|
+
for (s >>= 2; s >= 2; s >>= 2) {
|
|
109
|
+
i = e / s << 1;
|
|
110
|
+
var m = i >>> 2;
|
|
111
|
+
for (o = 0; o < e; o += i)
|
|
112
|
+
for (var _ = o + m, g = o, f = 0; g < _; g += 2, f += s) {
|
|
113
|
+
const u = g, p = u + m, v = p + m, w = v + m, b = t[u], A = t[u + 1], E = t[p], y = t[p + 1], F = t[v], C = t[v + 1], M = t[w], T = t[w + 1], x = b, R = A, z = d[f], S = h * d[f + 1], N = E * z - y * S, k = E * S + y * z, L = d[2 * f], P = h * d[2 * f + 1], G = F * L - C * P, H = F * P + C * L, J = d[3 * f], K = h * d[3 * f + 1], Y = M * J - T * K, Q = M * K + T * J, V = x + G, j = R + H, B = x - G, X = R - H, Z = N + Y, U = k + Q, $ = h * (N - Y), O = h * (k - Q), et = V + Z, at = j + U, it = V - Z, ct = j - U, lt = B + O, ht = X - $, dt = B - O, ut = X + $;
|
|
114
114
|
t[u] = et, t[u + 1] = at, t[p] = lt, t[p + 1] = ht, t[v] = it, t[v + 1] = ct, t[w] = dt, t[w + 1] = ut;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
|
-
},
|
|
118
|
-
const
|
|
119
|
-
|
|
120
|
-
},
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
},
|
|
124
|
-
var t = this._out,
|
|
125
|
-
if (
|
|
126
|
-
for (
|
|
117
|
+
}, a.prototype._singleTransform2 = function(t, e, r) {
|
|
118
|
+
const s = this._out, i = this._data, o = i[e], c = i[e + 1], l = i[e + r], h = i[e + r + 1], d = o + l, m = c + h, _ = o - l, g = c - h;
|
|
119
|
+
s[t] = d, s[t + 1] = m, s[t + 2] = _, s[t + 3] = g;
|
|
120
|
+
}, a.prototype._singleTransform4 = function(t, e, r) {
|
|
121
|
+
const s = this._out, i = this._data, o = this._inv ? -1 : 1, c = r * 2, l = r * 3, h = i[e], d = i[e + 1], m = i[e + r], _ = i[e + r + 1], g = i[e + c], f = i[e + c + 1], u = i[e + l], p = i[e + l + 1], v = h + g, w = d + f, b = h - g, A = d - f, E = m + u, y = _ + p, F = o * (m - u), C = o * (_ - p), M = v + E, T = w + y, x = b + C, R = A - F, z = v - E, S = w - y, N = b - C, k = A + F;
|
|
122
|
+
s[t] = M, s[t + 1] = T, s[t + 2] = x, s[t + 3] = R, s[t + 4] = z, s[t + 5] = S, s[t + 6] = N, s[t + 7] = k;
|
|
123
|
+
}, a.prototype._realTransform4 = function() {
|
|
124
|
+
var t = this._out, e = this._csize, r = this._width, s = 1 << r, i = e / s << 1, o, c, l = this._bitrev;
|
|
125
|
+
if (i === 4)
|
|
126
|
+
for (o = 0, c = 0; o < e; o += i, c++) {
|
|
127
127
|
const mt = l[c];
|
|
128
|
-
this._singleRealTransform2(
|
|
128
|
+
this._singleRealTransform2(o, mt >>> 1, s >>> 1);
|
|
129
129
|
}
|
|
130
130
|
else
|
|
131
|
-
for (
|
|
131
|
+
for (o = 0, c = 0; o < e; o += i, c++) {
|
|
132
132
|
const mt = l[c];
|
|
133
|
-
this._singleRealTransform4(
|
|
133
|
+
this._singleRealTransform4(o, mt >>> 1, s >>> 1);
|
|
134
134
|
}
|
|
135
135
|
var h = this._inv ? -1 : 1, d = this.table;
|
|
136
|
-
for (
|
|
137
|
-
|
|
138
|
-
var m =
|
|
139
|
-
for (
|
|
140
|
-
for (var f = 0, u = 0; f <= g; f += 2, u +=
|
|
141
|
-
var p =
|
|
136
|
+
for (s >>= 2; s >= 2; s >>= 2) {
|
|
137
|
+
i = e / s << 1;
|
|
138
|
+
var m = i >>> 1, _ = m >>> 1, g = _ >>> 1;
|
|
139
|
+
for (o = 0; o < e; o += i)
|
|
140
|
+
for (var f = 0, u = 0; f <= g; f += 2, u += s) {
|
|
141
|
+
var p = o + f, v = p + _, w = v + _, b = w + _, A = t[p], E = t[p + 1], y = t[v], F = t[v + 1], C = t[w], M = t[w + 1], T = t[b], x = t[b + 1], R = A, z = E, S = d[u], N = h * d[u + 1], k = y * S - F * N, L = y * N + F * S, P = d[2 * u], G = h * d[2 * u + 1], H = C * P - M * G, J = C * G + M * P, K = d[3 * u], Y = h * d[3 * u + 1], Q = T * K - x * Y, V = T * Y + x * K, j = R + H, B = z + J, X = R - H, Z = z - J, U = k + Q, $ = L + V, O = h * (k - Q), et = h * (L - V), at = j + U, it = B + $, ct = X + et, lt = Z - O;
|
|
142
142
|
if (t[p] = at, t[p + 1] = it, t[v] = ct, t[v + 1] = lt, f === 0) {
|
|
143
143
|
var ht = j - U, dt = B - $;
|
|
144
144
|
t[w] = ht, t[w + 1] = dt;
|
|
145
145
|
continue;
|
|
146
146
|
}
|
|
147
147
|
if (f !== g) {
|
|
148
|
-
var ut = X, It = -Z, Dt = j, Wt = -B, jt = -h * et, Bt = -h * O, Ut = -h * $, $t = -h * U, Lt = ut + jt, Pt = It + Bt, Gt = Dt + $t, Ht = Wt - Ut, yt =
|
|
148
|
+
var ut = X, It = -Z, Dt = j, Wt = -B, jt = -h * et, Bt = -h * O, Ut = -h * $, $t = -h * U, Lt = ut + jt, Pt = It + Bt, Gt = Dt + $t, Ht = Wt - Ut, yt = o + _ - f, bt = o + m - f;
|
|
149
149
|
t[yt] = Lt, t[yt + 1] = Pt, t[bt] = Gt, t[bt + 1] = Ht;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
}
|
|
153
|
-
},
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
},
|
|
157
|
-
const
|
|
158
|
-
|
|
153
|
+
}, a.prototype._singleRealTransform2 = function(t, e, r) {
|
|
154
|
+
const s = this._out, i = this._data, o = i[e], c = i[e + r], l = o + c, h = o - c;
|
|
155
|
+
s[t] = l, s[t + 1] = 0, s[t + 2] = h, s[t + 3] = 0;
|
|
156
|
+
}, a.prototype._singleRealTransform4 = function(t, e, r) {
|
|
157
|
+
const s = this._out, i = this._data, o = this._inv ? -1 : 1, c = r * 2, l = r * 3, h = i[e], d = i[e + r], m = i[e + c], _ = i[e + l], g = h + m, f = h - m, u = d + _, p = o * (d - _), v = g + u, w = f, b = -p, A = g - u, E = f, y = p;
|
|
158
|
+
s[t] = v, s[t + 1] = 0, s[t + 2] = w, s[t + 3] = b, s[t + 4] = A, s[t + 5] = 0, s[t + 6] = E, s[t + 7] = y;
|
|
159
159
|
}, ft;
|
|
160
160
|
}
|
|
161
161
|
var Ot = Zt();
|
|
162
|
-
const qt = /* @__PURE__ */ Xt(Ot), nt = 16e3, W = 512, D = 64,
|
|
163
|
-
function Ct(
|
|
164
|
-
return 2595 * Math.log10(1 +
|
|
162
|
+
const qt = /* @__PURE__ */ Xt(Ot), nt = 16e3, W = 512, D = 64, At = Math.floor(0.025 * nt), Ft = Math.floor(0.01 * nt);
|
|
163
|
+
function Ct(a) {
|
|
164
|
+
return 2595 * Math.log10(1 + a / 700);
|
|
165
165
|
}
|
|
166
|
-
function te(
|
|
167
|
-
return 700 * (10 ** (
|
|
166
|
+
function te(a) {
|
|
167
|
+
return 700 * (10 ** (a / 2595) - 1);
|
|
168
168
|
}
|
|
169
169
|
function ee() {
|
|
170
|
-
const
|
|
171
|
-
for (let
|
|
172
|
-
t[
|
|
173
|
-
const
|
|
174
|
-
for (let
|
|
175
|
-
const c = new Float32Array(
|
|
176
|
-
for (let l = o
|
|
177
|
-
for (let l = o
|
|
178
|
-
|
|
170
|
+
const a = Ct(0), n = Ct(nt / 2), t = new Float64Array(D + 2);
|
|
171
|
+
for (let o = 0; o < D + 2; o++)
|
|
172
|
+
t[o] = a + o * (n - a) / (D + 1);
|
|
173
|
+
const r = t.map((o) => te(o)).map((o) => Math.floor((W + 1) * o / nt)), s = [], i = Math.floor(W / 2) + 1;
|
|
174
|
+
for (let o = 0; o < D; o++) {
|
|
175
|
+
const c = new Float32Array(i);
|
|
176
|
+
for (let l = r[o]; l < r[o + 1]; l++) c[l] = (l - r[o]) / (r[o + 1] - r[o]);
|
|
177
|
+
for (let l = r[o + 1]; l < r[o + 2]; l++) c[l] = (r[o + 2] - l) / (r[o + 2] - r[o + 1]);
|
|
178
|
+
s.push(c);
|
|
179
179
|
}
|
|
180
|
-
return
|
|
180
|
+
return s;
|
|
181
181
|
}
|
|
182
182
|
const se = ee(), rt = new qt(W), _t = new Float32Array(W), Mt = rt.createComplexArray(), pt = rt.createComplexArray(), Tt = new Float32Array(Math.floor(W / 2) + 1);
|
|
183
|
-
function Nt(
|
|
184
|
-
const
|
|
185
|
-
for (let
|
|
186
|
-
const
|
|
183
|
+
function Nt(a) {
|
|
184
|
+
const n = 1 + Math.ceil((a.length - At) / Ft), t = new Float32Array(n * D), e = Math.floor(W / 2) + 1;
|
|
185
|
+
for (let r = 0; r < n; r++) {
|
|
186
|
+
const s = r * Ft;
|
|
187
187
|
_t.fill(0);
|
|
188
|
-
for (let
|
|
189
|
-
_t[
|
|
188
|
+
for (let i = 0; i < At && s + i < a.length; i++)
|
|
189
|
+
_t[i] = a[s + i];
|
|
190
190
|
rt.toComplexArray(_t, Mt), rt.transform(pt, Mt);
|
|
191
|
-
for (let
|
|
192
|
-
const
|
|
193
|
-
Tt[
|
|
191
|
+
for (let i = 0; i < e; i++) {
|
|
192
|
+
const o = pt[2 * i], c = pt[2 * i + 1], l = (o * o + c * c) / W;
|
|
193
|
+
Tt[i] = l === 0 ? 1e-30 : l;
|
|
194
194
|
}
|
|
195
|
-
for (let
|
|
196
|
-
const
|
|
195
|
+
for (let i = 0; i < D; i++) {
|
|
196
|
+
const o = se[i];
|
|
197
197
|
let c = 0;
|
|
198
|
-
for (let l = 0; l <
|
|
199
|
-
t[
|
|
198
|
+
for (let l = 0; l < e; l++) c += Tt[l] * o[l];
|
|
199
|
+
t[r * D + i] = Math.log(c === 0 ? 1e-30 : c);
|
|
200
200
|
}
|
|
201
201
|
}
|
|
202
202
|
return t;
|
|
203
203
|
}
|
|
204
|
-
function ne(
|
|
204
|
+
function ne(a, n) {
|
|
205
205
|
let t = 0;
|
|
206
|
-
for (let
|
|
206
|
+
for (let e = 0; e < a.length; e++) t += a[e] * n[e];
|
|
207
207
|
return (t + 1) / 2;
|
|
208
208
|
}
|
|
209
|
-
function re(
|
|
209
|
+
function re(a, n) {
|
|
210
210
|
let t = 0;
|
|
211
|
-
for (const
|
|
212
|
-
const
|
|
213
|
-
|
|
211
|
+
for (const e of n) {
|
|
212
|
+
const r = ne(a, e);
|
|
213
|
+
r > t && (t = r);
|
|
214
214
|
}
|
|
215
215
|
return t;
|
|
216
216
|
}
|
|
@@ -223,8 +223,8 @@ class xt extends EventTarget {
|
|
|
223
223
|
* @param {number} [opts.relaxationMs=2000] Min ms between events
|
|
224
224
|
* @param {number} [opts.inferenceGapMs=300] Min ms between inferences
|
|
225
225
|
*/
|
|
226
|
-
constructor({ name:
|
|
227
|
-
super(), this.name =
|
|
226
|
+
constructor({ name: n, refEmbeddings: t, threshold: e = 0.65, relaxationMs: r = 2e3, inferenceGapMs: s = 300 }) {
|
|
227
|
+
super(), this.name = n, this.refEmbeddings = t, this.threshold = e, this.relaxationMs = r, this.inferenceGapMs = s, this._lastDetectionAt = 0, this._lastInferenceAt = 0, this._lastScore = 0;
|
|
228
228
|
}
|
|
229
229
|
get lastScore() {
|
|
230
230
|
return this._lastScore;
|
|
@@ -235,26 +235,26 @@ class xt extends EventTarget {
|
|
|
235
235
|
* @param {Float32Array} audioBuffer 24 000 samples at 16 kHz
|
|
236
236
|
* @returns {Promise<number|null>} Similarity score, or null if rate-limited
|
|
237
237
|
*/
|
|
238
|
-
async scoreFrame(
|
|
238
|
+
async scoreFrame(n) {
|
|
239
239
|
const t = Date.now();
|
|
240
240
|
if (t - this._lastInferenceAt < this.inferenceGapMs) return null;
|
|
241
241
|
this._lastInferenceAt = t;
|
|
242
|
-
const
|
|
243
|
-
return this._lastScore =
|
|
244
|
-
detail: { name: this.name, confidence:
|
|
245
|
-
}))),
|
|
242
|
+
const e = Nt(n), r = await St(e), s = re(r, this.refEmbeddings);
|
|
243
|
+
return this._lastScore = s, s >= this.threshold && t - this._lastDetectionAt >= this.relaxationMs && (this._lastDetectionAt = t, this.dispatchEvent(new CustomEvent("match", {
|
|
244
|
+
detail: { name: this.name, confidence: s, timestamp: t }
|
|
245
|
+
}))), s;
|
|
246
246
|
}
|
|
247
247
|
}
|
|
248
248
|
const Rt = 16e3, oe = 1500, vt = 24e3;
|
|
249
|
-
function zt(
|
|
250
|
-
if (
|
|
251
|
-
const
|
|
252
|
-
return
|
|
249
|
+
function zt(a) {
|
|
250
|
+
if (a.length === vt) return a;
|
|
251
|
+
const n = new Float32Array(vt);
|
|
252
|
+
return n.set(a.subarray(0, vt)), n;
|
|
253
253
|
}
|
|
254
254
|
class ae extends EventTarget {
|
|
255
255
|
/** @param {string} wordName — the wake word label */
|
|
256
|
-
constructor(
|
|
257
|
-
super(), this.wordName =
|
|
256
|
+
constructor(n) {
|
|
257
|
+
super(), this.wordName = n.trim().toLowerCase(), this.samples = [];
|
|
258
258
|
}
|
|
259
259
|
get sampleCount() {
|
|
260
260
|
return this.samples.length;
|
|
@@ -267,23 +267,23 @@ class ae extends EventTarget {
|
|
|
267
267
|
* @returns {Promise<number>} Index (1-based) of the new sample
|
|
268
268
|
*/
|
|
269
269
|
async recordSample() {
|
|
270
|
-
const
|
|
271
|
-
return new Promise((t,
|
|
272
|
-
const
|
|
273
|
-
this.dispatchEvent(new CustomEvent("recording-start")),
|
|
274
|
-
|
|
275
|
-
},
|
|
276
|
-
|
|
270
|
+
const n = await navigator.mediaDevices.getUserMedia({ audio: !0 });
|
|
271
|
+
return new Promise((t, e) => {
|
|
272
|
+
const r = new AudioContext({ sampleRate: Rt }), s = new MediaRecorder(n), i = [];
|
|
273
|
+
this.dispatchEvent(new CustomEvent("recording-start")), s.ondataavailable = (o) => {
|
|
274
|
+
o.data.size > 0 && i.push(o.data);
|
|
275
|
+
}, s.onstop = async () => {
|
|
276
|
+
n.getTracks().forEach((o) => o.stop());
|
|
277
277
|
try {
|
|
278
|
-
const c = await new Blob(
|
|
279
|
-
await
|
|
278
|
+
const c = await new Blob(i, { type: "audio/webm" }).arrayBuffer(), l = await r.decodeAudioData(c);
|
|
279
|
+
await r.close();
|
|
280
280
|
const h = l.getChannelData(0), d = zt(new Float32Array(h)), m = this._push(d, `Recorded #${this.samples.length}`);
|
|
281
281
|
t(m);
|
|
282
|
-
} catch (
|
|
283
|
-
await
|
|
284
|
-
}),
|
|
282
|
+
} catch (o) {
|
|
283
|
+
await r.close().catch(() => {
|
|
284
|
+
}), e(o);
|
|
285
285
|
}
|
|
286
|
-
},
|
|
286
|
+
}, s.start(), setTimeout(() => s.stop(), oe);
|
|
287
287
|
});
|
|
288
288
|
}
|
|
289
289
|
// ─── Upload ────────────────────────────────────────────────────────────────
|
|
@@ -293,19 +293,19 @@ class ae extends EventTarget {
|
|
|
293
293
|
* @param {File} file
|
|
294
294
|
* @returns {Promise<number>} Index (1-based) of the new sample
|
|
295
295
|
*/
|
|
296
|
-
async addAudioFile(
|
|
297
|
-
const t = await
|
|
298
|
-
await
|
|
299
|
-
const
|
|
300
|
-
return this._push(
|
|
296
|
+
async addAudioFile(n) {
|
|
297
|
+
const t = await n.arrayBuffer(), e = new AudioContext({ sampleRate: Rt }), r = await e.decodeAudioData(t);
|
|
298
|
+
await e.close();
|
|
299
|
+
const s = r.getChannelData(0), i = zt(new Float32Array(s));
|
|
300
|
+
return this._push(i, n.name);
|
|
301
301
|
}
|
|
302
302
|
// ─── Manage ────────────────────────────────────────────────────────────────
|
|
303
303
|
/**
|
|
304
304
|
* Remove a sample by 0-based index.
|
|
305
305
|
* @param {number} idx
|
|
306
306
|
*/
|
|
307
|
-
removeSample(
|
|
308
|
-
this.samples.splice(
|
|
307
|
+
removeSample(n) {
|
|
308
|
+
this.samples.splice(n, 1), this.dispatchEvent(new CustomEvent("samples-changed", { detail: { count: this.samples.length } }));
|
|
309
309
|
}
|
|
310
310
|
clearSamples() {
|
|
311
311
|
this.samples = [], this.dispatchEvent(new CustomEvent("samples-changed", { detail: { count: 0 } }));
|
|
@@ -321,24 +321,24 @@ class ae extends EventTarget {
|
|
|
321
321
|
if (this.samples.length < 3)
|
|
322
322
|
throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);
|
|
323
323
|
this.dispatchEvent(new CustomEvent("generating", { detail: { total: this.samples.length } }));
|
|
324
|
-
const
|
|
324
|
+
const n = [];
|
|
325
325
|
for (let t = 0; t < this.samples.length; t++) {
|
|
326
|
-
const
|
|
327
|
-
|
|
326
|
+
const e = Nt(this.samples[t].audioBuffer), r = await St(e);
|
|
327
|
+
n.push(Array.from(r)), this.dispatchEvent(new CustomEvent("progress", {
|
|
328
328
|
detail: { done: t + 1, total: this.samples.length }
|
|
329
329
|
}));
|
|
330
330
|
}
|
|
331
331
|
return {
|
|
332
332
|
word_name: this.wordName,
|
|
333
333
|
model_type: "resnet_50_arc",
|
|
334
|
-
embeddings:
|
|
334
|
+
embeddings: n
|
|
335
335
|
};
|
|
336
336
|
}
|
|
337
337
|
// ─── Private ───────────────────────────────────────────────────────────────
|
|
338
|
-
_push(
|
|
339
|
-
this.samples.push({ audioBuffer:
|
|
340
|
-
const
|
|
341
|
-
return this.dispatchEvent(new CustomEvent("sample-added", { detail: { count:
|
|
338
|
+
_push(n, t) {
|
|
339
|
+
this.samples.push({ audioBuffer: n, name: t });
|
|
340
|
+
const e = this.samples.length;
|
|
341
|
+
return this.dispatchEvent(new CustomEvent("sample-added", { detail: { count: e, name: t } })), e;
|
|
342
342
|
}
|
|
343
343
|
}
|
|
344
344
|
const ie = `/**
|
|
@@ -382,40 +382,40 @@ registerProcessor('audio-processor', AudioProcessor)
|
|
|
382
382
|
let wt = null;
|
|
383
383
|
function ce() {
|
|
384
384
|
if (!wt) {
|
|
385
|
-
const
|
|
386
|
-
wt = URL.createObjectURL(
|
|
385
|
+
const a = new Blob([ie], { type: "application/javascript" });
|
|
386
|
+
wt = URL.createObjectURL(a);
|
|
387
387
|
}
|
|
388
388
|
return wt;
|
|
389
389
|
}
|
|
390
390
|
const gt = "mellon_custom_refs";
|
|
391
391
|
function ot() {
|
|
392
392
|
try {
|
|
393
|
-
const
|
|
394
|
-
return
|
|
393
|
+
const a = localStorage.getItem(gt);
|
|
394
|
+
return a ? JSON.parse(a) : [];
|
|
395
395
|
} catch {
|
|
396
396
|
return [];
|
|
397
397
|
}
|
|
398
398
|
}
|
|
399
|
-
function le(
|
|
400
|
-
const
|
|
401
|
-
|
|
399
|
+
function le(a) {
|
|
400
|
+
const n = ot().filter((t) => t.word_name !== a.word_name);
|
|
401
|
+
n.push(a), localStorage.setItem(gt, JSON.stringify(n));
|
|
402
402
|
}
|
|
403
|
-
function he(
|
|
404
|
-
const
|
|
405
|
-
localStorage.setItem(gt, JSON.stringify(
|
|
403
|
+
function he(a) {
|
|
404
|
+
const n = ot().filter((t) => t.word_name !== a);
|
|
405
|
+
localStorage.setItem(gt, JSON.stringify(n));
|
|
406
406
|
}
|
|
407
|
-
function de(
|
|
408
|
-
const
|
|
409
|
-
href:
|
|
410
|
-
download: `${
|
|
407
|
+
function de(a) {
|
|
408
|
+
const n = JSON.stringify(a, null, 2), t = new Blob([n], { type: "application/json" }), e = URL.createObjectURL(t), r = Object.assign(document.createElement("a"), {
|
|
409
|
+
href: e,
|
|
410
|
+
download: `${a.word_name}_ref.json`
|
|
411
411
|
});
|
|
412
|
-
document.body.appendChild(
|
|
412
|
+
document.body.appendChild(r), r.click(), document.body.removeChild(r), URL.revokeObjectURL(e);
|
|
413
413
|
}
|
|
414
|
-
async function ue(
|
|
415
|
-
const
|
|
414
|
+
async function ue(a) {
|
|
415
|
+
const n = await a.text();
|
|
416
416
|
let t;
|
|
417
417
|
try {
|
|
418
|
-
t = JSON.parse(
|
|
418
|
+
t = JSON.parse(n);
|
|
419
419
|
} catch {
|
|
420
420
|
throw new Error("Invalid JSON");
|
|
421
421
|
}
|
|
@@ -423,12 +423,11 @@ async function ue(i) {
|
|
|
423
423
|
throw new Error('Missing or empty "embeddings" array');
|
|
424
424
|
if (!Array.isArray(t.embeddings[0]))
|
|
425
425
|
throw new Error('"embeddings" must be a 2D array');
|
|
426
|
-
return t.word_name || (t.word_name =
|
|
426
|
+
return t.word_name || (t.word_name = a.name.replace(/_ref\.json$/i, "").replace(/\.json$/i, "")), t;
|
|
427
427
|
}
|
|
428
428
|
class kt extends EventTarget {
|
|
429
429
|
/**
|
|
430
430
|
* @param {object} [opts]
|
|
431
|
-
* @param {string[]} [opts.words] Words to detect (must have refs loaded via addCustomWord())
|
|
432
431
|
* @param {Array<string|{word_name:string,embeddings:number[][]}>} [opts.refs]
|
|
433
432
|
* Reference data to preload. Each entry is either a URL string pointing to a
|
|
434
433
|
* hosted `_ref.json` file, or an inline RefData object.
|
|
@@ -438,14 +437,13 @@ class kt extends EventTarget {
|
|
|
438
437
|
* @param {number} [opts.inferenceGapMs=300] Min ms between inference runs
|
|
439
438
|
* @param {string} [opts.assetsPath]
|
|
440
439
|
*/
|
|
441
|
-
constructor(
|
|
440
|
+
constructor(n = {}) {
|
|
442
441
|
super(), this._opts = {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
assetsPath: r.assetsPath
|
|
442
|
+
refs: n.refs ?? [],
|
|
443
|
+
threshold: n.threshold ?? 0.65,
|
|
444
|
+
relaxationMs: n.relaxationMs ?? 2e3,
|
|
445
|
+
inferenceGapMs: n.inferenceGapMs ?? 300,
|
|
446
|
+
assetsPath: n.assetsPath
|
|
449
447
|
}, this._refs = /* @__PURE__ */ new Map(), this._detectors = /* @__PURE__ */ new Map(), this._audioCtx = null, this._workletNode = null, this._stream = null, this._initialized = !1, this._running = !1;
|
|
450
448
|
}
|
|
451
449
|
/** Whether init() has completed successfully. */
|
|
@@ -464,88 +462,83 @@ class kt extends EventTarget {
|
|
|
464
462
|
*
|
|
465
463
|
* @param {(progress: number) => void} [onProgress] 0.0 → 1.0
|
|
466
464
|
*/
|
|
467
|
-
async init(
|
|
468
|
-
var
|
|
465
|
+
async init(n) {
|
|
466
|
+
var e;
|
|
469
467
|
if (this._initialized) {
|
|
470
|
-
|
|
468
|
+
n == null || n(1);
|
|
471
469
|
return;
|
|
472
470
|
}
|
|
473
471
|
this._opts.assetsPath && Qt({ assetsPath: this._opts.assetsPath });
|
|
474
472
|
try {
|
|
475
|
-
await Vt(
|
|
476
|
-
} catch (
|
|
477
|
-
throw this.dispatchEvent(new CustomEvent("error", { detail: { error:
|
|
473
|
+
await Vt(n);
|
|
474
|
+
} catch (r) {
|
|
475
|
+
throw this.dispatchEvent(new CustomEvent("error", { detail: { error: r } })), r;
|
|
478
476
|
}
|
|
479
477
|
const t = await ot();
|
|
480
|
-
for (const
|
|
481
|
-
const
|
|
482
|
-
if (!!t.find((n) => n.word_name === e)) break;
|
|
478
|
+
for (const r of this._opts.refs) {
|
|
479
|
+
const s = (e = r.match(/\/([^/]+?)_ref\.json$/)) == null ? void 0 : e[1], i = t.find((o) => o.word_name === s);
|
|
483
480
|
try {
|
|
484
|
-
let
|
|
485
|
-
if (
|
|
486
|
-
|
|
487
|
-
|
|
481
|
+
let o;
|
|
482
|
+
if (i) {
|
|
483
|
+
this.addCustomWord(i);
|
|
484
|
+
break;
|
|
485
|
+
} else if (typeof r == "string") {
|
|
486
|
+
console.log("fetching ref : ", r);
|
|
487
|
+
const c = await fetch(r);
|
|
488
488
|
if (!c.ok) throw new Error(`HTTP ${c.status}`);
|
|
489
|
-
|
|
489
|
+
o = await c.json();
|
|
490
490
|
} else
|
|
491
|
-
|
|
492
|
-
kt.saveWord(
|
|
493
|
-
} catch (
|
|
494
|
-
const c = typeof
|
|
495
|
-
console.warn(`[Mellon] Failed to load ref "${c}": ${
|
|
491
|
+
o = r;
|
|
492
|
+
kt.saveWord(o), this.addCustomWord(o);
|
|
493
|
+
} catch (o) {
|
|
494
|
+
const c = typeof r == "string" ? r : r.word_name;
|
|
495
|
+
console.warn(`[Mellon] Failed to load ref "${c}": ${o.message}`);
|
|
496
496
|
}
|
|
497
497
|
}
|
|
498
498
|
this._initialized = !0, this.dispatchEvent(new CustomEvent("ready"));
|
|
499
499
|
}
|
|
500
500
|
/**
|
|
501
501
|
* Request microphone access and start hotword detection.
|
|
502
|
+
* Listens for all words that have registered reference embeddings.
|
|
502
503
|
* Emits 'match' CustomEvents when a word is detected.
|
|
503
|
-
*
|
|
504
|
-
* @param {string[]} [words] Subset of words to listen for; defaults to opts.words
|
|
505
504
|
*/
|
|
506
|
-
async start(
|
|
505
|
+
async start() {
|
|
507
506
|
this._initialized || await this.init();
|
|
508
|
-
const t = r ?? this._opts.words;
|
|
509
507
|
try {
|
|
510
508
|
this._stream = await navigator.mediaDevices.getUserMedia({ audio: !0 });
|
|
511
509
|
} catch (e) {
|
|
512
|
-
const
|
|
513
|
-
throw this.dispatchEvent(new CustomEvent("error", { detail: { error:
|
|
510
|
+
const r = new Error(`Microphone access denied: ${e.message}`);
|
|
511
|
+
throw this.dispatchEvent(new CustomEvent("error", { detail: { error: r } })), r;
|
|
514
512
|
}
|
|
515
513
|
this._audioCtx = new AudioContext({ sampleRate: 16e3 });
|
|
516
|
-
const
|
|
517
|
-
await this._audioCtx.audioWorklet.addModule(
|
|
518
|
-
const
|
|
519
|
-
this._workletNode = new AudioWorkletNode(this._audioCtx, "audio-processor"),
|
|
520
|
-
for (const e of
|
|
521
|
-
const
|
|
522
|
-
if (!a) {
|
|
523
|
-
console.warn(`[Mellon] No reference embeddings for "${e}" — skipping. Call addCustomWord() to register custom words before start().`);
|
|
524
|
-
continue;
|
|
525
|
-
}
|
|
526
|
-
const n = new xt({
|
|
514
|
+
const n = ce();
|
|
515
|
+
await this._audioCtx.audioWorklet.addModule(n);
|
|
516
|
+
const t = this._audioCtx.createMediaStreamSource(this._stream);
|
|
517
|
+
this._workletNode = new AudioWorkletNode(this._audioCtx, "audio-processor"), t.connect(this._workletNode), this._workletNode.connect(this._audioCtx.destination);
|
|
518
|
+
for (const [e, r] of this._refs) {
|
|
519
|
+
const s = new xt({
|
|
527
520
|
name: e,
|
|
528
|
-
refEmbeddings:
|
|
521
|
+
refEmbeddings: r.embeddings,
|
|
529
522
|
threshold: this._opts.threshold,
|
|
530
523
|
relaxationMs: this._opts.relaxationMs,
|
|
531
524
|
inferenceGapMs: this._opts.inferenceGapMs
|
|
532
525
|
});
|
|
533
|
-
|
|
534
|
-
this.dispatchEvent(new CustomEvent("match", { detail:
|
|
535
|
-
}), this._detectors.set(e,
|
|
526
|
+
s.addEventListener("match", (i) => {
|
|
527
|
+
this.dispatchEvent(new CustomEvent("match", { detail: i.detail }));
|
|
528
|
+
}), this._detectors.set(e, s);
|
|
536
529
|
}
|
|
537
530
|
this._workletNode.port.onmessage = async (e) => {
|
|
538
|
-
const
|
|
539
|
-
for (const
|
|
540
|
-
|
|
541
|
-
await Promise.allSettled(
|
|
531
|
+
const r = [];
|
|
532
|
+
for (const s of this._detectors.values())
|
|
533
|
+
r.push(s.scoreFrame(e.data));
|
|
534
|
+
await Promise.allSettled(r);
|
|
542
535
|
}, this._running = !0;
|
|
543
536
|
}
|
|
544
537
|
/**
|
|
545
538
|
* Stop detection and release the microphone and AudioContext.
|
|
546
539
|
*/
|
|
547
540
|
stop() {
|
|
548
|
-
this._workletNode && (this._workletNode.port.onmessage = null, this._workletNode.disconnect(), this._workletNode = null), this._stream && (this._stream.getTracks().forEach((
|
|
541
|
+
this._workletNode && (this._workletNode.port.onmessage = null, this._workletNode.disconnect(), this._workletNode = null), this._stream && (this._stream.getTracks().forEach((n) => n.stop()), this._stream = null), this._audioCtx && (this._audioCtx.close(), this._audioCtx = null), this._detectors.clear(), this._running = !1;
|
|
549
542
|
}
|
|
550
543
|
// ─── Custom words ────────────────────────────────────────────────────────
|
|
551
544
|
/**
|
|
@@ -555,18 +548,18 @@ class kt extends EventTarget {
|
|
|
555
548
|
*
|
|
556
549
|
* @param {{ word_name: string, model_type: string, embeddings: number[][] }} refData
|
|
557
550
|
*/
|
|
558
|
-
addCustomWord(
|
|
559
|
-
if (this._refs.set(
|
|
551
|
+
addCustomWord(n) {
|
|
552
|
+
if (this._refs.set(n.word_name, n), this._running && this._workletNode) {
|
|
560
553
|
const t = new xt({
|
|
561
|
-
name:
|
|
562
|
-
refEmbeddings:
|
|
554
|
+
name: n.word_name,
|
|
555
|
+
refEmbeddings: n.embeddings,
|
|
563
556
|
threshold: this._opts.threshold,
|
|
564
557
|
relaxationMs: this._opts.relaxationMs,
|
|
565
558
|
inferenceGapMs: this._opts.inferenceGapMs
|
|
566
559
|
});
|
|
567
|
-
t.addEventListener("match", (
|
|
568
|
-
this.dispatchEvent(new CustomEvent("match", { detail:
|
|
569
|
-
}), this._detectors.set(
|
|
560
|
+
t.addEventListener("match", (e) => {
|
|
561
|
+
this.dispatchEvent(new CustomEvent("match", { detail: e.detail }));
|
|
562
|
+
}), this._detectors.set(n.word_name, t);
|
|
570
563
|
}
|
|
571
564
|
}
|
|
572
565
|
/**
|
|
@@ -583,8 +576,8 @@ class kt extends EventTarget {
|
|
|
583
576
|
* const ref = await session.generateRef()
|
|
584
577
|
* stt.addCustomWord(ref)
|
|
585
578
|
*/
|
|
586
|
-
enrollWord(
|
|
587
|
-
return new ae(
|
|
579
|
+
enrollWord(n) {
|
|
580
|
+
return new ae(n);
|
|
588
581
|
}
|
|
589
582
|
// ─── Persistence (static) ────────────────────────────────────────────────
|
|
590
583
|
/** Return all custom word refs stored in localStorage. */
|
|
@@ -592,23 +585,23 @@ class kt extends EventTarget {
|
|
|
592
585
|
return ot();
|
|
593
586
|
}
|
|
594
587
|
/** Persist a word ref to localStorage (replaces any existing entry with the same name). */
|
|
595
|
-
static saveWord(
|
|
596
|
-
le(
|
|
588
|
+
static saveWord(n) {
|
|
589
|
+
le(n);
|
|
597
590
|
}
|
|
598
591
|
/** Delete a word ref from localStorage by name. */
|
|
599
|
-
static deleteWord(
|
|
600
|
-
he(
|
|
592
|
+
static deleteWord(n) {
|
|
593
|
+
he(n);
|
|
601
594
|
}
|
|
602
595
|
/**
|
|
603
596
|
* Parse an uploaded ref JSON file and return a RefData object.
|
|
604
597
|
* @param {File} file
|
|
605
598
|
*/
|
|
606
|
-
static importWordFile(
|
|
607
|
-
return ue(
|
|
599
|
+
static importWordFile(n) {
|
|
600
|
+
return ue(n);
|
|
608
601
|
}
|
|
609
602
|
/** Trigger a browser download of a ref as a JSON file. */
|
|
610
|
-
static exportWord(
|
|
611
|
-
de(
|
|
603
|
+
static exportWord(n) {
|
|
604
|
+
de(n);
|
|
612
605
|
}
|
|
613
606
|
}
|
|
614
607
|
export {
|
package/package.json
CHANGED