mellon 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -2
- package/dist/index.d.ts +3 -26
- package/dist/mellon.cjs +2 -2
- package/dist/mellon.mjs +246 -240
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -145,8 +145,7 @@ class Mellon extends EventTarget {
|
|
|
145
145
|
| `threshold` | `number` | `0.65` | Detection threshold (0–1) |
|
|
146
146
|
| `relaxationMs` | `number` | `2000` | Min ms between match events |
|
|
147
147
|
| `inferenceGapMs` | `number` | `300` | Min ms between inference runs |
|
|
148
|
-
| `
|
|
149
|
-
| `modelUrl` | `string` | — | URL to `model.onnx` |
|
|
148
|
+
| `assetsPath` | `string` | — | Base URL for mellon assets |
|
|
150
149
|
|
|
151
150
|
#### Events
|
|
152
151
|
|
package/dist/index.d.ts
CHANGED
|
@@ -25,18 +25,7 @@ export interface SampleInfo {
|
|
|
25
25
|
// ─── Engine ──────────────────────────────────────────────────────────────────
|
|
26
26
|
|
|
27
27
|
export interface EngineConfig {
|
|
28
|
-
|
|
29
|
-
* Base URL where ORT WASM files are served (trailing slash required).
|
|
30
|
-
* Defaults to the jsDelivr CDN. Override for offline / intranet use.
|
|
31
|
-
* @example '/mellon-assets/wasm/'
|
|
32
|
-
*/
|
|
33
|
-
wasmBasePath?: string
|
|
34
|
-
/**
|
|
35
|
-
* Full URL to model.onnx.
|
|
36
|
-
* Defaults to the jsDelivr CDN. Override for offline / intranet use.
|
|
37
|
-
* @example '/mellon-assets/model.onnx'
|
|
38
|
-
*/
|
|
39
|
-
modelUrl?: string
|
|
28
|
+
assetsPath?: string
|
|
40
29
|
}
|
|
41
30
|
|
|
42
31
|
/**
|
|
@@ -210,18 +199,7 @@ export interface MellonOptions {
|
|
|
210
199
|
relaxationMs?: number
|
|
211
200
|
/** Minimum milliseconds between consecutive inference runs. Default: 300 */
|
|
212
201
|
inferenceGapMs?: number
|
|
213
|
-
|
|
214
|
-
* Override the ORT WASM base URL. Defaults to the jsDelivr CDN.
|
|
215
|
-
* Only needed for offline / intranet deployments (trailing slash required).
|
|
216
|
-
* @example '/mellon-assets/wasm/'
|
|
217
|
-
*/
|
|
218
|
-
wasmBasePath?: string
|
|
219
|
-
/**
|
|
220
|
-
* Override the model.onnx URL. Defaults to the jsDelivr CDN.
|
|
221
|
-
* Only needed for offline / intranet deployments.
|
|
222
|
-
* @example '/mellon-assets/model.onnx'
|
|
223
|
-
*/
|
|
224
|
-
modelUrl?: string
|
|
202
|
+
assetsPath?: string
|
|
225
203
|
}
|
|
226
204
|
|
|
227
205
|
/**
|
|
@@ -229,8 +207,7 @@ export interface MellonOptions {
|
|
|
229
207
|
*
|
|
230
208
|
* @example
|
|
231
209
|
* const stt = new Mellon({
|
|
232
|
-
*
|
|
233
|
-
* modelUrl: '/assets/model.onnx',
|
|
210
|
+
* assetsPath: '/mellon-assets/'
|
|
234
211
|
* })
|
|
235
212
|
* await stt.init(pct => progressBar.style.width = pct * 100 + '%')
|
|
236
213
|
* await stt.start()
|
package/dist/mellon.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const Kt="0.0.8",Yt=[1,1,149,64],Qt=`https://cdn.jsdelivr.net/npm/mellon@${Kt}/dist/assets`,st={assetsPath:`${Qt}`};let I=null,q=null,tt=null;function Vt({assetsPath:i}={}){i!==void 0&&(st.assetsPath=i),I=null,q=null,tt=null}async function Xt(i){return I?(i==null||i(1),I):q||(q=(async()=>{const r=st.assetsPath.endsWith("/")?st.assetsPath:st.assetsPath+"/",t=r+"ort.all.min.mjs",s=r+"model.onnx";tt=await new Function("url","return import(url)")(t),tt.env.wasm.wasmPaths=r;const e=await fetch(s);if(!e.ok)throw new Error(`Failed to fetch model: ${e.status}`);const a=parseInt(e.headers.get("content-length")||"0",10),n=e.body.getReader(),c=[];let l=0;for(;;){const{done:m,value:_}=await n.read();if(m)break;c.push(_),l+=_.byteLength,a>0&&(i==null||i(l/a))}const h=new Uint8Array(l);let d=0;for(const m of c)h.set(m,d),d+=m.byteLength;return I=await tt.InferenceSession.create(h.buffer,{executionProviders:["wasm"],graphOptimizationLevel:"all"}),i==null||i(1),I})(),q)}async function Nt(i){if(!I)throw new Error("Model not loaded — call loadModel() first");const r=new tt.Tensor("float32",i,Yt),t=await I.run({input:r}),s=Object.keys(t)[0];return t[s].data}function Zt(i){return i&&i.__esModule&&Object.prototype.hasOwnProperty.call(i,"default")?i.default:i}var ft,At;function Ot(){if(At)return ft;At=1;function i(r){if(this.size=r|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=r<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var o=0,e=1;this.size>e;e<<=1)o++;this._width=o%2===0?o-1:o,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var n=0;n<this._width;n+=2){var c=this._width-n-2;this._bitrev[a]|=(a>>>n&3)<<c}}this._out=null,this._data=null,this._inv=0}return ft=i,i.prototype.fromComplexArray=function(t,s){for(var o=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)o[e>>>1]=t[e];return o},i.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},i.prototype.toComplexArray=function(t,s){for(var o=s||this.createComplexArray(),e=0;e<o.length;e+=2)o[e]=t[e>>>1],o[e+1]=0;return o},i.prototype.completeSpectrum=function(t){for(var s=this._csize,o=s>>>1,e=2;e<o;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},i.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},i.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},i.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var o=0;o<t.length;o++)t[o]/=this.size;this._out=null,this._data=null},i.prototype._transform4=function(){var t=this._out,s=this._csize,o=this._width,e=1<<o,a=s/e<<1,n,c,l=this._bitrev;if(a===4)for(n=0,c=0;n<s;n+=a,c++){const u=l[c];this._singleTransform2(n,u,e)}else for(n=0,c=0;n<s;n+=a,c++){const u=l[c];this._singleTransform4(n,u,e)}var h=this._inv?-1:1,d=this.table;for(e>>=2;e>=2;e>>=2){a=s/e<<1;var m=a>>>2;for(n=0;n<s;n+=a)for(var _=n+m,g=n,f=0;g<_;g+=2,f+=e){const u=g,p=u+m,v=p+m,w=v+m,b=t[u],A=t[u+1],E=t[p],y=t[p+1],F=t[v],C=t[v+1],M=t[w],T=t[w+1],x=b,R=A,z=d[f],S=h*d[f+1],N=E*z-y*S,k=E*S+y*z,L=d[2*f],P=h*d[2*f+1],G=F*L-C*P,H=F*P+C*L,J=d[3*f],K=h*d[3*f+1],Y=M*J-T*K,Q=M*K+T*J,V=x+G,W=R+H,B=x-G,X=R-H,Z=N+Y,U=k+Q,$=h*(N-Y),O=h*(k-Q),et=V+Z,at=W+U,it=V-Z,ct=W-U,lt=B+O,ht=X-$,dt=B-O,ut=X+$;t[u]=et,t[u+1]=at,t[p]=lt,t[p+1]=ht,t[v]=it,t[v+1]=ct,t[w]=dt,t[w+1]=ut}}},i.prototype._singleTransform2=function(t,s,o){const e=this._out,a=this._data,n=a[s],c=a[s+1],l=a[s+o],h=a[s+o+1],d=n+l,m=c+h,_=n-l,g=c-h;e[t]=d,e[t+1]=m,e[t+2]=_,e[t+3]=g},i.prototype._singleTransform4=function(t,s,o){const e=this._out,a=this._data,n=this._inv?-1:1,c=o*2,l=o*3,h=a[s],d=a[s+1],m=a[s+o],_=a[s+o+1],g=a[s+c],f=a[s+c+1],u=a[s+l],p=a[s+l+1],v=h+g,w=d+f,b=h-g,A=d-f,E=m+u,y=_+p,F=n*(m-u),C=n*(_-p),M=v+E,T=w+y,x=b+C,R=A-F,z=v-E,S=w-y,N=b-C,k=A+F;e[t]=M,e[t+1]=T,e[t+2]=x,e[t+3]=R,e[t+4]=z,e[t+5]=S,e[t+6]=N,e[t+7]=k},i.prototype._realTransform4=function(){var t=this._out,s=this._csize,o=this._width,e=1<<o,a=s/e<<1,n,c,l=this._bitrev;if(a===4)for(n=0,c=0;n<s;n+=a,c++){const mt=l[c];this._singleRealTransform2(n,mt>>>1,e>>>1)}else for(n=0,c=0;n<s;n+=a,c++){const mt=l[c];this._singleRealTransform4(n,mt>>>1,e>>>1)}var h=this._inv?-1:1,d=this.table;for(e>>=2;e>=2;e>>=2){a=s/e<<1;var m=a>>>1,_=m>>>1,g=_>>>1;for(n=0;n<s;n+=a)for(var f=0,u=0;f<=g;f+=2,u+=e){var p=n+f,v=p+_,w=v+_,b=w+_,A=t[p],E=t[p+1],y=t[v],F=t[v+1],C=t[w],M=t[w+1],T=t[b],x=t[b+1],R=A,z=E,S=d[u],N=h*d[u+1],k=y*S-F*N,L=y*N+F*S,P=d[2*u],G=h*d[2*u+1],H=C*P-M*G,J=C*G+M*P,K=d[3*u],Y=h*d[3*u+1],Q=T*K-x*Y,V=T*Y+x*K,W=R+H,B=z+J,X=R-H,Z=z-J,U=k+Q,$=L+V,O=h*(k-Q),et=h*(L-V),at=W+U,it=B+$,ct=X+et,lt=Z-O;if(t[p]=at,t[p+1]=it,t[v]=ct,t[v+1]=lt,f===0){var ht=W-U,dt=B-$;t[w]=ht,t[w+1]=dt;continue}if(f!==g){var ut=X,Dt=-Z,jt=W,Wt=-B,Bt=-h*et,Ut=-h*O,$t=-h*$,Lt=-h*U,Pt=ut+Bt,Gt=Dt+Ut,Ht=jt+Lt,Jt=Wt-$t,bt=n+_-f,Et=n+m-f;t[bt]=Pt,t[bt+1]=Gt,t[Et]=Ht,t[Et+1]=Jt}}}},i.prototype._singleRealTransform2=function(t,s,o){const e=this._out,a=this._data,n=a[s],c=a[s+o],l=n+c,h=n-c;e[t]=l,e[t+1]=0,e[t+2]=h,e[t+3]=0},i.prototype._singleRealTransform4=function(t,s,o){const e=this._out,a=this._data,n=this._inv?-1:1,c=o*2,l=o*3,h=a[s],d=a[s+o],m=a[s+c],_=a[s+l],g=h+m,f=h-m,u=d+_,p=n*(d-_),v=g+u,w=f,b=-p,A=g-u,E=f,y=p;e[t]=v,e[t+1]=0,e[t+2]=w,e[t+3]=b,e[t+4]=A,e[t+5]=0,e[t+6]=E,e[t+7]=y},ft}var qt=Ot();const te=Zt(qt),nt=16e3,j=512,D=64,Ft=Math.floor(.025*nt),Ct=Math.floor(.01*nt);function Mt(i){return 2595*Math.log10(1+i/700)}function ee(i){return 700*(10**(i/2595)-1)}function se(){const i=Mt(0),r=Mt(nt/2),t=new Float64Array(D+2);for(let n=0;n<D+2;n++)t[n]=i+n*(r-i)/(D+1);const o=t.map(n=>ee(n)).map(n=>Math.floor((j+1)*n/nt)),e=[],a=Math.floor(j/2)+1;for(let n=0;n<D;n++){const c=new Float32Array(a);for(let l=o[n];l<o[n+1];l++)c[l]=(l-o[n])/(o[n+1]-o[n]);for(let l=o[n+1];l<o[n+2];l++)c[l]=(o[n+2]-l)/(o[n+2]-o[n+1]);e.push(c)}return e}const ne=se(),rt=new te(j),_t=new Float32Array(j),Tt=rt.createComplexArray(),pt=rt.createComplexArray(),xt=new Float32Array(Math.floor(j/2)+1);function kt(i){const r=1+Math.ceil((i.length-Ft)/Ct),t=new Float32Array(r*D),s=Math.floor(j/2)+1;for(let o=0;o<r;o++){const e=o*Ct;_t.fill(0);for(let a=0;a<Ft&&e+a<i.length;a++)_t[a]=i[e+a];rt.toComplexArray(_t,Tt),rt.transform(pt,Tt);for(let a=0;a<s;a++){const n=pt[2*a],c=pt[2*a+1],l=(n*n+c*c)/j;xt[a]=l===0?1e-30:l}for(let a=0;a<D;a++){const n=ne[a];let c=0;for(let l=0;l<s;l++)c+=xt[l]*n[l];t[o*D+a]=Math.log(c===0?1e-30:c)}}return t}function re(i,r){let t=0;for(let s=0;s<i.length;s++)t+=i[s]*r[s];return(t+1)/2}function oe(i,r){let t=0;for(const s of r){const o=re(i,s);o>t&&(t=o)}return t}class Rt extends EventTarget{constructor({name:r,refEmbeddings:t,threshold:s=.65,relaxationMs:o=2e3,inferenceGapMs:e=300}){super(),this.name=r,this.refEmbeddings=t,this.threshold=s,this.relaxationMs=o,this.inferenceGapMs=e,this._lastDetectionAt=0,this._lastInferenceAt=0,this._lastScore=0}get lastScore(){return this._lastScore}async scoreFrame(r){const t=Date.now();if(t-this._lastInferenceAt<this.inferenceGapMs)return null;this._lastInferenceAt=t;const s=kt(r),o=await Nt(s),e=oe(o,this.refEmbeddings);return this._lastScore=e,e>=this.threshold&&t-this._lastDetectionAt>=this.relaxationMs&&(this._lastDetectionAt=t,this.dispatchEvent(new CustomEvent("match",{detail:{name:this.name,confidence:e,timestamp:t}}))),e}}const zt=16e3,ae=1500,vt=24e3;function St(i){if(i.length===vt)return i;const r=new Float32Array(vt);return r.set(i.subarray(0,vt)),r}class It extends EventTarget{constructor(r){super(),this.wordName=r.trim().toLowerCase(),this.samples=[]}get sampleCount(){return this.samples.length}async recordSample(){const r=await navigator.mediaDevices.getUserMedia({audio:!0});return new Promise((t,s)=>{const o=new AudioContext({sampleRate:zt}),e=new MediaRecorder(r),a=[];this.dispatchEvent(new CustomEvent("recording-start")),e.ondataavailable=n=>{n.data.size>0&&a.push(n.data)},e.onstop=async()=>{r.getTracks().forEach(n=>n.stop());try{const c=await new Blob(a,{type:"audio/webm"}).arrayBuffer(),l=await o.decodeAudioData(c);await o.close();const h=l.getChannelData(0),d=St(new Float32Array(h)),m=this._push(d,`Recorded #${this.samples.length}`);t(m)}catch(n){await o.close().catch(()=>{}),s(n)}},e.start(),setTimeout(()=>e.stop(),ae)})}async addAudioFile(r){const t=await r.arrayBuffer(),s=new AudioContext({sampleRate:zt}),o=await s.decodeAudioData(t);await s.close();const e=o.getChannelData(0),a=St(new Float32Array(e));return this._push(a,r.name)}removeSample(r){this.samples.splice(r,1),this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:this.samples.length}}))}clearSamples(){this.samples=[],this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:0}}))}async generateRef(){if(this.samples.length<3)throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);this.dispatchEvent(new CustomEvent("generating",{detail:{total:this.samples.length}}));const r=[];for(let t=0;t<this.samples.length;t++){const s=kt(this.samples[t].audioBuffer),o=await Nt(s);r.push(Array.from(o)),this.dispatchEvent(new CustomEvent("progress",{detail:{done:t+1,total:this.samples.length}}))}return{word_name:this.wordName,model_type:"resnet_50_arc",embeddings:r}}_push(r,t){this.samples.push({audioBuffer:r,name:t});const s=this.samples.length;return this.dispatchEvent(new CustomEvent("sample-added",{detail:{count:s,name:t}})),s}}const ie=`/**
|
|
2
2
|
* public/audio-processor.js
|
|
3
3
|
* AudioWorklet that runs at 16 kHz and continuously emits the last
|
|
4
4
|
* 1.5-second window (24 000 samples) via a circular buffer.
|
|
@@ -35,4 +35,4 @@ class AudioProcessor extends AudioWorkletProcessor {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
registerProcessor('audio-processor', AudioProcessor)
|
|
38
|
-
`;let
|
|
38
|
+
`;let wt=null;function ce(){if(!wt){const i=new Blob([ie],{type:"application/javascript"});wt=URL.createObjectURL(i)}return wt}const gt="mellon_custom_refs";function ot(){try{const i=localStorage.getItem(gt);return i?JSON.parse(i):[]}catch{return[]}}function le(i){const r=ot().filter(t=>t.word_name!==i.word_name);r.push(i),localStorage.setItem(gt,JSON.stringify(r))}function he(i){const r=ot().filter(t=>t.word_name!==i);localStorage.setItem(gt,JSON.stringify(r))}function de(i){const r=JSON.stringify(i,null,2),t=new Blob([r],{type:"application/json"}),s=URL.createObjectURL(t),o=Object.assign(document.createElement("a"),{href:s,download:`${i.word_name}_ref.json`});document.body.appendChild(o),o.click(),document.body.removeChild(o),URL.revokeObjectURL(s)}async function ue(i){const r=await i.text();let t;try{t=JSON.parse(r)}catch{throw new Error("Invalid JSON")}if(!t.embeddings||!Array.isArray(t.embeddings)||!t.embeddings.length)throw new Error('Missing or empty "embeddings" array');if(!Array.isArray(t.embeddings[0]))throw new Error('"embeddings" must be a 2D array');return t.word_name||(t.word_name=i.name.replace(/_ref\.json$/i,"").replace(/\.json$/i,"")),t}class yt extends EventTarget{constructor(r={}){super(),this._opts={words:r.words??[],refs:r.refs??[],threshold:r.threshold??.65,relaxationMs:r.relaxationMs??2e3,inferenceGapMs:r.inferenceGapMs??300,assetsPath:r.assetsPath},this._refs=new Map,this._detectors=new Map,this._audioCtx=null,this._workletNode=null,this._stream=null,this._initialized=!1,this._running=!1}get isInitialized(){return this._initialized}get isRunning(){return this._running}async init(r){var s;if(this._initialized){r==null||r(1);return}this._opts.assetsPath&&Vt({assetsPath:this._opts.assetsPath});try{await Xt(r)}catch(o){throw this.dispatchEvent(new CustomEvent("error",{detail:{error:o}})),o}const t=await ot();for(const o of this._opts.refs){const e=(s=o.match(/\/([^/]+?)_ref\.json$/))==null?void 0:s[1];if(!!t.find(n=>n.word_name===e))break;try{let n;if(typeof o=="string"){console.log("fetching ref : ",o);const c=await fetch(o);if(!c.ok)throw new Error(`HTTP ${c.status}`);n=await c.json()}else n=o;yt.saveWord(n),this.addCustomWord(n)}catch(n){const c=typeof o=="string"?o:o.word_name;console.warn(`[Mellon] Failed to load ref "${c}": ${n.message}`)}}this._initialized=!0,this.dispatchEvent(new CustomEvent("ready"))}async start(r){this._initialized||await this.init();const t=r??this._opts.words;try{this._stream=await navigator.mediaDevices.getUserMedia({audio:!0})}catch(e){const a=new Error(`Microphone access denied: ${e.message}`);throw this.dispatchEvent(new CustomEvent("error",{detail:{error:a}})),a}this._audioCtx=new AudioContext({sampleRate:16e3});const s=ce();await this._audioCtx.audioWorklet.addModule(s);const o=this._audioCtx.createMediaStreamSource(this._stream);this._workletNode=new AudioWorkletNode(this._audioCtx,"audio-processor"),o.connect(this._workletNode),this._workletNode.connect(this._audioCtx.destination);for(const e of t){const a=this._refs.get(e);if(!a){console.warn(`[Mellon] No reference embeddings for "${e}" — skipping. Call addCustomWord() to register custom words before start().`);continue}const n=new Rt({name:e,refEmbeddings:a.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});n.addEventListener("match",c=>{this.dispatchEvent(new CustomEvent("match",{detail:c.detail}))}),this._detectors.set(e,n)}this._workletNode.port.onmessage=async e=>{const a=[];for(const n of this._detectors.values())a.push(n.scoreFrame(e.data));await Promise.allSettled(a)},this._running=!0}stop(){this._workletNode&&(this._workletNode.port.onmessage=null,this._workletNode.disconnect(),this._workletNode=null),this._stream&&(this._stream.getTracks().forEach(r=>r.stop()),this._stream=null),this._audioCtx&&(this._audioCtx.close(),this._audioCtx=null),this._detectors.clear(),this._running=!1}addCustomWord(r){if(this._refs.set(r.word_name,r),this._running&&this._workletNode){const t=new Rt({name:r.word_name,refEmbeddings:r.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});t.addEventListener("match",s=>{this.dispatchEvent(new CustomEvent("match",{detail:s.detail}))}),this._detectors.set(r.word_name,t)}}enrollWord(r){return new It(r)}static loadWords(){return ot()}static saveWord(r){le(r)}static deleteWord(r){he(r)}static importWordFile(r){return ue(r)}static exportWord(r){de(r)}}exports.EnrollmentSession=It;exports.Mellon=yt;
|
package/dist/mellon.mjs
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
const
|
|
2
|
-
assetsPath: `${
|
|
1
|
+
const Jt = "0.0.8", Kt = [1, 1, 149, 64], Yt = `https://cdn.jsdelivr.net/npm/mellon@${Jt}/dist/assets`, st = {
|
|
2
|
+
assetsPath: `${Yt}`
|
|
3
3
|
};
|
|
4
4
|
let I = null, q = null, tt = null;
|
|
5
|
-
function
|
|
5
|
+
function Qt({ assetsPath: i } = {}) {
|
|
6
6
|
i !== void 0 && (st.assetsPath = i), I = null, q = null, tt = null;
|
|
7
7
|
}
|
|
8
|
-
async function
|
|
8
|
+
async function Vt(i) {
|
|
9
9
|
return I ? (i == null || i(1), I) : q || (q = (async () => {
|
|
10
|
-
const
|
|
11
|
-
tt = await new Function("url", "return import(url)")(t), tt.env.wasm.wasmPaths =
|
|
12
|
-
const
|
|
13
|
-
if (!
|
|
14
|
-
const a = parseInt(
|
|
10
|
+
const r = st.assetsPath.endsWith("/") ? st.assetsPath : st.assetsPath + "/", t = r + "ort.all.min.mjs", s = r + "model.onnx";
|
|
11
|
+
tt = await new Function("url", "return import(url)")(t), tt.env.wasm.wasmPaths = r;
|
|
12
|
+
const e = await fetch(s);
|
|
13
|
+
if (!e.ok) throw new Error(`Failed to fetch model: ${e.status}`);
|
|
14
|
+
const a = parseInt(e.headers.get("content-length") || "0", 10), n = e.body.getReader(), c = [];
|
|
15
15
|
let l = 0;
|
|
16
16
|
for (; ; ) {
|
|
17
|
-
const { done: m, value: _ } = await
|
|
17
|
+
const { done: m, value: _ } = await n.read();
|
|
18
18
|
if (m) break;
|
|
19
19
|
c.push(_), l += _.byteLength, a > 0 && (i == null || i(l / a));
|
|
20
20
|
}
|
|
@@ -30,186 +30,186 @@ async function Qt(i) {
|
|
|
30
30
|
}
|
|
31
31
|
async function St(i) {
|
|
32
32
|
if (!I) throw new Error("Model not loaded — call loadModel() first");
|
|
33
|
-
const
|
|
34
|
-
return t[
|
|
33
|
+
const r = new tt.Tensor("float32", i, Kt), t = await I.run({ input: r }), s = Object.keys(t)[0];
|
|
34
|
+
return t[s].data;
|
|
35
35
|
}
|
|
36
|
-
function
|
|
36
|
+
function Xt(i) {
|
|
37
37
|
return i && i.__esModule && Object.prototype.hasOwnProperty.call(i, "default") ? i.default : i;
|
|
38
38
|
}
|
|
39
|
-
var
|
|
40
|
-
function
|
|
41
|
-
if (
|
|
42
|
-
|
|
43
|
-
function i(
|
|
44
|
-
if (this.size =
|
|
39
|
+
var ft, At;
|
|
40
|
+
function Zt() {
|
|
41
|
+
if (At) return ft;
|
|
42
|
+
At = 1;
|
|
43
|
+
function i(r) {
|
|
44
|
+
if (this.size = r | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
|
|
45
45
|
throw new Error("FFT size must be a power of two and bigger than 1");
|
|
46
|
-
this._csize =
|
|
47
|
-
for (var t = new Array(this.size * 2),
|
|
48
|
-
const l = Math.PI *
|
|
49
|
-
t[
|
|
46
|
+
this._csize = r << 1;
|
|
47
|
+
for (var t = new Array(this.size * 2), s = 0; s < t.length; s += 2) {
|
|
48
|
+
const l = Math.PI * s / this.size;
|
|
49
|
+
t[s] = Math.cos(l), t[s + 1] = -Math.sin(l);
|
|
50
50
|
}
|
|
51
51
|
this.table = t;
|
|
52
|
-
for (var o = 0,
|
|
52
|
+
for (var o = 0, e = 1; this.size > e; e <<= 1)
|
|
53
53
|
o++;
|
|
54
54
|
this._width = o % 2 === 0 ? o - 1 : o, this._bitrev = new Array(1 << this._width);
|
|
55
55
|
for (var a = 0; a < this._bitrev.length; a++) {
|
|
56
56
|
this._bitrev[a] = 0;
|
|
57
|
-
for (var
|
|
58
|
-
var c = this._width -
|
|
59
|
-
this._bitrev[a] |= (a >>>
|
|
57
|
+
for (var n = 0; n < this._width; n += 2) {
|
|
58
|
+
var c = this._width - n - 2;
|
|
59
|
+
this._bitrev[a] |= (a >>> n & 3) << c;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
62
|
this._out = null, this._data = null, this._inv = 0;
|
|
63
63
|
}
|
|
64
|
-
return
|
|
65
|
-
for (var o =
|
|
66
|
-
o[
|
|
64
|
+
return ft = i, i.prototype.fromComplexArray = function(t, s) {
|
|
65
|
+
for (var o = s || new Array(t.length >>> 1), e = 0; e < t.length; e += 2)
|
|
66
|
+
o[e >>> 1] = t[e];
|
|
67
67
|
return o;
|
|
68
68
|
}, i.prototype.createComplexArray = function() {
|
|
69
69
|
const t = new Array(this._csize);
|
|
70
|
-
for (var
|
|
71
|
-
t[
|
|
70
|
+
for (var s = 0; s < t.length; s++)
|
|
71
|
+
t[s] = 0;
|
|
72
72
|
return t;
|
|
73
|
-
}, i.prototype.toComplexArray = function(t,
|
|
74
|
-
for (var o =
|
|
75
|
-
o[
|
|
73
|
+
}, i.prototype.toComplexArray = function(t, s) {
|
|
74
|
+
for (var o = s || this.createComplexArray(), e = 0; e < o.length; e += 2)
|
|
75
|
+
o[e] = t[e >>> 1], o[e + 1] = 0;
|
|
76
76
|
return o;
|
|
77
77
|
}, i.prototype.completeSpectrum = function(t) {
|
|
78
|
-
for (var
|
|
79
|
-
t[
|
|
80
|
-
}, i.prototype.transform = function(t,
|
|
81
|
-
if (t ===
|
|
78
|
+
for (var s = this._csize, o = s >>> 1, e = 2; e < o; e += 2)
|
|
79
|
+
t[s - e] = t[e], t[s - e + 1] = -t[e + 1];
|
|
80
|
+
}, i.prototype.transform = function(t, s) {
|
|
81
|
+
if (t === s)
|
|
82
82
|
throw new Error("Input and output buffers must be different");
|
|
83
|
-
this._out = t, this._data =
|
|
84
|
-
}, i.prototype.realTransform = function(t,
|
|
85
|
-
if (t ===
|
|
83
|
+
this._out = t, this._data = s, this._inv = 0, this._transform4(), this._out = null, this._data = null;
|
|
84
|
+
}, i.prototype.realTransform = function(t, s) {
|
|
85
|
+
if (t === s)
|
|
86
86
|
throw new Error("Input and output buffers must be different");
|
|
87
|
-
this._out = t, this._data =
|
|
88
|
-
}, i.prototype.inverseTransform = function(t,
|
|
89
|
-
if (t ===
|
|
87
|
+
this._out = t, this._data = s, this._inv = 0, this._realTransform4(), this._out = null, this._data = null;
|
|
88
|
+
}, i.prototype.inverseTransform = function(t, s) {
|
|
89
|
+
if (t === s)
|
|
90
90
|
throw new Error("Input and output buffers must be different");
|
|
91
|
-
this._out = t, this._data =
|
|
91
|
+
this._out = t, this._data = s, this._inv = 1, this._transform4();
|
|
92
92
|
for (var o = 0; o < t.length; o++)
|
|
93
93
|
t[o] /= this.size;
|
|
94
94
|
this._out = null, this._data = null;
|
|
95
95
|
}, i.prototype._transform4 = function() {
|
|
96
|
-
var t = this._out,
|
|
96
|
+
var t = this._out, s = this._csize, o = this._width, e = 1 << o, a = s / e << 1, n, c, l = this._bitrev;
|
|
97
97
|
if (a === 4)
|
|
98
|
-
for (
|
|
98
|
+
for (n = 0, c = 0; n < s; n += a, c++) {
|
|
99
99
|
const u = l[c];
|
|
100
|
-
this._singleTransform2(
|
|
100
|
+
this._singleTransform2(n, u, e);
|
|
101
101
|
}
|
|
102
102
|
else
|
|
103
|
-
for (
|
|
103
|
+
for (n = 0, c = 0; n < s; n += a, c++) {
|
|
104
104
|
const u = l[c];
|
|
105
|
-
this._singleTransform4(
|
|
105
|
+
this._singleTransform4(n, u, e);
|
|
106
106
|
}
|
|
107
107
|
var h = this._inv ? -1 : 1, d = this.table;
|
|
108
|
-
for (
|
|
109
|
-
a =
|
|
108
|
+
for (e >>= 2; e >= 2; e >>= 2) {
|
|
109
|
+
a = s / e << 1;
|
|
110
110
|
var m = a >>> 2;
|
|
111
|
-
for (
|
|
112
|
-
for (var _ =
|
|
113
|
-
const u = g, p = u + m, v = p + m, w = v + m, b = t[u],
|
|
114
|
-
t[u] = et, t[u + 1] =
|
|
111
|
+
for (n = 0; n < s; n += a)
|
|
112
|
+
for (var _ = n + m, g = n, f = 0; g < _; g += 2, f += e) {
|
|
113
|
+
const u = g, p = u + m, v = p + m, w = v + m, b = t[u], E = t[u + 1], A = t[p], y = t[p + 1], F = t[v], C = t[v + 1], M = t[w], T = t[w + 1], x = b, R = E, z = d[f], S = h * d[f + 1], N = A * z - y * S, k = A * S + y * z, L = d[2 * f], P = h * d[2 * f + 1], G = F * L - C * P, H = F * P + C * L, J = d[3 * f], K = h * d[3 * f + 1], Y = M * J - T * K, Q = M * K + T * J, V = x + G, j = R + H, B = x - G, X = R - H, Z = N + Y, U = k + Q, $ = h * (N - Y), O = h * (k - Q), et = V + Z, at = j + U, it = V - Z, ct = j - U, lt = B + O, ht = X - $, dt = B - O, ut = X + $;
|
|
114
|
+
t[u] = et, t[u + 1] = at, t[p] = lt, t[p + 1] = ht, t[v] = it, t[v + 1] = ct, t[w] = dt, t[w + 1] = ut;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
|
-
}, i.prototype._singleTransform2 = function(t,
|
|
118
|
-
const
|
|
119
|
-
|
|
120
|
-
}, i.prototype._singleTransform4 = function(t,
|
|
121
|
-
const
|
|
122
|
-
|
|
117
|
+
}, i.prototype._singleTransform2 = function(t, s, o) {
|
|
118
|
+
const e = this._out, a = this._data, n = a[s], c = a[s + 1], l = a[s + o], h = a[s + o + 1], d = n + l, m = c + h, _ = n - l, g = c - h;
|
|
119
|
+
e[t] = d, e[t + 1] = m, e[t + 2] = _, e[t + 3] = g;
|
|
120
|
+
}, i.prototype._singleTransform4 = function(t, s, o) {
|
|
121
|
+
const e = this._out, a = this._data, n = this._inv ? -1 : 1, c = o * 2, l = o * 3, h = a[s], d = a[s + 1], m = a[s + o], _ = a[s + o + 1], g = a[s + c], f = a[s + c + 1], u = a[s + l], p = a[s + l + 1], v = h + g, w = d + f, b = h - g, E = d - f, A = m + u, y = _ + p, F = n * (m - u), C = n * (_ - p), M = v + A, T = w + y, x = b + C, R = E - F, z = v - A, S = w - y, N = b - C, k = E + F;
|
|
122
|
+
e[t] = M, e[t + 1] = T, e[t + 2] = x, e[t + 3] = R, e[t + 4] = z, e[t + 5] = S, e[t + 6] = N, e[t + 7] = k;
|
|
123
123
|
}, i.prototype._realTransform4 = function() {
|
|
124
|
-
var t = this._out,
|
|
124
|
+
var t = this._out, s = this._csize, o = this._width, e = 1 << o, a = s / e << 1, n, c, l = this._bitrev;
|
|
125
125
|
if (a === 4)
|
|
126
|
-
for (
|
|
127
|
-
const
|
|
128
|
-
this._singleRealTransform2(
|
|
126
|
+
for (n = 0, c = 0; n < s; n += a, c++) {
|
|
127
|
+
const mt = l[c];
|
|
128
|
+
this._singleRealTransform2(n, mt >>> 1, e >>> 1);
|
|
129
129
|
}
|
|
130
130
|
else
|
|
131
|
-
for (
|
|
132
|
-
const
|
|
133
|
-
this._singleRealTransform4(
|
|
131
|
+
for (n = 0, c = 0; n < s; n += a, c++) {
|
|
132
|
+
const mt = l[c];
|
|
133
|
+
this._singleRealTransform4(n, mt >>> 1, e >>> 1);
|
|
134
134
|
}
|
|
135
135
|
var h = this._inv ? -1 : 1, d = this.table;
|
|
136
|
-
for (
|
|
137
|
-
a =
|
|
136
|
+
for (e >>= 2; e >= 2; e >>= 2) {
|
|
137
|
+
a = s / e << 1;
|
|
138
138
|
var m = a >>> 1, _ = m >>> 1, g = _ >>> 1;
|
|
139
|
-
for (
|
|
140
|
-
for (var f = 0, u = 0; f <= g; f += 2, u +=
|
|
141
|
-
var p =
|
|
142
|
-
if (t[p] =
|
|
143
|
-
var
|
|
144
|
-
t[w] =
|
|
139
|
+
for (n = 0; n < s; n += a)
|
|
140
|
+
for (var f = 0, u = 0; f <= g; f += 2, u += e) {
|
|
141
|
+
var p = n + f, v = p + _, w = v + _, b = w + _, E = t[p], A = t[p + 1], y = t[v], F = t[v + 1], C = t[w], M = t[w + 1], T = t[b], x = t[b + 1], R = E, z = A, S = d[u], N = h * d[u + 1], k = y * S - F * N, L = y * N + F * S, P = d[2 * u], G = h * d[2 * u + 1], H = C * P - M * G, J = C * G + M * P, K = d[3 * u], Y = h * d[3 * u + 1], Q = T * K - x * Y, V = T * Y + x * K, j = R + H, B = z + J, X = R - H, Z = z - J, U = k + Q, $ = L + V, O = h * (k - Q), et = h * (L - V), at = j + U, it = B + $, ct = X + et, lt = Z - O;
|
|
142
|
+
if (t[p] = at, t[p + 1] = it, t[v] = ct, t[v + 1] = lt, f === 0) {
|
|
143
|
+
var ht = j - U, dt = B - $;
|
|
144
|
+
t[w] = ht, t[w + 1] = dt;
|
|
145
145
|
continue;
|
|
146
146
|
}
|
|
147
147
|
if (f !== g) {
|
|
148
|
-
var
|
|
149
|
-
t[yt] =
|
|
148
|
+
var ut = X, It = -Z, Dt = j, Wt = -B, jt = -h * et, Bt = -h * O, Ut = -h * $, $t = -h * U, Lt = ut + jt, Pt = It + Bt, Gt = Dt + $t, Ht = Wt - Ut, yt = n + _ - f, bt = n + m - f;
|
|
149
|
+
t[yt] = Lt, t[yt + 1] = Pt, t[bt] = Gt, t[bt + 1] = Ht;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
}
|
|
153
|
-
}, i.prototype._singleRealTransform2 = function(t,
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
}, i.prototype._singleRealTransform4 = function(t,
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
},
|
|
153
|
+
}, i.prototype._singleRealTransform2 = function(t, s, o) {
|
|
154
|
+
const e = this._out, a = this._data, n = a[s], c = a[s + o], l = n + c, h = n - c;
|
|
155
|
+
e[t] = l, e[t + 1] = 0, e[t + 2] = h, e[t + 3] = 0;
|
|
156
|
+
}, i.prototype._singleRealTransform4 = function(t, s, o) {
|
|
157
|
+
const e = this._out, a = this._data, n = this._inv ? -1 : 1, c = o * 2, l = o * 3, h = a[s], d = a[s + o], m = a[s + c], _ = a[s + l], g = h + m, f = h - m, u = d + _, p = n * (d - _), v = g + u, w = f, b = -p, E = g - u, A = f, y = p;
|
|
158
|
+
e[t] = v, e[t + 1] = 0, e[t + 2] = w, e[t + 3] = b, e[t + 4] = E, e[t + 5] = 0, e[t + 6] = A, e[t + 7] = y;
|
|
159
|
+
}, ft;
|
|
160
160
|
}
|
|
161
|
-
var
|
|
162
|
-
const
|
|
161
|
+
var Ot = Zt();
|
|
162
|
+
const qt = /* @__PURE__ */ Xt(Ot), nt = 16e3, W = 512, D = 64, Et = Math.floor(0.025 * nt), Ft = Math.floor(0.01 * nt);
|
|
163
163
|
function Ct(i) {
|
|
164
164
|
return 2595 * Math.log10(1 + i / 700);
|
|
165
165
|
}
|
|
166
|
-
function
|
|
166
|
+
function te(i) {
|
|
167
167
|
return 700 * (10 ** (i / 2595) - 1);
|
|
168
168
|
}
|
|
169
|
-
function
|
|
170
|
-
const i = Ct(0),
|
|
171
|
-
for (let
|
|
172
|
-
t[
|
|
173
|
-
const o = t.map((
|
|
174
|
-
for (let
|
|
169
|
+
function ee() {
|
|
170
|
+
const i = Ct(0), r = Ct(nt / 2), t = new Float64Array(D + 2);
|
|
171
|
+
for (let n = 0; n < D + 2; n++)
|
|
172
|
+
t[n] = i + n * (r - i) / (D + 1);
|
|
173
|
+
const o = t.map((n) => te(n)).map((n) => Math.floor((W + 1) * n / nt)), e = [], a = Math.floor(W / 2) + 1;
|
|
174
|
+
for (let n = 0; n < D; n++) {
|
|
175
175
|
const c = new Float32Array(a);
|
|
176
|
-
for (let l = o[
|
|
177
|
-
for (let l = o[
|
|
178
|
-
|
|
176
|
+
for (let l = o[n]; l < o[n + 1]; l++) c[l] = (l - o[n]) / (o[n + 1] - o[n]);
|
|
177
|
+
for (let l = o[n + 1]; l < o[n + 2]; l++) c[l] = (o[n + 2] - l) / (o[n + 2] - o[n + 1]);
|
|
178
|
+
e.push(c);
|
|
179
179
|
}
|
|
180
|
-
return
|
|
180
|
+
return e;
|
|
181
181
|
}
|
|
182
|
-
const
|
|
182
|
+
const se = ee(), rt = new qt(W), _t = new Float32Array(W), Mt = rt.createComplexArray(), pt = rt.createComplexArray(), Tt = new Float32Array(Math.floor(W / 2) + 1);
|
|
183
183
|
function Nt(i) {
|
|
184
|
-
const
|
|
185
|
-
for (let o = 0; o <
|
|
186
|
-
const
|
|
187
|
-
|
|
188
|
-
for (let a = 0; a <
|
|
189
|
-
|
|
190
|
-
rt.toComplexArray(
|
|
191
|
-
for (let a = 0; a <
|
|
192
|
-
const
|
|
184
|
+
const r = 1 + Math.ceil((i.length - Et) / Ft), t = new Float32Array(r * D), s = Math.floor(W / 2) + 1;
|
|
185
|
+
for (let o = 0; o < r; o++) {
|
|
186
|
+
const e = o * Ft;
|
|
187
|
+
_t.fill(0);
|
|
188
|
+
for (let a = 0; a < Et && e + a < i.length; a++)
|
|
189
|
+
_t[a] = i[e + a];
|
|
190
|
+
rt.toComplexArray(_t, Mt), rt.transform(pt, Mt);
|
|
191
|
+
for (let a = 0; a < s; a++) {
|
|
192
|
+
const n = pt[2 * a], c = pt[2 * a + 1], l = (n * n + c * c) / W;
|
|
193
193
|
Tt[a] = l === 0 ? 1e-30 : l;
|
|
194
194
|
}
|
|
195
195
|
for (let a = 0; a < D; a++) {
|
|
196
|
-
const
|
|
196
|
+
const n = se[a];
|
|
197
197
|
let c = 0;
|
|
198
|
-
for (let l = 0; l <
|
|
198
|
+
for (let l = 0; l < s; l++) c += Tt[l] * n[l];
|
|
199
199
|
t[o * D + a] = Math.log(c === 0 ? 1e-30 : c);
|
|
200
200
|
}
|
|
201
201
|
}
|
|
202
202
|
return t;
|
|
203
203
|
}
|
|
204
|
-
function
|
|
204
|
+
function ne(i, r) {
|
|
205
205
|
let t = 0;
|
|
206
|
-
for (let
|
|
206
|
+
for (let s = 0; s < i.length; s++) t += i[s] * r[s];
|
|
207
207
|
return (t + 1) / 2;
|
|
208
208
|
}
|
|
209
|
-
function
|
|
209
|
+
function re(i, r) {
|
|
210
210
|
let t = 0;
|
|
211
|
-
for (const
|
|
212
|
-
const o =
|
|
211
|
+
for (const s of r) {
|
|
212
|
+
const o = ne(i, s);
|
|
213
213
|
o > t && (t = o);
|
|
214
214
|
}
|
|
215
215
|
return t;
|
|
@@ -223,8 +223,8 @@ class xt extends EventTarget {
|
|
|
223
223
|
* @param {number} [opts.relaxationMs=2000] Min ms between events
|
|
224
224
|
* @param {number} [opts.inferenceGapMs=300] Min ms between inferences
|
|
225
225
|
*/
|
|
226
|
-
constructor({ name:
|
|
227
|
-
super(), this.name =
|
|
226
|
+
constructor({ name: r, refEmbeddings: t, threshold: s = 0.65, relaxationMs: o = 2e3, inferenceGapMs: e = 300 }) {
|
|
227
|
+
super(), this.name = r, this.refEmbeddings = t, this.threshold = s, this.relaxationMs = o, this.inferenceGapMs = e, this._lastDetectionAt = 0, this._lastInferenceAt = 0, this._lastScore = 0;
|
|
228
228
|
}
|
|
229
229
|
get lastScore() {
|
|
230
230
|
return this._lastScore;
|
|
@@ -235,26 +235,26 @@ class xt extends EventTarget {
|
|
|
235
235
|
* @param {Float32Array} audioBuffer 24 000 samples at 16 kHz
|
|
236
236
|
* @returns {Promise<number|null>} Similarity score, or null if rate-limited
|
|
237
237
|
*/
|
|
238
|
-
async scoreFrame(
|
|
238
|
+
async scoreFrame(r) {
|
|
239
239
|
const t = Date.now();
|
|
240
240
|
if (t - this._lastInferenceAt < this.inferenceGapMs) return null;
|
|
241
241
|
this._lastInferenceAt = t;
|
|
242
|
-
const
|
|
243
|
-
return this._lastScore =
|
|
244
|
-
detail: { name: this.name, confidence:
|
|
245
|
-
}))),
|
|
242
|
+
const s = Nt(r), o = await St(s), e = re(o, this.refEmbeddings);
|
|
243
|
+
return this._lastScore = e, e >= this.threshold && t - this._lastDetectionAt >= this.relaxationMs && (this._lastDetectionAt = t, this.dispatchEvent(new CustomEvent("match", {
|
|
244
|
+
detail: { name: this.name, confidence: e, timestamp: t }
|
|
245
|
+
}))), e;
|
|
246
246
|
}
|
|
247
247
|
}
|
|
248
|
-
const Rt = 16e3,
|
|
248
|
+
const Rt = 16e3, oe = 1500, vt = 24e3;
|
|
249
249
|
function zt(i) {
|
|
250
|
-
if (i.length ===
|
|
251
|
-
const
|
|
252
|
-
return
|
|
250
|
+
if (i.length === vt) return i;
|
|
251
|
+
const r = new Float32Array(vt);
|
|
252
|
+
return r.set(i.subarray(0, vt)), r;
|
|
253
253
|
}
|
|
254
|
-
class
|
|
254
|
+
class ae extends EventTarget {
|
|
255
255
|
/** @param {string} wordName — the wake word label */
|
|
256
|
-
constructor(
|
|
257
|
-
super(), this.wordName =
|
|
256
|
+
constructor(r) {
|
|
257
|
+
super(), this.wordName = r.trim().toLowerCase(), this.samples = [];
|
|
258
258
|
}
|
|
259
259
|
get sampleCount() {
|
|
260
260
|
return this.samples.length;
|
|
@@ -267,23 +267,23 @@ class oe extends EventTarget {
|
|
|
267
267
|
* @returns {Promise<number>} Index (1-based) of the new sample
|
|
268
268
|
*/
|
|
269
269
|
async recordSample() {
|
|
270
|
-
const
|
|
271
|
-
return new Promise((t,
|
|
272
|
-
const o = new AudioContext({ sampleRate: Rt }),
|
|
273
|
-
this.dispatchEvent(new CustomEvent("recording-start")),
|
|
274
|
-
|
|
275
|
-
},
|
|
276
|
-
|
|
270
|
+
const r = await navigator.mediaDevices.getUserMedia({ audio: !0 });
|
|
271
|
+
return new Promise((t, s) => {
|
|
272
|
+
const o = new AudioContext({ sampleRate: Rt }), e = new MediaRecorder(r), a = [];
|
|
273
|
+
this.dispatchEvent(new CustomEvent("recording-start")), e.ondataavailable = (n) => {
|
|
274
|
+
n.data.size > 0 && a.push(n.data);
|
|
275
|
+
}, e.onstop = async () => {
|
|
276
|
+
r.getTracks().forEach((n) => n.stop());
|
|
277
277
|
try {
|
|
278
278
|
const c = await new Blob(a, { type: "audio/webm" }).arrayBuffer(), l = await o.decodeAudioData(c);
|
|
279
279
|
await o.close();
|
|
280
280
|
const h = l.getChannelData(0), d = zt(new Float32Array(h)), m = this._push(d, `Recorded #${this.samples.length}`);
|
|
281
281
|
t(m);
|
|
282
|
-
} catch (
|
|
282
|
+
} catch (n) {
|
|
283
283
|
await o.close().catch(() => {
|
|
284
|
-
}),
|
|
284
|
+
}), s(n);
|
|
285
285
|
}
|
|
286
|
-
},
|
|
286
|
+
}, e.start(), setTimeout(() => e.stop(), oe);
|
|
287
287
|
});
|
|
288
288
|
}
|
|
289
289
|
// ─── Upload ────────────────────────────────────────────────────────────────
|
|
@@ -293,19 +293,19 @@ class oe extends EventTarget {
|
|
|
293
293
|
* @param {File} file
|
|
294
294
|
* @returns {Promise<number>} Index (1-based) of the new sample
|
|
295
295
|
*/
|
|
296
|
-
async addAudioFile(
|
|
297
|
-
const t = await
|
|
298
|
-
await
|
|
299
|
-
const
|
|
300
|
-
return this._push(a,
|
|
296
|
+
async addAudioFile(r) {
|
|
297
|
+
const t = await r.arrayBuffer(), s = new AudioContext({ sampleRate: Rt }), o = await s.decodeAudioData(t);
|
|
298
|
+
await s.close();
|
|
299
|
+
const e = o.getChannelData(0), a = zt(new Float32Array(e));
|
|
300
|
+
return this._push(a, r.name);
|
|
301
301
|
}
|
|
302
302
|
// ─── Manage ────────────────────────────────────────────────────────────────
|
|
303
303
|
/**
|
|
304
304
|
* Remove a sample by 0-based index.
|
|
305
305
|
* @param {number} idx
|
|
306
306
|
*/
|
|
307
|
-
removeSample(
|
|
308
|
-
this.samples.splice(
|
|
307
|
+
removeSample(r) {
|
|
308
|
+
this.samples.splice(r, 1), this.dispatchEvent(new CustomEvent("samples-changed", { detail: { count: this.samples.length } }));
|
|
309
309
|
}
|
|
310
310
|
clearSamples() {
|
|
311
311
|
this.samples = [], this.dispatchEvent(new CustomEvent("samples-changed", { detail: { count: 0 } }));
|
|
@@ -321,27 +321,27 @@ class oe extends EventTarget {
|
|
|
321
321
|
if (this.samples.length < 3)
|
|
322
322
|
throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);
|
|
323
323
|
this.dispatchEvent(new CustomEvent("generating", { detail: { total: this.samples.length } }));
|
|
324
|
-
const
|
|
324
|
+
const r = [];
|
|
325
325
|
for (let t = 0; t < this.samples.length; t++) {
|
|
326
|
-
const
|
|
327
|
-
|
|
326
|
+
const s = Nt(this.samples[t].audioBuffer), o = await St(s);
|
|
327
|
+
r.push(Array.from(o)), this.dispatchEvent(new CustomEvent("progress", {
|
|
328
328
|
detail: { done: t + 1, total: this.samples.length }
|
|
329
329
|
}));
|
|
330
330
|
}
|
|
331
331
|
return {
|
|
332
332
|
word_name: this.wordName,
|
|
333
333
|
model_type: "resnet_50_arc",
|
|
334
|
-
embeddings:
|
|
334
|
+
embeddings: r
|
|
335
335
|
};
|
|
336
336
|
}
|
|
337
337
|
// ─── Private ───────────────────────────────────────────────────────────────
|
|
338
|
-
_push(
|
|
339
|
-
this.samples.push({ audioBuffer:
|
|
340
|
-
const
|
|
341
|
-
return this.dispatchEvent(new CustomEvent("sample-added", { detail: { count:
|
|
338
|
+
_push(r, t) {
|
|
339
|
+
this.samples.push({ audioBuffer: r, name: t });
|
|
340
|
+
const s = this.samples.length;
|
|
341
|
+
return this.dispatchEvent(new CustomEvent("sample-added", { detail: { count: s, name: t } })), s;
|
|
342
342
|
}
|
|
343
343
|
}
|
|
344
|
-
const
|
|
344
|
+
const ie = `/**
|
|
345
345
|
* public/audio-processor.js
|
|
346
346
|
* AudioWorklet that runs at 16 kHz and continuously emits the last
|
|
347
347
|
* 1.5-second window (24 000 samples) via a circular buffer.
|
|
@@ -379,43 +379,43 @@ class AudioProcessor extends AudioWorkletProcessor {
|
|
|
379
379
|
|
|
380
380
|
registerProcessor('audio-processor', AudioProcessor)
|
|
381
381
|
`;
|
|
382
|
-
let
|
|
383
|
-
function
|
|
384
|
-
if (!
|
|
385
|
-
const i = new Blob([
|
|
386
|
-
|
|
382
|
+
let wt = null;
|
|
383
|
+
function ce() {
|
|
384
|
+
if (!wt) {
|
|
385
|
+
const i = new Blob([ie], { type: "application/javascript" });
|
|
386
|
+
wt = URL.createObjectURL(i);
|
|
387
387
|
}
|
|
388
|
-
return
|
|
388
|
+
return wt;
|
|
389
389
|
}
|
|
390
|
-
const
|
|
391
|
-
function
|
|
390
|
+
const gt = "mellon_custom_refs";
|
|
391
|
+
function ot() {
|
|
392
392
|
try {
|
|
393
|
-
const i = localStorage.getItem(
|
|
393
|
+
const i = localStorage.getItem(gt);
|
|
394
394
|
return i ? JSON.parse(i) : [];
|
|
395
395
|
} catch {
|
|
396
396
|
return [];
|
|
397
397
|
}
|
|
398
398
|
}
|
|
399
|
-
function ce(i) {
|
|
400
|
-
const n = gt().filter((t) => t.word_name !== i.word_name);
|
|
401
|
-
n.push(i), localStorage.setItem(wt, JSON.stringify(n));
|
|
402
|
-
}
|
|
403
399
|
function le(i) {
|
|
404
|
-
const
|
|
405
|
-
localStorage.setItem(
|
|
400
|
+
const r = ot().filter((t) => t.word_name !== i.word_name);
|
|
401
|
+
r.push(i), localStorage.setItem(gt, JSON.stringify(r));
|
|
406
402
|
}
|
|
407
403
|
function he(i) {
|
|
408
|
-
const
|
|
409
|
-
|
|
404
|
+
const r = ot().filter((t) => t.word_name !== i);
|
|
405
|
+
localStorage.setItem(gt, JSON.stringify(r));
|
|
406
|
+
}
|
|
407
|
+
function de(i) {
|
|
408
|
+
const r = JSON.stringify(i, null, 2), t = new Blob([r], { type: "application/json" }), s = URL.createObjectURL(t), o = Object.assign(document.createElement("a"), {
|
|
409
|
+
href: s,
|
|
410
410
|
download: `${i.word_name}_ref.json`
|
|
411
411
|
});
|
|
412
|
-
document.body.appendChild(o), o.click(), document.body.removeChild(o), URL.revokeObjectURL(
|
|
412
|
+
document.body.appendChild(o), o.click(), document.body.removeChild(o), URL.revokeObjectURL(s);
|
|
413
413
|
}
|
|
414
|
-
async function
|
|
415
|
-
const
|
|
414
|
+
async function ue(i) {
|
|
415
|
+
const r = await i.text();
|
|
416
416
|
let t;
|
|
417
417
|
try {
|
|
418
|
-
t = JSON.parse(
|
|
418
|
+
t = JSON.parse(r);
|
|
419
419
|
} catch {
|
|
420
420
|
throw new Error("Invalid JSON");
|
|
421
421
|
}
|
|
@@ -425,7 +425,7 @@ async function de(i) {
|
|
|
425
425
|
throw new Error('"embeddings" must be a 2D array');
|
|
426
426
|
return t.word_name || (t.word_name = i.name.replace(/_ref\.json$/i, "").replace(/\.json$/i, "")), t;
|
|
427
427
|
}
|
|
428
|
-
class
|
|
428
|
+
class kt extends EventTarget {
|
|
429
429
|
/**
|
|
430
430
|
* @param {object} [opts]
|
|
431
431
|
* @param {string[]} [opts.words] Words to detect (must have refs loaded via addCustomWord())
|
|
@@ -438,14 +438,14 @@ class ue extends EventTarget {
|
|
|
438
438
|
* @param {number} [opts.inferenceGapMs=300] Min ms between inference runs
|
|
439
439
|
* @param {string} [opts.assetsPath]
|
|
440
440
|
*/
|
|
441
|
-
constructor(
|
|
441
|
+
constructor(r = {}) {
|
|
442
442
|
super(), this._opts = {
|
|
443
|
-
words:
|
|
444
|
-
refs:
|
|
445
|
-
threshold:
|
|
446
|
-
relaxationMs:
|
|
447
|
-
inferenceGapMs:
|
|
448
|
-
assetsPath:
|
|
443
|
+
words: r.words ?? [],
|
|
444
|
+
refs: r.refs ?? [],
|
|
445
|
+
threshold: r.threshold ?? 0.65,
|
|
446
|
+
relaxationMs: r.relaxationMs ?? 2e3,
|
|
447
|
+
inferenceGapMs: r.inferenceGapMs ?? 300,
|
|
448
|
+
assetsPath: r.assetsPath
|
|
449
449
|
}, this._refs = /* @__PURE__ */ new Map(), this._detectors = /* @__PURE__ */ new Map(), this._audioCtx = null, this._workletNode = null, this._stream = null, this._initialized = !1, this._running = !1;
|
|
450
450
|
}
|
|
451
451
|
/** Whether init() has completed successfully. */
|
|
@@ -464,31 +464,37 @@ class ue extends EventTarget {
|
|
|
464
464
|
*
|
|
465
465
|
* @param {(progress: number) => void} [onProgress] 0.0 → 1.0
|
|
466
466
|
*/
|
|
467
|
-
async init(
|
|
467
|
+
async init(r) {
|
|
468
|
+
var s;
|
|
468
469
|
if (this._initialized) {
|
|
469
|
-
|
|
470
|
+
r == null || r(1);
|
|
470
471
|
return;
|
|
471
472
|
}
|
|
472
|
-
this._opts.assetsPath &&
|
|
473
|
+
this._opts.assetsPath && Qt({ assetsPath: this._opts.assetsPath });
|
|
473
474
|
try {
|
|
474
|
-
await
|
|
475
|
-
} catch (
|
|
476
|
-
throw this.dispatchEvent(new CustomEvent("error", { detail: { error:
|
|
475
|
+
await Vt(r);
|
|
476
|
+
} catch (o) {
|
|
477
|
+
throw this.dispatchEvent(new CustomEvent("error", { detail: { error: o } })), o;
|
|
477
478
|
}
|
|
478
|
-
|
|
479
|
+
const t = await ot();
|
|
480
|
+
for (const o of this._opts.refs) {
|
|
481
|
+
const e = (s = o.match(/\/([^/]+?)_ref\.json$/)) == null ? void 0 : s[1];
|
|
482
|
+
if (!!t.find((n) => n.word_name === e)) break;
|
|
479
483
|
try {
|
|
480
|
-
let
|
|
481
|
-
if (
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
484
|
+
let n;
|
|
485
|
+
if (typeof o == "string") {
|
|
486
|
+
console.log("fetching ref : ", o);
|
|
487
|
+
const c = await fetch(o);
|
|
488
|
+
if (!c.ok) throw new Error(`HTTP ${c.status}`);
|
|
489
|
+
n = await c.json();
|
|
485
490
|
} else
|
|
486
|
-
|
|
487
|
-
this.addCustomWord(
|
|
488
|
-
} catch (
|
|
489
|
-
const
|
|
490
|
-
console.warn(`[Mellon] Failed to load ref "${
|
|
491
|
+
n = o;
|
|
492
|
+
kt.saveWord(n), this.addCustomWord(n);
|
|
493
|
+
} catch (n) {
|
|
494
|
+
const c = typeof o == "string" ? o : o.word_name;
|
|
495
|
+
console.warn(`[Mellon] Failed to load ref "${c}": ${n.message}`);
|
|
491
496
|
}
|
|
497
|
+
}
|
|
492
498
|
this._initialized = !0, this.dispatchEvent(new CustomEvent("ready"));
|
|
493
499
|
}
|
|
494
500
|
/**
|
|
@@ -497,41 +503,41 @@ class ue extends EventTarget {
|
|
|
497
503
|
*
|
|
498
504
|
* @param {string[]} [words] Subset of words to listen for; defaults to opts.words
|
|
499
505
|
*/
|
|
500
|
-
async start(
|
|
506
|
+
async start(r) {
|
|
501
507
|
this._initialized || await this.init();
|
|
502
|
-
const t =
|
|
508
|
+
const t = r ?? this._opts.words;
|
|
503
509
|
try {
|
|
504
510
|
this._stream = await navigator.mediaDevices.getUserMedia({ audio: !0 });
|
|
505
|
-
} catch (
|
|
506
|
-
const a = new Error(`Microphone access denied: ${
|
|
511
|
+
} catch (e) {
|
|
512
|
+
const a = new Error(`Microphone access denied: ${e.message}`);
|
|
507
513
|
throw this.dispatchEvent(new CustomEvent("error", { detail: { error: a } })), a;
|
|
508
514
|
}
|
|
509
515
|
this._audioCtx = new AudioContext({ sampleRate: 16e3 });
|
|
510
|
-
const
|
|
511
|
-
await this._audioCtx.audioWorklet.addModule(
|
|
516
|
+
const s = ce();
|
|
517
|
+
await this._audioCtx.audioWorklet.addModule(s);
|
|
512
518
|
const o = this._audioCtx.createMediaStreamSource(this._stream);
|
|
513
519
|
this._workletNode = new AudioWorkletNode(this._audioCtx, "audio-processor"), o.connect(this._workletNode), this._workletNode.connect(this._audioCtx.destination);
|
|
514
|
-
for (const
|
|
515
|
-
const a = this._refs.get(
|
|
520
|
+
for (const e of t) {
|
|
521
|
+
const a = this._refs.get(e);
|
|
516
522
|
if (!a) {
|
|
517
|
-
console.warn(`[Mellon] No reference embeddings for "${
|
|
523
|
+
console.warn(`[Mellon] No reference embeddings for "${e}" — skipping. Call addCustomWord() to register custom words before start().`);
|
|
518
524
|
continue;
|
|
519
525
|
}
|
|
520
|
-
const
|
|
521
|
-
name:
|
|
526
|
+
const n = new xt({
|
|
527
|
+
name: e,
|
|
522
528
|
refEmbeddings: a.embeddings,
|
|
523
529
|
threshold: this._opts.threshold,
|
|
524
530
|
relaxationMs: this._opts.relaxationMs,
|
|
525
531
|
inferenceGapMs: this._opts.inferenceGapMs
|
|
526
532
|
});
|
|
527
|
-
|
|
533
|
+
n.addEventListener("match", (c) => {
|
|
528
534
|
this.dispatchEvent(new CustomEvent("match", { detail: c.detail }));
|
|
529
|
-
}), this._detectors.set(
|
|
535
|
+
}), this._detectors.set(e, n);
|
|
530
536
|
}
|
|
531
|
-
this._workletNode.port.onmessage = async (
|
|
537
|
+
this._workletNode.port.onmessage = async (e) => {
|
|
532
538
|
const a = [];
|
|
533
|
-
for (const
|
|
534
|
-
a.push(
|
|
539
|
+
for (const n of this._detectors.values())
|
|
540
|
+
a.push(n.scoreFrame(e.data));
|
|
535
541
|
await Promise.allSettled(a);
|
|
536
542
|
}, this._running = !0;
|
|
537
543
|
}
|
|
@@ -539,7 +545,7 @@ class ue extends EventTarget {
|
|
|
539
545
|
* Stop detection and release the microphone and AudioContext.
|
|
540
546
|
*/
|
|
541
547
|
stop() {
|
|
542
|
-
this._workletNode && (this._workletNode.port.onmessage = null, this._workletNode.disconnect(), this._workletNode = null), this._stream && (this._stream.getTracks().forEach((
|
|
548
|
+
this._workletNode && (this._workletNode.port.onmessage = null, this._workletNode.disconnect(), this._workletNode = null), this._stream && (this._stream.getTracks().forEach((r) => r.stop()), this._stream = null), this._audioCtx && (this._audioCtx.close(), this._audioCtx = null), this._detectors.clear(), this._running = !1;
|
|
543
549
|
}
|
|
544
550
|
// ─── Custom words ────────────────────────────────────────────────────────
|
|
545
551
|
/**
|
|
@@ -549,18 +555,18 @@ class ue extends EventTarget {
|
|
|
549
555
|
*
|
|
550
556
|
* @param {{ word_name: string, model_type: string, embeddings: number[][] }} refData
|
|
551
557
|
*/
|
|
552
|
-
addCustomWord(
|
|
553
|
-
if (this._refs.set(
|
|
558
|
+
addCustomWord(r) {
|
|
559
|
+
if (this._refs.set(r.word_name, r), this._running && this._workletNode) {
|
|
554
560
|
const t = new xt({
|
|
555
|
-
name:
|
|
556
|
-
refEmbeddings:
|
|
561
|
+
name: r.word_name,
|
|
562
|
+
refEmbeddings: r.embeddings,
|
|
557
563
|
threshold: this._opts.threshold,
|
|
558
564
|
relaxationMs: this._opts.relaxationMs,
|
|
559
565
|
inferenceGapMs: this._opts.inferenceGapMs
|
|
560
566
|
});
|
|
561
|
-
t.addEventListener("match", (
|
|
562
|
-
this.dispatchEvent(new CustomEvent("match", { detail:
|
|
563
|
-
}), this._detectors.set(
|
|
567
|
+
t.addEventListener("match", (s) => {
|
|
568
|
+
this.dispatchEvent(new CustomEvent("match", { detail: s.detail }));
|
|
569
|
+
}), this._detectors.set(r.word_name, t);
|
|
564
570
|
}
|
|
565
571
|
}
|
|
566
572
|
/**
|
|
@@ -577,35 +583,35 @@ class ue extends EventTarget {
|
|
|
577
583
|
* const ref = await session.generateRef()
|
|
578
584
|
* stt.addCustomWord(ref)
|
|
579
585
|
*/
|
|
580
|
-
enrollWord(
|
|
581
|
-
return new
|
|
586
|
+
enrollWord(r) {
|
|
587
|
+
return new ae(r);
|
|
582
588
|
}
|
|
583
589
|
// ─── Persistence (static) ────────────────────────────────────────────────
|
|
584
590
|
/** Return all custom word refs stored in localStorage. */
|
|
585
591
|
static loadWords() {
|
|
586
|
-
return
|
|
592
|
+
return ot();
|
|
587
593
|
}
|
|
588
594
|
/** Persist a word ref to localStorage (replaces any existing entry with the same name). */
|
|
589
|
-
static saveWord(
|
|
590
|
-
|
|
595
|
+
static saveWord(r) {
|
|
596
|
+
le(r);
|
|
591
597
|
}
|
|
592
598
|
/** Delete a word ref from localStorage by name. */
|
|
593
|
-
static deleteWord(
|
|
594
|
-
|
|
599
|
+
static deleteWord(r) {
|
|
600
|
+
he(r);
|
|
595
601
|
}
|
|
596
602
|
/**
|
|
597
603
|
* Parse an uploaded ref JSON file and return a RefData object.
|
|
598
604
|
* @param {File} file
|
|
599
605
|
*/
|
|
600
|
-
static importWordFile(
|
|
601
|
-
return
|
|
606
|
+
static importWordFile(r) {
|
|
607
|
+
return ue(r);
|
|
602
608
|
}
|
|
603
609
|
/** Trigger a browser download of a ref as a JSON file. */
|
|
604
|
-
static exportWord(
|
|
605
|
-
|
|
610
|
+
static exportWord(r) {
|
|
611
|
+
de(r);
|
|
606
612
|
}
|
|
607
613
|
}
|
|
608
614
|
export {
|
|
609
|
-
|
|
610
|
-
|
|
615
|
+
ae as EnrollmentSession,
|
|
616
|
+
kt as Mellon
|
|
611
617
|
};
|
package/package.json
CHANGED