mellon 0.0.18 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -9
- package/dist/index.d.ts +14 -3
- package/dist/mellon.cjs +1 -1
- package/dist/mellon.mjs +264 -221
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ Offline, fully in-browser **hotword / wake-word detection** powered by [Efficien
|
|
|
17
17
|
2. [Quick start](#quick-start)
|
|
18
18
|
3. [Enrolling words](#enrolling-custom-words)
|
|
19
19
|
4. [API reference](#api-reference)
|
|
20
|
-
- [
|
|
20
|
+
- [Detector](#detector)
|
|
21
21
|
- [EnrollmentSession](#enrollmentsession)
|
|
22
22
|
5. [Science behind the lib](#science-behind-the-lib)
|
|
23
23
|
---
|
|
@@ -31,9 +31,9 @@ npm install mellon
|
|
|
31
31
|
## Quick start
|
|
32
32
|
|
|
33
33
|
```js
|
|
34
|
-
import {
|
|
34
|
+
import { Detector } from 'mellon'
|
|
35
35
|
|
|
36
|
-
const hotWordDetection = new
|
|
36
|
+
const hotWordDetection = new Detector([
|
|
37
37
|
{
|
|
38
38
|
name: 'openDoors',
|
|
39
39
|
triggers: [{ name: 'mellon', defaultRefPath: '/mellon-assets/mellon_ref.json' }],
|
|
@@ -71,9 +71,9 @@ await hotWordDetection.start() // opens the mic and listens for all registered t
|
|
|
71
71
|
## Enrolling custom words
|
|
72
72
|
|
|
73
73
|
```js
|
|
74
|
-
import {
|
|
74
|
+
import { Detector, EnrollmentSession, Storage } from 'mellon'
|
|
75
75
|
|
|
76
|
-
const hotwordDetection = new
|
|
76
|
+
const hotwordDetection = new Detector([{
|
|
77
77
|
name: 'startEngine',
|
|
78
78
|
triggers: [{ name: 'start' }],
|
|
79
79
|
onMatch: (triggerNameMatched, confidence) => { console.log('starting engine...') }
|
|
@@ -99,19 +99,19 @@ hotwordDetection.addCustomWord(ref)
|
|
|
99
99
|
await hotwordDetection.start()
|
|
100
100
|
|
|
101
101
|
// 4b. Persist for future sessions
|
|
102
|
-
|
|
102
|
+
Storage.saveWord(ref)
|
|
103
103
|
```
|
|
104
104
|
|
|
105
105
|
---
|
|
106
106
|
|
|
107
107
|
## API reference
|
|
108
108
|
|
|
109
|
-
### `
|
|
109
|
+
### `Detector`
|
|
110
110
|
|
|
111
111
|
The easiest way to use the library. Wraps mic access, AudioWorklet wiring, and detector management into a single class.
|
|
112
112
|
|
|
113
113
|
```ts
|
|
114
|
-
class
|
|
114
|
+
class Detector {
|
|
115
115
|
constructor(commands: Command[], config?: MellonConfig)
|
|
116
116
|
readonly threshold: number // read/write; persisted in localStorage
|
|
117
117
|
readonly listening: boolean
|
|
@@ -121,7 +121,19 @@ class Mellon {
|
|
|
121
121
|
stop(): Promise<void>
|
|
122
122
|
addCustomWord(ref: WordRef): void
|
|
123
123
|
|
|
124
|
-
// Storage helpers — static, work without a
|
|
124
|
+
// Storage helpers — static, work without a Detector instance
|
|
125
|
+
static loadWords(storageKey?: string): WordRef[]
|
|
126
|
+
static saveWord(ref: WordRef, storageKey?: string): void
|
|
127
|
+
static deleteWord(wordName: string, storageKey?: string): void
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### `Storage`
|
|
132
|
+
|
|
133
|
+
Static helpers for persisting enrolled word references in `localStorage`.
|
|
134
|
+
|
|
135
|
+
```ts
|
|
136
|
+
class Storage {
|
|
125
137
|
static loadWords(storageKey?: string): WordRef[]
|
|
126
138
|
static saveWord(ref: WordRef, storageKey?: string): void
|
|
127
139
|
static deleteWord(wordName: string, storageKey?: string): void
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Detector } from './Detector';
|
|
2
2
|
import { EnrollmentSession } from './EnrollmentSession';
|
|
3
|
+
import { Storage } from './Storage';
|
|
4
|
+
import { AudioUtils } from './AudioUtils';
|
|
3
5
|
export type TriggerName = string;
|
|
4
6
|
export interface Trigger {
|
|
5
7
|
name: TriggerName;
|
|
@@ -10,28 +12,37 @@ export interface Command {
|
|
|
10
12
|
triggers: Trigger[];
|
|
11
13
|
onMatch?: (trigger: TriggerName, confidence: number) => any;
|
|
12
14
|
}
|
|
13
|
-
export interface
|
|
15
|
+
export interface DetectorConfig {
|
|
14
16
|
refsStorageKey?: string;
|
|
15
17
|
thresholdStorageKey?: string;
|
|
16
18
|
wasmPaths?: string;
|
|
17
19
|
modelPath?: string;
|
|
18
20
|
audioProcessorPath?: string;
|
|
19
21
|
ortCdnUrl?: string;
|
|
22
|
+
audioUtils?: AudioUtils;
|
|
20
23
|
}
|
|
21
24
|
export interface EnrollmentSessionConfig {
|
|
22
25
|
wasmPaths?: string;
|
|
23
26
|
modelPath?: string;
|
|
24
27
|
ortCdnUrl?: string;
|
|
28
|
+
audioUtils?: AudioUtils;
|
|
25
29
|
}
|
|
26
30
|
export interface WordRef {
|
|
27
31
|
word_name: TriggerName;
|
|
28
32
|
model_type?: string;
|
|
29
33
|
embeddings: number[][];
|
|
30
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Called during {@link Detector.init} to report real download progress.
|
|
37
|
+
* @param downloaded - total bytes received so far across all assets
|
|
38
|
+
* @param total - sum of known Content-Length values for all assets;
|
|
39
|
+
* may still be 0 early on (before first header is received)
|
|
40
|
+
*/
|
|
41
|
+
export type ProgressCallback = (downloaded: number, total: number) => void;
|
|
31
42
|
export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/";
|
|
32
43
|
export declare const DEFAULT_ORT_CDN_URL = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs";
|
|
33
44
|
export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx";
|
|
34
45
|
export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js";
|
|
35
46
|
export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs";
|
|
36
47
|
export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold";
|
|
37
|
-
export {
|
|
48
|
+
export { Detector, EnrollmentSession, Storage, AudioUtils };
|
package/dist/mellon.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Dt(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var lt,ut;function Ut(){if(ut)return lt;ut=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const i=Math.PI*s/this.size;t[s]=Math.cos(i),t[s+1]=-Math.sin(i)}this.table=t;for(var n=0,r=1;this.size>r;r<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var e=0;e<this._width;e+=2){var h=this._width-e-2;this._bitrev[a]|=(a>>>e&3)<<h}}this._out=null,this._data=null,this._inv=0}return lt=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),r=0;r<t.length;r+=2)n[r>>>1]=t[r];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),r=0;r<n.length;r+=2)n[r]=t[r>>>1],n[r+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,r=2;r<n;r+=2)t[s-r]=t[r],t[s-r+1]=-t[r+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,h,i=this._bitrev;if(a===4)for(e=0,h=0;e<s;e+=a,h++){const v=i[h];this._singleTransform2(e,v,r)}else for(e=0,h=0;e<s;e+=a,h++){const v=i[h];this._singleTransform4(e,v,r)}var l=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var d=a>>>2;for(e=0;e<s;e+=a)for(var _=e+d,u=e,f=0;u<_;u+=2,f+=r){const v=u,p=v+d,g=p+d,w=g+d,T=t[v],F=t[v+1],A=t[p],y=t[p+1],b=t[g],S=t[g+1],P=t[w],C=t[w+1],E=T,R=F,M=c[f],D=l*c[f+1],U=A*M-y*D,z=A*D+y*M,j=c[2*f],L=l*c[2*f+1],B=b*j-S*L,K=b*L+S*j,N=c[3*f],O=l*c[3*f+1],k=P*N-C*O,G=P*O+C*N,$=E+B,x=R+K,I=E-B,Y=R-K,J=U+k,H=z+G,W=l*(U-k),Q=l*(z-G),tt=$+J,rt=x+H,et=$-J,ot=x-H,nt=I+Q,at=Y-W,it=I-Q,ct=Y+W;t[v]=tt,t[v+1]=rt,t[p]=nt,t[p+1]=at,t[g]=et,t[g+1]=ot,t[w]=it,t[w+1]=ct}}},m.prototype._singleTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],h=a[s+1],i=a[s+n],l=a[s+n+1],c=e+i,d=h+l,_=e-i,u=h-l;r[t]=c,r[t+1]=d,r[t+2]=_,r[t+3]=u},m.prototype._singleTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,h=n*2,i=n*3,l=a[s],c=a[s+1],d=a[s+n],_=a[s+n+1],u=a[s+h],f=a[s+h+1],v=a[s+i],p=a[s+i+1],g=l+u,w=c+f,T=l-u,F=c-f,A=d+v,y=_+p,b=e*(d-v),S=e*(_-p),P=g+A,C=w+y,E=T+S,R=F-b,M=g-A,D=w-y,U=T-S,z=F+b;r[t]=P,r[t+1]=C,r[t+2]=E,r[t+3]=R,r[t+4]=M,r[t+5]=D,r[t+6]=U,r[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,r=1<<n,a=s/r<<1,e,h,i=this._bitrev;if(a===4)for(e=0,h=0;e<s;e+=a,h++){const ht=i[h];this._singleRealTransform2(e,ht>>>1,r>>>1)}else for(e=0,h=0;e<s;e+=a,h++){const ht=i[h];this._singleRealTransform4(e,ht>>>1,r>>>1)}var l=this._inv?-1:1,c=this.table;for(r>>=2;r>=2;r>>=2){a=s/r<<1;var d=a>>>1,_=d>>>1,u=_>>>1;for(e=0;e<s;e+=a)for(var f=0,v=0;f<=u;f+=2,v+=r){var p=e+f,g=p+_,w=g+_,T=w+_,F=t[p],A=t[p+1],y=t[g],b=t[g+1],S=t[w],P=t[w+1],C=t[T],E=t[T+1],R=F,M=A,D=c[v],U=l*c[v+1],z=y*D-b*U,j=y*U+b*D,L=c[2*v],B=l*c[2*v+1],K=S*L-P*B,N=S*B+P*L,O=c[3*v],k=l*c[3*v+1],G=C*O-E*k,$=C*k+E*O,x=R+K,I=M+N,Y=R-K,J=M-N,H=z+G,W=j+$,Q=l*(z-G),tt=l*(j-$),rt=x+H,et=I+W,ot=Y+tt,nt=J-Q;if(t[p]=rt,t[p+1]=et,t[g]=ot,t[g+1]=nt,f===0){var at=x-H,it=I-W;t[w]=at,t[w+1]=it;continue}if(f!==u){var ct=Y,yt=-J,Tt=x,At=-I,Ft=-l*tt,bt=-l*Q,St=-l*W,Pt=-l*H,Ct=ct+Ft,Et=yt+bt,Rt=Tt+Pt,Mt=At-St,ft=e+_-f,vt=e+d-f;t[ft]=Ct,t[ft+1]=Et,t[vt]=Rt,t[vt+1]=Mt}}}},m.prototype._singleRealTransform2=function(t,s,n){const r=this._out,a=this._data,e=a[s],h=a[s+n],i=e+h,l=e-h;r[t]=i,r[t+1]=0,r[t+2]=l,r[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const r=this._out,a=this._data,e=this._inv?-1:1,h=n*2,i=n*3,l=a[s],c=a[s+n],d=a[s+h],_=a[s+i],u=l+d,f=l-d,v=c+_,p=e*(c-_),g=u+v,w=f,T=-p,F=u-v,A=f,y=p;r[t]=g,r[t+1]=0,r[t+2]=w,r[t+3]=T,r[t+4]=F,r[t+5]=0,r[t+6]=A,r[t+7]=y},lt}var zt=Ut();const xt=Dt(zt);class pt{constructor(o=16e3,t=512,s=64){this.sampleRate=o,this.nfft=t,this.nfilt=s,this.fft=new xt(t),this.melFilters=this.createMelFilterbank()}hzToMel(o){return 2595*Math.log10(1+o/700)}melToHz(o){return 700*(10**(o/2595)-1)}createMelFilterbank(){const t=this.sampleRate/2,s=this.hzToMel(0),n=this.hzToMel(t),r=new Float32Array(this.nfilt+2);for(let i=0;i<this.nfilt+2;i++)r[i]=s+i*(n-s)/(this.nfilt+1);const e=r.map(i=>this.melToHz(i)).map(i=>Math.floor((this.nfft+1)*i/this.sampleRate)),h=[];for(let i=0;i<this.nfilt;i++){const l=new Float32Array(Math.floor(this.nfft/2)+1);for(let c=e[i];c<e[i+1];c++)l[c]=(c-e[i])/(e[i+1]-e[i]);for(let c=e[i+1];c<e[i+2];c++)l[c]=(e[i+2]-c)/(e[i+2]-e[i+1]);h.push(l)}return h}logfbank(o){const t=Math.floor(.025*this.sampleRate),s=Math.floor(.01*this.sampleRate),n=1+Math.ceil((o.length-t)/s),r=new Float32Array(n*this.nfilt),a=new Float32Array(this.nfft),e=this.fft.createComplexArray();for(let h=0;h<n;h++){const i=h*s;a.fill(0);for(let d=0;d<t&&i+d<o.length;d++)a[d]=o[i+d];const l=this.fft.toComplexArray(a,null);this.fft.transform(e,l);const c=new Float32Array(Math.floor(this.nfft/2)+1);for(let d=0;d<c.length;d++){const _=e[2*d],u=e[2*d+1];c[d]=1/this.nfft*(_*_+u*u),c[d]===0&&(c[d]=1e-30)}for(let d=0;d<this.nfilt;d++){let _=0;const u=this.melFilters[d];for(let f=0;f<c.length;f++)_+=c[f]*u[f];_===0&&(_=1e-30),r[h*this.nfilt+d]=Math.log(_)}}return r}}async function mt(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let dt=null;async function _t(m=st,o=Z,t=q){return dt||(dt=mt(o,t).then(s=>s.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),dt}const It=new pt;class X{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0;const{refsStorageKey:s=V,thresholdStorageKey:n=wt,wasmPaths:r=Z,modelPath:a=st,audioProcessorPath:e=gt,ortCdnUrl:h=q}=t||{};this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=e,this._wasmPaths=r,this._modelPath=a,this._ortCdnUrl=h;try{const i=localStorage.getItem(this._thresholdStorageKey);this._threshold=i!==null?Math.max(0,Math.min(1,Number(i))):.65}catch{this._threshold=.65}this._initPromise=this._init()}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _init(){await _t(this._modelPath,this._wasmPaths,this._ortCdnUrl);const o=new Set;for(const t of this._commands)for(const s of t.triggers)if(!o.has(s.name)&&(o.add(s.name),s.defaultRefPath)){const n=await fetch(s.defaultRefPath);if(n.ok){const r=await n.json();this.addCustomWord(r)}}for(const t of X.loadWords(this._refsStorageKey))this._refEmbeddings.set(t.word_name,t.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(){await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this._initPromise;let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=r=>{this._handleBuffer(r.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([mt(this._wasmPaths,this._ortCdnUrl),_t(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),r=It.logfbank(o),a=new s.Tensor("float32",r,[1,1,149,64]),e=await n.run({input:a}),h=e[Object.keys(e)[0]].data;let i=!1;for(const l of this._commands){if(i)break;for(const c of l.triggers){const d=this._refEmbeddings.get(c.name);if(!d)continue;const _=this._maxCosineSim(h,d);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${c}" sim=${_.toFixed(3)}`),typeof l.onMatch=="function"&&l.onMatch(c.name,_),i=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}_maxCosineSim(o,t){let s=0;for(const n of t){let r=0;for(let e=0;e<n.length;e++)r+=o[e]*n[e];const a=(r+1)/2;a>s&&(s=a)}return s}static loadWords(o=V){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=V){const s=X.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=V){try{const s=X.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}const Ht=new pt;class Wt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||st,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((a,e)=>{const h=new MediaRecorder(o),i=[];h.ondataavailable=l=>{l.data.size>0&&i.push(l.data)},h.onstop=async()=>{var l;for(const c of o.getTracks())c.stop();try{const d=await new Blob(i,{type:((l=i[0])==null?void 0:l.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(d);await t.close(),a(_.getChannelData(0).slice())}catch(c){e(c)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,r=new Float32Array(n);return r.set(s.slice(0,n)),this._samples.push(r),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([mt(this._config.wasmPaths,this._config.ortCdnUrl),_t(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const r=Ht.logfbank(n),a=new o.Tensor("float32",r,[1,1,149,64]),e=await t.run({input:a}),h=Array.from(e[Object.keys(e)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",st="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",gt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",V="mellon-refs",wt="mellon-threshold";exports.DEFAULT_AUDIO_PROCESSOR_PATH=gt;exports.DEFAULT_MODEL_PATH=st;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=V;exports.DEFAULT_THRESHOLD_STORAGE_KEY=wt;exports.DEFAULT_WASM_PATHS=Z;exports.EnrollmentSession=Wt;exports.Mellon=X;
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});function Ut(m){return m&&m.__esModule&&Object.prototype.hasOwnProperty.call(m,"default")?m.default:m}var dt,pt;function Dt(){if(pt)return dt;pt=1;function m(o){if(this.size=o|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=o<<1;for(var t=new Array(this.size*2),s=0;s<t.length;s+=2){const l=Math.PI*s/this.size;t[s]=Math.cos(l),t[s+1]=-Math.sin(l)}this.table=t;for(var n=0,e=1;this.size>e;e<<=1)n++;this._width=n%2===0?n-1:n,this._bitrev=new Array(1<<this._width);for(var c=0;c<this._bitrev.length;c++){this._bitrev[c]=0;for(var r=0;r<this._width;r+=2){var h=this._width-r-2;this._bitrev[c]|=(c>>>r&3)<<h}}this._out=null,this._data=null,this._inv=0}return dt=m,m.prototype.fromComplexArray=function(t,s){for(var n=s||new Array(t.length>>>1),e=0;e<t.length;e+=2)n[e>>>1]=t[e];return n},m.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var s=0;s<t.length;s++)t[s]=0;return t},m.prototype.toComplexArray=function(t,s){for(var n=s||this.createComplexArray(),e=0;e<n.length;e+=2)n[e]=t[e>>>1],n[e+1]=0;return n},m.prototype.completeSpectrum=function(t){for(var s=this._csize,n=s>>>1,e=2;e<n;e+=2)t[s-e]=t[e],t[s-e+1]=-t[e+1]},m.prototype.transform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._transform4(),this._out=null,this._data=null},m.prototype.realTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=0,this._realTransform4(),this._out=null,this._data=null},m.prototype.inverseTransform=function(t,s){if(t===s)throw new Error("Input and output buffers must be different");this._out=t,this._data=s,this._inv=1,this._transform4();for(var n=0;n<t.length;n++)t[n]/=this.size;this._out=null,this._data=null},m.prototype._transform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform2(r,u,e)}else for(r=0,h=0;r<s;r+=c,h++){const u=l[h];this._singleTransform4(r,u,e)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>2;for(r=0;r<s;r+=c)for(var _=r+d,v=r,f=0;v<_;v+=2,f+=e){const u=v,p=u+d,w=p+d,g=w+d,T=t[u],A=t[u+1],b=t[p],y=t[p+1],F=t[w],S=t[w+1],C=t[g],E=t[g+1],M=T,P=A,R=a[f],U=i*a[f+1],D=b*R-y*U,z=b*U+y*R,k=a[2*f],N=i*a[2*f+1],B=F*k-S*N,j=F*N+S*k,K=a[3*f],O=i*a[3*f+1],$=C*K-E*O,G=C*O+E*K,J=M+B,x=P+j,I=M-B,Y=P-j,Q=D+$,L=z+G,W=i*(D-$),V=i*(z-G),tt=J+Q,rt=x+L,ot=J-Q,nt=x-L,at=I+V,it=Y-W,ct=I-V,lt=Y+W;t[u]=tt,t[u+1]=rt,t[p]=at,t[p+1]=it,t[w]=ot,t[w+1]=nt,t[g]=ct,t[g+1]=lt}}},m.prototype._singleTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+1],l=c[s+n],i=c[s+n+1],a=r+l,d=h+i,_=r-l,v=h-i;e[t]=a,e[t+1]=d,e[t+2]=_,e[t+3]=v},m.prototype._singleTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+1],d=c[s+n],_=c[s+n+1],v=c[s+h],f=c[s+h+1],u=c[s+l],p=c[s+l+1],w=i+v,g=a+f,T=i-v,A=a-f,b=d+u,y=_+p,F=r*(d-u),S=r*(_-p),C=w+b,E=g+y,M=T+S,P=A-F,R=w-b,U=g-y,D=T-S,z=A+F;e[t]=C,e[t+1]=E,e[t+2]=M,e[t+3]=P,e[t+4]=R,e[t+5]=U,e[t+6]=D,e[t+7]=z},m.prototype._realTransform4=function(){var t=this._out,s=this._csize,n=this._width,e=1<<n,c=s/e<<1,r,h,l=this._bitrev;if(c===4)for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform2(r,ht>>>1,e>>>1)}else for(r=0,h=0;r<s;r+=c,h++){const ht=l[h];this._singleRealTransform4(r,ht>>>1,e>>>1)}var i=this._inv?-1:1,a=this.table;for(e>>=2;e>=2;e>>=2){c=s/e<<1;var d=c>>>1,_=d>>>1,v=_>>>1;for(r=0;r<s;r+=c)for(var f=0,u=0;f<=v;f+=2,u+=e){var p=r+f,w=p+_,g=w+_,T=g+_,A=t[p],b=t[p+1],y=t[w],F=t[w+1],S=t[g],C=t[g+1],E=t[T],M=t[T+1],P=A,R=b,U=a[u],D=i*a[u+1],z=y*U-F*D,k=y*D+F*U,N=a[2*u],B=i*a[2*u+1],j=S*N-C*B,K=S*B+C*N,O=a[3*u],$=i*a[3*u+1],G=E*O-M*$,J=E*$+M*O,x=P+j,I=R+K,Y=P-j,Q=R-K,L=z+G,W=k+J,V=i*(z-G),tt=i*(k-J),rt=x+L,ot=I+W,nt=Y+tt,at=Q-V;if(t[p]=rt,t[p+1]=ot,t[w]=nt,t[w+1]=at,f===0){var it=x-L,ct=I-W;t[g]=it,t[g+1]=ct;continue}if(f!==v){var lt=Y,yt=-Q,Tt=x,bt=-I,At=-i*tt,Ft=-i*V,St=-i*W,Ct=-i*L,Et=lt+At,Mt=yt+Ft,Pt=Tt+Ct,Rt=bt-St,ut=r+_-f,vt=r+d-f;t[ut]=Et,t[ut+1]=Mt,t[vt]=Pt,t[vt+1]=Rt}}}},m.prototype._singleRealTransform2=function(t,s,n){const e=this._out,c=this._data,r=c[s],h=c[s+n],l=r+h,i=r-h;e[t]=l,e[t+1]=0,e[t+2]=i,e[t+3]=0},m.prototype._singleRealTransform4=function(t,s,n){const e=this._out,c=this._data,r=this._inv?-1:1,h=n*2,l=n*3,i=c[s],a=c[s+n],d=c[s+h],_=c[s+l],v=i+d,f=i-d,u=a+_,p=r*(a-_),w=v+u,g=f,T=-p,A=v-u,b=f,y=p;e[t]=w,e[t+1]=0,e[t+2]=g,e[t+3]=T,e[t+4]=A,e[t+5]=0,e[t+6]=b,e[t+7]=y},dt}var zt=Dt();const xt=Ut(zt);class ft{constructor(o=16e3,t=512,s=64){this._sampleRate=o,this._nfft=t,this._nfilt=s,this._fft=new xt(t),this._melFilters=this._createMelFilterbank()}_hzToMel(o){return 2595*Math.log10(1+o/700)}_melToHz(o){return 700*(10**(o/2595)-1)}_createMelFilterbank(){const t=this._sampleRate/2,s=this._hzToMel(0),n=this._hzToMel(t),e=new Float32Array(this._nfilt+2);for(let l=0;l<this._nfilt+2;l++)e[l]=s+l*(n-s)/(this._nfilt+1);const r=e.map(l=>this._melToHz(l)).map(l=>Math.floor((this._nfft+1)*l/this._sampleRate)),h=[];for(let l=0;l<this._nfilt;l++){const i=new Float32Array(Math.floor(this._nfft/2)+1);for(let a=r[l];a<r[l+1];a++)i[a]=(a-r[l])/(r[l+1]-r[l]);for(let a=r[l+1];a<r[l+2];a++)i[a]=(r[l+2]-a)/(r[l+2]-r[l+1]);h.push(i)}return h}logfbank(o){const t=Math.floor(.025*this._sampleRate),s=Math.floor(.01*this._sampleRate),n=1+Math.ceil((o.length-t)/s),e=new Float32Array(n*this._nfilt),c=new Float32Array(this._nfft),r=this._fft.createComplexArray();for(let h=0;h<n;h++){const l=h*s;c.fill(0);for(let d=0;d<t&&l+d<o.length;d++)c[d]=o[l+d];const i=this._fft.toComplexArray(c,null);this._fft.transform(r,i);const a=new Float32Array(Math.floor(this._nfft/2)+1);for(let d=0;d<a.length;d++){const _=r[2*d],v=r[2*d+1];a[d]=1/this._nfft*(_*_+v*v),a[d]===0&&(a[d]=1e-30)}for(let d=0;d<this._nfilt;d++){let _=0;const v=this._melFilters[d];for(let f=0;f<a.length;f++)_+=a[f]*v[f];_===0&&(_=1e-30),e[h*this._nfilt+d]=Math.log(_)}}return e}maxCosineSim(o,t){let s=0;for(const n of t){let e=0;for(let r=0;r<n.length;r++)e+=o[r]*n[r];const c=(e+1)/2;c>s&&(s=c)}return s}}async function st(m=Z,o=q){const t=await import(o);return t.env.wasm.wasmPaths=m,t.env.wasm.numThreads=1,t}let _t=null;async function mt(m=et,o=Z,t=q,s){return _t||(_t=st(o,t).then(n=>s?n.InferenceSession.create(new Uint8Array(s),{executionProviders:["wasm"],graphOptimizationLevel:"all"}):n.InferenceSession.create(m,{executionProviders:["wasm"],graphOptimizationLevel:"all"}))),_t}class H{static loadWords(o=X){try{const t=localStorage.getItem(o);return t?JSON.parse(t):[]}catch{return[]}}static saveWord(o,t=X){const s=H.loadWords(t).filter(n=>n.word_name!==o.word_name);localStorage.setItem(t,JSON.stringify([...s,o]))}static deleteWord(o,t=X){try{const s=H.loadWords(t).filter(n=>n.word_name!==o);localStorage.setItem(t,JSON.stringify(s))}catch{}}}class It{constructor(o,t){this._started=!1,this._inferring=!1,this._audioCtx=null,this._stream=null,this._refEmbeddings=new Map,this._lastMatchAt=0,this._lastInferenceAt=0,this._initPromise=null;const{refsStorageKey:s=X,thresholdStorageKey:n=gt,wasmPaths:e=Z,modelPath:c=et,audioProcessorPath:r=wt,ortCdnUrl:h=q,audioUtils:l=new ft}=t||{};this._audioUtils=l,this._commands=o,this._refsStorageKey=s,this._thresholdStorageKey=n,this._audioProcessorPath=r,this._wasmPaths=e,this._modelPath=c,this._ortCdnUrl=h;try{const i=localStorage.getItem(this._thresholdStorageKey);this._threshold=i!==null?Math.max(0,Math.min(1,Number(i))):.65}catch{this._threshold=.65}}get threshold(){return this._threshold}set threshold(o){this._threshold=Math.max(0,Math.min(1,o));try{localStorage.setItem(this._thresholdStorageKey,String(this._threshold))}catch{}}get listening(){return this._started}async _trackFetch(o,t,s){const n=await fetch(o);if(!n.ok)throw new Error(`HTTP ${n.status} fetching ${o}`);const e=Number(n.headers.get("content-length")??"0");if(e>0&&(s.total+=e),!n.body){const a=await n.arrayBuffer();return s.downloaded+=a.byteLength,e||(s.total+=a.byteLength),t==null||t(s.downloaded,s.total),a}const c=n.body.getReader(),r=[];let h=0;for(;;){const{done:a,value:d}=await c.read();if(a)break;r.push(d),h+=d.length,s.downloaded+=d.length,t==null||t(s.downloaded,s.total)}e||(s.total+=h);const l=new Uint8Array(h);let i=0;for(const a of r)l.set(a,i),i+=a.length;return l.buffer}async _init(o){const t={downloaded:0,total:0},s=new Set,n=[];for(const i of this._commands)for(const a of i.triggers)!s.has(a.name)&&a.defaultRefPath&&(s.add(a.name),n.push({name:a.name,path:a.defaultRefPath}));const e=st(this._wasmPaths,this._ortCdnUrl),[c,...r]=await Promise.all([this._trackFetch(this._modelPath,o,t),...n.map(({path:i})=>this._trackFetch(i,o,t))]);await e,await mt(this._modelPath,this._wasmPaths,this._ortCdnUrl,c);const h=H.loadWords(this._refsStorageKey),l=new Set(h.map(i=>i.word_name));for(let i=0;i<n.length;i++)try{const a=JSON.parse(new TextDecoder().decode(r[i]));this.addCustomWord(a),l.has(a.word_name)||H.saveWord(a,this._refsStorageKey)}catch{console.warn(`[Mellon] failed to parse ref file: ${n[i].path}`)}for(const i of h)this._refEmbeddings.set(i.word_name,i.embeddings);console.info("[Mellon] init complete, loaded refs:",[...this._refEmbeddings.keys()])}async init(o){this._initPromise||(this._initPromise=this._init(o)),await this._initPromise}addCustomWord(o){if(!(Array.isArray(o.embeddings)&&o.embeddings.length>0))throw new Error("invalid ref file for : "+o.word_name);this._refEmbeddings.set(o.word_name,o.embeddings)}async start(){if(this._started)return;await this.init();let o;try{o=await navigator.mediaDevices.getUserMedia({audio:{noiseSuppression:!1,echoCancellation:!1,autoGainControl:!1,channelCount:1}})}catch{o=await navigator.mediaDevices.getUserMedia({audio:!0})}this._stream=o;const t=new AudioContext({sampleRate:16e3});this._audioCtx=t,await t.audioWorklet.addModule(this._audioProcessorPath);const s=t.createMediaStreamSource(o),n=new AudioWorkletNode(t,"audio-processor");n.port.onmessage=e=>{this._handleBuffer(e.data)},s.connect(n),n.connect(t.destination),this._started=!0}async stop(){if(this._started=!1,this._audioCtx&&(await this._audioCtx.close(),this._audioCtx=null),this._stream){for(const o of this._stream.getTracks())o.stop();this._stream=null}}async _handleBuffer(o){if(this._inferring)return;const t=Date.now();if(!(t-this._lastInferenceAt<300)){this._lastInferenceAt=t,this._inferring=!0;try{const[s,n]=await Promise.all([st(this._wasmPaths,this._ortCdnUrl),mt(this._modelPath,this._wasmPaths,this._ortCdnUrl)]),e=this._audioUtils.logfbank(o),c=new s.Tensor("float32",e,[1,1,149,64]),r=await n.run({input:c}),h=r[Object.keys(r)[0]].data;let l=!1;for(const i of this._commands){if(l)break;for(const a of i.triggers){const d=this._refEmbeddings.get(a.name);if(!d)continue;const _=this._audioUtils.maxCosineSim(h,d);if(_>=this._threshold&&t-this._lastMatchAt>2e3){this._lastMatchAt=t,console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`),typeof i.onMatch=="function"&&i.onMatch(a.name,_),l=!0;break}}}}catch(s){console.error("[Mellon] inference error:",s)}finally{this._inferring=!1}}}}class Lt{constructor(o,t){this._config={},this._samples=[],this._wordName=o,this._config.modelPath=(t==null?void 0:t.modelPath)||et,this._config.wasmPaths=(t==null?void 0:t.wasmPaths)||Z,this._config.ortCdnUrl=(t==null?void 0:t.ortCdnUrl)||q,this._audioUtils=(t==null?void 0:t.audioUtils)??new ft}async recordSample(){const o=await navigator.mediaDevices.getUserMedia({audio:!0}),t=new AudioContext({sampleRate:16e3}),s=await new Promise((c,r)=>{const h=new MediaRecorder(o),l=[];h.ondataavailable=i=>{i.data.size>0&&l.push(i.data)},h.onstop=async()=>{var i;for(const a of o.getTracks())a.stop();try{const d=await new Blob(l,{type:((i=l[0])==null?void 0:i.type)||"audio/webm"}).arrayBuffer(),_=await t.decodeAudioData(d);await t.close(),c(_.getChannelData(0).slice())}catch(a){r(a)}},h.start(),setTimeout(()=>{try{h.stop()}catch{}},1500)}),n=24e3,e=new Float32Array(n);return e.set(s.slice(0,n)),this._samples.push(e),this._samples.length}deleteSample(o){if(o<0||o>=this._samples.length)throw new RangeError(`index ${o} out of bounds (${this._samples.length} samples)`);return this._samples.splice(o,1),this._samples.length}async generateRef(){const[o,t]=await Promise.all([st(this._config.wasmPaths,this._config.ortCdnUrl),mt(this._config.modelPath,this._config.wasmPaths,this._config.ortCdnUrl)]),s=[];for(const n of this._samples){const e=this._audioUtils.logfbank(n),c=new o.Tensor("float32",e,[1,1,149,64]),r=await t.run({input:c}),h=Array.from(r[Object.keys(r)[0]].data);s.push(h)}return{word_name:this._wordName,model_type:"resnet_50_arc",embeddings:s}}}const Z="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/",q="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs",et="https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx",wt="https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js",X="mellon-refs",gt="mellon-threshold";exports.AudioUtils=ft;exports.DEFAULT_AUDIO_PROCESSOR_PATH=wt;exports.DEFAULT_MODEL_PATH=et;exports.DEFAULT_ORT_CDN_URL=q;exports.DEFAULT_REFS_STORAGE_KEY=X;exports.DEFAULT_THRESHOLD_STORAGE_KEY=gt;exports.DEFAULT_WASM_PATHS=Z;exports.Detector=It;exports.EnrollmentSession=Lt;exports.Storage=H;
|
package/dist/mellon.mjs
CHANGED
|
@@ -1,217 +1,250 @@
|
|
|
1
|
-
function Rt(
|
|
2
|
-
return
|
|
1
|
+
function Rt(m) {
|
|
2
|
+
return m && m.__esModule && Object.prototype.hasOwnProperty.call(m, "default") ? m.default : m;
|
|
3
3
|
}
|
|
4
|
-
var
|
|
5
|
-
function
|
|
6
|
-
if (
|
|
7
|
-
|
|
8
|
-
function
|
|
4
|
+
var lt, vt;
|
|
5
|
+
function Ut() {
|
|
6
|
+
if (vt) return lt;
|
|
7
|
+
vt = 1;
|
|
8
|
+
function m(o) {
|
|
9
9
|
if (this.size = o | 0, this.size <= 1 || (this.size & this.size - 1) !== 0)
|
|
10
10
|
throw new Error("FFT size must be a power of two and bigger than 1");
|
|
11
11
|
this._csize = o << 1;
|
|
12
12
|
for (var t = new Array(this.size * 2), s = 0; s < t.length; s += 2) {
|
|
13
|
-
const
|
|
14
|
-
t[s] = Math.cos(
|
|
13
|
+
const h = Math.PI * s / this.size;
|
|
14
|
+
t[s] = Math.cos(h), t[s + 1] = -Math.sin(h);
|
|
15
15
|
}
|
|
16
16
|
this.table = t;
|
|
17
|
-
for (var n = 0,
|
|
17
|
+
for (var n = 0, e = 1; this.size > e; e <<= 1)
|
|
18
18
|
n++;
|
|
19
19
|
this._width = n % 2 === 0 ? n - 1 : n, this._bitrev = new Array(1 << this._width);
|
|
20
|
-
for (var
|
|
21
|
-
this._bitrev[
|
|
22
|
-
for (var
|
|
23
|
-
var
|
|
24
|
-
this._bitrev[
|
|
20
|
+
for (var c = 0; c < this._bitrev.length; c++) {
|
|
21
|
+
this._bitrev[c] = 0;
|
|
22
|
+
for (var r = 0; r < this._width; r += 2) {
|
|
23
|
+
var l = this._width - r - 2;
|
|
24
|
+
this._bitrev[c] |= (c >>> r & 3) << l;
|
|
25
25
|
}
|
|
26
26
|
}
|
|
27
27
|
this._out = null, this._data = null, this._inv = 0;
|
|
28
28
|
}
|
|
29
|
-
return
|
|
30
|
-
for (var n = s || new Array(t.length >>> 1),
|
|
31
|
-
n[
|
|
29
|
+
return lt = m, m.prototype.fromComplexArray = function(t, s) {
|
|
30
|
+
for (var n = s || new Array(t.length >>> 1), e = 0; e < t.length; e += 2)
|
|
31
|
+
n[e >>> 1] = t[e];
|
|
32
32
|
return n;
|
|
33
|
-
},
|
|
33
|
+
}, m.prototype.createComplexArray = function() {
|
|
34
34
|
const t = new Array(this._csize);
|
|
35
35
|
for (var s = 0; s < t.length; s++)
|
|
36
36
|
t[s] = 0;
|
|
37
37
|
return t;
|
|
38
|
-
},
|
|
39
|
-
for (var n = s || this.createComplexArray(),
|
|
40
|
-
n[
|
|
38
|
+
}, m.prototype.toComplexArray = function(t, s) {
|
|
39
|
+
for (var n = s || this.createComplexArray(), e = 0; e < n.length; e += 2)
|
|
40
|
+
n[e] = t[e >>> 1], n[e + 1] = 0;
|
|
41
41
|
return n;
|
|
42
|
-
},
|
|
43
|
-
for (var s = this._csize, n = s >>> 1,
|
|
44
|
-
t[s -
|
|
45
|
-
},
|
|
42
|
+
}, m.prototype.completeSpectrum = function(t) {
|
|
43
|
+
for (var s = this._csize, n = s >>> 1, e = 2; e < n; e += 2)
|
|
44
|
+
t[s - e] = t[e], t[s - e + 1] = -t[e + 1];
|
|
45
|
+
}, m.prototype.transform = function(t, s) {
|
|
46
46
|
if (t === s)
|
|
47
47
|
throw new Error("Input and output buffers must be different");
|
|
48
48
|
this._out = t, this._data = s, this._inv = 0, this._transform4(), this._out = null, this._data = null;
|
|
49
|
-
},
|
|
49
|
+
}, m.prototype.realTransform = function(t, s) {
|
|
50
50
|
if (t === s)
|
|
51
51
|
throw new Error("Input and output buffers must be different");
|
|
52
52
|
this._out = t, this._data = s, this._inv = 0, this._realTransform4(), this._out = null, this._data = null;
|
|
53
|
-
},
|
|
53
|
+
}, m.prototype.inverseTransform = function(t, s) {
|
|
54
54
|
if (t === s)
|
|
55
55
|
throw new Error("Input and output buffers must be different");
|
|
56
56
|
this._out = t, this._data = s, this._inv = 1, this._transform4();
|
|
57
57
|
for (var n = 0; n < t.length; n++)
|
|
58
58
|
t[n] /= this.size;
|
|
59
59
|
this._out = null, this._data = null;
|
|
60
|
-
},
|
|
61
|
-
var t = this._out, s = this._csize, n = this._width,
|
|
62
|
-
if (
|
|
63
|
-
for (
|
|
64
|
-
const
|
|
65
|
-
this._singleTransform2(
|
|
60
|
+
}, m.prototype._transform4 = function() {
|
|
61
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, c = s / e << 1, r, l, h = this._bitrev;
|
|
62
|
+
if (c === 4)
|
|
63
|
+
for (r = 0, l = 0; r < s; r += c, l++) {
|
|
64
|
+
const u = h[l];
|
|
65
|
+
this._singleTransform2(r, u, e);
|
|
66
66
|
}
|
|
67
67
|
else
|
|
68
|
-
for (
|
|
69
|
-
const
|
|
70
|
-
this._singleTransform4(
|
|
68
|
+
for (r = 0, l = 0; r < s; r += c, l++) {
|
|
69
|
+
const u = h[l];
|
|
70
|
+
this._singleTransform4(r, u, e);
|
|
71
71
|
}
|
|
72
|
-
var
|
|
73
|
-
for (
|
|
74
|
-
|
|
75
|
-
var d =
|
|
76
|
-
for (
|
|
77
|
-
for (var
|
|
78
|
-
const
|
|
79
|
-
t[
|
|
72
|
+
var i = this._inv ? -1 : 1, a = this.table;
|
|
73
|
+
for (e >>= 2; e >= 2; e >>= 2) {
|
|
74
|
+
c = s / e << 1;
|
|
75
|
+
var d = c >>> 2;
|
|
76
|
+
for (r = 0; r < s; r += c)
|
|
77
|
+
for (var _ = r + d, v = r, f = 0; v < _; v += 2, f += e) {
|
|
78
|
+
const u = v, p = u + d, w = p + d, g = w + d, b = t[u], T = t[u + 1], F = t[p], y = t[p + 1], A = t[w], C = t[w + 1], M = t[g], S = t[g + 1], P = b, R = T, U = a[f], E = i * a[f + 1], D = F * U - y * E, x = F * E + y * U, B = a[2 * f], N = i * a[2 * f + 1], j = A * B - C * N, H = A * N + C * B, K = a[3 * f], $ = i * a[3 * f + 1], L = M * K - S * $, J = M * $ + S * K, G = P + j, z = R + H, I = P - j, Y = R - H, O = D + L, W = x + J, k = i * (D - L), Q = i * (x - J), X = G + O, et = z + W, rt = G - O, ot = z - W, nt = I + Q, at = Y - k, it = I - Q, ct = Y + k;
|
|
79
|
+
t[u] = X, t[u + 1] = et, t[p] = nt, t[p + 1] = at, t[w] = rt, t[w + 1] = ot, t[g] = it, t[g + 1] = ct;
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
|
-
},
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
},
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
},
|
|
89
|
-
var t = this._out, s = this._csize, n = this._width,
|
|
90
|
-
if (
|
|
91
|
-
for (
|
|
92
|
-
const
|
|
93
|
-
this._singleRealTransform2(
|
|
82
|
+
}, m.prototype._singleTransform2 = function(t, s, n) {
|
|
83
|
+
const e = this._out, c = this._data, r = c[s], l = c[s + 1], h = c[s + n], i = c[s + n + 1], a = r + h, d = l + i, _ = r - h, v = l - i;
|
|
84
|
+
e[t] = a, e[t + 1] = d, e[t + 2] = _, e[t + 3] = v;
|
|
85
|
+
}, m.prototype._singleTransform4 = function(t, s, n) {
|
|
86
|
+
const e = this._out, c = this._data, r = this._inv ? -1 : 1, l = n * 2, h = n * 3, i = c[s], a = c[s + 1], d = c[s + n], _ = c[s + n + 1], v = c[s + l], f = c[s + l + 1], u = c[s + h], p = c[s + h + 1], w = i + v, g = a + f, b = i - v, T = a - f, F = d + u, y = _ + p, A = r * (d - u), C = r * (_ - p), M = w + F, S = g + y, P = b + C, R = T - A, U = w - F, E = g - y, D = b - C, x = T + A;
|
|
87
|
+
e[t] = M, e[t + 1] = S, e[t + 2] = P, e[t + 3] = R, e[t + 4] = U, e[t + 5] = E, e[t + 6] = D, e[t + 7] = x;
|
|
88
|
+
}, m.prototype._realTransform4 = function() {
|
|
89
|
+
var t = this._out, s = this._csize, n = this._width, e = 1 << n, c = s / e << 1, r, l, h = this._bitrev;
|
|
90
|
+
if (c === 4)
|
|
91
|
+
for (r = 0, l = 0; r < s; r += c, l++) {
|
|
92
|
+
const ht = h[l];
|
|
93
|
+
this._singleRealTransform2(r, ht >>> 1, e >>> 1);
|
|
94
94
|
}
|
|
95
95
|
else
|
|
96
|
-
for (
|
|
97
|
-
const
|
|
98
|
-
this._singleRealTransform4(
|
|
96
|
+
for (r = 0, l = 0; r < s; r += c, l++) {
|
|
97
|
+
const ht = h[l];
|
|
98
|
+
this._singleRealTransform4(r, ht >>> 1, e >>> 1);
|
|
99
99
|
}
|
|
100
|
-
var
|
|
101
|
-
for (
|
|
102
|
-
|
|
103
|
-
var d =
|
|
104
|
-
for (
|
|
105
|
-
for (var f = 0,
|
|
106
|
-
var p =
|
|
107
|
-
if (t[p] =
|
|
108
|
-
var
|
|
109
|
-
t[g] =
|
|
100
|
+
var i = this._inv ? -1 : 1, a = this.table;
|
|
101
|
+
for (e >>= 2; e >= 2; e >>= 2) {
|
|
102
|
+
c = s / e << 1;
|
|
103
|
+
var d = c >>> 1, _ = d >>> 1, v = _ >>> 1;
|
|
104
|
+
for (r = 0; r < s; r += c)
|
|
105
|
+
for (var f = 0, u = 0; f <= v; f += 2, u += e) {
|
|
106
|
+
var p = r + f, w = p + _, g = w + _, b = g + _, T = t[p], F = t[p + 1], y = t[w], A = t[w + 1], C = t[g], M = t[g + 1], S = t[b], P = t[b + 1], R = T, U = F, E = a[u], D = i * a[u + 1], x = y * E - A * D, B = y * D + A * E, N = a[2 * u], j = i * a[2 * u + 1], H = C * N - M * j, K = C * j + M * N, $ = a[3 * u], L = i * a[3 * u + 1], J = S * $ - P * L, G = S * L + P * $, z = R + H, I = U + K, Y = R - H, O = U - K, W = x + J, k = B + G, Q = i * (x - J), X = i * (B - G), et = z + W, rt = I + k, ot = Y + X, nt = O - Q;
|
|
107
|
+
if (t[p] = et, t[p + 1] = rt, t[w] = ot, t[w + 1] = nt, f === 0) {
|
|
108
|
+
var at = z - W, it = I - k;
|
|
109
|
+
t[g] = at, t[g + 1] = it;
|
|
110
110
|
continue;
|
|
111
111
|
}
|
|
112
|
-
if (f !==
|
|
113
|
-
var
|
|
114
|
-
t[ft] = Ct, t[ft + 1] =
|
|
112
|
+
if (f !== v) {
|
|
113
|
+
var ct = Y, wt = -O, gt = z, yt = -I, bt = -i * X, Ft = -i * Q, Tt = -i * k, At = -i * W, Ct = ct + bt, Mt = wt + Ft, St = gt + At, Pt = yt - Tt, ft = r + _ - f, ut = r + d - f;
|
|
114
|
+
t[ft] = Ct, t[ft + 1] = Mt, t[ut] = St, t[ut + 1] = Pt;
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
|
-
},
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
},
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
},
|
|
118
|
+
}, m.prototype._singleRealTransform2 = function(t, s, n) {
|
|
119
|
+
const e = this._out, c = this._data, r = c[s], l = c[s + n], h = r + l, i = r - l;
|
|
120
|
+
e[t] = h, e[t + 1] = 0, e[t + 2] = i, e[t + 3] = 0;
|
|
121
|
+
}, m.prototype._singleRealTransform4 = function(t, s, n) {
|
|
122
|
+
const e = this._out, c = this._data, r = this._inv ? -1 : 1, l = n * 2, h = n * 3, i = c[s], a = c[s + n], d = c[s + l], _ = c[s + h], v = i + d, f = i - d, u = a + _, p = r * (a - _), w = v + u, g = f, b = -p, T = v - u, F = f, y = p;
|
|
123
|
+
e[t] = w, e[t + 1] = 0, e[t + 2] = g, e[t + 3] = b, e[t + 4] = T, e[t + 5] = 0, e[t + 6] = F, e[t + 7] = y;
|
|
124
|
+
}, lt;
|
|
125
125
|
}
|
|
126
|
-
var
|
|
127
|
-
const
|
|
126
|
+
var Et = Ut();
|
|
127
|
+
const Dt = /* @__PURE__ */ Rt(Et);
|
|
128
128
|
class pt {
|
|
129
129
|
constructor(o = 16e3, t = 512, s = 64) {
|
|
130
|
-
this.
|
|
130
|
+
this._sampleRate = o, this._nfft = t, this._nfilt = s, this._fft = new Dt(t), this._melFilters = this._createMelFilterbank();
|
|
131
131
|
}
|
|
132
|
-
|
|
132
|
+
_hzToMel(o) {
|
|
133
133
|
return 2595 * Math.log10(1 + o / 700);
|
|
134
134
|
}
|
|
135
|
-
|
|
135
|
+
_melToHz(o) {
|
|
136
136
|
return 700 * (10 ** (o / 2595) - 1);
|
|
137
137
|
}
|
|
138
|
-
|
|
139
|
-
const t = this.
|
|
140
|
-
for (let
|
|
141
|
-
|
|
142
|
-
const
|
|
143
|
-
for (let
|
|
144
|
-
const
|
|
145
|
-
for (let
|
|
146
|
-
|
|
147
|
-
for (let
|
|
148
|
-
|
|
149
|
-
|
|
138
|
+
_createMelFilterbank() {
|
|
139
|
+
const t = this._sampleRate / 2, s = this._hzToMel(0), n = this._hzToMel(t), e = new Float32Array(this._nfilt + 2);
|
|
140
|
+
for (let h = 0; h < this._nfilt + 2; h++)
|
|
141
|
+
e[h] = s + h * (n - s) / (this._nfilt + 1);
|
|
142
|
+
const r = e.map((h) => this._melToHz(h)).map((h) => Math.floor((this._nfft + 1) * h / this._sampleRate)), l = [];
|
|
143
|
+
for (let h = 0; h < this._nfilt; h++) {
|
|
144
|
+
const i = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
145
|
+
for (let a = r[h]; a < r[h + 1]; a++)
|
|
146
|
+
i[a] = (a - r[h]) / (r[h + 1] - r[h]);
|
|
147
|
+
for (let a = r[h + 1]; a < r[h + 2]; a++)
|
|
148
|
+
i[a] = (r[h + 2] - a) / (r[h + 2] - r[h + 1]);
|
|
149
|
+
l.push(i);
|
|
150
150
|
}
|
|
151
|
-
return
|
|
151
|
+
return l;
|
|
152
152
|
}
|
|
153
153
|
/** Returns a flat Float32Array of shape [numFrames × nfilt]. */
|
|
154
154
|
logfbank(o) {
|
|
155
|
-
const t = Math.floor(0.025 * this.
|
|
156
|
-
for (let
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
for (let d = 0; d < t &&
|
|
160
|
-
|
|
161
|
-
const
|
|
162
|
-
this.
|
|
163
|
-
const
|
|
164
|
-
for (let d = 0; d <
|
|
165
|
-
const
|
|
166
|
-
|
|
155
|
+
const t = Math.floor(0.025 * this._sampleRate), s = Math.floor(0.01 * this._sampleRate), n = 1 + Math.ceil((o.length - t) / s), e = new Float32Array(n * this._nfilt), c = new Float32Array(this._nfft), r = this._fft.createComplexArray();
|
|
156
|
+
for (let l = 0; l < n; l++) {
|
|
157
|
+
const h = l * s;
|
|
158
|
+
c.fill(0);
|
|
159
|
+
for (let d = 0; d < t && h + d < o.length; d++)
|
|
160
|
+
c[d] = o[h + d];
|
|
161
|
+
const i = this._fft.toComplexArray(c, null);
|
|
162
|
+
this._fft.transform(r, i);
|
|
163
|
+
const a = new Float32Array(Math.floor(this._nfft / 2) + 1);
|
|
164
|
+
for (let d = 0; d < a.length; d++) {
|
|
165
|
+
const _ = r[2 * d], v = r[2 * d + 1];
|
|
166
|
+
a[d] = 1 / this._nfft * (_ * _ + v * v), a[d] === 0 && (a[d] = 1e-30);
|
|
167
167
|
}
|
|
168
|
-
for (let d = 0; d < this.
|
|
169
|
-
let
|
|
170
|
-
const
|
|
171
|
-
for (let f = 0; f <
|
|
172
|
-
|
|
173
|
-
|
|
168
|
+
for (let d = 0; d < this._nfilt; d++) {
|
|
169
|
+
let _ = 0;
|
|
170
|
+
const v = this._melFilters[d];
|
|
171
|
+
for (let f = 0; f < a.length; f++)
|
|
172
|
+
_ += a[f] * v[f];
|
|
173
|
+
_ === 0 && (_ = 1e-30), e[l * this._nfilt + d] = Math.log(_);
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
|
-
return
|
|
176
|
+
return e;
|
|
177
|
+
}
|
|
178
|
+
maxCosineSim(o, t) {
|
|
179
|
+
let s = 0;
|
|
180
|
+
for (const n of t) {
|
|
181
|
+
let e = 0;
|
|
182
|
+
for (let r = 0; r < n.length; r++) e += o[r] * n[r];
|
|
183
|
+
const c = (e + 1) / 2;
|
|
184
|
+
c > s && (s = c);
|
|
185
|
+
}
|
|
186
|
+
return s;
|
|
177
187
|
}
|
|
178
188
|
}
|
|
179
|
-
async function
|
|
189
|
+
async function q(m = tt, o = st) {
|
|
180
190
|
const t = await import(
|
|
181
191
|
/* @vite-ignore */
|
|
182
192
|
o
|
|
183
193
|
);
|
|
184
|
-
return t.env.wasm.wasmPaths =
|
|
194
|
+
return t.env.wasm.wasmPaths = m, t.env.wasm.numThreads = 1, t;
|
|
185
195
|
}
|
|
186
|
-
let
|
|
187
|
-
async function
|
|
188
|
-
return
|
|
189
|
-
(
|
|
196
|
+
let dt = null;
|
|
197
|
+
async function _t(m = mt, o = tt, t = st, s) {
|
|
198
|
+
return dt || (dt = q(o, t).then(
|
|
199
|
+
(n) => s ? n.InferenceSession.create(new Uint8Array(s), {
|
|
200
|
+
executionProviders: ["wasm"],
|
|
201
|
+
graphOptimizationLevel: "all"
|
|
202
|
+
}) : n.InferenceSession.create(m, {
|
|
190
203
|
executionProviders: ["wasm"],
|
|
191
204
|
graphOptimizationLevel: "all"
|
|
192
205
|
})
|
|
193
|
-
)),
|
|
206
|
+
)), dt;
|
|
194
207
|
}
|
|
195
|
-
|
|
196
|
-
|
|
208
|
+
class V {
|
|
209
|
+
static loadWords(o = Z) {
|
|
210
|
+
try {
|
|
211
|
+
const t = localStorage.getItem(o);
|
|
212
|
+
return t ? JSON.parse(t) : [];
|
|
213
|
+
} catch {
|
|
214
|
+
return [];
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
static saveWord(o, t = Z) {
|
|
218
|
+
const s = V.loadWords(t).filter((n) => n.word_name !== o.word_name);
|
|
219
|
+
localStorage.setItem(t, JSON.stringify([...s, o]));
|
|
220
|
+
}
|
|
221
|
+
static deleteWord(o, t = Z) {
|
|
222
|
+
try {
|
|
223
|
+
const s = V.loadWords(t).filter((n) => n.word_name !== o);
|
|
224
|
+
localStorage.setItem(t, JSON.stringify(s));
|
|
225
|
+
} catch {
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
class It {
|
|
197
230
|
constructor(o, t) {
|
|
198
|
-
this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0;
|
|
231
|
+
this._started = !1, this._inferring = !1, this._audioCtx = null, this._stream = null, this._refEmbeddings = /* @__PURE__ */ new Map(), this._lastMatchAt = 0, this._lastInferenceAt = 0, this._initPromise = null;
|
|
199
232
|
const {
|
|
200
233
|
refsStorageKey: s = Z,
|
|
201
|
-
thresholdStorageKey: n =
|
|
202
|
-
wasmPaths:
|
|
203
|
-
modelPath:
|
|
204
|
-
audioProcessorPath:
|
|
205
|
-
ortCdnUrl:
|
|
234
|
+
thresholdStorageKey: n = zt,
|
|
235
|
+
wasmPaths: e = tt,
|
|
236
|
+
modelPath: c = mt,
|
|
237
|
+
audioProcessorPath: r = xt,
|
|
238
|
+
ortCdnUrl: l = st,
|
|
239
|
+
audioUtils: h = new pt()
|
|
206
240
|
} = t || {};
|
|
207
|
-
this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath =
|
|
241
|
+
this._audioUtils = h, this._commands = o, this._refsStorageKey = s, this._thresholdStorageKey = n, this._audioProcessorPath = r, this._wasmPaths = e, this._modelPath = c, this._ortCdnUrl = l;
|
|
208
242
|
try {
|
|
209
243
|
const i = localStorage.getItem(this._thresholdStorageKey);
|
|
210
244
|
this._threshold = i !== null ? Math.max(0, Math.min(1, Number(i))) : 0.65;
|
|
211
245
|
} catch {
|
|
212
246
|
this._threshold = 0.65;
|
|
213
247
|
}
|
|
214
|
-
this._initPromise = this._init();
|
|
215
248
|
}
|
|
216
249
|
get threshold() {
|
|
217
250
|
return this._threshold;
|
|
@@ -226,25 +259,63 @@ class O {
|
|
|
226
259
|
get listening() {
|
|
227
260
|
return this._started;
|
|
228
261
|
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
262
|
+
/**
|
|
263
|
+
* Streams `url`, calling `onProgress(downloaded, total)` after each chunk.
|
|
264
|
+
* Falls back to a single-shot fetch when the body stream is unavailable.
|
|
265
|
+
*/
|
|
266
|
+
async _trackFetch(o, t, s) {
|
|
267
|
+
const n = await fetch(o);
|
|
268
|
+
if (!n.ok) throw new Error(`HTTP ${n.status} fetching ${o}`);
|
|
269
|
+
const e = Number(n.headers.get("content-length") ?? "0");
|
|
270
|
+
if (e > 0 && (s.total += e), !n.body) {
|
|
271
|
+
const a = await n.arrayBuffer();
|
|
272
|
+
return s.downloaded += a.byteLength, e || (s.total += a.byteLength), t == null || t(s.downloaded, s.total), a;
|
|
273
|
+
}
|
|
274
|
+
const c = n.body.getReader(), r = [];
|
|
275
|
+
let l = 0;
|
|
276
|
+
for (; ; ) {
|
|
277
|
+
const { done: a, value: d } = await c.read();
|
|
278
|
+
if (a) break;
|
|
279
|
+
r.push(d), l += d.length, s.downloaded += d.length, t == null || t(s.downloaded, s.total);
|
|
280
|
+
}
|
|
281
|
+
e || (s.total += l);
|
|
282
|
+
const h = new Uint8Array(l);
|
|
283
|
+
let i = 0;
|
|
284
|
+
for (const a of r)
|
|
285
|
+
h.set(a, i), i += a.length;
|
|
286
|
+
return h.buffer;
|
|
287
|
+
}
|
|
288
|
+
async _init(o) {
|
|
289
|
+
const t = { downloaded: 0, total: 0 }, s = /* @__PURE__ */ new Set(), n = [];
|
|
290
|
+
for (const i of this._commands)
|
|
291
|
+
for (const a of i.triggers)
|
|
292
|
+
!s.has(a.name) && a.defaultRefPath && (s.add(a.name), n.push({ name: a.name, path: a.defaultRefPath }));
|
|
293
|
+
const e = q(this._wasmPaths, this._ortCdnUrl), [c, ...r] = await Promise.all([
|
|
294
|
+
this._trackFetch(this._modelPath, o, t),
|
|
295
|
+
...n.map(({ path: i }) => this._trackFetch(i, o, t))
|
|
296
|
+
]);
|
|
297
|
+
await e, await _t(this._modelPath, this._wasmPaths, this._ortCdnUrl, c);
|
|
298
|
+
const l = V.loadWords(this._refsStorageKey), h = new Set(l.map((i) => i.word_name));
|
|
299
|
+
for (let i = 0; i < n.length; i++)
|
|
300
|
+
try {
|
|
301
|
+
const a = JSON.parse(new TextDecoder().decode(r[i]));
|
|
302
|
+
this.addCustomWord(a), h.has(a.word_name) || V.saveWord(a, this._refsStorageKey);
|
|
303
|
+
} catch {
|
|
304
|
+
console.warn(`[Mellon] failed to parse ref file: ${n[i].path}`);
|
|
305
|
+
}
|
|
306
|
+
for (const i of l)
|
|
307
|
+
this._refEmbeddings.set(i.word_name, i.embeddings);
|
|
243
308
|
console.info("[Mellon] init complete, loaded refs:", [...this._refEmbeddings.keys()]);
|
|
244
309
|
}
|
|
245
|
-
/**
|
|
246
|
-
|
|
247
|
-
|
|
310
|
+
/**
|
|
311
|
+
* Loads the ONNX model and all reference embeddings.
|
|
312
|
+
* Must be called before {@link start}.
|
|
313
|
+
* Safe to call multiple times — the work is only done once.
|
|
314
|
+
*
|
|
315
|
+
* @param onProgress - optional callback invoked as each asset is loaded
|
|
316
|
+
*/
|
|
317
|
+
async init(o) {
|
|
318
|
+
this._initPromise || (this._initPromise = this._init(o)), await this._initPromise;
|
|
248
319
|
}
|
|
249
320
|
/** Adds (or replaces) the reference embeddings for a word without restarting. */
|
|
250
321
|
addCustomWord(o) {
|
|
@@ -254,7 +325,7 @@ class O {
|
|
|
254
325
|
}
|
|
255
326
|
async start() {
|
|
256
327
|
if (this._started) return;
|
|
257
|
-
await this.
|
|
328
|
+
await this.init();
|
|
258
329
|
let o;
|
|
259
330
|
try {
|
|
260
331
|
o = await navigator.mediaDevices.getUserMedia({
|
|
@@ -272,8 +343,8 @@ class O {
|
|
|
272
343
|
const t = new AudioContext({ sampleRate: 16e3 });
|
|
273
344
|
this._audioCtx = t, await t.audioWorklet.addModule(this._audioProcessorPath);
|
|
274
345
|
const s = t.createMediaStreamSource(o), n = new AudioWorkletNode(t, "audio-processor");
|
|
275
|
-
n.port.onmessage = (
|
|
276
|
-
this._handleBuffer(
|
|
346
|
+
n.port.onmessage = (e) => {
|
|
347
|
+
this._handleBuffer(e.data);
|
|
277
348
|
}, s.connect(n), n.connect(t.destination), this._started = !0;
|
|
278
349
|
}
|
|
279
350
|
async stop() {
|
|
@@ -288,16 +359,16 @@ class O {
|
|
|
288
359
|
if (!(t - this._lastInferenceAt < 300)) {
|
|
289
360
|
this._lastInferenceAt = t, this._inferring = !0;
|
|
290
361
|
try {
|
|
291
|
-
const [s, n] = await Promise.all([
|
|
292
|
-
let
|
|
293
|
-
for (const
|
|
294
|
-
if (
|
|
295
|
-
for (const
|
|
296
|
-
const d = this._refEmbeddings.get(
|
|
362
|
+
const [s, n] = await Promise.all([q(this._wasmPaths, this._ortCdnUrl), _t(this._modelPath, this._wasmPaths, this._ortCdnUrl)]), e = this._audioUtils.logfbank(o), c = new s.Tensor("float32", e, [1, 1, 149, 64]), r = await n.run({ input: c }), l = r[Object.keys(r)[0]].data;
|
|
363
|
+
let h = !1;
|
|
364
|
+
for (const i of this._commands) {
|
|
365
|
+
if (h) break;
|
|
366
|
+
for (const a of i.triggers) {
|
|
367
|
+
const d = this._refEmbeddings.get(a.name);
|
|
297
368
|
if (!d) continue;
|
|
298
|
-
const
|
|
299
|
-
if (
|
|
300
|
-
this._lastMatchAt = t, console.info(`[Mellon] match: "${
|
|
369
|
+
const _ = this._audioUtils.maxCosineSim(l, d);
|
|
370
|
+
if (_ >= this._threshold && t - this._lastMatchAt > 2e3) {
|
|
371
|
+
this._lastMatchAt = t, console.info(`[Mellon] match: "${a}" sim=${_.toFixed(3)}`), typeof i.onMatch == "function" && i.onMatch(a.name, _), h = !0;
|
|
301
372
|
break;
|
|
302
373
|
}
|
|
303
374
|
}
|
|
@@ -309,64 +380,34 @@ class O {
|
|
|
309
380
|
}
|
|
310
381
|
}
|
|
311
382
|
}
|
|
312
|
-
_maxCosineSim(o, t) {
|
|
313
|
-
let s = 0;
|
|
314
|
-
for (const n of t) {
|
|
315
|
-
let r = 0;
|
|
316
|
-
for (let e = 0; e < n.length; e++) r += o[e] * n[e];
|
|
317
|
-
const a = (r + 1) / 2;
|
|
318
|
-
a > s && (s = a);
|
|
319
|
-
}
|
|
320
|
-
return s;
|
|
321
|
-
}
|
|
322
|
-
static loadWords(o = Z) {
|
|
323
|
-
try {
|
|
324
|
-
const t = localStorage.getItem(o);
|
|
325
|
-
return t ? JSON.parse(t) : [];
|
|
326
|
-
} catch {
|
|
327
|
-
return [];
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
static saveWord(o, t = Z) {
|
|
331
|
-
const s = O.loadWords(t).filter((n) => n.word_name !== o.word_name);
|
|
332
|
-
localStorage.setItem(t, JSON.stringify([...s, o]));
|
|
333
|
-
}
|
|
334
|
-
static deleteWord(o, t = Z) {
|
|
335
|
-
try {
|
|
336
|
-
const s = O.loadWords(t).filter((n) => n.word_name !== o);
|
|
337
|
-
localStorage.setItem(t, JSON.stringify(s));
|
|
338
|
-
} catch {
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
383
|
}
|
|
342
|
-
|
|
343
|
-
class jt {
|
|
384
|
+
class Wt {
|
|
344
385
|
constructor(o, t) {
|
|
345
|
-
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) ||
|
|
386
|
+
this._config = {}, this._samples = [], this._wordName = o, this._config.modelPath = (t == null ? void 0 : t.modelPath) || mt, this._config.wasmPaths = (t == null ? void 0 : t.wasmPaths) || tt, this._config.ortCdnUrl = (t == null ? void 0 : t.ortCdnUrl) || st, this._audioUtils = (t == null ? void 0 : t.audioUtils) ?? new pt();
|
|
346
387
|
}
|
|
347
388
|
/** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
|
|
348
389
|
async recordSample() {
|
|
349
|
-
const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((
|
|
350
|
-
const
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
},
|
|
354
|
-
var
|
|
355
|
-
for (const
|
|
390
|
+
const o = await navigator.mediaDevices.getUserMedia({ audio: !0 }), t = new AudioContext({ sampleRate: 16e3 }), s = await new Promise((c, r) => {
|
|
391
|
+
const l = new MediaRecorder(o), h = [];
|
|
392
|
+
l.ondataavailable = (i) => {
|
|
393
|
+
i.data.size > 0 && h.push(i.data);
|
|
394
|
+
}, l.onstop = async () => {
|
|
395
|
+
var i;
|
|
396
|
+
for (const a of o.getTracks()) a.stop();
|
|
356
397
|
try {
|
|
357
|
-
const d = await new Blob(
|
|
358
|
-
await t.close(),
|
|
359
|
-
} catch (
|
|
360
|
-
|
|
398
|
+
const d = await new Blob(h, { type: ((i = h[0]) == null ? void 0 : i.type) || "audio/webm" }).arrayBuffer(), _ = await t.decodeAudioData(d);
|
|
399
|
+
await t.close(), c(_.getChannelData(0).slice());
|
|
400
|
+
} catch (a) {
|
|
401
|
+
r(a);
|
|
361
402
|
}
|
|
362
|
-
},
|
|
403
|
+
}, l.start(), setTimeout(() => {
|
|
363
404
|
try {
|
|
364
|
-
|
|
405
|
+
l.stop();
|
|
365
406
|
} catch {
|
|
366
407
|
}
|
|
367
408
|
}, 1500);
|
|
368
|
-
}), n = 24e3,
|
|
369
|
-
return
|
|
409
|
+
}), n = 24e3, e = new Float32Array(n);
|
|
410
|
+
return e.set(s.slice(0, n)), this._samples.push(e), this._samples.length;
|
|
370
411
|
}
|
|
371
412
|
/** Removes the sample at the given index. Returns the new sample count. */
|
|
372
413
|
deleteSample(o) {
|
|
@@ -376,22 +417,24 @@ class jt {
|
|
|
376
417
|
}
|
|
377
418
|
/** Runs ONNX inference on every recorded sample to produce reference embeddings. */
|
|
378
419
|
async generateRef() {
|
|
379
|
-
const [o, t] = await Promise.all([
|
|
420
|
+
const [o, t] = await Promise.all([q(this._config.wasmPaths, this._config.ortCdnUrl), _t(this._config.modelPath, this._config.wasmPaths, this._config.ortCdnUrl)]), s = [];
|
|
380
421
|
for (const n of this._samples) {
|
|
381
|
-
const
|
|
382
|
-
s.push(
|
|
422
|
+
const e = this._audioUtils.logfbank(n), c = new o.Tensor("float32", e, [1, 1, 149, 64]), r = await t.run({ input: c }), l = Array.from(r[Object.keys(r)[0]].data);
|
|
423
|
+
s.push(l);
|
|
383
424
|
}
|
|
384
425
|
return { word_name: this._wordName, model_type: "resnet_50_arc", embeddings: s };
|
|
385
426
|
}
|
|
386
427
|
}
|
|
387
|
-
const
|
|
428
|
+
const tt = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/", st = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs", mt = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx", xt = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js", Z = "mellon-refs", zt = "mellon-threshold";
|
|
388
429
|
export {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
430
|
+
pt as AudioUtils,
|
|
431
|
+
xt as DEFAULT_AUDIO_PROCESSOR_PATH,
|
|
432
|
+
mt as DEFAULT_MODEL_PATH,
|
|
433
|
+
st as DEFAULT_ORT_CDN_URL,
|
|
392
434
|
Z as DEFAULT_REFS_STORAGE_KEY,
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
435
|
+
zt as DEFAULT_THRESHOLD_STORAGE_KEY,
|
|
436
|
+
tt as DEFAULT_WASM_PATHS,
|
|
437
|
+
It as Detector,
|
|
438
|
+
Wt as EnrollmentSession,
|
|
439
|
+
V as Storage
|
|
397
440
|
};
|