mellon 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/index.d.ts +295 -0
  2. package/dist/mellon.cjs +38 -0
  3. package/dist/mellon.mjs +616 -0
  4. package/package.json +6 -9
  5. package/dist/assets/index-B3ZBo_ZU.css +0 -1
  6. package/dist/assets/index-CiLJGV_Q.js +0 -19
  7. package/dist/index.html +0 -251
  8. /package/dist/{audio-processor.js → assets/audio-processor.js} +0 -0
  9. /package/dist/{manifest.json → assets/manifest.json} +0 -0
  10. /package/dist/{models → assets}/model.onnx +0 -0
  11. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.asyncify.mjs +0 -0
  12. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.asyncify.wasm +0 -0
  13. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jsep.mjs +0 -0
  14. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jsep.wasm +0 -0
  15. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jspi.mjs +0 -0
  16. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jspi.wasm +0 -0
  17. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.mjs +0 -0
  18. /package/dist/{wasm → assets}/ort-wasm-simd-threaded.wasm +0 -0
  19. /package/dist/{wasm → assets}/ort.all.bundle.min.mjs +0 -0
  20. /package/dist/{wasm → assets}/ort.all.min.mjs +0 -0
  21. /package/dist/{wasm → assets}/ort.all.mjs +0 -0
  22. /package/dist/{wasm → assets}/ort.bundle.min.mjs +0 -0
  23. /package/dist/{wasm → assets}/ort.jspi.bundle.min.mjs +0 -0
  24. /package/dist/{wasm → assets}/ort.jspi.min.mjs +0 -0
  25. /package/dist/{wasm → assets}/ort.jspi.mjs +0 -0
  26. /package/dist/{wasm → assets}/ort.min.mjs +0 -0
  27. /package/dist/{wasm → assets}/ort.mjs +0 -0
  28. /package/dist/{wasm → assets}/ort.node.min.mjs +0 -0
  29. /package/dist/{wasm → assets}/ort.wasm.bundle.min.mjs +0 -0
  30. /package/dist/{wasm → assets}/ort.wasm.min.mjs +0 -0
  31. /package/dist/{wasm → assets}/ort.wasm.mjs +0 -0
  32. /package/dist/{wasm → assets}/ort.webgl.min.mjs +0 -0
  33. /package/dist/{wasm → assets}/ort.webgl.mjs +0 -0
  34. /package/dist/{wasm → assets}/ort.webgpu.bundle.min.mjs +0 -0
  35. /package/dist/{wasm → assets}/ort.webgpu.min.mjs +0 -0
  36. /package/dist/{wasm → assets}/ort.webgpu.mjs +0 -0
  37. /package/dist/{sw.js → assets/sw.js} +0 -0
@@ -0,0 +1,295 @@
1
+ // mellon type declarations
2
+
3
+ // ─── Shared data types ───────────────────────────────────────────────────────
4
+
5
+ export interface RefData {
6
+ word_name: string
7
+ model_type: 'resnet_50_arc'
8
+ embeddings: number[][]
9
+ }
10
+
11
+ export interface MatchEventDetail {
12
+ /** The detected word name. */
13
+ name: string
14
+ /** Similarity score that triggered detection (0–1). */
15
+ confidence: number
16
+ /** Unix timestamp (ms) of the detection. */
17
+ timestamp: number
18
+ }
19
+
20
+ export interface SampleInfo {
21
+ audioBuffer: Float32Array
22
+ name: string
23
+ }
24
+
25
+ // ─── Engine ──────────────────────────────────────────────────────────────────
26
+
27
+ export interface EngineConfig {
28
+ /**
29
+ * Base URL where ORT WASM files are served (trailing slash required).
30
+ * Defaults to the jsDelivr CDN. Override for offline / intranet use.
31
+ * @example '/mellon-assets/wasm/'
32
+ */
33
+ wasmBasePath?: string
34
+ /**
35
+ * Full URL to model.onnx.
36
+ * Defaults to the jsDelivr CDN. Override for offline / intranet use.
37
+ * @example '/mellon-assets/model.onnx'
38
+ */
39
+ modelUrl?: string
40
+ }
41
+
42
+ /**
43
+ * Override asset paths. Optional — by default assets load from the jsDelivr CDN.
44
+ * Call this before loadModel() when deploying offline or on a private network.
45
+ */
46
+ export function configure(config: EngineConfig): void
47
+
48
+ /**
49
+ * Load (or return the already-loaded) ONNX inference session.
50
+ * Idempotent — safe to call multiple times.
51
+ *
52
+ * @param onProgress Called with values 0.0 → 1.0 as the model downloads.
53
+ */
54
+ export function loadModel(onProgress?: (progress: number) => void): Promise<void>
55
+
56
+ /**
57
+ * Compute a 256-dim L2-normalised embedding from a log-mel spectrogram.
58
+ * Requires loadModel() to have completed first.
59
+ *
60
+ * @param spectrogram Flat Float32Array of shape [149 × 64] from logfbank().
61
+ */
62
+ export function embed(spectrogram: Float32Array): Promise<Float32Array>
63
+
64
+ // ─── Mel feature extraction ───────────────────────────────────────────────────
65
+
66
+ /**
67
+ * Compute a log-mel spectrogram from a 1.5-second 16 kHz audio buffer.
68
+ *
69
+ * @param signal 24 000 samples at 16 kHz (1.5 seconds).
70
+ * @returns Float32Array of shape [149 × 64] (frames × mel-bins).
71
+ */
72
+ export function logfbank(signal: Float32Array): Float32Array
73
+
74
+ // ─── Similarity helpers ───────────────────────────────────────────────────────
75
+
76
+ /**
77
+ * Cosine similarity normalised to [0, 1].
78
+ * Assumes both vectors are L2-normalised (as the ArcFace model guarantees).
79
+ */
80
+ export function cosineSim(a: Float32Array | number[], b: Float32Array | number[]): number
81
+
82
+ /**
83
+ * Maximum cosine similarity between `embedding` and any of `refs`.
84
+ */
85
+ export function maxSimilarity(
86
+ embedding: Float32Array,
87
+ refs: number[][] | Float32Array[],
88
+ ): number
89
+
90
+ // ─── HotwordDetector ─────────────────────────────────────────────────────────
91
+
92
+ export interface DetectorOptions {
93
+ /** Human-readable label for this word. */
94
+ name: string
95
+ /** Reference embeddings (N × 256), e.g. from a RefData.embeddings array. */
96
+ refEmbeddings: number[][] | Float32Array[]
97
+ /** Detection threshold in [0, 1]. Default: 0.65 */
98
+ threshold?: number
99
+ /** Minimum milliseconds between successive 'match' events. Default: 2000 */
100
+ relaxationMs?: number
101
+ /** Minimum milliseconds between consecutive inference runs. Default: 300 */
102
+ inferenceGapMs?: number
103
+ }
104
+
105
+ /**
106
+ * Stateful detector for a single hotword.
107
+ *
108
+ * @example
109
+ * const myRef = await importRefFile(file) // or load from your source
110
+ * const d = new HotwordDetector({ name: myRef.word_name, refEmbeddings: myRef.embeddings })
111
+ * d.addEventListener('match', e => console.log(e.detail.name, e.detail.confidence))
112
+ * // In AudioWorklet onmessage handler:
113
+ * const score = await d.scoreFrame(audioBuffer)
114
+ */
115
+ export class HotwordDetector extends EventTarget {
116
+ constructor(opts: DetectorOptions)
117
+
118
+ readonly name: string
119
+ /** Most recent similarity score (0–1). */
120
+ readonly lastScore: number
121
+ threshold: number
122
+ relaxationMs: number
123
+ inferenceGapMs: number
124
+ refEmbeddings: number[][] | Float32Array[]
125
+
126
+ /**
127
+ * Score a 1.5-second audio frame. Rate-limited to inferenceGapMs.
128
+ *
129
+ * @param audioBuffer 24 000 samples at 16 kHz.
130
+ * @returns Similarity score, or null when rate-limited.
131
+ */
132
+ scoreFrame(audioBuffer: Float32Array): Promise<number | null>
133
+
134
+ addEventListener(
135
+ type: 'match',
136
+ listener: (event: CustomEvent<MatchEventDetail>) => void,
137
+ options?: boolean | AddEventListenerOptions,
138
+ ): void
139
+ addEventListener(
140
+ type: string,
141
+ listener: EventListenerOrEventListenerObject,
142
+ options?: boolean | AddEventListenerOptions,
143
+ ): void
144
+ }
145
+
146
+ // ─── EnrollmentSession ────────────────────────────────────────────────────────
147
+
148
+ /**
149
+ * Manages recording and embedding generation for a custom wake word.
150
+ *
151
+ * @example
152
+ * const session = new EnrollmentSession('hello')
153
+ * await session.recordSample() // record 1.5 s from mic (repeat 3+ times)
154
+ * const ref = await session.generateRef()
155
+ * saveCustomRef(ref) // persist to localStorage
156
+ */
157
+ export class EnrollmentSession extends EventTarget {
158
+ constructor(wordName: string)
159
+
160
+ readonly wordName: string
161
+ readonly sampleCount: number
162
+ readonly samples: SampleInfo[]
163
+
164
+ /** Record exactly 1.5 seconds from the microphone. Returns 1-based sample index. */
165
+ recordSample(): Promise<number>
166
+
167
+ /** Decode an uploaded audio File and add it as a sample. Returns 1-based index. */
168
+ addAudioFile(file: File): Promise<number>
169
+
170
+ /** Remove a sample by 0-based index. */
171
+ removeSample(idx: number): void
172
+
173
+ /** Remove all samples. */
174
+ clearSamples(): void
175
+
176
+ /**
177
+ * Generate reference embeddings from the accumulated samples (minimum 3 required).
178
+ * Returns a RefData object ready to pass to saveCustomRef() or Mellon.addCustomWord().
179
+ */
180
+ generateRef(): Promise<RefData>
181
+
182
+ addEventListener(type: 'recording-start', listener: (event: CustomEvent) => void, options?: boolean | AddEventListenerOptions): void
183
+ addEventListener(type: 'sample-added', listener: (event: CustomEvent<{ count: number; name: string }>) => void, options?: boolean | AddEventListenerOptions): void
184
+ addEventListener(type: 'samples-changed', listener: (event: CustomEvent<{ count: number }>) => void, options?: boolean | AddEventListenerOptions): void
185
+ addEventListener(type: 'generating', listener: (event: CustomEvent<{ total: number }>) => void, options?: boolean | AddEventListenerOptions): void
186
+ addEventListener(type: 'progress', listener: (event: CustomEvent<{ done: number; total: number }>) => void, options?: boolean | AddEventListenerOptions): void
187
+ addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void
188
+ }
189
+
190
+ // ─── Mellon (high-level API) ─────────────────────────────────────────────────
191
+
192
+ export interface MellonOptions {
193
+ /** Words to detect. Refs must be registered via `addCustomWord()` or `refs` before `start()`. */
194
+ words?: string[]
195
+ /**
196
+ * Reference data to preload during `init()`. Each entry is either:
197
+ * - a URL string pointing to a hosted `_ref.json` file, or
198
+ * - an inline `RefData` object.
199
+ *
200
+ * @example
201
+ * refs: [
202
+ * 'https://example.com/hello_ref.json',
203
+ * 'https://example.com/stop_ref.json',
204
+ * ]
205
+ */
206
+ refs?: (string | RefData)[]
207
+ /** Detection threshold [0, 1]. Default: 0.65 */
208
+ threshold?: number
209
+ /** Minimum milliseconds between successive match events per word. Default: 2000 */
210
+ relaxationMs?: number
211
+ /** Minimum milliseconds between consecutive inference runs. Default: 300 */
212
+ inferenceGapMs?: number
213
+ /**
214
+ * Override the ORT WASM base URL. Defaults to the jsDelivr CDN.
215
+ * Only needed for offline / intranet deployments (trailing slash required).
216
+ * @example '/mellon-assets/wasm/'
217
+ */
218
+ wasmBasePath?: string
219
+ /**
220
+ * Override the model.onnx URL. Defaults to the jsDelivr CDN.
221
+ * Only needed for offline / intranet deployments.
222
+ * @example '/mellon-assets/model.onnx'
223
+ */
224
+ modelUrl?: string
225
+ }
226
+
227
+ /**
228
+ * High-level, all-in-one hotword detector.
229
+ *
230
+ * @example
231
+ * const stt = new Mellon({
232
+ * wasmBasePath: '/assets/wasm/',
233
+ * modelUrl: '/assets/model.onnx',
234
+ * })
235
+ * await stt.init(pct => progressBar.style.width = pct * 100 + '%')
236
+ * await stt.start()
237
+ * stt.addEventListener('match', e => console.log(e.detail.name, e.detail.confidence))
238
+ */
239
+ export class Mellon extends EventTarget {
240
+ constructor(opts?: MellonOptions)
241
+
242
+ /** True after init() has completed successfully. */
243
+ readonly isInitialized: boolean
244
+
245
+ /** True while start() is active (microphone is open). */
246
+ readonly isRunning: boolean
247
+
248
+ /**
249
+ * Load the ONNX model and cache built-in reference embeddings.
250
+ * Optional — start() auto-calls init() when needed.
251
+ *
252
+ * @param onProgress Progress callback, 0.0 → 1.0.
253
+ */
254
+ init(onProgress?: (progress: number) => void): Promise<void>
255
+
256
+ /**
257
+ * Request microphone access and start hotword detection.
258
+ * Resolves once audio pipeline is running.
259
+ *
260
+ * @param words Optional subset of words to activate (must have refs loaded).
261
+ */
262
+ start(words?: string[]): Promise<void>
263
+
264
+ /** Stop detection and release the microphone + AudioContext. */
265
+ stop(): void
266
+
267
+ /**
268
+ * Register reference embeddings for a word.
269
+ * Can be called before or after start().
270
+ */
271
+ addCustomWord(refData: RefData): void
272
+
273
+ /**
274
+ * Create an EnrollmentSession for recording a new custom word.
275
+ * Call addCustomWord() with the result of session.generateRef().
276
+ */
277
+ enrollWord(wordName: string): EnrollmentSession
278
+
279
+ /** Return all custom word refs stored in localStorage. */
280
+ static loadWords(): RefData[]
281
+ /** Persist a word ref to localStorage (replaces any existing entry with the same name). */
282
+ static saveWord(refData: RefData): void
283
+ /** Delete a word ref from localStorage by name. */
284
+ static deleteWord(wordName: string): void
285
+ /** Parse an uploaded ref JSON file. */
286
+ static importWordFile(file: File): Promise<RefData>
287
+ /** Trigger a browser download of a ref as a JSON file. */
288
+ static exportWord(refData: RefData): void
289
+
290
+ addEventListener(type: 'match', listener: (event: CustomEvent<MatchEventDetail>) => void, options?: boolean | AddEventListenerOptions): void
291
+ addEventListener(type: 'ready', listener: (event: CustomEvent) => void, options?: boolean | AddEventListenerOptions): void
292
+ addEventListener(type: 'error', listener: (event: CustomEvent<{ error: Error }>) => void, options?: boolean | AddEventListenerOptions): void
293
+ addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void
294
+ }
295
+
@@ -0,0 +1,38 @@
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const Jt="0.0.4",Kt=[1,1,149,64],Yt=`https://cdn.jsdelivr.net/npm/mellon@${Jt}/dist/assets`,st={assetsPath:`${Yt}`};let I=null,q=null,tt=null;function Qt({assetsPath:i}={}){i!==void 0&&(st.assetsPath=i),I=null,q=null,tt=null}async function Vt(i){return I?(i==null||i(1),I):q||(q=(async()=>{const n=st.assetsPath.endsWith("/")?st.assetsPath:st.assetsPath+"/",t=n+"ort.all.min.mjs",e=n+"model.onnx";tt=await new Function("url","return import(url)")(t),tt.env.wasm.wasmPaths=n;const s=await fetch(e);if(!s.ok)throw new Error(`Failed to fetch model: ${s.status}`);const a=parseInt(s.headers.get("content-length")||"0",10),r=s.body.getReader(),c=[];let l=0;for(;;){const{done:m,value:f}=await r.read();if(m)break;c.push(f),l+=f.byteLength,a>0&&(i==null||i(l/a))}const h=new Uint8Array(l);let d=0;for(const m of c)h.set(m,d),d+=m.byteLength;return I=await tt.InferenceSession.create(h.buffer,{executionProviders:["wasm"],graphOptimizationLevel:"all"}),i==null||i(1),I})(),q)}async function St(i){if(!I)throw new Error("Model not loaded — call loadModel() first");const n=new tt.Tensor("float32",i,Kt),t=await I.run({input:n}),e=Object.keys(t)[0];return t[e].data}function Xt(i){return i&&i.__esModule&&Object.prototype.hasOwnProperty.call(i,"default")?i.default:i}var mt,Et;function Zt(){if(Et)return mt;Et=1;function i(n){if(this.size=n|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=n<<1;for(var t=new Array(this.size*2),e=0;e<t.length;e+=2){const l=Math.PI*e/this.size;t[e]=Math.cos(l),t[e+1]=-Math.sin(l)}this.table=t;for(var o=0,s=1;this.size>s;s<<=1)o++;this._width=o%2===0?o-1:o,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var r=0;r<this._width;r+=2){var c=this._width-r-2;this._bitrev[a]|=(a>>>r&3)<<c}}this._out=null,this._data=null,this._inv=0}return mt=i,i.prototype.fromComplexArray=function(t,e){for(var o=e||new Array(t.length>>>1),s=0;s<t.length;s+=2)o[s>>>1]=t[s];return o},i.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var e=0;e<t.length;e++)t[e]=0;return t},i.prototype.toComplexArray=function(t,e){for(var o=e||this.createComplexArray(),s=0;s<o.length;s+=2)o[s]=t[s>>>1],o[s+1]=0;return o},i.prototype.completeSpectrum=function(t){for(var e=this._csize,o=e>>>1,s=2;s<o;s+=2)t[e-s]=t[s],t[e-s+1]=-t[s+1]},i.prototype.transform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._transform4(),this._out=null,this._data=null},i.prototype.realTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._realTransform4(),this._out=null,this._data=null},i.prototype.inverseTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=1,this._transform4();for(var o=0;o<t.length;o++)t[o]/=this.size;this._out=null,this._data=null},i.prototype._transform4=function(){var t=this._out,e=this._csize,o=this._width,s=1<<o,a=e/s<<1,r,c,l=this._bitrev;if(a===4)for(r=0,c=0;r<e;r+=a,c++){const u=l[c];this._singleTransform2(r,u,s)}else for(r=0,c=0;r<e;r+=a,c++){const u=l[c];this._singleTransform4(r,u,s)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){a=e/s<<1;var m=a>>>2;for(r=0;r<e;r+=a)for(var f=r+m,g=r,_=0;g<f;g+=2,_+=s){const u=g,p=u+m,v=p+m,w=v+m,b=t[u],A=t[u+1],E=t[p],y=t[p+1],F=t[v],M=t[v+1],C=t[w],T=t[w+1],x=b,R=A,z=d[_],S=h*d[_+1],N=E*z-y*S,k=E*S+y*z,P=d[2*_],L=h*d[2*_+1],G=F*P-M*L,H=F*L+M*P,J=d[3*_],K=h*d[3*_+1],Y=C*J-T*K,Q=C*K+T*J,V=x+G,U=R+H,j=x-G,X=R-H,Z=N+Y,W=k+Q,$=h*(N-Y),O=h*(k-Q),et=V+Z,ot=U+W,at=V-Z,it=U-W,ct=j+O,lt=X-$,ht=j-O,dt=X+$;t[u]=et,t[u+1]=ot,t[p]=ct,t[p+1]=lt,t[v]=at,t[v+1]=it,t[w]=ht,t[w+1]=dt}}},i.prototype._singleTransform2=function(t,e,o){const s=this._out,a=this._data,r=a[e],c=a[e+1],l=a[e+o],h=a[e+o+1],d=r+l,m=c+h,f=r-l,g=c-h;s[t]=d,s[t+1]=m,s[t+2]=f,s[t+3]=g},i.prototype._singleTransform4=function(t,e,o){const s=this._out,a=this._data,r=this._inv?-1:1,c=o*2,l=o*3,h=a[e],d=a[e+1],m=a[e+o],f=a[e+o+1],g=a[e+c],_=a[e+c+1],u=a[e+l],p=a[e+l+1],v=h+g,w=d+_,b=h-g,A=d-_,E=m+u,y=f+p,F=r*(m-u),M=r*(f-p),C=v+E,T=w+y,x=b+M,R=A-F,z=v-E,S=w-y,N=b-M,k=A+F;s[t]=C,s[t+1]=T,s[t+2]=x,s[t+3]=R,s[t+4]=z,s[t+5]=S,s[t+6]=N,s[t+7]=k},i.prototype._realTransform4=function(){var t=this._out,e=this._csize,o=this._width,s=1<<o,a=e/s<<1,r,c,l=this._bitrev;if(a===4)for(r=0,c=0;r<e;r+=a,c++){const ut=l[c];this._singleRealTransform2(r,ut>>>1,s>>>1)}else for(r=0,c=0;r<e;r+=a,c++){const ut=l[c];this._singleRealTransform4(r,ut>>>1,s>>>1)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){a=e/s<<1;var m=a>>>1,f=m>>>1,g=f>>>1;for(r=0;r<e;r+=a)for(var _=0,u=0;_<=g;_+=2,u+=s){var p=r+_,v=p+f,w=v+f,b=w+f,A=t[p],E=t[p+1],y=t[v],F=t[v+1],M=t[w],C=t[w+1],T=t[b],x=t[b+1],R=A,z=E,S=d[u],N=h*d[u+1],k=y*S-F*N,P=y*N+F*S,L=d[2*u],G=h*d[2*u+1],H=M*L-C*G,J=M*G+C*L,K=d[3*u],Y=h*d[3*u+1],Q=T*K-x*Y,V=T*Y+x*K,U=R+H,j=z+J,X=R-H,Z=z-J,W=k+Q,$=P+V,O=h*(k-Q),et=h*(P-V),ot=U+W,at=j+$,it=X+et,ct=Z-O;if(t[p]=ot,t[p+1]=at,t[v]=it,t[v+1]=ct,_===0){var lt=U-W,ht=j-$;t[w]=lt,t[w+1]=ht;continue}if(_!==g){var dt=X,It=-Z,Bt=U,Dt=-j,Ut=-h*et,jt=-h*O,Wt=-h*$,$t=-h*W,Pt=dt+Ut,Lt=It+jt,Gt=Bt+$t,Ht=Dt-Wt,yt=r+f-_,bt=r+m-_;t[yt]=Pt,t[yt+1]=Lt,t[bt]=Gt,t[bt+1]=Ht}}}},i.prototype._singleRealTransform2=function(t,e,o){const s=this._out,a=this._data,r=a[e],c=a[e+o],l=r+c,h=r-c;s[t]=l,s[t+1]=0,s[t+2]=h,s[t+3]=0},i.prototype._singleRealTransform4=function(t,e,o){const s=this._out,a=this._data,r=this._inv?-1:1,c=o*2,l=o*3,h=a[e],d=a[e+o],m=a[e+c],f=a[e+l],g=h+m,_=h-m,u=d+f,p=r*(d-f),v=g+u,w=_,b=-p,A=g-u,E=_,y=p;s[t]=v,s[t+1]=0,s[t+2]=w,s[t+3]=b,s[t+4]=A,s[t+5]=0,s[t+6]=E,s[t+7]=y},mt}var Ot=Zt();const qt=Xt(Ot),nt=16e3,D=512,B=64,At=Math.floor(.025*nt),Ft=Math.floor(.01*nt);function Mt(i){return 2595*Math.log10(1+i/700)}function te(i){return 700*(10**(i/2595)-1)}function ee(){const i=Mt(0),n=Mt(nt/2),t=new Float64Array(B+2);for(let r=0;r<B+2;r++)t[r]=i+r*(n-i)/(B+1);const o=t.map(r=>te(r)).map(r=>Math.floor((D+1)*r/nt)),s=[],a=Math.floor(D/2)+1;for(let r=0;r<B;r++){const c=new Float32Array(a);for(let l=o[r];l<o[r+1];l++)c[l]=(l-o[r])/(o[r+1]-o[r]);for(let l=o[r+1];l<o[r+2];l++)c[l]=(o[r+2]-l)/(o[r+2]-o[r+1]);s.push(c)}return s}const se=ee(),rt=new qt(D),_t=new Float32Array(D),Ct=rt.createComplexArray(),ft=rt.createComplexArray(),Tt=new Float32Array(Math.floor(D/2)+1);function Nt(i){const n=1+Math.ceil((i.length-At)/Ft),t=new Float32Array(n*B),e=Math.floor(D/2)+1;for(let o=0;o<n;o++){const s=o*Ft;_t.fill(0);for(let a=0;a<At&&s+a<i.length;a++)_t[a]=i[s+a];rt.toComplexArray(_t,Ct),rt.transform(ft,Ct);for(let a=0;a<e;a++){const r=ft[2*a],c=ft[2*a+1],l=(r*r+c*c)/D;Tt[a]=l===0?1e-30:l}for(let a=0;a<B;a++){const r=se[a];let c=0;for(let l=0;l<e;l++)c+=Tt[l]*r[l];t[o*B+a]=Math.log(c===0?1e-30:c)}}return t}function ne(i,n){let t=0;for(let e=0;e<i.length;e++)t+=i[e]*n[e];return(t+1)/2}function re(i,n){let t=0;for(const e of n){const o=ne(i,e);o>t&&(t=o)}return t}class xt extends EventTarget{constructor({name:n,refEmbeddings:t,threshold:e=.65,relaxationMs:o=2e3,inferenceGapMs:s=300}){super(),this.name=n,this.refEmbeddings=t,this.threshold=e,this.relaxationMs=o,this.inferenceGapMs=s,this._lastDetectionAt=0,this._lastInferenceAt=0,this._lastScore=0}get lastScore(){return this._lastScore}async scoreFrame(n){const t=Date.now();if(t-this._lastInferenceAt<this.inferenceGapMs)return null;this._lastInferenceAt=t;const e=Nt(n),o=await St(e),s=re(o,this.refEmbeddings);return this._lastScore=s,s>=this.threshold&&t-this._lastDetectionAt>=this.relaxationMs&&(this._lastDetectionAt=t,this.dispatchEvent(new CustomEvent("match",{detail:{name:this.name,confidence:s,timestamp:t}}))),s}}const Rt=16e3,oe=1500,pt=24e3;function zt(i){if(i.length===pt)return i;const n=new Float32Array(pt);return n.set(i.subarray(0,pt)),n}class kt extends EventTarget{constructor(n){super(),this.wordName=n.trim().toLowerCase(),this.samples=[]}get sampleCount(){return this.samples.length}async recordSample(){const n=await navigator.mediaDevices.getUserMedia({audio:!0});return new Promise((t,e)=>{const o=new AudioContext({sampleRate:Rt}),s=new MediaRecorder(n),a=[];this.dispatchEvent(new CustomEvent("recording-start")),s.ondataavailable=r=>{r.data.size>0&&a.push(r.data)},s.onstop=async()=>{n.getTracks().forEach(r=>r.stop());try{const c=await new Blob(a,{type:"audio/webm"}).arrayBuffer(),l=await o.decodeAudioData(c);await o.close();const h=l.getChannelData(0),d=zt(new Float32Array(h)),m=this._push(d,`Recorded #${this.samples.length}`);t(m)}catch(r){await o.close().catch(()=>{}),e(r)}},s.start(),setTimeout(()=>s.stop(),oe)})}async addAudioFile(n){const t=await n.arrayBuffer(),e=new AudioContext({sampleRate:Rt}),o=await e.decodeAudioData(t);await e.close();const s=o.getChannelData(0),a=zt(new Float32Array(s));return this._push(a,n.name)}removeSample(n){this.samples.splice(n,1),this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:this.samples.length}}))}clearSamples(){this.samples=[],this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:0}}))}async generateRef(){if(this.samples.length<3)throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);this.dispatchEvent(new CustomEvent("generating",{detail:{total:this.samples.length}}));const n=[];for(let t=0;t<this.samples.length;t++){const e=Nt(this.samples[t].audioBuffer),o=await St(e);n.push(Array.from(o)),this.dispatchEvent(new CustomEvent("progress",{detail:{done:t+1,total:this.samples.length}}))}return{word_name:this.wordName,model_type:"resnet_50_arc",embeddings:n}}_push(n,t){this.samples.push({audioBuffer:n,name:t});const e=this.samples.length;return this.dispatchEvent(new CustomEvent("sample-added",{detail:{count:e,name:t}})),e}}const ae=`/**
2
+ * public/audio-processor.js
3
+ * AudioWorklet that runs at 16 kHz and continuously emits the last
4
+ * 1.5-second window (24 000 samples) via a circular buffer.
5
+ *
6
+ * The main thread receives a fresh Float32Array on every
7
+ * AudioWorklet quantum (128 samples ≈ every 8 ms at 16 kHz).
8
+ * The inference loop in engine.js rate-limits to avoid excessive work.
9
+ */
10
+ class AudioProcessor extends AudioWorkletProcessor {
11
+ constructor() {
12
+ super()
13
+ this._size = 24000 // 1.5 s × 16 000 Hz
14
+ this._buf = new Float32Array(this._size)
15
+ this._ptr = 0
16
+ }
17
+
18
+ process(inputs) {
19
+ const ch = inputs[0]?.[0]
20
+ if (!ch) return true
21
+
22
+ for (let i = 0; i < ch.length; i++) {
23
+ this._buf[this._ptr] = ch[i]
24
+ this._ptr = (this._ptr + 1) % this._size
25
+ }
26
+
27
+ // Send an ordered copy of the ring buffer
28
+ const out = new Float32Array(this._size)
29
+ for (let i = 0; i < this._size; i++) {
30
+ out[i] = this._buf[(this._ptr + i) % this._size]
31
+ }
32
+ this.port.postMessage(out)
33
+ return true
34
+ }
35
+ }
36
+
37
+ registerProcessor('audio-processor', AudioProcessor)
38
+ `;let vt=null;function ie(){if(!vt){const i=new Blob([ae],{type:"application/javascript"});vt=URL.createObjectURL(i)}return vt}const wt="mellon_custom_refs";function gt(){try{const i=localStorage.getItem(wt);return i?JSON.parse(i):[]}catch{return[]}}function ce(i){const n=gt().filter(t=>t.word_name!==i.word_name);n.push(i),localStorage.setItem(wt,JSON.stringify(n))}function le(i){const n=gt().filter(t=>t.word_name!==i);localStorage.setItem(wt,JSON.stringify(n))}function he(i){const n=JSON.stringify(i,null,2),t=new Blob([n],{type:"application/json"}),e=URL.createObjectURL(t),o=Object.assign(document.createElement("a"),{href:e,download:`${i.word_name}_ref.json`});document.body.appendChild(o),o.click(),document.body.removeChild(o),URL.revokeObjectURL(e)}async function de(i){const n=await i.text();let t;try{t=JSON.parse(n)}catch{throw new Error("Invalid JSON")}if(!t.embeddings||!Array.isArray(t.embeddings)||!t.embeddings.length)throw new Error('Missing or empty "embeddings" array');if(!Array.isArray(t.embeddings[0]))throw new Error('"embeddings" must be a 2D array');return t.word_name||(t.word_name=i.name.replace(/_ref\.json$/i,"").replace(/\.json$/i,"")),t}class ue extends EventTarget{constructor(n={}){super(),this._opts={words:n.words??[],refs:n.refs??[],threshold:n.threshold??.65,relaxationMs:n.relaxationMs??2e3,inferenceGapMs:n.inferenceGapMs??300,wasmBasePath:n.wasmBasePath,modelUrl:n.modelUrl},this._refs=new Map,this._detectors=new Map,this._audioCtx=null,this._workletNode=null,this._stream=null,this._initialized=!1,this._running=!1}get isInitialized(){return this._initialized}get isRunning(){return this._running}async init(n){if(this._initialized){n==null||n(1);return}(this._opts.wasmBasePath||this._opts.modelUrl)&&Qt({wasmBasePath:this._opts.wasmBasePath,modelUrl:this._opts.modelUrl});try{await Vt(n)}catch(t){throw this.dispatchEvent(new CustomEvent("error",{detail:{error:t}})),t}for(const t of this._opts.refs)try{let e;if(typeof t=="string"){const o=await fetch(t);if(!o.ok)throw new Error(`HTTP ${o.status}`);e=await o.json()}else e=t;this.addCustomWord(e)}catch(e){const o=typeof t=="string"?t:t.word_name;console.warn(`[Mellon] Failed to load ref "${o}": ${e.message}`)}this._initialized=!0,this.dispatchEvent(new CustomEvent("ready"))}async start(n){this._initialized||await this.init();const t=n??this._opts.words;try{this._stream=await navigator.mediaDevices.getUserMedia({audio:!0})}catch(s){const a=new Error(`Microphone access denied: ${s.message}`);throw this.dispatchEvent(new CustomEvent("error",{detail:{error:a}})),a}this._audioCtx=new AudioContext({sampleRate:16e3});const e=ie();await this._audioCtx.audioWorklet.addModule(e);const o=this._audioCtx.createMediaStreamSource(this._stream);this._workletNode=new AudioWorkletNode(this._audioCtx,"audio-processor"),o.connect(this._workletNode),this._workletNode.connect(this._audioCtx.destination);for(const s of t){const a=this._refs.get(s);if(!a){console.warn(`[Mellon] No reference embeddings for "${s}" — skipping. Call addCustomWord() to register custom words before start().`);continue}const r=new xt({name:s,refEmbeddings:a.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});r.addEventListener("match",c=>{this.dispatchEvent(new CustomEvent("match",{detail:c.detail}))}),this._detectors.set(s,r)}this._workletNode.port.onmessage=async s=>{const a=[];for(const r of this._detectors.values())a.push(r.scoreFrame(s.data));await Promise.allSettled(a)},this._running=!0}stop(){this._workletNode&&(this._workletNode.port.onmessage=null,this._workletNode.disconnect(),this._workletNode=null),this._stream&&(this._stream.getTracks().forEach(n=>n.stop()),this._stream=null),this._audioCtx&&(this._audioCtx.close(),this._audioCtx=null),this._detectors.clear(),this._running=!1}addCustomWord(n){if(this._refs.set(n.word_name,n),this._running&&this._workletNode){const t=new xt({name:n.word_name,refEmbeddings:n.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});t.addEventListener("match",e=>{this.dispatchEvent(new CustomEvent("match",{detail:e.detail}))}),this._detectors.set(n.word_name,t)}}enrollWord(n){return new kt(n)}static loadWords(){return gt()}static saveWord(n){ce(n)}static deleteWord(n){le(n)}static importWordFile(n){return de(n)}static exportWord(n){he(n)}}exports.EnrollmentSession=kt;exports.Mellon=ue;