mellon 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +295 -0
- package/dist/mellon.cjs +38 -0
- package/dist/mellon.mjs +616 -0
- package/package.json +6 -9
- package/dist/assets/index-B3ZBo_ZU.css +0 -1
- package/dist/assets/index-CiLJGV_Q.js +0 -19
- package/dist/index.html +0 -251
- /package/dist/{audio-processor.js → assets/audio-processor.js} +0 -0
- /package/dist/{manifest.json → assets/manifest.json} +0 -0
- /package/dist/{models → assets}/model.onnx +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.asyncify.mjs +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.asyncify.wasm +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jsep.mjs +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jsep.wasm +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jspi.mjs +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.jspi.wasm +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.mjs +0 -0
- /package/dist/{wasm → assets}/ort-wasm-simd-threaded.wasm +0 -0
- /package/dist/{wasm → assets}/ort.all.bundle.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.all.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.all.mjs +0 -0
- /package/dist/{wasm → assets}/ort.bundle.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.jspi.bundle.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.jspi.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.jspi.mjs +0 -0
- /package/dist/{wasm → assets}/ort.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.mjs +0 -0
- /package/dist/{wasm → assets}/ort.node.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.wasm.bundle.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.wasm.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.wasm.mjs +0 -0
- /package/dist/{wasm → assets}/ort.webgl.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.webgl.mjs +0 -0
- /package/dist/{wasm → assets}/ort.webgpu.bundle.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.webgpu.min.mjs +0 -0
- /package/dist/{wasm → assets}/ort.webgpu.mjs +0 -0
- /package/dist/{sw.js → assets/sw.js} +0 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
// mellon type declarations
|
|
2
|
+
|
|
3
|
+
// ─── Shared data types ───────────────────────────────────────────────────────
|
|
4
|
+
|
|
5
|
+
export interface RefData {
|
|
6
|
+
word_name: string
|
|
7
|
+
model_type: 'resnet_50_arc'
|
|
8
|
+
embeddings: number[][]
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface MatchEventDetail {
|
|
12
|
+
/** The detected word name. */
|
|
13
|
+
name: string
|
|
14
|
+
/** Similarity score that triggered detection (0–1). */
|
|
15
|
+
confidence: number
|
|
16
|
+
/** Unix timestamp (ms) of the detection. */
|
|
17
|
+
timestamp: number
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface SampleInfo {
|
|
21
|
+
audioBuffer: Float32Array
|
|
22
|
+
name: string
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// ─── Engine ──────────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
export interface EngineConfig {
|
|
28
|
+
/**
|
|
29
|
+
* Base URL where ORT WASM files are served (trailing slash required).
|
|
30
|
+
* Defaults to the jsDelivr CDN. Override for offline / intranet use.
|
|
31
|
+
* @example '/mellon-assets/wasm/'
|
|
32
|
+
*/
|
|
33
|
+
wasmBasePath?: string
|
|
34
|
+
/**
|
|
35
|
+
* Full URL to model.onnx.
|
|
36
|
+
* Defaults to the jsDelivr CDN. Override for offline / intranet use.
|
|
37
|
+
* @example '/mellon-assets/model.onnx'
|
|
38
|
+
*/
|
|
39
|
+
modelUrl?: string
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Override asset paths. Optional — by default assets load from the jsDelivr CDN.
|
|
44
|
+
* Call this before loadModel() when deploying offline or on a private network.
|
|
45
|
+
*/
|
|
46
|
+
export function configure(config: EngineConfig): void
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Load (or return the already-loaded) ONNX inference session.
|
|
50
|
+
* Idempotent — safe to call multiple times.
|
|
51
|
+
*
|
|
52
|
+
* @param onProgress Called with values 0.0 → 1.0 as the model downloads.
|
|
53
|
+
*/
|
|
54
|
+
export function loadModel(onProgress?: (progress: number) => void): Promise<void>
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Compute a 256-dim L2-normalised embedding from a log-mel spectrogram.
|
|
58
|
+
* Requires loadModel() to have completed first.
|
|
59
|
+
*
|
|
60
|
+
* @param spectrogram Flat Float32Array of shape [149 × 64] from logfbank().
|
|
61
|
+
*/
|
|
62
|
+
export function embed(spectrogram: Float32Array): Promise<Float32Array>
|
|
63
|
+
|
|
64
|
+
// ─── Mel feature extraction ───────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Compute a log-mel spectrogram from a 1.5-second 16 kHz audio buffer.
|
|
68
|
+
*
|
|
69
|
+
* @param signal 24 000 samples at 16 kHz (1.5 seconds).
|
|
70
|
+
* @returns Float32Array of shape [149 × 64] (frames × mel-bins).
|
|
71
|
+
*/
|
|
72
|
+
export function logfbank(signal: Float32Array): Float32Array
|
|
73
|
+
|
|
74
|
+
// ─── Similarity helpers ───────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Cosine similarity normalised to [0, 1].
|
|
78
|
+
* Assumes both vectors are L2-normalised (as the ArcFace model guarantees).
|
|
79
|
+
*/
|
|
80
|
+
export function cosineSim(a: Float32Array | number[], b: Float32Array | number[]): number
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Maximum cosine similarity between `embedding` and any of `refs`.
|
|
84
|
+
*/
|
|
85
|
+
export function maxSimilarity(
|
|
86
|
+
embedding: Float32Array,
|
|
87
|
+
refs: number[][] | Float32Array[],
|
|
88
|
+
): number
|
|
89
|
+
|
|
90
|
+
// ─── HotwordDetector ─────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
export interface DetectorOptions {
|
|
93
|
+
/** Human-readable label for this word. */
|
|
94
|
+
name: string
|
|
95
|
+
/** Reference embeddings (N × 256), e.g. from a RefData.embeddings array. */
|
|
96
|
+
refEmbeddings: number[][] | Float32Array[]
|
|
97
|
+
/** Detection threshold in [0, 1]. Default: 0.65 */
|
|
98
|
+
threshold?: number
|
|
99
|
+
/** Minimum milliseconds between successive 'match' events. Default: 2000 */
|
|
100
|
+
relaxationMs?: number
|
|
101
|
+
/** Minimum milliseconds between consecutive inference runs. Default: 300 */
|
|
102
|
+
inferenceGapMs?: number
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Stateful detector for a single hotword.
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* const myRef = await importRefFile(file) // or load from your source
|
|
110
|
+
* const d = new HotwordDetector({ name: myRef.word_name, refEmbeddings: myRef.embeddings })
|
|
111
|
+
* d.addEventListener('match', e => console.log(e.detail.name, e.detail.confidence))
|
|
112
|
+
* // In AudioWorklet onmessage handler:
|
|
113
|
+
* const score = await d.scoreFrame(audioBuffer)
|
|
114
|
+
*/
|
|
115
|
+
export class HotwordDetector extends EventTarget {
|
|
116
|
+
constructor(opts: DetectorOptions)
|
|
117
|
+
|
|
118
|
+
readonly name: string
|
|
119
|
+
/** Most recent similarity score (0–1). */
|
|
120
|
+
readonly lastScore: number
|
|
121
|
+
threshold: number
|
|
122
|
+
relaxationMs: number
|
|
123
|
+
inferenceGapMs: number
|
|
124
|
+
refEmbeddings: number[][] | Float32Array[]
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Score a 1.5-second audio frame. Rate-limited to inferenceGapMs.
|
|
128
|
+
*
|
|
129
|
+
* @param audioBuffer 24 000 samples at 16 kHz.
|
|
130
|
+
* @returns Similarity score, or null when rate-limited.
|
|
131
|
+
*/
|
|
132
|
+
scoreFrame(audioBuffer: Float32Array): Promise<number | null>
|
|
133
|
+
|
|
134
|
+
addEventListener(
|
|
135
|
+
type: 'match',
|
|
136
|
+
listener: (event: CustomEvent<MatchEventDetail>) => void,
|
|
137
|
+
options?: boolean | AddEventListenerOptions,
|
|
138
|
+
): void
|
|
139
|
+
addEventListener(
|
|
140
|
+
type: string,
|
|
141
|
+
listener: EventListenerOrEventListenerObject,
|
|
142
|
+
options?: boolean | AddEventListenerOptions,
|
|
143
|
+
): void
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ─── EnrollmentSession ────────────────────────────────────────────────────────
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Manages recording and embedding generation for a custom wake word.
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* const session = new EnrollmentSession('hello')
|
|
153
|
+
* await session.recordSample() // record 1.5 s from mic (repeat 3+ times)
|
|
154
|
+
* const ref = await session.generateRef()
|
|
155
|
+
* saveCustomRef(ref) // persist to localStorage
|
|
156
|
+
*/
|
|
157
|
+
export class EnrollmentSession extends EventTarget {
|
|
158
|
+
constructor(wordName: string)
|
|
159
|
+
|
|
160
|
+
readonly wordName: string
|
|
161
|
+
readonly sampleCount: number
|
|
162
|
+
readonly samples: SampleInfo[]
|
|
163
|
+
|
|
164
|
+
/** Record exactly 1.5 seconds from the microphone. Returns 1-based sample index. */
|
|
165
|
+
recordSample(): Promise<number>
|
|
166
|
+
|
|
167
|
+
/** Decode an uploaded audio File and add it as a sample. Returns 1-based index. */
|
|
168
|
+
addAudioFile(file: File): Promise<number>
|
|
169
|
+
|
|
170
|
+
/** Remove a sample by 0-based index. */
|
|
171
|
+
removeSample(idx: number): void
|
|
172
|
+
|
|
173
|
+
/** Remove all samples. */
|
|
174
|
+
clearSamples(): void
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Generate reference embeddings from the accumulated samples (minimum 3 required).
|
|
178
|
+
* Returns a RefData object ready to pass to saveCustomRef() or Mellon.addCustomWord().
|
|
179
|
+
*/
|
|
180
|
+
generateRef(): Promise<RefData>
|
|
181
|
+
|
|
182
|
+
addEventListener(type: 'recording-start', listener: (event: CustomEvent) => void, options?: boolean | AddEventListenerOptions): void
|
|
183
|
+
addEventListener(type: 'sample-added', listener: (event: CustomEvent<{ count: number; name: string }>) => void, options?: boolean | AddEventListenerOptions): void
|
|
184
|
+
addEventListener(type: 'samples-changed', listener: (event: CustomEvent<{ count: number }>) => void, options?: boolean | AddEventListenerOptions): void
|
|
185
|
+
addEventListener(type: 'generating', listener: (event: CustomEvent<{ total: number }>) => void, options?: boolean | AddEventListenerOptions): void
|
|
186
|
+
addEventListener(type: 'progress', listener: (event: CustomEvent<{ done: number; total: number }>) => void, options?: boolean | AddEventListenerOptions): void
|
|
187
|
+
addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ─── Mellon (high-level API) ─────────────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
export interface MellonOptions {
|
|
193
|
+
/** Words to detect. Refs must be registered via `addCustomWord()` or `refs` before `start()`. */
|
|
194
|
+
words?: string[]
|
|
195
|
+
/**
|
|
196
|
+
* Reference data to preload during `init()`. Each entry is either:
|
|
197
|
+
* - a URL string pointing to a hosted `_ref.json` file, or
|
|
198
|
+
* - an inline `RefData` object.
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* refs: [
|
|
202
|
+
* 'https://example.com/hello_ref.json',
|
|
203
|
+
* 'https://example.com/stop_ref.json',
|
|
204
|
+
* ]
|
|
205
|
+
*/
|
|
206
|
+
refs?: (string | RefData)[]
|
|
207
|
+
/** Detection threshold [0, 1]. Default: 0.65 */
|
|
208
|
+
threshold?: number
|
|
209
|
+
/** Minimum milliseconds between successive match events per word. Default: 2000 */
|
|
210
|
+
relaxationMs?: number
|
|
211
|
+
/** Minimum milliseconds between consecutive inference runs. Default: 300 */
|
|
212
|
+
inferenceGapMs?: number
|
|
213
|
+
/**
|
|
214
|
+
* Override the ORT WASM base URL. Defaults to the jsDelivr CDN.
|
|
215
|
+
* Only needed for offline / intranet deployments (trailing slash required).
|
|
216
|
+
* @example '/mellon-assets/wasm/'
|
|
217
|
+
*/
|
|
218
|
+
wasmBasePath?: string
|
|
219
|
+
/**
|
|
220
|
+
* Override the model.onnx URL. Defaults to the jsDelivr CDN.
|
|
221
|
+
* Only needed for offline / intranet deployments.
|
|
222
|
+
* @example '/mellon-assets/model.onnx'
|
|
223
|
+
*/
|
|
224
|
+
modelUrl?: string
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* High-level, all-in-one hotword detector.
|
|
229
|
+
*
|
|
230
|
+
* @example
|
|
231
|
+
* const stt = new Mellon({
|
|
232
|
+
* wasmBasePath: '/assets/wasm/',
|
|
233
|
+
* modelUrl: '/assets/model.onnx',
|
|
234
|
+
* })
|
|
235
|
+
* await stt.init(pct => progressBar.style.width = pct * 100 + '%')
|
|
236
|
+
* await stt.start()
|
|
237
|
+
* stt.addEventListener('match', e => console.log(e.detail.name, e.detail.confidence))
|
|
238
|
+
*/
|
|
239
|
+
export class Mellon extends EventTarget {
|
|
240
|
+
constructor(opts?: MellonOptions)
|
|
241
|
+
|
|
242
|
+
/** True after init() has completed successfully. */
|
|
243
|
+
readonly isInitialized: boolean
|
|
244
|
+
|
|
245
|
+
/** True while start() is active (microphone is open). */
|
|
246
|
+
readonly isRunning: boolean
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Load the ONNX model and cache built-in reference embeddings.
|
|
250
|
+
* Optional — start() auto-calls init() when needed.
|
|
251
|
+
*
|
|
252
|
+
* @param onProgress Progress callback, 0.0 → 1.0.
|
|
253
|
+
*/
|
|
254
|
+
init(onProgress?: (progress: number) => void): Promise<void>
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Request microphone access and start hotword detection.
|
|
258
|
+
* Resolves once audio pipeline is running.
|
|
259
|
+
*
|
|
260
|
+
* @param words Optional subset of words to activate (must have refs loaded).
|
|
261
|
+
*/
|
|
262
|
+
start(words?: string[]): Promise<void>
|
|
263
|
+
|
|
264
|
+
/** Stop detection and release the microphone + AudioContext. */
|
|
265
|
+
stop(): void
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Register reference embeddings for a word.
|
|
269
|
+
* Can be called before or after start().
|
|
270
|
+
*/
|
|
271
|
+
addCustomWord(refData: RefData): void
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Create an EnrollmentSession for recording a new custom word.
|
|
275
|
+
* Call addCustomWord() with the result of session.generateRef().
|
|
276
|
+
*/
|
|
277
|
+
enrollWord(wordName: string): EnrollmentSession
|
|
278
|
+
|
|
279
|
+
/** Return all custom word refs stored in localStorage. */
|
|
280
|
+
static loadWords(): RefData[]
|
|
281
|
+
/** Persist a word ref to localStorage (replaces any existing entry with the same name). */
|
|
282
|
+
static saveWord(refData: RefData): void
|
|
283
|
+
/** Delete a word ref from localStorage by name. */
|
|
284
|
+
static deleteWord(wordName: string): void
|
|
285
|
+
/** Parse an uploaded ref JSON file. */
|
|
286
|
+
static importWordFile(file: File): Promise<RefData>
|
|
287
|
+
/** Trigger a browser download of a ref as a JSON file. */
|
|
288
|
+
static exportWord(refData: RefData): void
|
|
289
|
+
|
|
290
|
+
addEventListener(type: 'match', listener: (event: CustomEvent<MatchEventDetail>) => void, options?: boolean | AddEventListenerOptions): void
|
|
291
|
+
addEventListener(type: 'ready', listener: (event: CustomEvent) => void, options?: boolean | AddEventListenerOptions): void
|
|
292
|
+
addEventListener(type: 'error', listener: (event: CustomEvent<{ error: Error }>) => void, options?: boolean | AddEventListenerOptions): void
|
|
293
|
+
addEventListener(type: string, listener: EventListenerOrEventListenerObject, options?: boolean | AddEventListenerOptions): void
|
|
294
|
+
}
|
|
295
|
+
|
package/dist/mellon.cjs
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const Jt="0.0.4",Kt=[1,1,149,64],Yt=`https://cdn.jsdelivr.net/npm/mellon@${Jt}/dist/assets`,st={assetsPath:`${Yt}`};let I=null,q=null,tt=null;function Qt({assetsPath:i}={}){i!==void 0&&(st.assetsPath=i),I=null,q=null,tt=null}async function Vt(i){return I?(i==null||i(1),I):q||(q=(async()=>{const n=st.assetsPath.endsWith("/")?st.assetsPath:st.assetsPath+"/",t=n+"ort.all.min.mjs",e=n+"model.onnx";tt=await new Function("url","return import(url)")(t),tt.env.wasm.wasmPaths=n;const s=await fetch(e);if(!s.ok)throw new Error(`Failed to fetch model: ${s.status}`);const a=parseInt(s.headers.get("content-length")||"0",10),r=s.body.getReader(),c=[];let l=0;for(;;){const{done:m,value:f}=await r.read();if(m)break;c.push(f),l+=f.byteLength,a>0&&(i==null||i(l/a))}const h=new Uint8Array(l);let d=0;for(const m of c)h.set(m,d),d+=m.byteLength;return I=await tt.InferenceSession.create(h.buffer,{executionProviders:["wasm"],graphOptimizationLevel:"all"}),i==null||i(1),I})(),q)}async function St(i){if(!I)throw new Error("Model not loaded — call loadModel() first");const n=new tt.Tensor("float32",i,Kt),t=await I.run({input:n}),e=Object.keys(t)[0];return t[e].data}function Xt(i){return i&&i.__esModule&&Object.prototype.hasOwnProperty.call(i,"default")?i.default:i}var mt,Et;function Zt(){if(Et)return mt;Et=1;function i(n){if(this.size=n|0,this.size<=1||(this.size&this.size-1)!==0)throw new Error("FFT size must be a power of two and bigger than 1");this._csize=n<<1;for(var t=new Array(this.size*2),e=0;e<t.length;e+=2){const l=Math.PI*e/this.size;t[e]=Math.cos(l),t[e+1]=-Math.sin(l)}this.table=t;for(var o=0,s=1;this.size>s;s<<=1)o++;this._width=o%2===0?o-1:o,this._bitrev=new Array(1<<this._width);for(var a=0;a<this._bitrev.length;a++){this._bitrev[a]=0;for(var r=0;r<this._width;r+=2){var c=this._width-r-2;this._bitrev[a]|=(a>>>r&3)<<c}}this._out=null,this._data=null,this._inv=0}return mt=i,i.prototype.fromComplexArray=function(t,e){for(var o=e||new Array(t.length>>>1),s=0;s<t.length;s+=2)o[s>>>1]=t[s];return o},i.prototype.createComplexArray=function(){const t=new Array(this._csize);for(var e=0;e<t.length;e++)t[e]=0;return t},i.prototype.toComplexArray=function(t,e){for(var o=e||this.createComplexArray(),s=0;s<o.length;s+=2)o[s]=t[s>>>1],o[s+1]=0;return o},i.prototype.completeSpectrum=function(t){for(var e=this._csize,o=e>>>1,s=2;s<o;s+=2)t[e-s]=t[s],t[e-s+1]=-t[s+1]},i.prototype.transform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._transform4(),this._out=null,this._data=null},i.prototype.realTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=0,this._realTransform4(),this._out=null,this._data=null},i.prototype.inverseTransform=function(t,e){if(t===e)throw new Error("Input and output buffers must be different");this._out=t,this._data=e,this._inv=1,this._transform4();for(var o=0;o<t.length;o++)t[o]/=this.size;this._out=null,this._data=null},i.prototype._transform4=function(){var t=this._out,e=this._csize,o=this._width,s=1<<o,a=e/s<<1,r,c,l=this._bitrev;if(a===4)for(r=0,c=0;r<e;r+=a,c++){const u=l[c];this._singleTransform2(r,u,s)}else for(r=0,c=0;r<e;r+=a,c++){const u=l[c];this._singleTransform4(r,u,s)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){a=e/s<<1;var m=a>>>2;for(r=0;r<e;r+=a)for(var f=r+m,g=r,_=0;g<f;g+=2,_+=s){const u=g,p=u+m,v=p+m,w=v+m,b=t[u],A=t[u+1],E=t[p],y=t[p+1],F=t[v],M=t[v+1],C=t[w],T=t[w+1],x=b,R=A,z=d[_],S=h*d[_+1],N=E*z-y*S,k=E*S+y*z,P=d[2*_],L=h*d[2*_+1],G=F*P-M*L,H=F*L+M*P,J=d[3*_],K=h*d[3*_+1],Y=C*J-T*K,Q=C*K+T*J,V=x+G,U=R+H,j=x-G,X=R-H,Z=N+Y,W=k+Q,$=h*(N-Y),O=h*(k-Q),et=V+Z,ot=U+W,at=V-Z,it=U-W,ct=j+O,lt=X-$,ht=j-O,dt=X+$;t[u]=et,t[u+1]=ot,t[p]=ct,t[p+1]=lt,t[v]=at,t[v+1]=it,t[w]=ht,t[w+1]=dt}}},i.prototype._singleTransform2=function(t,e,o){const s=this._out,a=this._data,r=a[e],c=a[e+1],l=a[e+o],h=a[e+o+1],d=r+l,m=c+h,f=r-l,g=c-h;s[t]=d,s[t+1]=m,s[t+2]=f,s[t+3]=g},i.prototype._singleTransform4=function(t,e,o){const s=this._out,a=this._data,r=this._inv?-1:1,c=o*2,l=o*3,h=a[e],d=a[e+1],m=a[e+o],f=a[e+o+1],g=a[e+c],_=a[e+c+1],u=a[e+l],p=a[e+l+1],v=h+g,w=d+_,b=h-g,A=d-_,E=m+u,y=f+p,F=r*(m-u),M=r*(f-p),C=v+E,T=w+y,x=b+M,R=A-F,z=v-E,S=w-y,N=b-M,k=A+F;s[t]=C,s[t+1]=T,s[t+2]=x,s[t+3]=R,s[t+4]=z,s[t+5]=S,s[t+6]=N,s[t+7]=k},i.prototype._realTransform4=function(){var t=this._out,e=this._csize,o=this._width,s=1<<o,a=e/s<<1,r,c,l=this._bitrev;if(a===4)for(r=0,c=0;r<e;r+=a,c++){const ut=l[c];this._singleRealTransform2(r,ut>>>1,s>>>1)}else for(r=0,c=0;r<e;r+=a,c++){const ut=l[c];this._singleRealTransform4(r,ut>>>1,s>>>1)}var h=this._inv?-1:1,d=this.table;for(s>>=2;s>=2;s>>=2){a=e/s<<1;var m=a>>>1,f=m>>>1,g=f>>>1;for(r=0;r<e;r+=a)for(var _=0,u=0;_<=g;_+=2,u+=s){var p=r+_,v=p+f,w=v+f,b=w+f,A=t[p],E=t[p+1],y=t[v],F=t[v+1],M=t[w],C=t[w+1],T=t[b],x=t[b+1],R=A,z=E,S=d[u],N=h*d[u+1],k=y*S-F*N,P=y*N+F*S,L=d[2*u],G=h*d[2*u+1],H=M*L-C*G,J=M*G+C*L,K=d[3*u],Y=h*d[3*u+1],Q=T*K-x*Y,V=T*Y+x*K,U=R+H,j=z+J,X=R-H,Z=z-J,W=k+Q,$=P+V,O=h*(k-Q),et=h*(P-V),ot=U+W,at=j+$,it=X+et,ct=Z-O;if(t[p]=ot,t[p+1]=at,t[v]=it,t[v+1]=ct,_===0){var lt=U-W,ht=j-$;t[w]=lt,t[w+1]=ht;continue}if(_!==g){var dt=X,It=-Z,Bt=U,Dt=-j,Ut=-h*et,jt=-h*O,Wt=-h*$,$t=-h*W,Pt=dt+Ut,Lt=It+jt,Gt=Bt+$t,Ht=Dt-Wt,yt=r+f-_,bt=r+m-_;t[yt]=Pt,t[yt+1]=Lt,t[bt]=Gt,t[bt+1]=Ht}}}},i.prototype._singleRealTransform2=function(t,e,o){const s=this._out,a=this._data,r=a[e],c=a[e+o],l=r+c,h=r-c;s[t]=l,s[t+1]=0,s[t+2]=h,s[t+3]=0},i.prototype._singleRealTransform4=function(t,e,o){const s=this._out,a=this._data,r=this._inv?-1:1,c=o*2,l=o*3,h=a[e],d=a[e+o],m=a[e+c],f=a[e+l],g=h+m,_=h-m,u=d+f,p=r*(d-f),v=g+u,w=_,b=-p,A=g-u,E=_,y=p;s[t]=v,s[t+1]=0,s[t+2]=w,s[t+3]=b,s[t+4]=A,s[t+5]=0,s[t+6]=E,s[t+7]=y},mt}var Ot=Zt();const qt=Xt(Ot),nt=16e3,D=512,B=64,At=Math.floor(.025*nt),Ft=Math.floor(.01*nt);function Mt(i){return 2595*Math.log10(1+i/700)}function te(i){return 700*(10**(i/2595)-1)}function ee(){const i=Mt(0),n=Mt(nt/2),t=new Float64Array(B+2);for(let r=0;r<B+2;r++)t[r]=i+r*(n-i)/(B+1);const o=t.map(r=>te(r)).map(r=>Math.floor((D+1)*r/nt)),s=[],a=Math.floor(D/2)+1;for(let r=0;r<B;r++){const c=new Float32Array(a);for(let l=o[r];l<o[r+1];l++)c[l]=(l-o[r])/(o[r+1]-o[r]);for(let l=o[r+1];l<o[r+2];l++)c[l]=(o[r+2]-l)/(o[r+2]-o[r+1]);s.push(c)}return s}const se=ee(),rt=new qt(D),_t=new Float32Array(D),Ct=rt.createComplexArray(),ft=rt.createComplexArray(),Tt=new Float32Array(Math.floor(D/2)+1);function Nt(i){const n=1+Math.ceil((i.length-At)/Ft),t=new Float32Array(n*B),e=Math.floor(D/2)+1;for(let o=0;o<n;o++){const s=o*Ft;_t.fill(0);for(let a=0;a<At&&s+a<i.length;a++)_t[a]=i[s+a];rt.toComplexArray(_t,Ct),rt.transform(ft,Ct);for(let a=0;a<e;a++){const r=ft[2*a],c=ft[2*a+1],l=(r*r+c*c)/D;Tt[a]=l===0?1e-30:l}for(let a=0;a<B;a++){const r=se[a];let c=0;for(let l=0;l<e;l++)c+=Tt[l]*r[l];t[o*B+a]=Math.log(c===0?1e-30:c)}}return t}function ne(i,n){let t=0;for(let e=0;e<i.length;e++)t+=i[e]*n[e];return(t+1)/2}function re(i,n){let t=0;for(const e of n){const o=ne(i,e);o>t&&(t=o)}return t}class xt extends EventTarget{constructor({name:n,refEmbeddings:t,threshold:e=.65,relaxationMs:o=2e3,inferenceGapMs:s=300}){super(),this.name=n,this.refEmbeddings=t,this.threshold=e,this.relaxationMs=o,this.inferenceGapMs=s,this._lastDetectionAt=0,this._lastInferenceAt=0,this._lastScore=0}get lastScore(){return this._lastScore}async scoreFrame(n){const t=Date.now();if(t-this._lastInferenceAt<this.inferenceGapMs)return null;this._lastInferenceAt=t;const e=Nt(n),o=await St(e),s=re(o,this.refEmbeddings);return this._lastScore=s,s>=this.threshold&&t-this._lastDetectionAt>=this.relaxationMs&&(this._lastDetectionAt=t,this.dispatchEvent(new CustomEvent("match",{detail:{name:this.name,confidence:s,timestamp:t}}))),s}}const Rt=16e3,oe=1500,pt=24e3;function zt(i){if(i.length===pt)return i;const n=new Float32Array(pt);return n.set(i.subarray(0,pt)),n}class kt extends EventTarget{constructor(n){super(),this.wordName=n.trim().toLowerCase(),this.samples=[]}get sampleCount(){return this.samples.length}async recordSample(){const n=await navigator.mediaDevices.getUserMedia({audio:!0});return new Promise((t,e)=>{const o=new AudioContext({sampleRate:Rt}),s=new MediaRecorder(n),a=[];this.dispatchEvent(new CustomEvent("recording-start")),s.ondataavailable=r=>{r.data.size>0&&a.push(r.data)},s.onstop=async()=>{n.getTracks().forEach(r=>r.stop());try{const c=await new Blob(a,{type:"audio/webm"}).arrayBuffer(),l=await o.decodeAudioData(c);await o.close();const h=l.getChannelData(0),d=zt(new Float32Array(h)),m=this._push(d,`Recorded #${this.samples.length}`);t(m)}catch(r){await o.close().catch(()=>{}),e(r)}},s.start(),setTimeout(()=>s.stop(),oe)})}async addAudioFile(n){const t=await n.arrayBuffer(),e=new AudioContext({sampleRate:Rt}),o=await e.decodeAudioData(t);await e.close();const s=o.getChannelData(0),a=zt(new Float32Array(s));return this._push(a,n.name)}removeSample(n){this.samples.splice(n,1),this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:this.samples.length}}))}clearSamples(){this.samples=[],this.dispatchEvent(new CustomEvent("samples-changed",{detail:{count:0}}))}async generateRef(){if(this.samples.length<3)throw new Error(`Need at least 3 samples (currently have ${this.samples.length})`);this.dispatchEvent(new CustomEvent("generating",{detail:{total:this.samples.length}}));const n=[];for(let t=0;t<this.samples.length;t++){const e=Nt(this.samples[t].audioBuffer),o=await St(e);n.push(Array.from(o)),this.dispatchEvent(new CustomEvent("progress",{detail:{done:t+1,total:this.samples.length}}))}return{word_name:this.wordName,model_type:"resnet_50_arc",embeddings:n}}_push(n,t){this.samples.push({audioBuffer:n,name:t});const e=this.samples.length;return this.dispatchEvent(new CustomEvent("sample-added",{detail:{count:e,name:t}})),e}}const ae=`/**
|
|
2
|
+
* public/audio-processor.js
|
|
3
|
+
* AudioWorklet that runs at 16 kHz and continuously emits the last
|
|
4
|
+
* 1.5-second window (24 000 samples) via a circular buffer.
|
|
5
|
+
*
|
|
6
|
+
* The main thread receives a fresh Float32Array on every
|
|
7
|
+
* AudioWorklet quantum (128 samples ≈ every 8 ms at 16 kHz).
|
|
8
|
+
* The inference loop in engine.js rate-limits to avoid excessive work.
|
|
9
|
+
*/
|
|
10
|
+
class AudioProcessor extends AudioWorkletProcessor {
|
|
11
|
+
constructor() {
|
|
12
|
+
super()
|
|
13
|
+
this._size = 24000 // 1.5 s × 16 000 Hz
|
|
14
|
+
this._buf = new Float32Array(this._size)
|
|
15
|
+
this._ptr = 0
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
process(inputs) {
|
|
19
|
+
const ch = inputs[0]?.[0]
|
|
20
|
+
if (!ch) return true
|
|
21
|
+
|
|
22
|
+
for (let i = 0; i < ch.length; i++) {
|
|
23
|
+
this._buf[this._ptr] = ch[i]
|
|
24
|
+
this._ptr = (this._ptr + 1) % this._size
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Send an ordered copy of the ring buffer
|
|
28
|
+
const out = new Float32Array(this._size)
|
|
29
|
+
for (let i = 0; i < this._size; i++) {
|
|
30
|
+
out[i] = this._buf[(this._ptr + i) % this._size]
|
|
31
|
+
}
|
|
32
|
+
this.port.postMessage(out)
|
|
33
|
+
return true
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
registerProcessor('audio-processor', AudioProcessor)
|
|
38
|
+
`;let vt=null;function ie(){if(!vt){const i=new Blob([ae],{type:"application/javascript"});vt=URL.createObjectURL(i)}return vt}const wt="mellon_custom_refs";function gt(){try{const i=localStorage.getItem(wt);return i?JSON.parse(i):[]}catch{return[]}}function ce(i){const n=gt().filter(t=>t.word_name!==i.word_name);n.push(i),localStorage.setItem(wt,JSON.stringify(n))}function le(i){const n=gt().filter(t=>t.word_name!==i);localStorage.setItem(wt,JSON.stringify(n))}function he(i){const n=JSON.stringify(i,null,2),t=new Blob([n],{type:"application/json"}),e=URL.createObjectURL(t),o=Object.assign(document.createElement("a"),{href:e,download:`${i.word_name}_ref.json`});document.body.appendChild(o),o.click(),document.body.removeChild(o),URL.revokeObjectURL(e)}async function de(i){const n=await i.text();let t;try{t=JSON.parse(n)}catch{throw new Error("Invalid JSON")}if(!t.embeddings||!Array.isArray(t.embeddings)||!t.embeddings.length)throw new Error('Missing or empty "embeddings" array');if(!Array.isArray(t.embeddings[0]))throw new Error('"embeddings" must be a 2D array');return t.word_name||(t.word_name=i.name.replace(/_ref\.json$/i,"").replace(/\.json$/i,"")),t}class ue extends EventTarget{constructor(n={}){super(),this._opts={words:n.words??[],refs:n.refs??[],threshold:n.threshold??.65,relaxationMs:n.relaxationMs??2e3,inferenceGapMs:n.inferenceGapMs??300,wasmBasePath:n.wasmBasePath,modelUrl:n.modelUrl},this._refs=new Map,this._detectors=new Map,this._audioCtx=null,this._workletNode=null,this._stream=null,this._initialized=!1,this._running=!1}get isInitialized(){return this._initialized}get isRunning(){return this._running}async init(n){if(this._initialized){n==null||n(1);return}(this._opts.wasmBasePath||this._opts.modelUrl)&&Qt({wasmBasePath:this._opts.wasmBasePath,modelUrl:this._opts.modelUrl});try{await Vt(n)}catch(t){throw this.dispatchEvent(new CustomEvent("error",{detail:{error:t}})),t}for(const t of this._opts.refs)try{let e;if(typeof t=="string"){const o=await fetch(t);if(!o.ok)throw new Error(`HTTP ${o.status}`);e=await o.json()}else e=t;this.addCustomWord(e)}catch(e){const o=typeof t=="string"?t:t.word_name;console.warn(`[Mellon] Failed to load ref "${o}": ${e.message}`)}this._initialized=!0,this.dispatchEvent(new CustomEvent("ready"))}async start(n){this._initialized||await this.init();const t=n??this._opts.words;try{this._stream=await navigator.mediaDevices.getUserMedia({audio:!0})}catch(s){const a=new Error(`Microphone access denied: ${s.message}`);throw this.dispatchEvent(new CustomEvent("error",{detail:{error:a}})),a}this._audioCtx=new AudioContext({sampleRate:16e3});const e=ie();await this._audioCtx.audioWorklet.addModule(e);const o=this._audioCtx.createMediaStreamSource(this._stream);this._workletNode=new AudioWorkletNode(this._audioCtx,"audio-processor"),o.connect(this._workletNode),this._workletNode.connect(this._audioCtx.destination);for(const s of t){const a=this._refs.get(s);if(!a){console.warn(`[Mellon] No reference embeddings for "${s}" — skipping. Call addCustomWord() to register custom words before start().`);continue}const r=new xt({name:s,refEmbeddings:a.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});r.addEventListener("match",c=>{this.dispatchEvent(new CustomEvent("match",{detail:c.detail}))}),this._detectors.set(s,r)}this._workletNode.port.onmessage=async s=>{const a=[];for(const r of this._detectors.values())a.push(r.scoreFrame(s.data));await Promise.allSettled(a)},this._running=!0}stop(){this._workletNode&&(this._workletNode.port.onmessage=null,this._workletNode.disconnect(),this._workletNode=null),this._stream&&(this._stream.getTracks().forEach(n=>n.stop()),this._stream=null),this._audioCtx&&(this._audioCtx.close(),this._audioCtx=null),this._detectors.clear(),this._running=!1}addCustomWord(n){if(this._refs.set(n.word_name,n),this._running&&this._workletNode){const t=new xt({name:n.word_name,refEmbeddings:n.embeddings,threshold:this._opts.threshold,relaxationMs:this._opts.relaxationMs,inferenceGapMs:this._opts.inferenceGapMs});t.addEventListener("match",e=>{this.dispatchEvent(new CustomEvent("match",{detail:e.detail}))}),this._detectors.set(n.word_name,t)}}enrollWord(n){return new kt(n)}static loadWords(){return gt()}static saveWord(n){ce(n)}static deleteWord(n){le(n)}static importWordFile(n){return de(n)}static exportWord(n){he(n)}}exports.EnrollmentSession=kt;exports.Mellon=ue;
|