mellon 0.0.25 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +165 -54
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -1,54 +1,165 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
export
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Log Mel-filterbank feature extraction.
|
|
3
|
+
* Exact port of the AudioUtils used by EfficientWord-Net InBrowser.
|
|
4
|
+
* - 64 Mel filters
|
|
5
|
+
* - 512-point FFT
|
|
6
|
+
* - 25 ms window / 10 ms step at 16 kHz
|
|
7
|
+
* Produces a [149 × 64] spectrogram from a 1.5 s (24 000-sample) input,
|
|
8
|
+
* matching the [1, 1, 149, 64] tensor expected by the ONNX Siamese model.
|
|
9
|
+
*/
|
|
10
|
+
export declare class AudioUtils {
|
|
11
|
+
private _nfft;
|
|
12
|
+
private _nfilt;
|
|
13
|
+
private _sampleRate;
|
|
14
|
+
private _melFilters;
|
|
15
|
+
private _fft;
|
|
16
|
+
constructor(sampleRate?: number, nfft?: number, nfilt?: number);
|
|
17
|
+
private _hzToMel;
|
|
18
|
+
private _melToHz;
|
|
19
|
+
private _createMelFilterbank;
|
|
20
|
+
/** Returns a flat Float32Array of shape [numFrames × nfilt]. */
|
|
21
|
+
logfbank(signal: Float32Array): Float32Array;
|
|
22
|
+
maxCosineSim(embedding: Float32Array, refs: number[][]): number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export declare interface Command {
|
|
26
|
+
name: string;
|
|
27
|
+
triggers: Trigger[];
|
|
28
|
+
onMatch?: (trigger: TriggerName, confidence: number) => any;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export declare const DEFAULT_AUDIO_PROCESSOR_PATH = "https://cdn.jsdelivr.net/npm/mellon@0.0.14/dist/assets/audio-processor.js";
|
|
32
|
+
|
|
33
|
+
export declare const DEFAULT_MODEL_PATH = "https://huggingface.co/ComicScrip/mellon/resolve/main/model.onnx";
|
|
34
|
+
|
|
35
|
+
export declare const DEFAULT_ORT_CDN_URL = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.wasm.min.mjs";
|
|
36
|
+
|
|
37
|
+
export declare const DEFAULT_REFS_STORAGE_KEY = "mellon-refs";
|
|
38
|
+
|
|
39
|
+
export declare const DEFAULT_THRESHOLD_STORAGE_KEY = "mellon-threshold";
|
|
40
|
+
|
|
41
|
+
export declare const DEFAULT_WASM_PATHS = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/";
|
|
42
|
+
|
|
43
|
+
export declare class Detector {
|
|
44
|
+
private _refsStorageKey;
|
|
45
|
+
private _thresholdStorageKey;
|
|
46
|
+
private _audioProcessorPath;
|
|
47
|
+
private _modelPath;
|
|
48
|
+
private _wasmPaths;
|
|
49
|
+
private _ortCdnUrl;
|
|
50
|
+
private _started;
|
|
51
|
+
private _inferring;
|
|
52
|
+
private _audioCtx;
|
|
53
|
+
private _stream;
|
|
54
|
+
private _refEmbeddings;
|
|
55
|
+
private _lastMatchAt;
|
|
56
|
+
private _lastInferenceAt;
|
|
57
|
+
private _audioUtils;
|
|
58
|
+
private _commands;
|
|
59
|
+
private _threshold;
|
|
60
|
+
private _initPromise;
|
|
61
|
+
private _log;
|
|
62
|
+
get threshold(): number;
|
|
63
|
+
set threshold(value: number);
|
|
64
|
+
get listening(): boolean;
|
|
65
|
+
constructor(commands: Command[], config?: DetectorConfig);
|
|
66
|
+
/**
|
|
67
|
+
* Streams `url`, calling `onProgress(downloaded, total)` after each chunk.
|
|
68
|
+
* Falls back to a single-shot fetch when the body stream is unavailable.
|
|
69
|
+
*/
|
|
70
|
+
private _trackFetch;
|
|
71
|
+
private _init;
|
|
72
|
+
/**
|
|
73
|
+
* Loads the ONNX model and all reference embeddings.
|
|
74
|
+
* Must be called before {@link start}.
|
|
75
|
+
* Safe to call multiple times — the work is only done once.
|
|
76
|
+
*
|
|
77
|
+
* @param onProgress - optional callback invoked as each asset is loaded
|
|
78
|
+
*/
|
|
79
|
+
init(onProgress?: ProgressCallback): Promise<void>;
|
|
80
|
+
/** Adds (or replaces) the reference embeddings for a word without restarting. */
|
|
81
|
+
addCustomWord(ref: WordRef): void;
|
|
82
|
+
start(): Promise<void>;
|
|
83
|
+
stop(): Promise<void>;
|
|
84
|
+
private _handleBuffer;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export declare interface DetectorConfig {
|
|
88
|
+
refsStorageKey?: string;
|
|
89
|
+
thresholdStorageKey?: string;
|
|
90
|
+
wasmPaths?: string;
|
|
91
|
+
modelPath?: string;
|
|
92
|
+
audioProcessorPath?: string;
|
|
93
|
+
ortCdnUrl?: string;
|
|
94
|
+
audioUtils?: AudioUtils;
|
|
95
|
+
/** Enable console logging. Pass `true` for info+warn+error, or a custom logger. Defaults to silent. */
|
|
96
|
+
log?: boolean | {
|
|
97
|
+
info?: (...a: unknown[]) => void;
|
|
98
|
+
warn?: (...a: unknown[]) => void;
|
|
99
|
+
error?: (...a: unknown[]) => void;
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Records voice samples and generates an EfficientWord-Net reference file
|
|
105
|
+
* (compatible with the JSON format used by the ONNX Siamese model).
|
|
106
|
+
*
|
|
107
|
+
* Usage:
|
|
108
|
+
* const session = new EnrollmentSession('suivant');
|
|
109
|
+
* const count = await session.recordSample(); // repeat ≥ 3 times
|
|
110
|
+
* const ref = await session.generateRef(); // needs ≥ 3 samples
|
|
111
|
+
* Mellon.saveWord(ref);
|
|
112
|
+
* mellon.addCustomWord(ref);
|
|
113
|
+
*/
|
|
114
|
+
export declare class EnrollmentSession {
|
|
115
|
+
private _config;
|
|
116
|
+
private _wordName;
|
|
117
|
+
private _samples;
|
|
118
|
+
private _audioUtils;
|
|
119
|
+
constructor(wordName: string, config?: EnrollmentSessionConfig);
|
|
120
|
+
/** Records 1.5 s of audio, stores the decoded PCM, returns new sample count. */
|
|
121
|
+
recordSample(): Promise<number>;
|
|
122
|
+
/** Returns the raw PCM Float32Array for the sample at the given index (16 kHz). */
|
|
123
|
+
getSample(index: number): Float32Array;
|
|
124
|
+
/** Removes the sample at the given index. Returns the new sample count. */
|
|
125
|
+
deleteSample(index: number): number;
|
|
126
|
+
/** Runs ONNX inference on every recorded sample to produce reference embeddings. */
|
|
127
|
+
generateRef(): Promise<WordRef>;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export declare interface EnrollmentSessionConfig {
|
|
131
|
+
wasmPaths?: string;
|
|
132
|
+
modelPath?: string;
|
|
133
|
+
ortCdnUrl?: string;
|
|
134
|
+
audioUtils?: AudioUtils;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Called during {@link Detector.init} to report real download progress.
|
|
139
|
+
* @param downloaded - total bytes received so far across all assets
|
|
140
|
+
* @param total - sum of known Content-Length values for all assets;
|
|
141
|
+
* may still be 0 early on (before first header is received)
|
|
142
|
+
*/
|
|
143
|
+
export declare type ProgressCallback = (downloaded: number, total: number) => void;
|
|
144
|
+
|
|
145
|
+
declare class Storage_2 {
|
|
146
|
+
static loadWords(storageKey?: string): WordRef[];
|
|
147
|
+
static saveWord(ref: WordRef, storageKey?: string): void;
|
|
148
|
+
static deleteWord(wordName: string, storageKey?: string): void;
|
|
149
|
+
}
|
|
150
|
+
export { Storage_2 as Storage }
|
|
151
|
+
|
|
152
|
+
export declare interface Trigger {
|
|
153
|
+
name: TriggerName;
|
|
154
|
+
defaultRefPath?: string;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export declare type TriggerName = string;
|
|
158
|
+
|
|
159
|
+
export declare interface WordRef {
|
|
160
|
+
word_name: TriggerName;
|
|
161
|
+
model_type?: string;
|
|
162
|
+
embeddings: number[][];
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export { }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mellon",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.26",
|
|
4
4
|
"description": "Offline, in-browser voice commands powered by EfficientWord-Net (ResNet-50 ArcFace).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/mellon.cjs",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"voice"
|
|
32
32
|
],
|
|
33
33
|
"scripts": {
|
|
34
|
-
"build": "vite build
|
|
34
|
+
"build": "vite build"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"fft.js": "^4.0.4"
|