openwakeword-js 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -52
- package/example/index.html +517 -64
- package/package.json +6 -1
- package/scripts/download_models.js +1 -0
package/README.md
CHANGED
|
@@ -1,104 +1,115 @@
|
|
|
1
1
|
# openWakeWord-JS
|
|
2
2
|
|
|
3
|
-
**The precision JavaScript/TypeScript port of openWakeWord.**
|
|
3
|
+
**The high-performance, precision JavaScript/TypeScript port of openWakeWord.**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
[](https://www.npmjs.com/package/openwakeword-js)
|
|
6
|
+
[](https://github.com/Firojpaudel/OpenWakeWord_npm_porting/blob/main/LICENSE)
|
|
6
7
|
|
|
7
|
-
|
|
8
|
-
This package is a JavaScript port of the work by David Scripka.
|
|
9
|
-
[Original openWakeWord Repository](https://github.com/dscripka/openWakeWord)
|
|
8
|
+
A high-accuracy, 100% logic-aligned port of [openWakeWord](https://github.com/dscripka/openWakeWord). This implementation is designed to match the original Python behavior bit-for-bit, ensuring that your custom models perform exactly as they did in training.
|
|
10
9
|
|
|
11
10
|
---
|
|
12
11
|
|
|
13
|
-
##
|
|
12
|
+
## Technical Features
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
### Required Model Assets
|
|
21
|
-
You need at least three models to detect a wake word:
|
|
22
|
-
- `melspectrogram.onnx`: Audio feature extractor.
|
|
23
|
-
- `embedding_model.onnx`: Feature embedding generator.
|
|
24
|
-
- **Your Custom Model**: (e.g., `hey_deepa.onnx`). The specific phrase model.
|
|
25
|
-
- `silero_vad.onnx` (Optional): Voice Activity Detection for improved accuracy.
|
|
26
|
-
|
|
27
|
-
> [!NOTE]
|
|
28
|
-
> You can generate your own custom wake word models using this [Kaggle Notebook Link](https://www.kaggle.com/code/firojpaudel/deepa-wise).
|
|
14
|
+
- **Signal Parity**: Matches the original Python Mel spectrogram transforms (linear `x/10 + 2` scaling) and log-mel clamping.
|
|
15
|
+
- **Sliding Window Inference**: Implements the required 76-frame mel context for embeddings and 24-frame embedding context for classifiers.
|
|
16
|
+
- **Privacy First**: 100% local execution. No audio data ever leaves the user's device.
|
|
17
|
+
- **Hardware Acceleration**: Optimized via ONNX Runtime Web using WebAssembly (WASM) with SIMD and Multi-threading.
|
|
18
|
+
- **VAD Integration**: Optional Silero VAD gating to reduce CPU usage and prevent false triggers in silence.
|
|
29
19
|
|
|
30
20
|
---
|
|
31
21
|
|
|
32
|
-
##
|
|
22
|
+
## Step-by-Step Setup Guide
|
|
23
|
+
|
|
24
|
+
For a developer to recreate the full pipeline from scratch, follow these exact steps:
|
|
33
25
|
|
|
34
|
-
### 1.
|
|
26
|
+
### 1. Installation
|
|
27
|
+
In your project directory, install the core library and the ONNX runtime:
|
|
35
28
|
```bash
|
|
36
29
|
npm install openwakeword-js onnxruntime-web
|
|
37
30
|
```
|
|
38
31
|
|
|
39
|
-
### 2.
|
|
40
|
-
|
|
32
|
+
### 2. Automatic Asset Initialization
|
|
33
|
+
Run this command from your project root to automatically download the base models and copy the required WebAssembly binaries:
|
|
41
34
|
|
|
42
|
-
**Automated Setup:**
|
|
43
|
-
Run this command to automatically download the base models (`melspectrogram`, `embedding`, and `silero_vad`) from the original repository:
|
|
44
35
|
```bash
|
|
45
|
-
|
|
36
|
+
npx openwakeword-js-setup
|
|
46
37
|
```
|
|
47
38
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
- `
|
|
51
|
-
-
|
|
52
|
-
- `silero_vad.onnx` (Optional but recommended)
|
|
53
|
-
- **Your custom wake word model** (e.g., `hey_deepa.onnx`)
|
|
54
|
-
|
|
55
|
-
**Browser Requirements:** Browsers need the `.wasm` (WebAssembly) files to run the models at high speed.
|
|
56
|
-
- The `npm run download-models` command automatically copies these for you from `node_modules`.
|
|
57
|
-
- Alternatively, you can use a CDN by setting the `wasmPaths` in the constructor.
|
|
39
|
+
### 3. Training & Models
|
|
40
|
+
You will need a specific wake word model (classifier) for your chosen phrase.
|
|
41
|
+
- **Download Official Models**: You can find many pre-trained `.onnx` models (like `alexa.onnx`) in the [original repository](https://github.com/dscripka/openWakeWord).
|
|
42
|
+
- **Train Your Own**: Use this [Kaggle Notebook](https://www.kaggle.com/code/firojpaudel/deepa-wise) to train a custom model for any word, then download the exported `.onnx` file and put it in your `./models/` folder.
|
|
58
43
|
|
|
59
44
|
---
|
|
60
45
|
|
|
61
|
-
##
|
|
46
|
+
## The Execution Pipeline
|
|
62
47
|
|
|
63
|
-
|
|
48
|
+
Understanding how the data flows helps in debugging and implementation:
|
|
64
49
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
4. Open `http://localhost:3000/example/index.html`
|
|
50
|
+
1. **Audio In**: Feed 16kHz Mono audio chunks (typically 1280 samples / 80ms).
|
|
51
|
+
2. **Mel Processing**: The library converts audio into Mel Spectrograms using `melspectrogram.onnx`.
|
|
52
|
+
3. **Embedding Generation**: Every 8 Mel frames (shifted) generates one Embedding vector via `embedding_model.onnx`.
|
|
53
|
+
4. **Classification**: Your custom model looks at a window of 24 embeddings to decide if the word was spoken.
|
|
70
54
|
|
|
71
55
|
---
|
|
72
56
|
|
|
73
|
-
## Usage Example
|
|
57
|
+
## Usage Example (TypeScript / JavaScript)
|
|
74
58
|
|
|
75
59
|
```typescript
|
|
76
60
|
import { Model } from 'openwakeword-js';
|
|
77
61
|
|
|
62
|
+
// Configuration
|
|
78
63
|
const model = new Model({
|
|
64
|
+
// 1. Path to your phrase model (e.g., from Kaggle or Official repo)
|
|
79
65
|
wakewordModels: ['./models/my_custom_model.onnx'],
|
|
66
|
+
|
|
67
|
+
// 2. Paths to the feature extraction models (created by download-models)
|
|
80
68
|
melspectrogramModelPath: './models/melspectrogram.onnx',
|
|
81
69
|
embeddingModelPath: './models/embedding_model.onnx',
|
|
82
70
|
|
|
83
|
-
// Optional VAD
|
|
71
|
+
// 3. Optional VAD config
|
|
84
72
|
vadModelPath: './models/silero_vad.onnx',
|
|
85
73
|
vadThreshold: 0.5,
|
|
86
74
|
|
|
87
75
|
inferenceFramework: 'onnx',
|
|
88
76
|
|
|
89
|
-
//
|
|
77
|
+
// 4. Direction to WASM binaries (required for browser context)
|
|
90
78
|
wasmPaths: './models/'
|
|
91
79
|
});
|
|
92
80
|
|
|
81
|
+
// Initialize (Downloads/Loads models into memory)
|
|
93
82
|
await model.init();
|
|
94
83
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
84
|
+
/**
|
|
85
|
+
* Feed audio chunks.
|
|
86
|
+
* inputData can be a Float32Array (normalized -1 to 1)
|
|
87
|
+
* or an Int16Array (raw PCM 16-bit).
|
|
88
|
+
*/
|
|
89
|
+
const scores = await model.predict(inputData);
|
|
90
|
+
|
|
91
|
+
// Output format: { "my_custom_model": 0.85 }
|
|
92
|
+
if (scores["my_custom_model"] > 0.5) {
|
|
93
|
+
console.log("Wake word detected locally!");
|
|
94
|
+
}
|
|
98
95
|
```
|
|
99
96
|
|
|
100
|
-
|
|
101
|
-
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Local Development & Demo
|
|
100
|
+
|
|
101
|
+
We have included a full working example in the `example/` folder.
|
|
102
|
+
|
|
103
|
+
1. Clone the repo and run `npm install`.
|
|
104
|
+
2. Run `npm run download-models`.
|
|
105
|
+
3. Serve the root directory using a static server (e.g., `npx serve .`).
|
|
106
|
+
4. Navigate to `http://localhost:3000/example/index.html`.
|
|
107
|
+
5. Allow Microphone access and watch the real-time scores.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Credits
|
|
112
|
+
This package is a JavaScript port of the work by **David Scripka**. We encourage support for the original project's research and model training infrastructure.
|
|
102
113
|
|
|
103
114
|
## License
|
|
104
115
|
Apache-2.0
|
package/example/index.html
CHANGED
|
@@ -4,133 +4,586 @@
|
|
|
4
4
|
<head>
|
|
5
5
|
<meta charset="UTF-8">
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
7
|
-
<title>openWakeWord
|
|
7
|
+
<title>openWakeWord | AI Listening Interface</title>
|
|
8
8
|
<style>
|
|
9
|
+
:root {
|
|
10
|
+
--bg-color: #0c0c0e;
|
|
11
|
+
--panel-bg: rgba(28, 28, 30, 0.7);
|
|
12
|
+
--accent-color: #34d399;
|
|
13
|
+
/* Subtle neon green */
|
|
14
|
+
--text-primary: #f5f5f7;
|
|
15
|
+
--text-secondary: rgba(245, 245, 247, 0.6);
|
|
16
|
+
--glass-border: rgba(255, 255, 255, 0.1);
|
|
17
|
+
--shadow-primary: 0 20px 40px rgba(0, 0, 0, 0.4);
|
|
18
|
+
--easing: cubic-bezier(0.4, 0, 0.2, 1);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
* {
|
|
22
|
+
box-sizing: border-box;
|
|
23
|
+
-webkit-font-smoothing: antialiased;
|
|
24
|
+
-moz-osx-font-smoothing: grayscale;
|
|
25
|
+
}
|
|
26
|
+
|
|
9
27
|
body {
|
|
10
|
-
|
|
28
|
+
margin: 0;
|
|
29
|
+
padding: 0;
|
|
30
|
+
height: 100vh;
|
|
11
31
|
display: flex;
|
|
12
|
-
flex-direction: column;
|
|
13
32
|
align-items: center;
|
|
14
|
-
|
|
15
|
-
background:
|
|
33
|
+
justify-content: center;
|
|
34
|
+
background: var(--bg-color);
|
|
35
|
+
background: radial-gradient(circle at center, #1c1c1e 0%, #0c0c0e 100%);
|
|
36
|
+
color: var(--text-primary);
|
|
37
|
+
font-family: -apple-system, BlinkMacSystemFont, "SF Pro Display", "Helvetica Neue", sans-serif;
|
|
38
|
+
overflow: hidden;
|
|
39
|
+
position: relative;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/* Ambient Glow Effect */
|
|
43
|
+
body::after {
|
|
44
|
+
content: '';
|
|
45
|
+
position: absolute;
|
|
46
|
+
top: 50%;
|
|
47
|
+
left: 50%;
|
|
48
|
+
width: 800px;
|
|
49
|
+
height: 800px;
|
|
50
|
+
background: radial-gradient(circle, rgba(52, 211, 153, 0.03) 0%, transparent 60%);
|
|
51
|
+
transform: translate(-50%, -50%);
|
|
52
|
+
z-index: 0;
|
|
53
|
+
pointer-events: none;
|
|
16
54
|
}
|
|
17
55
|
|
|
18
|
-
.container {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
56
|
+
.main-container {
|
|
57
|
+
position: relative;
|
|
58
|
+
z-index: 1;
|
|
59
|
+
width: 100%;
|
|
60
|
+
max-width: 500px;
|
|
61
|
+
padding: 40px;
|
|
62
|
+
background: var(--panel-bg);
|
|
63
|
+
backdrop-filter: blur(30px);
|
|
64
|
+
-webkit-backdrop-filter: blur(30px);
|
|
65
|
+
border-radius: 32px;
|
|
66
|
+
border: 1px solid var(--glass-border);
|
|
67
|
+
box-shadow: var(--shadow-primary);
|
|
23
68
|
text-align: center;
|
|
69
|
+
transition: transform 0.4s var(--easing);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/* Title Section */
|
|
73
|
+
h1 {
|
|
74
|
+
font-size: 28px;
|
|
75
|
+
font-weight: 600;
|
|
76
|
+
margin: 0 0 4px 0;
|
|
77
|
+
letter-spacing: -0.5px;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
.subtitle {
|
|
81
|
+
font-size: 14px;
|
|
82
|
+
color: var(--text-secondary);
|
|
83
|
+
margin-bottom: 40px;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* Status Indicator Section */
|
|
87
|
+
.status-area {
|
|
88
|
+
display: flex;
|
|
89
|
+
flex-direction: column;
|
|
90
|
+
align-items: center;
|
|
91
|
+
margin-bottom: 40px;
|
|
24
92
|
}
|
|
25
93
|
|
|
26
|
-
.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
94
|
+
.orb-outer {
|
|
95
|
+
width: 120px;
|
|
96
|
+
height: 120px;
|
|
97
|
+
display: flex;
|
|
98
|
+
align-items: center;
|
|
99
|
+
justify-content: center;
|
|
100
|
+
position: relative;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
.orb {
|
|
104
|
+
width: 50px;
|
|
105
|
+
height: 50px;
|
|
106
|
+
background: #3a3a3c;
|
|
107
|
+
border-radius: 50%;
|
|
108
|
+
position: relative;
|
|
109
|
+
z-index: 2;
|
|
110
|
+
transition: all 0.5s var(--easing);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
.orb-glow {
|
|
114
|
+
position: absolute;
|
|
115
|
+
top: 0;
|
|
116
|
+
left: 0;
|
|
117
|
+
right: 0;
|
|
118
|
+
bottom: 0;
|
|
119
|
+
border-radius: 50%;
|
|
120
|
+
background: transparent;
|
|
121
|
+
z-index: 1;
|
|
122
|
+
transition: all 0.5s var(--easing);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/* Orb States */
|
|
126
|
+
.state-idle .orb {
|
|
127
|
+
background: #3a3a3c;
|
|
128
|
+
animation: breathe 4s infinite var(--easing);
|
|
30
129
|
}
|
|
31
130
|
|
|
32
|
-
.
|
|
33
|
-
|
|
34
|
-
|
|
131
|
+
.state-loading .orb-glow {
|
|
132
|
+
border: 2px solid rgba(245, 245, 247, 0.1);
|
|
133
|
+
border-top-color: var(--text-primary);
|
|
134
|
+
animation: rotate 1s linear infinite;
|
|
35
135
|
}
|
|
36
136
|
|
|
37
|
-
.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
animation: pulse
|
|
137
|
+
.state-listening .orb {
|
|
138
|
+
background: var(--accent-color);
|
|
139
|
+
box-shadow: 0 0 30px rgba(52, 211, 153, 0.4);
|
|
140
|
+
animation: pulse-listening 2s infinite var(--easing);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
.state-error .orb {
|
|
144
|
+
background: #ff453a;
|
|
145
|
+
box-shadow: 0 0 30px rgba(255, 69, 58, 0.4);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
@keyframes breathe {
|
|
149
|
+
|
|
150
|
+
0%,
|
|
151
|
+
100% {
|
|
152
|
+
transform: scale(1);
|
|
153
|
+
opacity: 0.8;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
50% {
|
|
157
|
+
transform: scale(1.05);
|
|
158
|
+
opacity: 1;
|
|
159
|
+
}
|
|
41
160
|
}
|
|
42
161
|
|
|
43
|
-
@keyframes pulse {
|
|
162
|
+
@keyframes pulse-listening {
|
|
44
163
|
0% {
|
|
45
164
|
transform: scale(1);
|
|
165
|
+
box-shadow: 0 0 0px rgba(52, 211, 153, 0.4);
|
|
46
166
|
}
|
|
47
167
|
|
|
48
168
|
50% {
|
|
49
169
|
transform: scale(1.1);
|
|
170
|
+
box-shadow: 0 0 40px rgba(52, 211, 153, 0.6);
|
|
50
171
|
}
|
|
51
172
|
|
|
52
173
|
100% {
|
|
53
174
|
transform: scale(1);
|
|
175
|
+
box-shadow: 0 0 0px rgba(52, 211, 153, 0.4);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
@keyframes rotate {
|
|
180
|
+
from {
|
|
181
|
+
transform: rotate(0deg);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
to {
|
|
185
|
+
transform: rotate(360deg);
|
|
54
186
|
}
|
|
55
187
|
}
|
|
56
188
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
font-size:
|
|
189
|
+
.status-text {
|
|
190
|
+
margin-top: 16px;
|
|
191
|
+
font-size: 13px;
|
|
192
|
+
font-weight: 500;
|
|
193
|
+
color: var(--text-secondary);
|
|
194
|
+
text-transform: uppercase;
|
|
195
|
+
letter-spacing: 1px;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/* Slider Section */
|
|
199
|
+
.controls {
|
|
200
|
+
width: 100%;
|
|
201
|
+
margin-bottom: 32px;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
.slider-container {
|
|
205
|
+
position: relative;
|
|
206
|
+
padding: 20px 0;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
.label-group {
|
|
210
|
+
display: flex;
|
|
211
|
+
justify-content: space-between;
|
|
212
|
+
margin-bottom: 12px;
|
|
213
|
+
font-size: 13px;
|
|
214
|
+
color: var(--text-secondary);
|
|
215
|
+
font-weight: 500;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
.slider-value {
|
|
219
|
+
color: var(--text-primary);
|
|
220
|
+
font-variant-numeric: tabular-nums;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
input[type=range] {
|
|
224
|
+
-webkit-appearance: none;
|
|
225
|
+
width: 100%;
|
|
226
|
+
height: 4px;
|
|
227
|
+
background: rgba(255, 255, 255, 0.1);
|
|
228
|
+
border-radius: 2px;
|
|
229
|
+
outline: none;
|
|
230
|
+
margin: 0;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
input[type=range]::-webkit-slider-thumb {
|
|
234
|
+
-webkit-appearance: none;
|
|
235
|
+
width: 20px;
|
|
236
|
+
height: 20px;
|
|
237
|
+
background: var(--text-primary);
|
|
238
|
+
border-radius: 50%;
|
|
60
239
|
cursor: pointer;
|
|
61
|
-
|
|
62
|
-
|
|
240
|
+
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.5);
|
|
241
|
+
transition: transform 0.2s var(--easing), box-shadow 0.2s var(--easing);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
input[type=range]:active::-webkit-slider-thumb {
|
|
245
|
+
transform: scale(1.2);
|
|
246
|
+
box-shadow: 0 0 15px rgba(255, 255, 255, 0.3);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* Action Button */
|
|
250
|
+
.btn-primary {
|
|
251
|
+
width: 100%;
|
|
252
|
+
padding: 16px;
|
|
253
|
+
font-size: 16px;
|
|
254
|
+
font-weight: 600;
|
|
255
|
+
border-radius: 18px;
|
|
63
256
|
border: none;
|
|
64
|
-
|
|
257
|
+
background: linear-gradient(135deg, #2c2c2e 0%, #1c1c1e 100%);
|
|
258
|
+
color: var(--text-primary);
|
|
259
|
+
cursor: pointer;
|
|
260
|
+
border: 1px solid var(--glass-border);
|
|
261
|
+
transition: all 0.3s var(--easing);
|
|
262
|
+
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
.btn-primary:hover {
|
|
266
|
+
transform: translateY(-2px);
|
|
267
|
+
background: linear-gradient(135deg, #3a3a3c 0%, #2c2c2e 100%);
|
|
268
|
+
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
.btn-primary:active {
|
|
272
|
+
transform: translateY(1px);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
.btn-active {
|
|
276
|
+
background: var(--text-primary);
|
|
277
|
+
color: var(--bg-color);
|
|
65
278
|
}
|
|
66
279
|
|
|
67
|
-
|
|
68
|
-
|
|
280
|
+
/* Detection Cards */
|
|
281
|
+
.detections-overlay {
|
|
282
|
+
position: fixed;
|
|
283
|
+
top: 40px;
|
|
284
|
+
right: 40px;
|
|
285
|
+
width: 320px;
|
|
286
|
+
z-index: 100;
|
|
287
|
+
pointer-events: none;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
.card {
|
|
291
|
+
background: rgba(44, 44, 46, 0.8);
|
|
292
|
+
backdrop-filter: blur(20px);
|
|
293
|
+
margin-bottom: 16px;
|
|
294
|
+
padding: 20px;
|
|
295
|
+
border-radius: 20px;
|
|
296
|
+
border: 1px solid var(--glass-border);
|
|
297
|
+
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.4);
|
|
298
|
+
animation: card-appear 0.5s var(--easing) forwards;
|
|
299
|
+
position: relative;
|
|
300
|
+
transform-origin: top right;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
@keyframes card-appear {
|
|
304
|
+
from {
|
|
305
|
+
opacity: 0;
|
|
306
|
+
transform: translateY(20px) scale(0.9);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
to {
|
|
310
|
+
opacity: 1;
|
|
311
|
+
transform: translateY(0) scale(1);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
.card.fade-out {
|
|
316
|
+
animation: card-fade-out 0.5s var(--easing) forwards;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
@keyframes card-fade-out {
|
|
320
|
+
to {
|
|
321
|
+
opacity: 0;
|
|
322
|
+
transform: translateY(-20px) scale(0.95);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
.card-title {
|
|
327
|
+
font-weight: 600;
|
|
328
|
+
font-size: 16px;
|
|
329
|
+
margin-bottom: 4px;
|
|
330
|
+
display: block;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
.card-meta {
|
|
334
|
+
font-size: 12px;
|
|
335
|
+
color: var(--text-secondary);
|
|
336
|
+
display: flex;
|
|
337
|
+
gap: 12px;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
.card-accent {
|
|
341
|
+
color: var(--accent-color);
|
|
342
|
+
font-weight: 600;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/* Advanced Panel */
|
|
346
|
+
.advanced-toggle {
|
|
347
|
+
margin-top: 24px;
|
|
348
|
+
font-size: 12px;
|
|
349
|
+
color: var(--text-secondary);
|
|
350
|
+
cursor: pointer;
|
|
351
|
+
text-decoration: none;
|
|
352
|
+
display: block;
|
|
353
|
+
transition: color 0.2s;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
.advanced-toggle:hover {
|
|
357
|
+
color: var(--text-primary);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
.log-container {
|
|
361
|
+
max-height: 0;
|
|
362
|
+
overflow: hidden;
|
|
363
|
+
transition: max-height 0.4s var(--easing);
|
|
364
|
+
text-align: left;
|
|
365
|
+
background: rgba(0, 0, 0, 0.2);
|
|
366
|
+
border-radius: 12px;
|
|
367
|
+
margin-top: 20px;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
.log-container.visible {
|
|
371
|
+
max-height: 150px;
|
|
372
|
+
padding: 12px;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
.log-content {
|
|
376
|
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
|
377
|
+
font-size: 11px;
|
|
378
|
+
color: var(--text-secondary);
|
|
379
|
+
line-height: 1.5;
|
|
380
|
+
white-space: pre-wrap;
|
|
381
|
+
height: 126px;
|
|
382
|
+
overflow-y: auto;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/* Custom Scrollbar */
|
|
386
|
+
.log-content::-webkit-scrollbar {
|
|
387
|
+
width: 4px;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
.log-content::-webkit-scrollbar-track {
|
|
391
|
+
background: transparent;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
.log-content::-webkit-scrollbar-thumb {
|
|
395
|
+
background: rgba(255, 255, 255, 0.1);
|
|
396
|
+
border-radius: 2px;
|
|
69
397
|
}
|
|
70
398
|
</style>
|
|
71
399
|
</head>
|
|
72
400
|
|
|
73
401
|
<body>
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
<
|
|
402
|
+
|
|
403
|
+
<div class="detections-overlay" id="detectionsOverlay"></div>
|
|
404
|
+
|
|
405
|
+
<div class="main-container" id="mainCard">
|
|
406
|
+
<h1>Wake Word Detection</h1>
|
|
407
|
+
<div class="subtitle">Real-time AI Listening Interface</div>
|
|
408
|
+
|
|
409
|
+
<div class="status-area">
|
|
410
|
+
<div class="orb-outer state-idle" id="orbOuter">
|
|
411
|
+
<div class="orb-glow"></div>
|
|
412
|
+
<div class="orb"></div>
|
|
413
|
+
</div>
|
|
414
|
+
<div class="status-text" id="statusText">System Idle</div>
|
|
415
|
+
</div>
|
|
416
|
+
|
|
417
|
+
<div class="controls">
|
|
418
|
+
<div class="slider-container">
|
|
419
|
+
<div class="label-group">
|
|
420
|
+
<span>Precision Threshold</span>
|
|
421
|
+
<span class="slider-value" id="thresholdValue">0.50</span>
|
|
422
|
+
</div>
|
|
423
|
+
<input type="range" id="thresholdSlider" min="0.1" max="0.99" step="0.01" value="0.50">
|
|
424
|
+
</div>
|
|
425
|
+
</div>
|
|
426
|
+
|
|
427
|
+
<button class="btn-primary" id="toggleButton">Start Listening</button>
|
|
428
|
+
|
|
429
|
+
<a class="advanced-toggle" id="advancedToggle">Show Advanced Debug Log</a>
|
|
430
|
+
<div class="log-container" id="logContainer">
|
|
431
|
+
<div class="log-content" id="log">Initializing system components...</div>
|
|
432
|
+
</div>
|
|
79
433
|
</div>
|
|
80
434
|
|
|
81
|
-
<!--
|
|
435
|
+
<!-- Scripts -->
|
|
82
436
|
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
|
|
83
437
|
<script type="module">
|
|
84
438
|
import { Model } from 'https://cdn.jsdelivr.net/npm/openwakeword-js/dist/index.mjs';
|
|
85
439
|
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
440
|
+
const elements = {
|
|
441
|
+
orb: document.getElementById('orbOuter'),
|
|
442
|
+
status: document.getElementById('statusText'),
|
|
443
|
+
button: document.getElementById('toggleButton'),
|
|
444
|
+
slider: document.getElementById('thresholdSlider'),
|
|
445
|
+
val: document.getElementById('thresholdValue'),
|
|
446
|
+
overlay: document.getElementById('detectionsOverlay'),
|
|
447
|
+
log: document.getElementById('log'),
|
|
448
|
+
advanced: document.getElementById('advancedToggle'),
|
|
449
|
+
logContainer: document.getElementById('logContainer')
|
|
450
|
+
};
|
|
451
|
+
|
|
452
|
+
const config = {
|
|
453
|
+
threshold: 0.50,
|
|
454
|
+
isListening: false,
|
|
455
|
+
model: null,
|
|
456
|
+
audioContext: null,
|
|
457
|
+
audioProcessor: null
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
function updateStatus(state, text) {
|
|
461
|
+
elements.orb.className = `orb-outer state-${state}`;
|
|
462
|
+
elements.status.textContent = text;
|
|
463
|
+
log(`Status changed: ${text}`);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
function log(msg) {
|
|
467
|
+
const time = new Date().toLocaleTimeString([], { hour12: false, hour: '2-digit', minute: '2-digit', second: '2-digit' });
|
|
468
|
+
elements.log.innerHTML += `\n[${time}] ${msg}`;
|
|
469
|
+
elements.log.scrollTop = elements.log.scrollHeight;
|
|
470
|
+
}
|
|
89
471
|
|
|
90
|
-
|
|
472
|
+
function showDetection(name, score) {
|
|
473
|
+
const card = document.createElement('div');
|
|
474
|
+
card.className = 'card';
|
|
475
|
+
card.innerHTML = `
|
|
476
|
+
<span class="card-title">${name} Activated</span>
|
|
477
|
+
<div class="card-meta">
|
|
478
|
+
<span class="card-accent">Score: ${score.toFixed(2)}</span>
|
|
479
|
+
<span>Language: English</span>
|
|
480
|
+
</div>
|
|
481
|
+
`;
|
|
482
|
+
elements.overlay.prepend(card);
|
|
91
483
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
484
|
+
// Visual haptic on main card
|
|
485
|
+
document.getElementById('mainCard').style.transform = 'scale(1.02)';
|
|
486
|
+
setTimeout(() => {
|
|
487
|
+
document.getElementById('mainCard').style.transform = 'scale(1)';
|
|
488
|
+
}, 100);
|
|
95
489
|
|
|
490
|
+
setTimeout(() => {
|
|
491
|
+
card.classList.add('fade-out');
|
|
492
|
+
setTimeout(() => card.remove(), 500);
|
|
493
|
+
}, 5000);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Logic Initialization
|
|
497
|
+
async function initializeModel() {
|
|
498
|
+
if (config.model) return true;
|
|
499
|
+
|
|
500
|
+
updateStatus('loading', 'Loading AI Models');
|
|
96
501
|
try {
|
|
97
|
-
model = new Model({
|
|
98
|
-
wakewordModels: [
|
|
502
|
+
config.model = new Model({
|
|
503
|
+
wakewordModels: [
|
|
504
|
+
'./models/hello_deepa.onnx' // Replace with your model
|
|
505
|
+
],
|
|
99
506
|
melspectrogramModelPath: './models/melspectrogram.onnx',
|
|
100
507
|
embeddingModelPath: './models/embedding_model.onnx',
|
|
101
|
-
|
|
508
|
+
vadModelPath: './models/silero_vad.onnx',
|
|
509
|
+
inferenceFramework: 'onnx',
|
|
510
|
+
wasmPaths: './models/' // Use models folder for WASM
|
|
102
511
|
});
|
|
103
512
|
|
|
104
|
-
await model.init();
|
|
105
|
-
|
|
513
|
+
await config.model.init();
|
|
514
|
+
log("All models initialized successfully.");
|
|
515
|
+
return true;
|
|
516
|
+
} catch (err) {
|
|
517
|
+
log(`Init Error: ${err.message}`);
|
|
518
|
+
updateStatus('error', 'Initialization Failed');
|
|
519
|
+
return false;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
async function startEngine() {
|
|
524
|
+
const ready = await initializeModel();
|
|
525
|
+
if (!ready) return;
|
|
106
526
|
|
|
527
|
+
try {
|
|
107
528
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
108
|
-
|
|
109
|
-
const source = audioContext.createMediaStreamSource(stream);
|
|
110
|
-
|
|
529
|
+
config.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
|
|
530
|
+
const source = config.audioContext.createMediaStreamSource(stream);
|
|
531
|
+
|
|
532
|
+
// Using ScriptProcessor for maximum compatibility (AudioWorklet suggested for future)
|
|
533
|
+
config.audioProcessor = config.audioContext.createScriptProcessor(1024, 1, 1);
|
|
111
534
|
|
|
112
|
-
source.connect(
|
|
113
|
-
|
|
535
|
+
source.connect(config.audioProcessor);
|
|
536
|
+
config.audioProcessor.connect(config.audioContext.destination);
|
|
114
537
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const scores = await model.predict(inputData);
|
|
538
|
+
config.audioProcessor.onaudioprocess = async (e) => {
|
|
539
|
+
if (!config.isListening) return;
|
|
118
540
|
|
|
119
|
-
const
|
|
120
|
-
|
|
541
|
+
const input = e.inputBuffer.getChannelData(0);
|
|
542
|
+
const scores = await config.model.predict(input);
|
|
121
543
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
544
|
+
for (const [name, score] of Object.entries(scores)) {
|
|
545
|
+
if (score > config.threshold) {
|
|
546
|
+
showDetection(name.split('/').pop().replace('.onnx', ''), score);
|
|
547
|
+
}
|
|
125
548
|
}
|
|
126
549
|
};
|
|
550
|
+
|
|
551
|
+
updateStatus('listening', 'AI Listening');
|
|
552
|
+
elements.button.textContent = "Stop Listening";
|
|
553
|
+
elements.button.classList.add('btn-active');
|
|
554
|
+
config.isListening = true;
|
|
127
555
|
} catch (err) {
|
|
128
|
-
|
|
129
|
-
|
|
556
|
+
updateStatus('error', 'Microphone Denied');
|
|
557
|
+
log(`Mic Error: ${err.message}`);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function stopEngine() {
|
|
562
|
+
config.isListening = false;
|
|
563
|
+
if (config.audioContext) {
|
|
564
|
+
config.audioContext.close();
|
|
130
565
|
}
|
|
566
|
+
updateStatus('idle', 'System Idle');
|
|
567
|
+
elements.button.textContent = "Start Listening";
|
|
568
|
+
elements.button.classList.remove('btn-active');
|
|
131
569
|
}
|
|
132
570
|
|
|
133
|
-
|
|
571
|
+
// Event Listeners
|
|
572
|
+
elements.button.onclick = () => {
|
|
573
|
+
if (config.isListening) stopEngine();
|
|
574
|
+
else startEngine();
|
|
575
|
+
};
|
|
576
|
+
|
|
577
|
+
elements.slider.oninput = (e) => {
|
|
578
|
+
config.threshold = parseFloat(e.target.value);
|
|
579
|
+
elements.val.textContent = config.threshold.toFixed(2);
|
|
580
|
+
};
|
|
581
|
+
|
|
582
|
+
elements.advanced.onclick = () => {
|
|
583
|
+
const isVisible = elements.logContainer.classList.toggle('visible');
|
|
584
|
+
elements.advanced.textContent = isVisible ? "Hide Debug Log" : "Show Advanced Debug Log";
|
|
585
|
+
};
|
|
586
|
+
|
|
134
587
|
</script>
|
|
135
588
|
</body>
|
|
136
589
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openwakeword-js",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Port of openWakeWord to JavaScript/TypeScript using ONNX Runtime",
|
|
5
|
+
"bin": {
|
|
6
|
+
"openwakeword-js-setup": "scripts/download_models.js"
|
|
7
|
+
},
|
|
5
8
|
"main": "dist/index.js",
|
|
6
9
|
"module": "dist/index.mjs",
|
|
7
10
|
"types": "dist/index.d.ts",
|
|
@@ -18,6 +21,8 @@
|
|
|
18
21
|
"src",
|
|
19
22
|
"scripts",
|
|
20
23
|
"example",
|
|
24
|
+
"models/hello_deepa.onnx",
|
|
25
|
+
"models/namaste_deepa.onnx",
|
|
21
26
|
"README.md",
|
|
22
27
|
"LICENSE"
|
|
23
28
|
],
|