@elizaos/capacitor-swabble 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ElizaosCapacitorSwabble.podspec +18 -0
- package/android/build.gradle +50 -0
- package/android/src/main/AndroidManifest.xml +4 -0
- package/android/src/main/java/ai/eliza/plugins/swabble/SwabblePlugin.kt +840 -0
- package/dist/esm/definitions.d.ts +218 -0
- package/dist/esm/definitions.d.ts.map +1 -0
- package/dist/esm/definitions.js +1 -0
- package/dist/esm/index.d.ts +4 -0
- package/dist/esm/index.d.ts.map +1 -0
- package/dist/esm/index.js +6 -0
- package/dist/esm/web.d.ts +54 -0
- package/dist/esm/web.d.ts.map +1 -0
- package/dist/esm/web.js +461 -0
- package/dist/plugin.cjs.js +477 -0
- package/dist/plugin.cjs.js.map +1 -0
- package/dist/plugin.js +480 -0
- package/dist/plugin.js.map +1 -0
- package/electrobun/src/global.d.ts +1 -0
- package/electrobun/src/index.ts +786 -0
- package/electrobun/tsconfig.json +16 -0
- package/ios/Sources/SwabblePlugin/SwabblePlugin.swift +1156 -0
- package/package.json +84 -0
|
@@ -0,0 +1,786 @@
|
|
|
1
|
+
/// <reference path="./global.d.ts" />
|
|
2
|
+
import type { PluginListenerHandle } from "@capacitor/core";
|
|
3
|
+
import {
|
|
4
|
+
getElectrobunRendererRpc,
|
|
5
|
+
invokeDesktopBridgeRequest,
|
|
6
|
+
subscribeDesktopBridgeEvent,
|
|
7
|
+
} from "@elizaos/app-core";
|
|
8
|
+
import type {
|
|
9
|
+
EventCallback,
|
|
10
|
+
ListenerEntry as BaseListenerEntry,
|
|
11
|
+
} from "../../../shared-types.js";
|
|
12
|
+
import type {
|
|
13
|
+
SwabbleAudioLevelEvent,
|
|
14
|
+
SwabbleConfig,
|
|
15
|
+
SwabbleErrorEvent,
|
|
16
|
+
SwabblePermissionStatus,
|
|
17
|
+
SwabblePlugin,
|
|
18
|
+
SwabbleSpeechSegment,
|
|
19
|
+
SwabbleStartOptions,
|
|
20
|
+
SwabbleStartResult,
|
|
21
|
+
SwabbleStateEvent,
|
|
22
|
+
SwabbleTranscriptEvent,
|
|
23
|
+
SwabbleWakeWordEvent,
|
|
24
|
+
} from "../../src/definitions";
|
|
25
|
+
|
|
26
|
+
type SwabbleEvent =
|
|
27
|
+
| SwabbleWakeWordEvent
|
|
28
|
+
| SwabbleTranscriptEvent
|
|
29
|
+
| SwabbleStateEvent
|
|
30
|
+
| SwabbleAudioLevelEvent
|
|
31
|
+
| SwabbleErrorEvent;
|
|
32
|
+
|
|
33
|
+
type ListenerEntry = BaseListenerEntry<string, SwabbleEvent>;
|
|
34
|
+
|
|
35
|
+
const isObjectRecord = (value: unknown): value is Record<string, unknown> =>
|
|
36
|
+
typeof value === "object" && value !== null;
|
|
37
|
+
|
|
38
|
+
const isSwabbleState = (value: unknown): value is SwabbleStateEvent["state"] =>
|
|
39
|
+
value === "idle" ||
|
|
40
|
+
value === "listening" ||
|
|
41
|
+
value === "processing" ||
|
|
42
|
+
value === "error";
|
|
43
|
+
/**
|
|
44
|
+
* WakeWordGate detects trigger phrases in transcripts.
|
|
45
|
+
*
|
|
46
|
+
* NOTE: When using the Web Speech API fallback (no Whisper IPC),
|
|
47
|
+
* word-level timing is unavailable. In that mode, `postGap` is -1
|
|
48
|
+
* and minPostTriggerGap is not enforced.
|
|
49
|
+
*/
|
|
50
|
+
class WakeWordGate {
|
|
51
|
+
private triggers: string[];
|
|
52
|
+
private minCommandLength: number;
|
|
53
|
+
|
|
54
|
+
constructor(config: SwabbleConfig) {
|
|
55
|
+
this.triggers = config.triggers.map((t) => t.toLowerCase().trim());
|
|
56
|
+
this.minCommandLength = config.minCommandLength ?? 1;
|
|
57
|
+
// Note: minPostTriggerGap cannot be enforced - Web Speech API lacks timing data
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
updateConfig(config: Partial<SwabbleConfig>): void {
|
|
61
|
+
if (config.triggers) {
|
|
62
|
+
this.triggers = config.triggers.map((t) => t.toLowerCase().trim());
|
|
63
|
+
}
|
|
64
|
+
if (config.minCommandLength !== undefined) {
|
|
65
|
+
this.minCommandLength = config.minCommandLength;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Match wake word in transcript using text-only detection.
|
|
71
|
+
* Returns postGap=-1 to indicate timing data is unavailable on desktop/web.
|
|
72
|
+
*/
|
|
73
|
+
match(
|
|
74
|
+
transcript: string,
|
|
75
|
+
): { wakeWord: string; command: string; postGap: number } | null {
|
|
76
|
+
const normalizedTranscript = transcript.toLowerCase();
|
|
77
|
+
|
|
78
|
+
for (const trigger of this.triggers) {
|
|
79
|
+
const triggerIndex = normalizedTranscript.indexOf(trigger);
|
|
80
|
+
if (triggerIndex === -1) continue;
|
|
81
|
+
|
|
82
|
+
// Extract command after the trigger phrase
|
|
83
|
+
const commandStart = triggerIndex + trigger.length;
|
|
84
|
+
const command = transcript.slice(commandStart).trim();
|
|
85
|
+
|
|
86
|
+
if (command.length < this.minCommandLength) continue;
|
|
87
|
+
|
|
88
|
+
// postGap=-1 indicates timing unavailable on desktop/web platform
|
|
89
|
+
return { wakeWord: trigger, command, postGap: -1 };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Swabble Plugin for Electrobun
|
|
98
|
+
*
|
|
99
|
+
* Uses Whisper.cpp via the Electrobun bridge when available for full timing parity,
|
|
100
|
+
* with Web Speech API fallback when Whisper bindings are unavailable.
|
|
101
|
+
*/
|
|
102
|
+
export class SwabbleElectrobun implements SwabblePlugin {
|
|
103
|
+
private recognition: SpeechRecognition | null = null;
|
|
104
|
+
private config: SwabbleConfig | null = null;
|
|
105
|
+
private wakeGate: WakeWordGate | null = null;
|
|
106
|
+
private isActive = false;
|
|
107
|
+
private segments: SwabbleSpeechSegment[] = [];
|
|
108
|
+
private audioContext: AudioContext | null = null;
|
|
109
|
+
private analyser: AnalyserNode | null = null;
|
|
110
|
+
private mediaStream: MediaStream | null = null;
|
|
111
|
+
private levelInterval: ReturnType<typeof setInterval> | null = null;
|
|
112
|
+
private listeners: ListenerEntry[] = [];
|
|
113
|
+
private selectedDeviceId: string | null = null;
|
|
114
|
+
private captureStream: MediaStream | null = null;
|
|
115
|
+
private captureContext: AudioContext | null = null;
|
|
116
|
+
private captureProcessor: ScriptProcessorNode | null = null;
|
|
117
|
+
private captureGain: GainNode | null = null;
|
|
118
|
+
private captureSampleRate = 16000;
|
|
119
|
+
private bridgeSubscriptions: Array<() => void> = [];
|
|
120
|
+
|
|
121
|
+
private async invokeBridge<T>(
|
|
122
|
+
rpcMethod: string,
|
|
123
|
+
ipcChannel: string,
|
|
124
|
+
params?: unknown,
|
|
125
|
+
): Promise<T | null> {
|
|
126
|
+
try {
|
|
127
|
+
return await invokeDesktopBridgeRequest<T>({
|
|
128
|
+
rpcMethod,
|
|
129
|
+
ipcChannel,
|
|
130
|
+
params,
|
|
131
|
+
});
|
|
132
|
+
} catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
private async getDesktopPlatform(): Promise<string | null> {
|
|
138
|
+
return this.invokeBridge<string>(
|
|
139
|
+
"permissionsGetPlatform",
|
|
140
|
+
"permissions:getPlatform",
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async start(options: SwabbleStartOptions): Promise<SwabbleStartResult> {
|
|
145
|
+
if (this.isActive) {
|
|
146
|
+
return { started: true };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
this.config = options.config;
|
|
150
|
+
this.wakeGate = new WakeWordGate(options.config);
|
|
151
|
+
this.segments = [];
|
|
152
|
+
this.captureSampleRate = options.config.sampleRate ?? 16000;
|
|
153
|
+
|
|
154
|
+
// Try native Whisper via the desktop bridge first
|
|
155
|
+
const nativeResult = await this.invokeBridge<SwabbleStartResult>(
|
|
156
|
+
"swabbleStart",
|
|
157
|
+
"swabble:start",
|
|
158
|
+
options,
|
|
159
|
+
);
|
|
160
|
+
if (nativeResult?.started) {
|
|
161
|
+
this.isActive = true;
|
|
162
|
+
this.setupNativeListeners();
|
|
163
|
+
await this.startAudioCapture();
|
|
164
|
+
return nativeResult;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (nativeResult) {
|
|
168
|
+
// Fall through to web implementation when the native bridge is present
|
|
169
|
+
// but cannot start whisper.cpp.
|
|
170
|
+
} else if (getElectrobunRendererRpc()) {
|
|
171
|
+
// Native bridge exists but returned no result. Fall through to the web path.
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const SpeechRecognitionAPI =
|
|
175
|
+
(window as Window & { SpeechRecognition?: typeof SpeechRecognition })
|
|
176
|
+
.SpeechRecognition ||
|
|
177
|
+
(
|
|
178
|
+
window as Window & {
|
|
179
|
+
webkitSpeechRecognition?: typeof SpeechRecognition;
|
|
180
|
+
}
|
|
181
|
+
).webkitSpeechRecognition;
|
|
182
|
+
|
|
183
|
+
if (!SpeechRecognitionAPI) {
|
|
184
|
+
return {
|
|
185
|
+
started: false,
|
|
186
|
+
error: "Speech recognition not supported. Whisper.cpp is unavailable.",
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
this.recognition = new SpeechRecognitionAPI();
|
|
191
|
+
this.recognition.continuous = true;
|
|
192
|
+
this.recognition.interimResults = true;
|
|
193
|
+
this.recognition.lang = options.config.locale || "en-US";
|
|
194
|
+
|
|
195
|
+
this.recognition.onstart = () => {
|
|
196
|
+
this.isActive = true;
|
|
197
|
+
this.notifyListeners("stateChange", { state: "listening" });
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
this.recognition.onend = () => {
|
|
201
|
+
if (this.isActive) {
|
|
202
|
+
// Restart for continuous listening
|
|
203
|
+
setTimeout(() => {
|
|
204
|
+
if (this.isActive && this.recognition) {
|
|
205
|
+
this.recognition.start();
|
|
206
|
+
}
|
|
207
|
+
}, 100);
|
|
208
|
+
} else {
|
|
209
|
+
this.notifyListeners("stateChange", { state: "idle" });
|
|
210
|
+
}
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
this.recognition.onerror = (event) => {
|
|
214
|
+
const recoverable =
|
|
215
|
+
event.error === "no-speech" || event.error === "aborted";
|
|
216
|
+
|
|
217
|
+
this.notifyListeners("error", {
|
|
218
|
+
code: event.error,
|
|
219
|
+
message: `Speech recognition error: ${event.error}`,
|
|
220
|
+
recoverable,
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
if (!recoverable) {
|
|
224
|
+
this.isActive = false;
|
|
225
|
+
this.notifyListeners("stateChange", {
|
|
226
|
+
state: "error",
|
|
227
|
+
reason: event.error,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
this.recognition.onresult = (event) => {
|
|
233
|
+
this.handleSpeechResult(event);
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
await this.startAudioLevelMonitoring();
|
|
237
|
+
this.recognition.start();
|
|
238
|
+
|
|
239
|
+
return { started: true };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private handleSpeechResult(event: SpeechRecognitionEvent): void {
|
|
243
|
+
let transcript = "";
|
|
244
|
+
let isFinal = false;
|
|
245
|
+
|
|
246
|
+
for (let i = 0; i < event.results.length; i++) {
|
|
247
|
+
const result = event.results[i];
|
|
248
|
+
transcript += result[0].transcript;
|
|
249
|
+
if (result.isFinal) {
|
|
250
|
+
isFinal = true;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Create segments from words - timing is unavailable on the web/desktop platform
|
|
255
|
+
// start=-1 and duration=-1 indicate timing data is not available
|
|
256
|
+
const words = transcript.split(/\s+/).filter((w) => w.length > 0);
|
|
257
|
+
this.segments = words.map((text) => ({
|
|
258
|
+
text,
|
|
259
|
+
start: -1, // Unavailable on desktop/web
|
|
260
|
+
duration: -1, // Unavailable on desktop/web
|
|
261
|
+
isFinal,
|
|
262
|
+
}));
|
|
263
|
+
|
|
264
|
+
this.notifyListeners("transcript", {
|
|
265
|
+
transcript,
|
|
266
|
+
segments: this.segments,
|
|
267
|
+
isFinal,
|
|
268
|
+
confidence: event.results[event.results.length - 1]?.[0]?.confidence,
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
if (isFinal && this.wakeGate) {
|
|
272
|
+
const match = this.wakeGate.match(transcript);
|
|
273
|
+
if (match) {
|
|
274
|
+
this.notifyListeners("wakeWord", {
|
|
275
|
+
wakeWord: match.wakeWord,
|
|
276
|
+
command: match.command,
|
|
277
|
+
transcript,
|
|
278
|
+
postGap: match.postGap,
|
|
279
|
+
confidence: event.results[event.results.length - 1]?.[0]?.confidence,
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
private setupNativeListeners(): void {
|
|
286
|
+
this.removeNativeListeners();
|
|
287
|
+
|
|
288
|
+
const bridgeHandlers = [
|
|
289
|
+
{
|
|
290
|
+
eventName: "wakeWord" as const,
|
|
291
|
+
rpcMessage: "swabbleWakeWord",
|
|
292
|
+
ipcChannel: "swabble:wakeWord",
|
|
293
|
+
normalize: (data: unknown) => this.normalizeWakeWordEvent(data),
|
|
294
|
+
},
|
|
295
|
+
{
|
|
296
|
+
eventName: "stateChange" as const,
|
|
297
|
+
rpcMessage: "swabbleStateChanged",
|
|
298
|
+
ipcChannel: "swabble:stateChange",
|
|
299
|
+
normalize: (data: unknown) => this.normalizeStateEvent(data),
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
eventName: "transcript" as const,
|
|
303
|
+
rpcMessage: "swabbleTranscript",
|
|
304
|
+
ipcChannel: "swabble:transcript",
|
|
305
|
+
normalize: (data: unknown) => data as SwabbleTranscriptEvent,
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
eventName: "error" as const,
|
|
309
|
+
rpcMessage: "swabbleError",
|
|
310
|
+
ipcChannel: "swabble:error",
|
|
311
|
+
normalize: (data: unknown) => data as SwabbleErrorEvent,
|
|
312
|
+
},
|
|
313
|
+
];
|
|
314
|
+
|
|
315
|
+
for (const entry of bridgeHandlers) {
|
|
316
|
+
const unsubscribe = subscribeDesktopBridgeEvent({
|
|
317
|
+
rpcMessage: entry.rpcMessage,
|
|
318
|
+
ipcChannel: entry.ipcChannel,
|
|
319
|
+
listener: (data) => {
|
|
320
|
+
this.notifyListeners(entry.eventName, entry.normalize(data));
|
|
321
|
+
},
|
|
322
|
+
});
|
|
323
|
+
this.bridgeSubscriptions.push(unsubscribe);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
private removeNativeListeners(): void {
|
|
328
|
+
for (const unsubscribe of this.bridgeSubscriptions) {
|
|
329
|
+
unsubscribe();
|
|
330
|
+
}
|
|
331
|
+
this.bridgeSubscriptions = [];
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
private async startAudioCapture(): Promise<void> {
|
|
335
|
+
if (
|
|
336
|
+
this.captureContext ||
|
|
337
|
+
!getElectrobunRendererRpc()?.request?.swabbleAudioChunk
|
|
338
|
+
) {
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const constraints: MediaStreamConstraints = {
|
|
343
|
+
audio: this.selectedDeviceId
|
|
344
|
+
? { deviceId: { exact: this.selectedDeviceId } }
|
|
345
|
+
: true,
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
this.captureStream = await navigator.mediaDevices.getUserMedia(constraints);
|
|
349
|
+
this.captureContext = new AudioContext();
|
|
350
|
+
|
|
351
|
+
const source = this.captureContext.createMediaStreamSource(
|
|
352
|
+
this.captureStream,
|
|
353
|
+
);
|
|
354
|
+
const processor = this.captureContext.createScriptProcessor(4096, 1, 1);
|
|
355
|
+
const gain = this.captureContext.createGain();
|
|
356
|
+
gain.gain.value = 0;
|
|
357
|
+
|
|
358
|
+
this.captureProcessor = processor;
|
|
359
|
+
this.captureGain = gain;
|
|
360
|
+
|
|
361
|
+
const inputSampleRate = this.captureContext.sampleRate;
|
|
362
|
+
|
|
363
|
+
processor.onaudioprocess = (event: AudioProcessingEvent) => {
|
|
364
|
+
const input = event.inputBuffer.getChannelData(0);
|
|
365
|
+
const downsampled = this.downsampleBuffer(
|
|
366
|
+
input,
|
|
367
|
+
inputSampleRate,
|
|
368
|
+
this.captureSampleRate,
|
|
369
|
+
);
|
|
370
|
+
if (downsampled.length > 0) {
|
|
371
|
+
this.sendAudioChunk(downsampled);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const level = this.computeRms(input);
|
|
375
|
+
const peak = this.computePeak(input);
|
|
376
|
+
this.notifyListeners("audioLevel", { level, peak });
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
source.connect(processor);
|
|
380
|
+
processor.connect(gain);
|
|
381
|
+
gain.connect(this.captureContext.destination);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
private stopAudioCapture(): void {
|
|
385
|
+
if (this.captureProcessor) {
|
|
386
|
+
this.captureProcessor.disconnect();
|
|
387
|
+
this.captureProcessor = null;
|
|
388
|
+
}
|
|
389
|
+
if (this.captureGain) {
|
|
390
|
+
this.captureGain.disconnect();
|
|
391
|
+
this.captureGain = null;
|
|
392
|
+
}
|
|
393
|
+
if (this.captureContext) {
|
|
394
|
+
void this.captureContext.close();
|
|
395
|
+
this.captureContext = null;
|
|
396
|
+
}
|
|
397
|
+
if (this.captureStream) {
|
|
398
|
+
this.captureStream.getTracks().forEach((track) => {
|
|
399
|
+
track.stop();
|
|
400
|
+
});
|
|
401
|
+
this.captureStream = null;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
private downsampleBuffer(
|
|
406
|
+
buffer: Float32Array,
|
|
407
|
+
inputSampleRate: number,
|
|
408
|
+
targetSampleRate: number,
|
|
409
|
+
): Float32Array {
|
|
410
|
+
if (targetSampleRate >= inputSampleRate) {
|
|
411
|
+
return buffer;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const ratio = inputSampleRate / targetSampleRate;
|
|
415
|
+
const newLength = Math.round(buffer.length / ratio);
|
|
416
|
+
const result = new Float32Array(newLength);
|
|
417
|
+
let offsetResult = 0;
|
|
418
|
+
let offsetBuffer = 0;
|
|
419
|
+
|
|
420
|
+
while (offsetResult < result.length) {
|
|
421
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
422
|
+
let acc = 0;
|
|
423
|
+
let count = 0;
|
|
424
|
+
for (
|
|
425
|
+
let i = offsetBuffer;
|
|
426
|
+
i < nextOffsetBuffer && i < buffer.length;
|
|
427
|
+
i++
|
|
428
|
+
) {
|
|
429
|
+
acc += buffer[i];
|
|
430
|
+
count += 1;
|
|
431
|
+
}
|
|
432
|
+
result[offsetResult] = count > 0 ? acc / count : 0;
|
|
433
|
+
offsetResult++;
|
|
434
|
+
offsetBuffer = nextOffsetBuffer;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
return result;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
private computeRms(samples: Float32Array): number {
|
|
441
|
+
let sum = 0;
|
|
442
|
+
for (let i = 0; i < samples.length; i++) {
|
|
443
|
+
sum += samples[i] * samples[i];
|
|
444
|
+
}
|
|
445
|
+
return Math.sqrt(sum / samples.length);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
private computePeak(samples: Float32Array): number {
|
|
449
|
+
let peak = 0;
|
|
450
|
+
for (let i = 0; i < samples.length; i++) {
|
|
451
|
+
const value = Math.abs(samples[i]);
|
|
452
|
+
if (value > peak) peak = value;
|
|
453
|
+
}
|
|
454
|
+
return peak;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
private sendAudioChunk(downsampled: Float32Array): void {
|
|
458
|
+
const rpcRequest = getElectrobunRendererRpc()?.request?.swabbleAudioChunk;
|
|
459
|
+
if (!rpcRequest) {
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const bytes = new Uint8Array(
|
|
464
|
+
downsampled.buffer,
|
|
465
|
+
downsampled.byteOffset,
|
|
466
|
+
downsampled.byteLength,
|
|
467
|
+
);
|
|
468
|
+
let binary = "";
|
|
469
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
470
|
+
binary += String.fromCharCode(bytes[i]);
|
|
471
|
+
}
|
|
472
|
+
void rpcRequest({ data: btoa(binary) }).catch(() => {});
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
private normalizeWakeWordEvent(data: unknown): SwabbleWakeWordEvent {
|
|
476
|
+
if (!isObjectRecord(data)) {
|
|
477
|
+
return {
|
|
478
|
+
wakeWord: "",
|
|
479
|
+
command: "",
|
|
480
|
+
transcript: "",
|
|
481
|
+
postGap: -1,
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
wakeWord:
|
|
487
|
+
typeof data.wakeWord === "string"
|
|
488
|
+
? data.wakeWord
|
|
489
|
+
: typeof data.trigger === "string"
|
|
490
|
+
? data.trigger
|
|
491
|
+
: "",
|
|
492
|
+
command: typeof data.command === "string" ? data.command : "",
|
|
493
|
+
transcript: typeof data.transcript === "string" ? data.transcript : "",
|
|
494
|
+
postGap: typeof data.postGap === "number" ? data.postGap : -1,
|
|
495
|
+
confidence:
|
|
496
|
+
typeof data.confidence === "number" ? data.confidence : undefined,
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
private normalizeStateEvent(data: unknown): SwabbleStateEvent {
|
|
501
|
+
if (!isObjectRecord(data)) {
|
|
502
|
+
return { state: "idle" };
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if (isSwabbleState(data.state)) {
|
|
506
|
+
return {
|
|
507
|
+
state: data.state,
|
|
508
|
+
reason: typeof data.reason === "string" ? data.reason : undefined,
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (typeof data.listening === "boolean") {
|
|
513
|
+
return { state: data.listening ? "listening" : "idle" };
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return { state: "idle" };
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
private async startAudioLevelMonitoring(): Promise<void> {
|
|
520
|
+
try {
|
|
521
|
+
const constraints: MediaStreamConstraints = {
|
|
522
|
+
audio: this.selectedDeviceId
|
|
523
|
+
? { deviceId: { exact: this.selectedDeviceId } }
|
|
524
|
+
: true,
|
|
525
|
+
};
|
|
526
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
|
|
527
|
+
this.audioContext = new AudioContext();
|
|
528
|
+
this.analyser = this.audioContext.createAnalyser();
|
|
529
|
+
this.analyser.fftSize = 256;
|
|
530
|
+
|
|
531
|
+
const source = this.audioContext.createMediaStreamSource(
|
|
532
|
+
this.mediaStream,
|
|
533
|
+
);
|
|
534
|
+
source.connect(this.analyser);
|
|
535
|
+
|
|
536
|
+
const dataArray = new Uint8Array(this.analyser.frequencyBinCount);
|
|
537
|
+
|
|
538
|
+
this.levelInterval = setInterval(() => {
|
|
539
|
+
if (!this.analyser) return;
|
|
540
|
+
|
|
541
|
+
this.analyser.getByteFrequencyData(dataArray);
|
|
542
|
+
|
|
543
|
+
let sum = 0;
|
|
544
|
+
let peak = 0;
|
|
545
|
+
for (const value of dataArray) {
|
|
546
|
+
sum += value;
|
|
547
|
+
peak = Math.max(peak, value);
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
const average = sum / dataArray.length;
|
|
551
|
+
const level = average / 255;
|
|
552
|
+
const peakLevel = peak / 255;
|
|
553
|
+
|
|
554
|
+
this.notifyListeners("audioLevel", { level, peak: peakLevel });
|
|
555
|
+
}, 100);
|
|
556
|
+
} catch (error) {
|
|
557
|
+
console.warn("Failed to start audio level monitoring:", error);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
private stopAudioLevelMonitoring(): void {
|
|
562
|
+
if (this.levelInterval) {
|
|
563
|
+
clearInterval(this.levelInterval);
|
|
564
|
+
this.levelInterval = null;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
if (this.audioContext) {
|
|
568
|
+
this.audioContext.close();
|
|
569
|
+
this.audioContext = null;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
if (this.mediaStream) {
|
|
573
|
+
this.mediaStream.getTracks().forEach((track) => {
|
|
574
|
+
track.stop();
|
|
575
|
+
});
|
|
576
|
+
this.mediaStream = null;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
this.analyser = null;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
async stop(): Promise<void> {
|
|
583
|
+
this.isActive = false;
|
|
584
|
+
this.removeNativeListeners();
|
|
585
|
+
this.stopAudioCapture();
|
|
586
|
+
this.stopAudioLevelMonitoring();
|
|
587
|
+
|
|
588
|
+
await this.invokeBridge("swabbleStop", "swabble:stop");
|
|
589
|
+
|
|
590
|
+
if (this.recognition) {
|
|
591
|
+
this.recognition.stop();
|
|
592
|
+
this.recognition = null;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
this.notifyListeners("stateChange", { state: "idle" });
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
async isListening(): Promise<{ listening: boolean }> {
|
|
599
|
+
const nativeState = await this.invokeBridge<{ listening: boolean }>(
|
|
600
|
+
"swabbleIsListening",
|
|
601
|
+
"swabble:isListening",
|
|
602
|
+
);
|
|
603
|
+
if (nativeState) {
|
|
604
|
+
this.isActive = nativeState.listening;
|
|
605
|
+
return nativeState;
|
|
606
|
+
}
|
|
607
|
+
return { listening: this.isActive };
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
async getConfig(): Promise<{ config: SwabbleConfig | null }> {
|
|
611
|
+
const nativeConfig = await this.invokeBridge<Record<string, unknown>>(
|
|
612
|
+
"swabbleGetConfig",
|
|
613
|
+
"swabble:getConfig",
|
|
614
|
+
);
|
|
615
|
+
if (nativeConfig && isObjectRecord(nativeConfig)) {
|
|
616
|
+
return { config: nativeConfig as SwabbleConfig };
|
|
617
|
+
}
|
|
618
|
+
return { config: this.config };
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
async updateConfig(options: {
|
|
622
|
+
config: Partial<SwabbleConfig>;
|
|
623
|
+
}): Promise<void> {
|
|
624
|
+
if (this.config) {
|
|
625
|
+
this.config = { ...this.config, ...options.config };
|
|
626
|
+
this.wakeGate?.updateConfig(options.config);
|
|
627
|
+
this.captureSampleRate = this.config.sampleRate ?? this.captureSampleRate;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
await this.invokeBridge(
|
|
631
|
+
"swabbleUpdateConfig",
|
|
632
|
+
"swabble:updateConfig",
|
|
633
|
+
options.config,
|
|
634
|
+
);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
async checkPermissions(): Promise<SwabblePermissionStatus> {
|
|
638
|
+
let micStatus: "granted" | "denied" | "prompt" = "prompt";
|
|
639
|
+
|
|
640
|
+
try {
|
|
641
|
+
const result = await navigator.permissions.query({
|
|
642
|
+
name: "microphone" as PermissionName,
|
|
643
|
+
});
|
|
644
|
+
micStatus = result.state as "granted" | "denied" | "prompt";
|
|
645
|
+
} catch {
|
|
646
|
+
// Permissions API may not support microphone query
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
const SpeechRecognitionAPI =
|
|
650
|
+
(window as Window & { SpeechRecognition?: typeof SpeechRecognition })
|
|
651
|
+
.SpeechRecognition ||
|
|
652
|
+
(
|
|
653
|
+
window as Window & {
|
|
654
|
+
webkitSpeechRecognition?: typeof SpeechRecognition;
|
|
655
|
+
}
|
|
656
|
+
).webkitSpeechRecognition;
|
|
657
|
+
|
|
658
|
+
let speechRecognition: SwabblePermissionStatus["speechRecognition"] =
|
|
659
|
+
SpeechRecognitionAPI ? "granted" : "not_supported";
|
|
660
|
+
|
|
661
|
+
const whisperStatus = await this.invokeBridge<{ available: boolean }>(
|
|
662
|
+
"swabbleIsWhisperAvailable",
|
|
663
|
+
"swabble:isWhisperAvailable",
|
|
664
|
+
);
|
|
665
|
+
if (whisperStatus?.available) {
|
|
666
|
+
speechRecognition = "granted";
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
return {
|
|
670
|
+
microphone: micStatus,
|
|
671
|
+
speechRecognition,
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
async requestPermissions(): Promise<SwabblePermissionStatus> {
|
|
676
|
+
if ((await this.getDesktopPlatform()) === "win32") {
|
|
677
|
+
return this.checkPermissions();
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
try {
|
|
681
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
682
|
+
stream.getTracks().forEach((track) => {
|
|
683
|
+
track.stop();
|
|
684
|
+
});
|
|
685
|
+
return this.checkPermissions();
|
|
686
|
+
} catch {
|
|
687
|
+
return {
|
|
688
|
+
microphone: "denied",
|
|
689
|
+
speechRecognition: "not_supported",
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
async getAudioDevices(): Promise<{
|
|
695
|
+
devices: Array<{ id: string; name: string; isDefault: boolean }>;
|
|
696
|
+
}> {
|
|
697
|
+
if ((await this.getDesktopPlatform()) !== "win32") {
|
|
698
|
+
// Ensure we have permission first (required to get device labels)
|
|
699
|
+
await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
700
|
+
}
|
|
701
|
+
const devices = await navigator.mediaDevices.enumerateDevices();
|
|
702
|
+
const audioInputs = devices.filter((d) => d.kind === "audioinput");
|
|
703
|
+
|
|
704
|
+
return {
|
|
705
|
+
devices: audioInputs.map((d, i) => ({
|
|
706
|
+
id: d.deviceId,
|
|
707
|
+
name: d.label || `Microphone ${i + 1}`,
|
|
708
|
+
isDefault: d.deviceId === "default" || i === 0,
|
|
709
|
+
})),
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
async setAudioDevice(_options: { deviceId: string }): Promise<void> {
|
|
714
|
+
this.selectedDeviceId = _options.deviceId;
|
|
715
|
+
|
|
716
|
+
if (getElectrobunRendererRpc() && this.captureContext) {
|
|
717
|
+
this.stopAudioCapture();
|
|
718
|
+
await this.startAudioCapture();
|
|
719
|
+
return;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
throw new Error(
|
|
723
|
+
"setAudioDevice is not supported for Web Speech API. " +
|
|
724
|
+
"Use Whisper.cpp mode for device selection.",
|
|
725
|
+
);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
private notifyListeners<
|
|
729
|
+
T extends
|
|
730
|
+
| SwabbleWakeWordEvent
|
|
731
|
+
| SwabbleTranscriptEvent
|
|
732
|
+
| SwabbleStateEvent
|
|
733
|
+
| SwabbleAudioLevelEvent
|
|
734
|
+
| SwabbleErrorEvent,
|
|
735
|
+
>(eventName: string, data: T): void {
|
|
736
|
+
for (const listener of this.listeners) {
|
|
737
|
+
if (listener.eventName === eventName) {
|
|
738
|
+
(listener.callback as EventCallback<T>)(data);
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
async addListener(
|
|
744
|
+
eventName: "wakeWord",
|
|
745
|
+
listenerFunc: (event: SwabbleWakeWordEvent) => void,
|
|
746
|
+
): Promise<PluginListenerHandle>;
|
|
747
|
+
async addListener(
|
|
748
|
+
eventName: "transcript",
|
|
749
|
+
listenerFunc: (event: SwabbleTranscriptEvent) => void,
|
|
750
|
+
): Promise<PluginListenerHandle>;
|
|
751
|
+
async addListener(
|
|
752
|
+
eventName: "stateChange",
|
|
753
|
+
listenerFunc: (event: SwabbleStateEvent) => void,
|
|
754
|
+
): Promise<PluginListenerHandle>;
|
|
755
|
+
async addListener(
|
|
756
|
+
eventName: "audioLevel",
|
|
757
|
+
listenerFunc: (event: SwabbleAudioLevelEvent) => void,
|
|
758
|
+
): Promise<PluginListenerHandle>;
|
|
759
|
+
async addListener(
|
|
760
|
+
eventName: "error",
|
|
761
|
+
listenerFunc: (event: SwabbleErrorEvent) => void,
|
|
762
|
+
): Promise<PluginListenerHandle>;
|
|
763
|
+
async addListener(
|
|
764
|
+
eventName: string,
|
|
765
|
+
listenerFunc: EventCallback<unknown>,
|
|
766
|
+
): Promise<PluginListenerHandle> {
|
|
767
|
+
const entry: ListenerEntry = { eventName, callback: listenerFunc };
|
|
768
|
+
this.listeners.push(entry);
|
|
769
|
+
|
|
770
|
+
return {
|
|
771
|
+
remove: async () => {
|
|
772
|
+
const idx = this.listeners.indexOf(entry);
|
|
773
|
+
if (idx >= 0) {
|
|
774
|
+
this.listeners.splice(idx, 1);
|
|
775
|
+
}
|
|
776
|
+
},
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
async removeAllListeners(): Promise<void> {
|
|
781
|
+
this.listeners = [];
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
// Export the plugin instance for Capacitor registration
|
|
786
|
+
export const Swabble = new SwabbleElectrobun();
|