@elizaos/capacitor-swabble 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,461 @@
1
+ import { WebPlugin } from "@capacitor/core";
2
+ function getDesktopBridgeWindow() {
3
+ if (typeof window === "undefined") {
4
+ return null;
5
+ }
6
+ return window;
7
+ }
8
+ function getElectrobunRendererRpc() {
9
+ const w = getDesktopBridgeWindow();
10
+ return w?.__ELIZA_ELECTROBUN_RPC__ ?? null;
11
+ }
12
+ async function invokeDesktopBridgeRequest(options) {
13
+ const rpc = getElectrobunRendererRpc();
14
+ const request = rpc?.request?.[options.rpcMethod];
15
+ if (request) {
16
+ return (await request(options.params));
17
+ }
18
+ return null;
19
+ }
20
+ function subscribeDesktopBridgeEvent(options) {
21
+ const rpc = getElectrobunRendererRpc();
22
+ if (rpc) {
23
+ rpc.onMessage(options.rpcMessage, options.listener);
24
+ return () => {
25
+ rpc.offMessage(options.rpcMessage, options.listener);
26
+ };
27
+ }
28
+ return () => { };
29
+ }
30
+ const getSpeechRecognition = () => window.SpeechRecognition ||
31
+ window.webkitSpeechRecognition ||
32
+ null;
33
+ /**
34
+ * WakeWordGate detects trigger phrases in transcripts.
35
+ *
36
+ * LIMITATION: Web Speech API does not provide word-level timing data.
37
+ * Unlike native implementations, we cannot measure post-trigger gaps.
38
+ * The `postGap` returned is always -1 (unavailable), and minPostTriggerGap is ignored.
39
+ * Detection is purely text-based: trigger phrase + subsequent command text.
40
+ */
41
+ class WakeWordGate {
42
+ constructor(config) {
43
+ this.triggers = config.triggers.map((t) => t.toLowerCase().trim());
44
+ this.minCommandLength = config.minCommandLength ?? 1;
45
+ // Note: minPostTriggerGap cannot be enforced - Web Speech API lacks timing data
46
+ }
47
+ updateConfig(config) {
48
+ if (config.triggers)
49
+ this.triggers = config.triggers.map((t) => t.toLowerCase().trim());
50
+ if (config.minCommandLength !== undefined)
51
+ this.minCommandLength = config.minCommandLength;
52
+ }
53
+ /**
54
+ * Match wake word in transcript using text-only detection.
55
+ * Returns postGap=-1 to indicate timing data is unavailable on web.
56
+ */
57
+ match(transcript) {
58
+ const normalizedTranscript = transcript.toLowerCase();
59
+ for (const trigger of this.triggers) {
60
+ const triggerIndex = normalizedTranscript.indexOf(trigger);
61
+ if (triggerIndex === -1)
62
+ continue;
63
+ // Extract command after the trigger phrase
64
+ const commandStart = triggerIndex + trigger.length;
65
+ const command = transcript.slice(commandStart).trim();
66
+ if (command.length < this.minCommandLength)
67
+ continue;
68
+ // postGap=-1 indicates timing unavailable on web platform
69
+ return { wakeWord: trigger, command, postGap: -1 };
70
+ }
71
+ return null;
72
+ }
73
+ }
74
+ export class SwabbleWeb extends WebPlugin {
75
+ constructor() {
76
+ super(...arguments);
77
+ this.recognition = null;
78
+ this.config = null;
79
+ this.wakeGate = null;
80
+ this.isActive = false;
81
+ this.segments = [];
82
+ this.audioContext = null;
83
+ this.analyser = null;
84
+ this.mediaStream = null;
85
+ this.levelInterval = null;
86
+ // Native IPC state (Electrobun)
87
+ this.captureStream = null;
88
+ this.captureContext = null;
89
+ this.captureProcessor = null;
90
+ this.bridgeSubscriptions = [];
91
+ this.usingNativeIpc = false;
92
+ }
93
+ getRendererRpc() {
94
+ return getElectrobunRendererRpc() ?? null;
95
+ }
96
+ subscribeDesktopEvent(options) {
97
+ this.bridgeSubscriptions.push(subscribeDesktopBridgeEvent(options));
98
+ }
99
+ async invokeDesktopRequest(options) {
100
+ return await invokeDesktopBridgeRequest(options);
101
+ }
102
+ setupNativeListeners() {
103
+ this.removeNativeListeners();
104
+ this.subscribeDesktopEvent({
105
+ rpcMessage: "swabbleWakeWord",
106
+ ipcChannel: "swabble:wakeWord",
107
+ listener: (payload) => {
108
+ this.notifyListeners("wakeWord", payload);
109
+ },
110
+ });
111
+ this.subscribeDesktopEvent({
112
+ rpcMessage: "swabbleStateChanged",
113
+ ipcChannel: "swabble:stateChange",
114
+ listener: (payload) => {
115
+ const listening = typeof payload.listening === "boolean"
116
+ ? payload.listening
117
+ : false;
118
+ this.isActive = listening;
119
+ this.notifyListeners("stateChange", {
120
+ state: listening ? "listening" : "idle",
121
+ });
122
+ },
123
+ });
124
+ this.subscribeDesktopEvent({
125
+ rpcMessage: "swabbleTranscript",
126
+ ipcChannel: "swabble:transcript",
127
+ listener: (payload) => {
128
+ this.notifyListeners("transcript", payload);
129
+ },
130
+ });
131
+ this.subscribeDesktopEvent({
132
+ rpcMessage: "swabbleError",
133
+ ipcChannel: "swabble:error",
134
+ listener: (payload) => {
135
+ this.notifyListeners("error", payload);
136
+ },
137
+ });
138
+ }
139
+ removeNativeListeners() {
140
+ for (const unsubscribe of this.bridgeSubscriptions) {
141
+ unsubscribe();
142
+ }
143
+ this.bridgeSubscriptions = [];
144
+ }
145
+ async startNativeAudioCapture(sampleRate = 16000) {
146
+ const rpcRequest = this.getRendererRpc()?.request?.swabbleAudioChunk;
147
+ const stream = await navigator.mediaDevices
148
+ .getUserMedia({ audio: true })
149
+ .catch(() => null);
150
+ if (!stream)
151
+ return;
152
+ this.captureStream = stream;
153
+ this.captureContext = new AudioContext();
154
+ const source = this.captureContext.createMediaStreamSource(stream);
155
+ const processor = this.captureContext.createScriptProcessor(4096, 1, 1);
156
+ this.captureProcessor = processor;
157
+ const inputRate = this.captureContext.sampleRate;
158
+ processor.onaudioprocess = (e) => {
159
+ const input = e.inputBuffer.getChannelData(0);
160
+ this.notifyListeners("audioLevel", {
161
+ level: this.computeRms(input),
162
+ peak: this.computePeak(input),
163
+ });
164
+ const ratio = inputRate / sampleRate;
165
+ const out = new Float32Array(Math.round(input.length / ratio));
166
+ for (let i = 0; i < out.length; i++) {
167
+ let acc = 0;
168
+ let cnt = 0;
169
+ const start = Math.round(i * ratio);
170
+ const end = Math.round((i + 1) * ratio);
171
+ for (let j = start; j < end && j < input.length; j++) {
172
+ acc += input[j];
173
+ cnt++;
174
+ }
175
+ out[i] = cnt > 0 ? acc / cnt : 0;
176
+ }
177
+ const bytes = new Uint8Array(out.buffer, out.byteOffset, out.byteLength);
178
+ let binary = "";
179
+ for (let i = 0; i < bytes.length; i++) {
180
+ binary += String.fromCharCode(bytes[i]);
181
+ }
182
+ if (rpcRequest) {
183
+ void rpcRequest({ data: btoa(binary) }).catch(() => { });
184
+ }
185
+ };
186
+ source.connect(processor);
187
+ const sink = this.captureContext.createGain();
188
+ sink.gain.value = 0;
189
+ processor.connect(sink);
190
+ sink.connect(this.captureContext.destination);
191
+ }
192
+ computeRms(samples) {
193
+ let sum = 0;
194
+ for (let i = 0; i < samples.length; i++) {
195
+ sum += samples[i] * samples[i];
196
+ }
197
+ return Math.sqrt(sum / samples.length);
198
+ }
199
+ computePeak(samples) {
200
+ let peak = 0;
201
+ for (let i = 0; i < samples.length; i++) {
202
+ const value = Math.abs(samples[i]);
203
+ if (value > peak)
204
+ peak = value;
205
+ }
206
+ return peak;
207
+ }
208
+ stopNativeAudioCapture() {
209
+ this.captureProcessor?.disconnect();
210
+ this.captureProcessor = null;
211
+ this.captureContext?.close();
212
+ this.captureContext = null;
213
+ this.captureStream?.getTracks().forEach((t) => {
214
+ t.stop();
215
+ });
216
+ this.captureStream = null;
217
+ }
218
+ async start(options) {
219
+ if (this.isActive)
220
+ return { started: true };
221
+ // Delegate to the native desktop bridge when available.
222
+ const rpc = this.getRendererRpc();
223
+ if (rpc) {
224
+ try {
225
+ const result = await this.invokeDesktopRequest({
226
+ rpcMethod: "swabbleStart",
227
+ ipcChannel: "swabble:start",
228
+ params: options,
229
+ });
230
+ if (result?.started) {
231
+ this.isActive = true;
232
+ this.usingNativeIpc = true;
233
+ this.config = options.config;
234
+ this.setupNativeListeners();
235
+ await this.startNativeAudioCapture(options.config.sampleRate ?? 16000);
236
+ return result;
237
+ }
238
+ }
239
+ catch {
240
+ // Fall through to Web Speech API
241
+ }
242
+ }
243
+ const SpeechRecognitionAPI = getSpeechRecognition();
244
+ if (!SpeechRecognitionAPI) {
245
+ return {
246
+ started: false,
247
+ error: "Speech recognition not supported in this browser",
248
+ };
249
+ }
250
+ this.config = options.config;
251
+ this.wakeGate = new WakeWordGate(options.config);
252
+ this.segments = [];
253
+ const recognition = new SpeechRecognitionAPI();
254
+ recognition.continuous = true;
255
+ recognition.interimResults = true;
256
+ recognition.lang = options.config.locale || "en-US";
257
+ recognition.onstart = () => {
258
+ this.isActive = true;
259
+ this.notifyListeners("stateChange", { state: "listening" });
260
+ };
261
+ recognition.onend = () => {
262
+ if (this.isActive) {
263
+ this.recognition?.start();
264
+ }
265
+ else {
266
+ this.notifyListeners("stateChange", { state: "idle" });
267
+ }
268
+ };
269
+ recognition.onerror = (event) => {
270
+ const recoverable = event.error === "no-speech" || event.error === "aborted";
271
+ this.notifyListeners("error", {
272
+ code: event.error,
273
+ message: `Speech recognition error: ${event.error}`,
274
+ recoverable,
275
+ });
276
+ if (!recoverable) {
277
+ this.isActive = false;
278
+ this.notifyListeners("stateChange", {
279
+ state: "error",
280
+ reason: event.error,
281
+ });
282
+ }
283
+ };
284
+ recognition.onresult = (event) => this.handleSpeechResult(event);
285
+ this.recognition = recognition;
286
+ await this.startAudioLevelMonitoring();
287
+ recognition.start();
288
+ return { started: true };
289
+ }
290
+ handleSpeechResult(event) {
291
+ let transcript = "";
292
+ let isFinal = false;
293
+ for (let i = 0; i < event.results.length; i++) {
294
+ transcript += event.results[i][0].transcript;
295
+ if (event.results[i].isFinal)
296
+ isFinal = true;
297
+ }
298
+ // Web Speech API does not provide word-level timing.
299
+ // Segments are provided for API compatibility but timing values are approximations.
300
+ const words = transcript.split(/\s+/).filter(Boolean);
301
+ this.segments = words.map((text) => ({
302
+ text,
303
+ start: -1, // Unavailable on web
304
+ duration: -1, // Unavailable on web
305
+ isFinal,
306
+ }));
307
+ const lastResult = event.results[event.results.length - 1];
308
+ const confidence = lastResult?.[0]?.confidence;
309
+ this.notifyListeners("transcript", {
310
+ transcript,
311
+ segments: this.segments,
312
+ isFinal,
313
+ confidence,
314
+ });
315
+ if (isFinal && this.wakeGate) {
316
+ const match = this.wakeGate.match(transcript);
317
+ if (match) {
318
+ this.notifyListeners("wakeWord", { ...match, transcript, confidence });
319
+ }
320
+ }
321
+ }
322
+ async startAudioLevelMonitoring() {
323
+ const stream = await navigator.mediaDevices
324
+ .getUserMedia({ audio: true })
325
+ .catch(() => null);
326
+ if (!stream)
327
+ return;
328
+ this.mediaStream = stream;
329
+ this.audioContext = new AudioContext();
330
+ this.analyser = this.audioContext.createAnalyser();
331
+ this.analyser.fftSize = 256;
332
+ this.audioContext.createMediaStreamSource(stream).connect(this.analyser);
333
+ const dataArray = new Uint8Array(this.analyser.frequencyBinCount);
334
+ this.levelInterval = setInterval(() => {
335
+ if (!this.analyser)
336
+ return;
337
+ this.analyser.getByteFrequencyData(dataArray);
338
+ const sum = dataArray.reduce((a, b) => a + b, 0);
339
+ this.notifyListeners("audioLevel", {
340
+ level: sum / dataArray.length / 255,
341
+ peak: Math.max(...dataArray) / 255,
342
+ });
343
+ }, 100);
344
+ }
345
+ stopAudioLevelMonitoring() {
346
+ if (this.levelInterval)
347
+ clearInterval(this.levelInterval);
348
+ this.levelInterval = null;
349
+ this.audioContext?.close();
350
+ this.audioContext = null;
351
+ this.mediaStream?.getTracks().forEach((t) => {
352
+ t.stop();
353
+ });
354
+ this.mediaStream = null;
355
+ this.analyser = null;
356
+ }
357
+ async stop() {
358
+ this.isActive = false;
359
+ // Clean up native IPC if in native mode
360
+ if (this.usingNativeIpc) {
361
+ this.usingNativeIpc = false;
362
+ this.removeNativeListeners();
363
+ this.stopNativeAudioCapture();
364
+ void this.invokeDesktopRequest({
365
+ rpcMethod: "swabbleStop",
366
+ ipcChannel: "swabble:stop",
367
+ });
368
+ this.notifyListeners("stateChange", { state: "idle" });
369
+ return;
370
+ }
371
+ if (this.recognition) {
372
+ this.recognition.stop();
373
+ this.recognition = null;
374
+ }
375
+ this.stopAudioLevelMonitoring();
376
+ this.notifyListeners("stateChange", { state: "idle" });
377
+ }
378
+ async isListening() {
379
+ return { listening: this.isActive };
380
+ }
381
+ async getConfig() {
382
+ return { config: this.config };
383
+ }
384
+ async updateConfig(options) {
385
+ if (this.config) {
386
+ this.config = { ...this.config, ...options.config };
387
+ this.wakeGate?.updateConfig(options.config);
388
+ if (options.config.locale && this.recognition) {
389
+ this.recognition.lang = options.config.locale;
390
+ }
391
+ }
392
+ // Sync to native IPC if active
393
+ if (this.usingNativeIpc) {
394
+ void this.invokeDesktopRequest({
395
+ rpcMethod: "swabbleUpdateConfig",
396
+ ipcChannel: "swabble:updateConfig",
397
+ params: options.config,
398
+ });
399
+ }
400
+ }
401
+ async checkPermissions() {
402
+ let microphone = "prompt";
403
+ try {
404
+ const result = await navigator.permissions.query({
405
+ name: "microphone",
406
+ });
407
+ microphone = result.state;
408
+ }
409
+ catch {
410
+ /* permissions.query not supported for microphone in some browsers */
411
+ }
412
+ let speechRecognition = getSpeechRecognition() ? "granted" : "not_supported";
413
+ const whisperStatus = await this.invokeDesktopRequest({
414
+ rpcMethod: "swabbleIsWhisperAvailable",
415
+ ipcChannel: "swabble:isWhisperAvailable",
416
+ });
417
+ if (whisperStatus?.available) {
418
+ speechRecognition = "granted";
419
+ }
420
+ return {
421
+ microphone,
422
+ speechRecognition,
423
+ };
424
+ }
425
+ async requestPermissions() {
426
+ try {
427
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
428
+ stream.getTracks().forEach((track) => {
429
+ track.stop();
430
+ });
431
+ return this.checkPermissions();
432
+ }
433
+ catch {
434
+ return {
435
+ microphone: "denied",
436
+ speechRecognition: "denied",
437
+ };
438
+ }
439
+ }
440
+ async getAudioDevices() {
441
+ try {
442
+ const devices = await navigator.mediaDevices.enumerateDevices();
443
+ const audioInputs = devices
444
+ .filter((d) => d.kind === "audioinput")
445
+ .map((d, i) => ({
446
+ id: d.deviceId,
447
+ name: d.label || `Microphone ${i + 1}`,
448
+ isDefault: d.deviceId === "default",
449
+ }));
450
+ return { devices: audioInputs };
451
+ }
452
+ catch {
453
+ return { devices: [] };
454
+ }
455
+ }
456
+ async setAudioDevice(_options) {
457
+ // Web Speech API doesn't support device selection directly.
458
+ // The browser uses its default audio input device.
459
+ throw new Error("setAudioDevice is not supported on web platform - browser uses system default audio input");
460
+ }
461
+ }