@elizaos/capacitor-swabble 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,840 @@
1
+ package ai.eliza.plugins.swabble
2
+
3
+ import android.Manifest
4
+ import android.content.Context
5
+ import android.content.Intent
6
+ import android.media.AudioDeviceInfo
7
+ import android.media.AudioFocusRequest
8
+ import android.media.AudioManager
9
+ import android.os.Build
10
+ import android.os.Bundle
11
+ import android.speech.RecognitionListener
12
+ import android.speech.RecognizerIntent
13
+ import android.speech.SpeechRecognizer
14
+ import com.getcapacitor.JSArray
15
+ import com.getcapacitor.JSObject
16
+ import com.getcapacitor.Plugin
17
+ import com.getcapacitor.PluginCall
18
+ import com.getcapacitor.PluginMethod
19
+ import com.getcapacitor.annotation.CapacitorPlugin
20
+ import com.getcapacitor.annotation.Permission
21
+ import com.getcapacitor.annotation.PermissionCallback
22
+ import kotlinx.coroutines.*
23
+ import java.util.Locale
24
+ import kotlin.math.abs
25
+ import kotlin.math.min
26
+
27
+ /**
28
+ * Swabble (Voice Wake) Plugin for Capacitor Android
29
+ *
30
+ * Provides continuous voice wake word detection and speech-to-text using
31
+ * Android SpeechRecognizer with Levenshtein fuzzy matching, state machine,
32
+ * audio focus, and device enumeration.
33
+ *
34
+ * State machine: idle → listening → triggered → capturing → listening
35
+ */
36
+ @CapacitorPlugin(
37
+ name = "Swabble",
38
+ permissions = [
39
+ Permission(alias = "microphone", strings = [Manifest.permission.RECORD_AUDIO])
40
+ ]
41
+ )
42
+ class SwabblePlugin : Plugin() {
43
+
44
+ // ── State ───────────────────────────────────────────────────────────
45
+
46
+ private var speechRecognizer: SpeechRecognizer? = null
47
+ private var config: SwabbleConfig? = null
48
+ private var currentState = SwabbleState.IDLE
49
+ private var lastTranscript = ""
50
+ private var lastDispatchedCommand: String? = null
51
+ private var segments = mutableListOf<SpeechSegment>()
52
+ private val scope = CoroutineScope(Dispatchers.Main + SupervisorJob())
53
+ private var restartJob: Job? = null
54
+ private var silenceJob: Job? = null
55
+ private var segmentStartTime = 0L
56
+ private var pendingCall: PluginCall? = null
57
+ private var stopRequested = false
58
+
59
+ // Audio focus
60
+ private var audioManager: AudioManager? = null
61
+ private var audioFocusRequest: AudioFocusRequest? = null
62
+ private var hasAudioFocus = false
63
+ private var selectedDeviceId: String? = null
64
+
65
+ // Silence detection
66
+ private var lastSpeechTime = 0L
67
+ private val silenceThresholdMs = 1500L // ms of silence before ending capture
68
+
69
+ // ── Data classes ────────────────────────────────────────────────────
70
+
71
+ enum class SwabbleState(val value: String) {
72
+ IDLE("idle"),
73
+ LISTENING("listening"),
74
+ TRIGGERED("triggered"),
75
+ CAPTURING("capturing"),
76
+ ERROR("error")
77
+ }
78
+
79
+ data class SwabbleConfig(
80
+ var triggers: List<String>,
81
+ var minPostTriggerGap: Double,
82
+ var minCommandLength: Int,
83
+ var locale: String,
84
+ var sampleRate: Int
85
+ ) {
86
+ companion object {
87
+ fun fromJSObject(obj: JSObject): SwabbleConfig {
88
+ val triggersArray = obj.optJSONArray("triggers")
89
+ val triggers = if (triggersArray != null) {
90
+ (0 until triggersArray.length()).map { triggersArray.getString(it) }
91
+ } else {
92
+ listOf("eliza")
93
+ }
94
+
95
+ return SwabbleConfig(
96
+ triggers = triggers,
97
+ minPostTriggerGap = obj.optDouble("minPostTriggerGap", 0.45),
98
+ minCommandLength = obj.optInt("minCommandLength", 1),
99
+ locale = obj.optString("locale", Locale.getDefault().toLanguageTag()),
100
+ sampleRate = obj.optInt("sampleRate", 16000)
101
+ )
102
+ }
103
+ }
104
+
105
+ fun toJSObject(): JSObject {
106
+ val obj = JSObject()
107
+ obj.put("triggers", JSArray(triggers))
108
+ obj.put("minPostTriggerGap", minPostTriggerGap)
109
+ obj.put("minCommandLength", minCommandLength)
110
+ obj.put("locale", locale)
111
+ obj.put("sampleRate", sampleRate)
112
+ return obj
113
+ }
114
+ }
115
+
116
+ data class SpeechSegment(
117
+ val text: String,
118
+ val start: Double,
119
+ val duration: Double
120
+ ) {
121
+ val end: Double get() = start + duration
122
+ }
123
+
124
+ data class WakeWordMatch(
125
+ val wakeWord: String,
126
+ val command: String,
127
+ val postGap: Double
128
+ )
129
+
130
+ // ── Plugin methods ──────────────────────────────────────────────────
131
+
132
+ @PluginMethod
133
+ fun start(call: PluginCall) {
134
+ val configObj = call.getObject("config")
135
+ if (configObj == null) {
136
+ call.reject("Missing config parameter")
137
+ return
138
+ }
139
+
140
+ config = SwabbleConfig.fromJSObject(configObj)
141
+
142
+ if (!hasRequiredPermissions()) {
143
+ pendingCall = call
144
+ requestPermissionForAlias("microphone", call, "handlePermissionResult")
145
+ return
146
+ }
147
+
148
+ startRecognition(call)
149
+ }
150
+
151
+ @PluginMethod
152
+ fun stop(call: PluginCall) {
153
+ stopRecognitionInternal()
154
+ transitionState(SwabbleState.IDLE)
155
+ call.resolve()
156
+ }
157
+
158
+ @PluginMethod
159
+ fun isListening(call: PluginCall) {
160
+ call.resolve(JSObject().apply {
161
+ put("listening", currentState == SwabbleState.LISTENING ||
162
+ currentState == SwabbleState.TRIGGERED ||
163
+ currentState == SwabbleState.CAPTURING)
164
+ })
165
+ }
166
+
167
+ @PluginMethod
168
+ fun getConfig(call: PluginCall) {
169
+ val result = JSObject()
170
+ config?.let {
171
+ result.put("config", it.toJSObject())
172
+ } ?: result.put("config", JSObject.NULL)
173
+ call.resolve(result)
174
+ }
175
+
176
+ @PluginMethod
177
+ fun updateConfig(call: PluginCall) {
178
+ val configObj = call.getObject("config")
179
+ if (configObj == null) {
180
+ call.reject("Missing config parameter")
181
+ return
182
+ }
183
+
184
+ config?.let { current ->
185
+ configObj.optJSONArray("triggers")?.let { arr ->
186
+ current.triggers = (0 until arr.length()).map { arr.getString(it) }
187
+ }
188
+ if (configObj.has("minPostTriggerGap")) {
189
+ current.minPostTriggerGap = configObj.getDouble("minPostTriggerGap")
190
+ }
191
+ if (configObj.has("minCommandLength")) {
192
+ current.minCommandLength = configObj.getInt("minCommandLength")
193
+ }
194
+ if (configObj.has("locale")) {
195
+ current.locale = configObj.getString("locale")!!
196
+ }
197
+ if (configObj.has("sampleRate")) {
198
+ current.sampleRate = configObj.getInt("sampleRate")
199
+ }
200
+ config = current
201
+ }
202
+
203
+ call.resolve()
204
+ }
205
+
206
+ @PluginMethod
207
+ override fun checkPermissions(call: PluginCall) {
208
+ call.resolve(buildPermissionResult())
209
+ }
210
+
211
+ @PluginMethod
212
+ override fun requestPermissions(call: PluginCall) {
213
+ requestPermissionForAlias("microphone", call, "handlePermissionCheckResult")
214
+ }
215
+
216
+ @PluginMethod
217
+ fun getAudioDevices(call: PluginCall) {
218
+ val am = getAudioManager()
219
+ val devices = JSArray()
220
+
221
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
222
+ val inputDevices = am.getDevices(AudioManager.GET_DEVICES_INPUTS)
223
+ for (device in inputDevices) {
224
+ devices.put(JSObject().apply {
225
+ put("id", device.id.toString())
226
+ put("name", getDeviceTypeName(device.type) +
227
+ if (device.productName.isNotEmpty()) " (${device.productName})" else "")
228
+ put("isDefault", device.id.toString() == (selectedDeviceId ?: inputDevices.firstOrNull()?.id?.toString()))
229
+ })
230
+ }
231
+ }
232
+
233
+ // Always include a default entry if no devices found
234
+ if (devices.length() == 0) {
235
+ devices.put(JSObject().apply {
236
+ put("id", "default")
237
+ put("name", "Default Microphone")
238
+ put("isDefault", true)
239
+ })
240
+ }
241
+
242
+ call.resolve(JSObject().apply {
243
+ put("devices", devices)
244
+ })
245
+ }
246
+
247
+ @PluginMethod
248
+ fun setAudioDevice(call: PluginCall) {
249
+ val deviceId = call.getString("deviceId")
250
+ if (deviceId == null) {
251
+ call.reject("Missing deviceId")
252
+ return
253
+ }
254
+
255
+ selectedDeviceId = deviceId
256
+
257
+ // If using API 23+ and currently recording, try to route to the device
258
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
259
+ val am = getAudioManager()
260
+ val inputDevices = am.getDevices(AudioManager.GET_DEVICES_INPUTS)
261
+ val target = inputDevices.find { it.id.toString() == deviceId }
262
+ if (target != null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) {
263
+ speechRecognizer?.let {
264
+ // SpeechRecognizer doesn't expose preferred device directly;
265
+ // store the preference for next recognition session
266
+ }
267
+ }
268
+ }
269
+
270
+ call.resolve()
271
+ }
272
+
273
+ // ── Permission callbacks ────────────────────────────────────────────
274
+
275
+ @PermissionCallback
276
+ private fun handlePermissionResult(call: PluginCall) {
277
+ if (hasRequiredPermissions()) {
278
+ startRecognition(call)
279
+ } else {
280
+ call.resolve(JSObject().apply {
281
+ put("started", false)
282
+ put("error", "Microphone permission denied")
283
+ })
284
+ }
285
+ }
286
+
287
+ @PermissionCallback
288
+ private fun handlePermissionCheckResult(call: PluginCall) {
289
+ call.resolve(buildPermissionResult())
290
+ }
291
+
292
+ // ── Recognition lifecycle ───────────────────────────────────────────
293
+
294
+ private fun startRecognition(call: PluginCall) {
295
+ if (!SpeechRecognizer.isRecognitionAvailable(context)) {
296
+ call.resolve(JSObject().apply {
297
+ put("started", false)
298
+ put("error", "Speech recognition not available on this device")
299
+ })
300
+ return
301
+ }
302
+
303
+ val cfg = config
304
+ if (cfg == null) {
305
+ call.reject("Configuration not set")
306
+ return
307
+ }
308
+
309
+ // Stop any existing recognition
310
+ stopRecognitionInternal()
311
+ stopRequested = false
312
+
313
+ // Request audio focus
314
+ requestAudioFocus()
315
+
316
+ activity.runOnUiThread {
317
+ try {
318
+ speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
319
+ speechRecognizer?.setRecognitionListener(createRecognitionListener())
320
+
321
+ segmentStartTime = System.currentTimeMillis()
322
+ lastSpeechTime = segmentStartTime
323
+ speechRecognizer?.startListening(createRecognitionIntent(cfg))
324
+
325
+ transitionState(SwabbleState.LISTENING)
326
+
327
+ call.resolve(JSObject().apply {
328
+ put("started", true)
329
+ })
330
+ } catch (err: Throwable) {
331
+ transitionState(SwabbleState.ERROR, "Start failed: ${err.message}")
332
+ call.resolve(JSObject().apply {
333
+ put("started", false)
334
+ put("error", err.message ?: "Unknown error")
335
+ })
336
+ }
337
+ }
338
+ }
339
+
340
+ private fun stopRecognitionInternal() {
341
+ stopRequested = true
342
+ restartJob?.cancel()
343
+ restartJob = null
344
+ silenceJob?.cancel()
345
+ silenceJob = null
346
+ lastDispatchedCommand = null
347
+
348
+ activity.runOnUiThread {
349
+ speechRecognizer?.stopListening()
350
+ speechRecognizer?.cancel()
351
+ speechRecognizer?.destroy()
352
+ speechRecognizer = null
353
+ }
354
+
355
+ abandonAudioFocus()
356
+ segments.clear()
357
+ lastTranscript = ""
358
+ }
359
+
360
+ private fun createRecognitionIntent(config: SwabbleConfig): Intent {
361
+ return Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
362
+ putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
363
+ putExtra(RecognizerIntent.EXTRA_LANGUAGE, config.locale)
364
+ putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
365
+ putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3)
366
+ putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, context.packageName)
367
+ }
368
+ }
369
+
370
+ private fun createRecognitionListener(): RecognitionListener {
371
+ return object : RecognitionListener {
372
+ override fun onReadyForSpeech(params: Bundle?) {
373
+ if (currentState != SwabbleState.CAPTURING) {
374
+ transitionState(SwabbleState.LISTENING)
375
+ }
376
+ }
377
+
378
+ override fun onBeginningOfSpeech() {
379
+ lastSpeechTime = System.currentTimeMillis()
380
+ }
381
+
382
+ override fun onRmsChanged(rmsdB: Float) {
383
+ // RMS is typically -2 to 10 dB; normalize to 0..1
384
+ val level = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
385
+ notifyListeners("audioLevel", JSObject().apply {
386
+ put("level", level.toDouble())
387
+ put("peak", level.toDouble())
388
+ })
389
+
390
+ // Track speech activity for silence detection
391
+ if (rmsdB > 0f) {
392
+ lastSpeechTime = System.currentTimeMillis()
393
+ }
394
+ }
395
+
396
+ override fun onBufferReceived(buffer: ByteArray?) {
397
+ // Not used
398
+ }
399
+
400
+ override fun onEndOfSpeech() {
401
+ // SpeechRecognizer finished a segment; will restart if still active
402
+ if (currentState == SwabbleState.CAPTURING) {
403
+ startSilenceTimer()
404
+ }
405
+ }
406
+
407
+ override fun onError(error: Int) {
408
+ if (stopRequested) return
409
+
410
+ val errorMessage = getErrorMessage(error)
411
+ val recoverable = error == SpeechRecognizer.ERROR_NO_MATCH ||
412
+ error == SpeechRecognizer.ERROR_SPEECH_TIMEOUT ||
413
+ error == SpeechRecognizer.ERROR_CLIENT
414
+
415
+ notifyListeners("error", JSObject().apply {
416
+ put("code", error.toString())
417
+ put("message", errorMessage)
418
+ put("recoverable", recoverable)
419
+ })
420
+
421
+ if (error == SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) {
422
+ transitionState(SwabbleState.ERROR, "Microphone permission required")
423
+ return
424
+ }
425
+
426
+ if (recoverable) {
427
+ scheduleRestart(delayMs = 500)
428
+ } else {
429
+ transitionState(SwabbleState.ERROR, errorMessage)
430
+ // Try to recover from non-fatal errors after a longer delay
431
+ scheduleRestart(delayMs = 2000)
432
+ }
433
+ }
434
+
435
+ override fun onResults(results: Bundle?) {
436
+ handleResults(results, isFinal = true)
437
+
438
+ if (!stopRequested) {
439
+ // After final results, restart for continuous listening
440
+ scheduleRestart(delayMs = 350)
441
+ }
442
+ }
443
+
444
+ override fun onPartialResults(partialResults: Bundle?) {
445
+ handleResults(partialResults, isFinal = false)
446
+ }
447
+
448
+ override fun onEvent(eventType: Int, params: Bundle?) {
449
+ // Not used
450
+ }
451
+ }
452
+ }
453
+
454
+ // ── Result handling ─────────────────────────────────────────────────
455
+
456
+ private fun handleResults(results: Bundle?, isFinal: Boolean) {
457
+ val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
458
+ val confidence = results?.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES)
459
+
460
+ if (matches.isNullOrEmpty()) return
461
+
462
+ val transcript = matches[0]
463
+ if (transcript.isBlank()) return
464
+
465
+ // Build estimated segments from words
466
+ val words = transcript.split("\\s+".toRegex()).filter { it.isNotEmpty() }
467
+ val avgWordDuration = 0.3
468
+ segments.clear()
469
+ var time = 0.0
470
+
471
+ for (word in words) {
472
+ segments.add(SpeechSegment(
473
+ text = word,
474
+ start = time,
475
+ duration = avgWordDuration
476
+ ))
477
+ time += avgWordDuration + 0.1
478
+ }
479
+
480
+ // Build JS segments array
481
+ val jsSegments = JSArray()
482
+ for (segment in segments) {
483
+ jsSegments.put(JSObject().apply {
484
+ put("text", segment.text)
485
+ put("start", segment.start)
486
+ put("duration", segment.duration)
487
+ put("isFinal", isFinal)
488
+ })
489
+ }
490
+
491
+ // Emit transcript event
492
+ notifyListeners("transcript", JSObject().apply {
493
+ put("transcript", transcript)
494
+ put("segments", jsSegments)
495
+ put("isFinal", isFinal)
496
+ put("confidence", confidence?.firstOrNull()?.toDouble() ?: 0.0)
497
+ })
498
+
499
+ // Check for wake word — use all recognition alternatives for robustness
500
+ val cfg = config ?: return
501
+ for (alternative in matches) {
502
+ val match = matchWakeWord(alternative, segments, cfg)
503
+ if (match != null) {
504
+ // Dedup: skip if we already dispatched this exact command
505
+ if (match.command == lastDispatchedCommand) continue
506
+ lastDispatchedCommand = match.command
507
+
508
+ transitionState(SwabbleState.TRIGGERED)
509
+
510
+ notifyListeners("wakeWord", JSObject().apply {
511
+ put("wakeWord", match.wakeWord)
512
+ put("command", match.command)
513
+ put("transcript", alternative)
514
+ put("postGap", match.postGap)
515
+ put("confidence", confidence?.firstOrNull()?.toDouble() ?: 0.0)
516
+ })
517
+
518
+ // Move to capturing state briefly, then back to listening
519
+ scope.launch {
520
+ transitionState(SwabbleState.CAPTURING)
521
+ delay(650)
522
+ if (currentState == SwabbleState.CAPTURING && !stopRequested) {
523
+ transitionState(SwabbleState.LISTENING)
524
+ }
525
+ }
526
+
527
+ break
528
+ }
529
+ }
530
+
531
+ lastTranscript = transcript
532
+ }
533
+
534
+ // ── Wake word matching (regex + Levenshtein fuzzy) ──────────────────
535
+
536
+ /**
537
+ * Two-pass wake word matching:
538
+ * 1. Exact regex match (ported from classic VoiceWakeCommandExtractor)
539
+ * 2. Fuzzy match using Levenshtein distance for misheard trigger words
540
+ */
541
+ private fun matchWakeWord(
542
+ transcript: String,
543
+ segments: List<SpeechSegment>,
544
+ config: SwabbleConfig
545
+ ): WakeWordMatch? {
546
+ // Pass 1: exact regex match (from classic VoiceWakeCommandExtractor)
547
+ for (trigger in config.triggers) {
548
+ val command = extractCommandExact(transcript, trigger)
549
+ if (command != null && command.length >= config.minCommandLength) {
550
+ return WakeWordMatch(
551
+ wakeWord = trigger,
552
+ command = command,
553
+ postGap = config.minPostTriggerGap
554
+ )
555
+ }
556
+ }
557
+
558
+ // Pass 2: fuzzy match using Levenshtein distance
559
+ val words = transcript.split("\\s+".toRegex()).filter { it.isNotEmpty() }
560
+ for ((wordIndex, _) in words.withIndex()) {
561
+ for (trigger in config.triggers) {
562
+ val triggerWords = trigger.split("\\s+".toRegex()).filter { it.isNotEmpty() }
563
+ val triggerLen = triggerWords.size
564
+
565
+ // Check if enough words remain to form the trigger
566
+ if (wordIndex + triggerLen > words.size) continue
567
+
568
+ val candidate = words.subList(wordIndex, wordIndex + triggerLen).joinToString(" ")
569
+ val distance = levenshteinDistance(candidate.lowercase(), trigger.lowercase())
570
+ val maxLen = maxOf(candidate.length, trigger.length)
571
+
572
+ // Accept if within 30% edit distance (fuzzy threshold)
573
+ if (maxLen > 0 && distance.toDouble() / maxLen <= 0.3) {
574
+ val commandStart = wordIndex + triggerLen
575
+ if (commandStart >= words.size) continue
576
+
577
+ val command = words.subList(commandStart, words.size).joinToString(" ").trim()
578
+ if (command.length < config.minCommandLength) continue
579
+
580
+ // Estimate post-trigger gap from segments
581
+ val gap = if (commandStart < segments.size && wordIndex + triggerLen - 1 < segments.size) {
582
+ val triggerEnd = segments[wordIndex + triggerLen - 1].end
583
+ val commandBegin = segments[commandStart].start
584
+ commandBegin - triggerEnd
585
+ } else {
586
+ config.minPostTriggerGap
587
+ }
588
+
589
+ return WakeWordMatch(
590
+ wakeWord = trigger,
591
+ command = cleanCommand(command),
592
+ postGap = gap
593
+ )
594
+ }
595
+ }
596
+ }
597
+
598
+ return null
599
+ }
600
+
601
+ /**
602
+ * Exact command extraction using regex — ported from classic
603
+ * VoiceWakeCommandExtractor.extractCommand()
604
+ */
605
+ private fun extractCommandExact(text: String, trigger: String): String? {
606
+ val raw = text.trim()
607
+ if (raw.isEmpty()) return null
608
+
609
+ val normalizedTrigger = trigger.trim().lowercase()
610
+ if (normalizedTrigger.isEmpty()) return null
611
+
612
+ val escaped = Regex.escape(normalizedTrigger)
613
+ val regex = Regex("(?i)(?:^|\\s)($escaped)\\b[\\s\\p{Punct}]*([\\s\\S]+)$")
614
+ val match = regex.find(raw) ?: return null
615
+ val extracted = match.groupValues.getOrNull(2)?.trim() ?: return null
616
+ if (extracted.isEmpty()) return null
617
+
618
+ return cleanCommand(extracted)
619
+ }
620
+
621
+ /** Strip leading punctuation/whitespace from a command string. */
622
+ private fun cleanCommand(text: String): String {
623
+ return text.trimStart { it.isWhitespace() || it.isPunctuation() }.trim()
624
+ }
625
+
626
+ private fun Char.isPunctuation(): Boolean {
627
+ return when (Character.getType(this)) {
628
+ Character.CONNECTOR_PUNCTUATION.toInt(),
629
+ Character.DASH_PUNCTUATION.toInt(),
630
+ Character.START_PUNCTUATION.toInt(),
631
+ Character.END_PUNCTUATION.toInt(),
632
+ Character.INITIAL_QUOTE_PUNCTUATION.toInt(),
633
+ Character.FINAL_QUOTE_PUNCTUATION.toInt(),
634
+ Character.OTHER_PUNCTUATION.toInt() -> true
635
+ else -> false
636
+ }
637
+ }
638
+
639
+ /**
640
+ * Levenshtein edit distance between two strings.
641
+ * Used for fuzzy trigger word matching (handles speech recognition errors).
642
+ */
643
+ private fun levenshteinDistance(a: String, b: String): Int {
644
+ val m = a.length
645
+ val n = b.length
646
+ if (m == 0) return n
647
+ if (n == 0) return m
648
+
649
+ // Single-row DP to save memory
650
+ var prev = IntArray(n + 1) { it }
651
+ var curr = IntArray(n + 1)
652
+
653
+ for (i in 1..m) {
654
+ curr[0] = i
655
+ for (j in 1..n) {
656
+ val cost = if (a[i - 1] == b[j - 1]) 0 else 1
657
+ curr[j] = minOf(
658
+ prev[j] + 1, // deletion
659
+ curr[j - 1] + 1, // insertion
660
+ prev[j - 1] + cost // substitution
661
+ )
662
+ }
663
+ val tmp = prev
664
+ prev = curr
665
+ curr = tmp
666
+ }
667
+ return prev[n]
668
+ }
669
+
670
+ // ── State machine ───────────────────────────────────────────────────
671
+
672
+ private fun transitionState(newState: SwabbleState, reason: String? = null) {
673
+ if (currentState == newState) return
674
+ currentState = newState
675
+
676
+ notifyListeners("stateChange", JSObject().apply {
677
+ put("state", newState.value)
678
+ if (reason != null) {
679
+ put("reason", reason)
680
+ }
681
+ })
682
+ }
683
+
684
+ // ── Restart / silence detection ─────────────────────────────────────
685
+
686
+ private fun scheduleRestart(delayMs: Long = 350) {
687
+ if (stopRequested) return
688
+ restartJob?.cancel()
689
+ restartJob = scope.launch {
690
+ delay(delayMs)
691
+ if (!stopRequested) {
692
+ activity.runOnUiThread {
693
+ if (stopRequested) return@runOnUiThread
694
+ try {
695
+ val cfg = config ?: return@runOnUiThread
696
+ segmentStartTime = System.currentTimeMillis()
697
+ lastSpeechTime = segmentStartTime
698
+ lastDispatchedCommand = null
699
+ speechRecognizer?.cancel()
700
+ speechRecognizer?.startListening(createRecognitionIntent(cfg))
701
+ } catch (_: Throwable) {
702
+ // Will be picked up by onError and retried
703
+ }
704
+ }
705
+ }
706
+ }
707
+ }
708
+
709
+ /** Start a silence timer during capture state; return to listening if silence exceeds threshold. */
710
+ private fun startSilenceTimer() {
711
+ silenceJob?.cancel()
712
+ silenceJob = scope.launch {
713
+ delay(silenceThresholdMs)
714
+ if (currentState == SwabbleState.CAPTURING && !stopRequested) {
715
+ transitionState(SwabbleState.LISTENING)
716
+ }
717
+ }
718
+ }
719
+
720
+ // ── Audio focus ─────────────────────────────────────────────────────
721
+
722
+ private fun getAudioManager(): AudioManager {
723
+ if (audioManager == null) {
724
+ audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
725
+ }
726
+ return audioManager!!
727
+ }
728
+
729
+ private fun requestAudioFocus() {
730
+ val am = getAudioManager()
731
+
732
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
733
+ val focusRequest = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
734
+ .setOnAudioFocusChangeListener { focusChange ->
735
+ when (focusChange) {
736
+ AudioManager.AUDIOFOCUS_LOSS -> {
737
+ // Another app took focus permanently — stop
738
+ if (!stopRequested) {
739
+ stopRecognitionInternal()
740
+ transitionState(SwabbleState.IDLE, "Audio focus lost")
741
+ }
742
+ }
743
+ AudioManager.AUDIOFOCUS_LOSS_TRANSIENT -> {
744
+ // Temporary loss (e.g. phone call) — pause
745
+ notifyListeners("error", JSObject().apply {
746
+ put("code", "AUDIO_FOCUS_LOST")
747
+ put("message", "Audio focus temporarily lost")
748
+ put("recoverable", true)
749
+ })
750
+ }
751
+ AudioManager.AUDIOFOCUS_GAIN -> {
752
+ hasAudioFocus = true
753
+ }
754
+ }
755
+ }
756
+ .build()
757
+
758
+ audioFocusRequest = focusRequest
759
+ val result = am.requestAudioFocus(focusRequest)
760
+ hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
761
+ } else {
762
+ @Suppress("DEPRECATION")
763
+ val result = am.requestAudioFocus(
764
+ { /* legacy listener */ },
765
+ AudioManager.STREAM_MUSIC,
766
+ AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
767
+ )
768
+ hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
769
+ }
770
+ }
771
+
772
+ private fun abandonAudioFocus() {
773
+ val am = getAudioManager()
774
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
775
+ audioFocusRequest?.let { am.abandonAudioFocusRequest(it) }
776
+ } else {
777
+ @Suppress("DEPRECATION")
778
+ am.abandonAudioFocus(null)
779
+ }
780
+ hasAudioFocus = false
781
+ audioFocusRequest = null
782
+ }
783
+
784
+ // ── Helpers ──────────────────────────────────────────────────────────
785
+
786
+ override fun hasRequiredPermissions(): Boolean {
787
+ return getPermissionState("microphone") == com.getcapacitor.PermissionState.GRANTED
788
+ }
789
+
790
+ private fun buildPermissionResult(): JSObject {
791
+ val micStatus = getPermissionState("microphone")
792
+ val speechAvailable = SpeechRecognizer.isRecognitionAvailable(context)
793
+
794
+ return JSObject().apply {
795
+ put("microphone", when (micStatus) {
796
+ com.getcapacitor.PermissionState.GRANTED -> "granted"
797
+ com.getcapacitor.PermissionState.DENIED -> "denied"
798
+ else -> "prompt"
799
+ })
800
+ put("speechRecognition", if (speechAvailable) "granted" else "not_supported")
801
+ }
802
+ }
803
+
804
+ private fun getErrorMessage(error: Int): String {
805
+ return when (error) {
806
+ SpeechRecognizer.ERROR_AUDIO -> "Audio recording error"
807
+ SpeechRecognizer.ERROR_CLIENT -> "Client error"
808
+ SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Insufficient permissions"
809
+ SpeechRecognizer.ERROR_NETWORK -> "Network error"
810
+ SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
811
+ SpeechRecognizer.ERROR_NO_MATCH -> "No speech match"
812
+ SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy"
813
+ SpeechRecognizer.ERROR_SERVER -> "Server error"
814
+ SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
815
+ else -> "Unknown error: $error"
816
+ }
817
+ }
818
+
819
+ /** Human-readable name for AudioDeviceInfo types. */
820
+ private fun getDeviceTypeName(type: Int): String {
821
+ return when (type) {
822
+ AudioDeviceInfo.TYPE_BUILTIN_MIC -> "Built-in Microphone"
823
+ AudioDeviceInfo.TYPE_WIRED_HEADSET -> "Wired Headset"
824
+ AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "Bluetooth SCO"
825
+ AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "Bluetooth A2DP"
826
+ AudioDeviceInfo.TYPE_USB_DEVICE -> "USB Device"
827
+ AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB Accessory"
828
+ AudioDeviceInfo.TYPE_TELEPHONY -> "Telephony"
829
+ else -> "Audio Input"
830
+ }
831
+ }
832
+
833
+ // ── Lifecycle ───────────────────────────────────────────────────────
834
+
835
+ override fun handleOnDestroy() {
836
+ super.handleOnDestroy()
837
+ stopRecognitionInternal()
838
+ scope.cancel()
839
+ }
840
+ }