@elizaos/capacitor-swabble 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ElizaosCapacitorSwabble.podspec +18 -0
- package/android/build.gradle +50 -0
- package/android/src/main/AndroidManifest.xml +4 -0
- package/android/src/main/java/ai/eliza/plugins/swabble/SwabblePlugin.kt +840 -0
- package/dist/esm/definitions.d.ts +218 -0
- package/dist/esm/definitions.d.ts.map +1 -0
- package/dist/esm/definitions.js +1 -0
- package/dist/esm/index.d.ts +4 -0
- package/dist/esm/index.d.ts.map +1 -0
- package/dist/esm/index.js +6 -0
- package/dist/esm/web.d.ts +54 -0
- package/dist/esm/web.d.ts.map +1 -0
- package/dist/esm/web.js +461 -0
- package/dist/plugin.cjs.js +477 -0
- package/dist/plugin.cjs.js.map +1 -0
- package/dist/plugin.js +480 -0
- package/dist/plugin.js.map +1 -0
- package/electrobun/src/global.d.ts +1 -0
- package/electrobun/src/index.ts +786 -0
- package/electrobun/tsconfig.json +16 -0
- package/ios/Sources/SwabblePlugin/SwabblePlugin.swift +1156 -0
- package/package.json +84 -0
|
@@ -0,0 +1,840 @@
|
|
|
1
|
+
package ai.eliza.plugins.swabble
|
|
2
|
+
|
|
3
|
+
import android.Manifest
|
|
4
|
+
import android.content.Context
|
|
5
|
+
import android.content.Intent
|
|
6
|
+
import android.media.AudioDeviceInfo
|
|
7
|
+
import android.media.AudioFocusRequest
|
|
8
|
+
import android.media.AudioManager
|
|
9
|
+
import android.os.Build
|
|
10
|
+
import android.os.Bundle
|
|
11
|
+
import android.speech.RecognitionListener
|
|
12
|
+
import android.speech.RecognizerIntent
|
|
13
|
+
import android.speech.SpeechRecognizer
|
|
14
|
+
import com.getcapacitor.JSArray
|
|
15
|
+
import com.getcapacitor.JSObject
|
|
16
|
+
import com.getcapacitor.Plugin
|
|
17
|
+
import com.getcapacitor.PluginCall
|
|
18
|
+
import com.getcapacitor.PluginMethod
|
|
19
|
+
import com.getcapacitor.annotation.CapacitorPlugin
|
|
20
|
+
import com.getcapacitor.annotation.Permission
|
|
21
|
+
import com.getcapacitor.annotation.PermissionCallback
|
|
22
|
+
import kotlinx.coroutines.*
|
|
23
|
+
import java.util.Locale
|
|
24
|
+
import kotlin.math.abs
|
|
25
|
+
import kotlin.math.min
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Swabble (Voice Wake) Plugin for Capacitor Android
|
|
29
|
+
*
|
|
30
|
+
* Provides continuous voice wake word detection and speech-to-text using
|
|
31
|
+
* Android SpeechRecognizer with Levenshtein fuzzy matching, state machine,
|
|
32
|
+
* audio focus, and device enumeration.
|
|
33
|
+
*
|
|
34
|
+
* State machine: idle → listening → triggered → capturing → listening
|
|
35
|
+
*/
|
|
36
|
+
@CapacitorPlugin(
|
|
37
|
+
name = "Swabble",
|
|
38
|
+
permissions = [
|
|
39
|
+
Permission(alias = "microphone", strings = [Manifest.permission.RECORD_AUDIO])
|
|
40
|
+
]
|
|
41
|
+
)
|
|
42
|
+
class SwabblePlugin : Plugin() {
|
|
43
|
+
|
|
44
|
+
// ── State ───────────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
private var speechRecognizer: SpeechRecognizer? = null
|
|
47
|
+
private var config: SwabbleConfig? = null
|
|
48
|
+
private var currentState = SwabbleState.IDLE
|
|
49
|
+
private var lastTranscript = ""
|
|
50
|
+
private var lastDispatchedCommand: String? = null
|
|
51
|
+
private var segments = mutableListOf<SpeechSegment>()
|
|
52
|
+
private val scope = CoroutineScope(Dispatchers.Main + SupervisorJob())
|
|
53
|
+
private var restartJob: Job? = null
|
|
54
|
+
private var silenceJob: Job? = null
|
|
55
|
+
private var segmentStartTime = 0L
|
|
56
|
+
private var pendingCall: PluginCall? = null
|
|
57
|
+
private var stopRequested = false
|
|
58
|
+
|
|
59
|
+
// Audio focus
|
|
60
|
+
private var audioManager: AudioManager? = null
|
|
61
|
+
private var audioFocusRequest: AudioFocusRequest? = null
|
|
62
|
+
private var hasAudioFocus = false
|
|
63
|
+
private var selectedDeviceId: String? = null
|
|
64
|
+
|
|
65
|
+
// Silence detection
|
|
66
|
+
private var lastSpeechTime = 0L
|
|
67
|
+
private val silenceThresholdMs = 1500L // ms of silence before ending capture
|
|
68
|
+
|
|
69
|
+
// ── Data classes ────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
enum class SwabbleState(val value: String) {
|
|
72
|
+
IDLE("idle"),
|
|
73
|
+
LISTENING("listening"),
|
|
74
|
+
TRIGGERED("triggered"),
|
|
75
|
+
CAPTURING("capturing"),
|
|
76
|
+
ERROR("error")
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
data class SwabbleConfig(
|
|
80
|
+
var triggers: List<String>,
|
|
81
|
+
var minPostTriggerGap: Double,
|
|
82
|
+
var minCommandLength: Int,
|
|
83
|
+
var locale: String,
|
|
84
|
+
var sampleRate: Int
|
|
85
|
+
) {
|
|
86
|
+
companion object {
|
|
87
|
+
fun fromJSObject(obj: JSObject): SwabbleConfig {
|
|
88
|
+
val triggersArray = obj.optJSONArray("triggers")
|
|
89
|
+
val triggers = if (triggersArray != null) {
|
|
90
|
+
(0 until triggersArray.length()).map { triggersArray.getString(it) }
|
|
91
|
+
} else {
|
|
92
|
+
listOf("eliza")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return SwabbleConfig(
|
|
96
|
+
triggers = triggers,
|
|
97
|
+
minPostTriggerGap = obj.optDouble("minPostTriggerGap", 0.45),
|
|
98
|
+
minCommandLength = obj.optInt("minCommandLength", 1),
|
|
99
|
+
locale = obj.optString("locale", Locale.getDefault().toLanguageTag()),
|
|
100
|
+
sampleRate = obj.optInt("sampleRate", 16000)
|
|
101
|
+
)
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
fun toJSObject(): JSObject {
|
|
106
|
+
val obj = JSObject()
|
|
107
|
+
obj.put("triggers", JSArray(triggers))
|
|
108
|
+
obj.put("minPostTriggerGap", minPostTriggerGap)
|
|
109
|
+
obj.put("minCommandLength", minCommandLength)
|
|
110
|
+
obj.put("locale", locale)
|
|
111
|
+
obj.put("sampleRate", sampleRate)
|
|
112
|
+
return obj
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
data class SpeechSegment(
|
|
117
|
+
val text: String,
|
|
118
|
+
val start: Double,
|
|
119
|
+
val duration: Double
|
|
120
|
+
) {
|
|
121
|
+
val end: Double get() = start + duration
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
data class WakeWordMatch(
|
|
125
|
+
val wakeWord: String,
|
|
126
|
+
val command: String,
|
|
127
|
+
val postGap: Double
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
// ── Plugin methods ──────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
@PluginMethod
|
|
133
|
+
fun start(call: PluginCall) {
|
|
134
|
+
val configObj = call.getObject("config")
|
|
135
|
+
if (configObj == null) {
|
|
136
|
+
call.reject("Missing config parameter")
|
|
137
|
+
return
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
config = SwabbleConfig.fromJSObject(configObj)
|
|
141
|
+
|
|
142
|
+
if (!hasRequiredPermissions()) {
|
|
143
|
+
pendingCall = call
|
|
144
|
+
requestPermissionForAlias("microphone", call, "handlePermissionResult")
|
|
145
|
+
return
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
startRecognition(call)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
@PluginMethod
|
|
152
|
+
fun stop(call: PluginCall) {
|
|
153
|
+
stopRecognitionInternal()
|
|
154
|
+
transitionState(SwabbleState.IDLE)
|
|
155
|
+
call.resolve()
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
@PluginMethod
|
|
159
|
+
fun isListening(call: PluginCall) {
|
|
160
|
+
call.resolve(JSObject().apply {
|
|
161
|
+
put("listening", currentState == SwabbleState.LISTENING ||
|
|
162
|
+
currentState == SwabbleState.TRIGGERED ||
|
|
163
|
+
currentState == SwabbleState.CAPTURING)
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
@PluginMethod
|
|
168
|
+
fun getConfig(call: PluginCall) {
|
|
169
|
+
val result = JSObject()
|
|
170
|
+
config?.let {
|
|
171
|
+
result.put("config", it.toJSObject())
|
|
172
|
+
} ?: result.put("config", JSObject.NULL)
|
|
173
|
+
call.resolve(result)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
@PluginMethod
|
|
177
|
+
fun updateConfig(call: PluginCall) {
|
|
178
|
+
val configObj = call.getObject("config")
|
|
179
|
+
if (configObj == null) {
|
|
180
|
+
call.reject("Missing config parameter")
|
|
181
|
+
return
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
config?.let { current ->
|
|
185
|
+
configObj.optJSONArray("triggers")?.let { arr ->
|
|
186
|
+
current.triggers = (0 until arr.length()).map { arr.getString(it) }
|
|
187
|
+
}
|
|
188
|
+
if (configObj.has("minPostTriggerGap")) {
|
|
189
|
+
current.minPostTriggerGap = configObj.getDouble("minPostTriggerGap")
|
|
190
|
+
}
|
|
191
|
+
if (configObj.has("minCommandLength")) {
|
|
192
|
+
current.minCommandLength = configObj.getInt("minCommandLength")
|
|
193
|
+
}
|
|
194
|
+
if (configObj.has("locale")) {
|
|
195
|
+
current.locale = configObj.getString("locale")!!
|
|
196
|
+
}
|
|
197
|
+
if (configObj.has("sampleRate")) {
|
|
198
|
+
current.sampleRate = configObj.getInt("sampleRate")
|
|
199
|
+
}
|
|
200
|
+
config = current
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
call.resolve()
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
@PluginMethod
|
|
207
|
+
override fun checkPermissions(call: PluginCall) {
|
|
208
|
+
call.resolve(buildPermissionResult())
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
@PluginMethod
|
|
212
|
+
override fun requestPermissions(call: PluginCall) {
|
|
213
|
+
requestPermissionForAlias("microphone", call, "handlePermissionCheckResult")
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
@PluginMethod
|
|
217
|
+
fun getAudioDevices(call: PluginCall) {
|
|
218
|
+
val am = getAudioManager()
|
|
219
|
+
val devices = JSArray()
|
|
220
|
+
|
|
221
|
+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
|
|
222
|
+
val inputDevices = am.getDevices(AudioManager.GET_DEVICES_INPUTS)
|
|
223
|
+
for (device in inputDevices) {
|
|
224
|
+
devices.put(JSObject().apply {
|
|
225
|
+
put("id", device.id.toString())
|
|
226
|
+
put("name", getDeviceTypeName(device.type) +
|
|
227
|
+
if (device.productName.isNotEmpty()) " (${device.productName})" else "")
|
|
228
|
+
put("isDefault", device.id.toString() == (selectedDeviceId ?: inputDevices.firstOrNull()?.id?.toString()))
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Always include a default entry if no devices found
|
|
234
|
+
if (devices.length() == 0) {
|
|
235
|
+
devices.put(JSObject().apply {
|
|
236
|
+
put("id", "default")
|
|
237
|
+
put("name", "Default Microphone")
|
|
238
|
+
put("isDefault", true)
|
|
239
|
+
})
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
call.resolve(JSObject().apply {
|
|
243
|
+
put("devices", devices)
|
|
244
|
+
})
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
@PluginMethod
|
|
248
|
+
fun setAudioDevice(call: PluginCall) {
|
|
249
|
+
val deviceId = call.getString("deviceId")
|
|
250
|
+
if (deviceId == null) {
|
|
251
|
+
call.reject("Missing deviceId")
|
|
252
|
+
return
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
selectedDeviceId = deviceId
|
|
256
|
+
|
|
257
|
+
// If using API 23+ and currently recording, try to route to the device
|
|
258
|
+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
|
|
259
|
+
val am = getAudioManager()
|
|
260
|
+
val inputDevices = am.getDevices(AudioManager.GET_DEVICES_INPUTS)
|
|
261
|
+
val target = inputDevices.find { it.id.toString() == deviceId }
|
|
262
|
+
if (target != null && Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) {
|
|
263
|
+
speechRecognizer?.let {
|
|
264
|
+
// SpeechRecognizer doesn't expose preferred device directly;
|
|
265
|
+
// store the preference for next recognition session
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
call.resolve()
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ── Permission callbacks ────────────────────────────────────────────
|
|
274
|
+
|
|
275
|
+
@PermissionCallback
|
|
276
|
+
private fun handlePermissionResult(call: PluginCall) {
|
|
277
|
+
if (hasRequiredPermissions()) {
|
|
278
|
+
startRecognition(call)
|
|
279
|
+
} else {
|
|
280
|
+
call.resolve(JSObject().apply {
|
|
281
|
+
put("started", false)
|
|
282
|
+
put("error", "Microphone permission denied")
|
|
283
|
+
})
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
@PermissionCallback
|
|
288
|
+
private fun handlePermissionCheckResult(call: PluginCall) {
|
|
289
|
+
call.resolve(buildPermissionResult())
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// ── Recognition lifecycle ───────────────────────────────────────────
|
|
293
|
+
|
|
294
|
+
private fun startRecognition(call: PluginCall) {
|
|
295
|
+
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
|
296
|
+
call.resolve(JSObject().apply {
|
|
297
|
+
put("started", false)
|
|
298
|
+
put("error", "Speech recognition not available on this device")
|
|
299
|
+
})
|
|
300
|
+
return
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
val cfg = config
|
|
304
|
+
if (cfg == null) {
|
|
305
|
+
call.reject("Configuration not set")
|
|
306
|
+
return
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Stop any existing recognition
|
|
310
|
+
stopRecognitionInternal()
|
|
311
|
+
stopRequested = false
|
|
312
|
+
|
|
313
|
+
// Request audio focus
|
|
314
|
+
requestAudioFocus()
|
|
315
|
+
|
|
316
|
+
activity.runOnUiThread {
|
|
317
|
+
try {
|
|
318
|
+
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
|
|
319
|
+
speechRecognizer?.setRecognitionListener(createRecognitionListener())
|
|
320
|
+
|
|
321
|
+
segmentStartTime = System.currentTimeMillis()
|
|
322
|
+
lastSpeechTime = segmentStartTime
|
|
323
|
+
speechRecognizer?.startListening(createRecognitionIntent(cfg))
|
|
324
|
+
|
|
325
|
+
transitionState(SwabbleState.LISTENING)
|
|
326
|
+
|
|
327
|
+
call.resolve(JSObject().apply {
|
|
328
|
+
put("started", true)
|
|
329
|
+
})
|
|
330
|
+
} catch (err: Throwable) {
|
|
331
|
+
transitionState(SwabbleState.ERROR, "Start failed: ${err.message}")
|
|
332
|
+
call.resolve(JSObject().apply {
|
|
333
|
+
put("started", false)
|
|
334
|
+
put("error", err.message ?: "Unknown error")
|
|
335
|
+
})
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
private fun stopRecognitionInternal() {
|
|
341
|
+
stopRequested = true
|
|
342
|
+
restartJob?.cancel()
|
|
343
|
+
restartJob = null
|
|
344
|
+
silenceJob?.cancel()
|
|
345
|
+
silenceJob = null
|
|
346
|
+
lastDispatchedCommand = null
|
|
347
|
+
|
|
348
|
+
activity.runOnUiThread {
|
|
349
|
+
speechRecognizer?.stopListening()
|
|
350
|
+
speechRecognizer?.cancel()
|
|
351
|
+
speechRecognizer?.destroy()
|
|
352
|
+
speechRecognizer = null
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
abandonAudioFocus()
|
|
356
|
+
segments.clear()
|
|
357
|
+
lastTranscript = ""
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
private fun createRecognitionIntent(config: SwabbleConfig): Intent {
|
|
361
|
+
return Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
|
362
|
+
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
|
363
|
+
putExtra(RecognizerIntent.EXTRA_LANGUAGE, config.locale)
|
|
364
|
+
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
|
|
365
|
+
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3)
|
|
366
|
+
putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, context.packageName)
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
private fun createRecognitionListener(): RecognitionListener {
|
|
371
|
+
return object : RecognitionListener {
|
|
372
|
+
override fun onReadyForSpeech(params: Bundle?) {
|
|
373
|
+
if (currentState != SwabbleState.CAPTURING) {
|
|
374
|
+
transitionState(SwabbleState.LISTENING)
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
override fun onBeginningOfSpeech() {
|
|
379
|
+
lastSpeechTime = System.currentTimeMillis()
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
override fun onRmsChanged(rmsdB: Float) {
|
|
383
|
+
// RMS is typically -2 to 10 dB; normalize to 0..1
|
|
384
|
+
val level = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
|
|
385
|
+
notifyListeners("audioLevel", JSObject().apply {
|
|
386
|
+
put("level", level.toDouble())
|
|
387
|
+
put("peak", level.toDouble())
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
// Track speech activity for silence detection
|
|
391
|
+
if (rmsdB > 0f) {
|
|
392
|
+
lastSpeechTime = System.currentTimeMillis()
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
override fun onBufferReceived(buffer: ByteArray?) {
|
|
397
|
+
// Not used
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
override fun onEndOfSpeech() {
|
|
401
|
+
// SpeechRecognizer finished a segment; will restart if still active
|
|
402
|
+
if (currentState == SwabbleState.CAPTURING) {
|
|
403
|
+
startSilenceTimer()
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
override fun onError(error: Int) {
|
|
408
|
+
if (stopRequested) return
|
|
409
|
+
|
|
410
|
+
val errorMessage = getErrorMessage(error)
|
|
411
|
+
val recoverable = error == SpeechRecognizer.ERROR_NO_MATCH ||
|
|
412
|
+
error == SpeechRecognizer.ERROR_SPEECH_TIMEOUT ||
|
|
413
|
+
error == SpeechRecognizer.ERROR_CLIENT
|
|
414
|
+
|
|
415
|
+
notifyListeners("error", JSObject().apply {
|
|
416
|
+
put("code", error.toString())
|
|
417
|
+
put("message", errorMessage)
|
|
418
|
+
put("recoverable", recoverable)
|
|
419
|
+
})
|
|
420
|
+
|
|
421
|
+
if (error == SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) {
|
|
422
|
+
transitionState(SwabbleState.ERROR, "Microphone permission required")
|
|
423
|
+
return
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (recoverable) {
|
|
427
|
+
scheduleRestart(delayMs = 500)
|
|
428
|
+
} else {
|
|
429
|
+
transitionState(SwabbleState.ERROR, errorMessage)
|
|
430
|
+
// Try to recover from non-fatal errors after a longer delay
|
|
431
|
+
scheduleRestart(delayMs = 2000)
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
override fun onResults(results: Bundle?) {
|
|
436
|
+
handleResults(results, isFinal = true)
|
|
437
|
+
|
|
438
|
+
if (!stopRequested) {
|
|
439
|
+
// After final results, restart for continuous listening
|
|
440
|
+
scheduleRestart(delayMs = 350)
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
override fun onPartialResults(partialResults: Bundle?) {
|
|
445
|
+
handleResults(partialResults, isFinal = false)
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
override fun onEvent(eventType: Int, params: Bundle?) {
|
|
449
|
+
// Not used
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ── Result handling ─────────────────────────────────────────────────
|
|
455
|
+
|
|
456
|
+
private fun handleResults(results: Bundle?, isFinal: Boolean) {
|
|
457
|
+
val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
458
|
+
val confidence = results?.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES)
|
|
459
|
+
|
|
460
|
+
if (matches.isNullOrEmpty()) return
|
|
461
|
+
|
|
462
|
+
val transcript = matches[0]
|
|
463
|
+
if (transcript.isBlank()) return
|
|
464
|
+
|
|
465
|
+
// Build estimated segments from words
|
|
466
|
+
val words = transcript.split("\\s+".toRegex()).filter { it.isNotEmpty() }
|
|
467
|
+
val avgWordDuration = 0.3
|
|
468
|
+
segments.clear()
|
|
469
|
+
var time = 0.0
|
|
470
|
+
|
|
471
|
+
for (word in words) {
|
|
472
|
+
segments.add(SpeechSegment(
|
|
473
|
+
text = word,
|
|
474
|
+
start = time,
|
|
475
|
+
duration = avgWordDuration
|
|
476
|
+
))
|
|
477
|
+
time += avgWordDuration + 0.1
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Build JS segments array
|
|
481
|
+
val jsSegments = JSArray()
|
|
482
|
+
for (segment in segments) {
|
|
483
|
+
jsSegments.put(JSObject().apply {
|
|
484
|
+
put("text", segment.text)
|
|
485
|
+
put("start", segment.start)
|
|
486
|
+
put("duration", segment.duration)
|
|
487
|
+
put("isFinal", isFinal)
|
|
488
|
+
})
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Emit transcript event
|
|
492
|
+
notifyListeners("transcript", JSObject().apply {
|
|
493
|
+
put("transcript", transcript)
|
|
494
|
+
put("segments", jsSegments)
|
|
495
|
+
put("isFinal", isFinal)
|
|
496
|
+
put("confidence", confidence?.firstOrNull()?.toDouble() ?: 0.0)
|
|
497
|
+
})
|
|
498
|
+
|
|
499
|
+
// Check for wake word — use all recognition alternatives for robustness
|
|
500
|
+
val cfg = config ?: return
|
|
501
|
+
for (alternative in matches) {
|
|
502
|
+
val match = matchWakeWord(alternative, segments, cfg)
|
|
503
|
+
if (match != null) {
|
|
504
|
+
// Dedup: skip if we already dispatched this exact command
|
|
505
|
+
if (match.command == lastDispatchedCommand) continue
|
|
506
|
+
lastDispatchedCommand = match.command
|
|
507
|
+
|
|
508
|
+
transitionState(SwabbleState.TRIGGERED)
|
|
509
|
+
|
|
510
|
+
notifyListeners("wakeWord", JSObject().apply {
|
|
511
|
+
put("wakeWord", match.wakeWord)
|
|
512
|
+
put("command", match.command)
|
|
513
|
+
put("transcript", alternative)
|
|
514
|
+
put("postGap", match.postGap)
|
|
515
|
+
put("confidence", confidence?.firstOrNull()?.toDouble() ?: 0.0)
|
|
516
|
+
})
|
|
517
|
+
|
|
518
|
+
// Move to capturing state briefly, then back to listening
|
|
519
|
+
scope.launch {
|
|
520
|
+
transitionState(SwabbleState.CAPTURING)
|
|
521
|
+
delay(650)
|
|
522
|
+
if (currentState == SwabbleState.CAPTURING && !stopRequested) {
|
|
523
|
+
transitionState(SwabbleState.LISTENING)
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
break
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
lastTranscript = transcript
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// ── Wake word matching (regex + Levenshtein fuzzy) ──────────────────
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Two-pass wake word matching:
|
|
538
|
+
* 1. Exact regex match (ported from classic VoiceWakeCommandExtractor)
|
|
539
|
+
* 2. Fuzzy match using Levenshtein distance for misheard trigger words
|
|
540
|
+
*/
|
|
541
|
+
private fun matchWakeWord(
|
|
542
|
+
transcript: String,
|
|
543
|
+
segments: List<SpeechSegment>,
|
|
544
|
+
config: SwabbleConfig
|
|
545
|
+
): WakeWordMatch? {
|
|
546
|
+
// Pass 1: exact regex match (from classic VoiceWakeCommandExtractor)
|
|
547
|
+
for (trigger in config.triggers) {
|
|
548
|
+
val command = extractCommandExact(transcript, trigger)
|
|
549
|
+
if (command != null && command.length >= config.minCommandLength) {
|
|
550
|
+
return WakeWordMatch(
|
|
551
|
+
wakeWord = trigger,
|
|
552
|
+
command = command,
|
|
553
|
+
postGap = config.minPostTriggerGap
|
|
554
|
+
)
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// Pass 2: fuzzy match using Levenshtein distance
|
|
559
|
+
val words = transcript.split("\\s+".toRegex()).filter { it.isNotEmpty() }
|
|
560
|
+
for ((wordIndex, _) in words.withIndex()) {
|
|
561
|
+
for (trigger in config.triggers) {
|
|
562
|
+
val triggerWords = trigger.split("\\s+".toRegex()).filter { it.isNotEmpty() }
|
|
563
|
+
val triggerLen = triggerWords.size
|
|
564
|
+
|
|
565
|
+
// Check if enough words remain to form the trigger
|
|
566
|
+
if (wordIndex + triggerLen > words.size) continue
|
|
567
|
+
|
|
568
|
+
val candidate = words.subList(wordIndex, wordIndex + triggerLen).joinToString(" ")
|
|
569
|
+
val distance = levenshteinDistance(candidate.lowercase(), trigger.lowercase())
|
|
570
|
+
val maxLen = maxOf(candidate.length, trigger.length)
|
|
571
|
+
|
|
572
|
+
// Accept if within 30% edit distance (fuzzy threshold)
|
|
573
|
+
if (maxLen > 0 && distance.toDouble() / maxLen <= 0.3) {
|
|
574
|
+
val commandStart = wordIndex + triggerLen
|
|
575
|
+
if (commandStart >= words.size) continue
|
|
576
|
+
|
|
577
|
+
val command = words.subList(commandStart, words.size).joinToString(" ").trim()
|
|
578
|
+
if (command.length < config.minCommandLength) continue
|
|
579
|
+
|
|
580
|
+
// Estimate post-trigger gap from segments
|
|
581
|
+
val gap = if (commandStart < segments.size && wordIndex + triggerLen - 1 < segments.size) {
|
|
582
|
+
val triggerEnd = segments[wordIndex + triggerLen - 1].end
|
|
583
|
+
val commandBegin = segments[commandStart].start
|
|
584
|
+
commandBegin - triggerEnd
|
|
585
|
+
} else {
|
|
586
|
+
config.minPostTriggerGap
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
return WakeWordMatch(
|
|
590
|
+
wakeWord = trigger,
|
|
591
|
+
command = cleanCommand(command),
|
|
592
|
+
postGap = gap
|
|
593
|
+
)
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
return null
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Exact command extraction using regex — ported from classic
|
|
603
|
+
* VoiceWakeCommandExtractor.extractCommand()
|
|
604
|
+
*/
|
|
605
|
+
private fun extractCommandExact(text: String, trigger: String): String? {
|
|
606
|
+
val raw = text.trim()
|
|
607
|
+
if (raw.isEmpty()) return null
|
|
608
|
+
|
|
609
|
+
val normalizedTrigger = trigger.trim().lowercase()
|
|
610
|
+
if (normalizedTrigger.isEmpty()) return null
|
|
611
|
+
|
|
612
|
+
val escaped = Regex.escape(normalizedTrigger)
|
|
613
|
+
val regex = Regex("(?i)(?:^|\\s)($escaped)\\b[\\s\\p{Punct}]*([\\s\\S]+)$")
|
|
614
|
+
val match = regex.find(raw) ?: return null
|
|
615
|
+
val extracted = match.groupValues.getOrNull(2)?.trim() ?: return null
|
|
616
|
+
if (extracted.isEmpty()) return null
|
|
617
|
+
|
|
618
|
+
return cleanCommand(extracted)
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/** Strip leading punctuation/whitespace from a command string. */
|
|
622
|
+
private fun cleanCommand(text: String): String {
|
|
623
|
+
return text.trimStart { it.isWhitespace() || it.isPunctuation() }.trim()
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
private fun Char.isPunctuation(): Boolean {
|
|
627
|
+
return when (Character.getType(this)) {
|
|
628
|
+
Character.CONNECTOR_PUNCTUATION.toInt(),
|
|
629
|
+
Character.DASH_PUNCTUATION.toInt(),
|
|
630
|
+
Character.START_PUNCTUATION.toInt(),
|
|
631
|
+
Character.END_PUNCTUATION.toInt(),
|
|
632
|
+
Character.INITIAL_QUOTE_PUNCTUATION.toInt(),
|
|
633
|
+
Character.FINAL_QUOTE_PUNCTUATION.toInt(),
|
|
634
|
+
Character.OTHER_PUNCTUATION.toInt() -> true
|
|
635
|
+
else -> false
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* Levenshtein edit distance between two strings.
|
|
641
|
+
* Used for fuzzy trigger word matching (handles speech recognition errors).
|
|
642
|
+
*/
|
|
643
|
+
private fun levenshteinDistance(a: String, b: String): Int {
|
|
644
|
+
val m = a.length
|
|
645
|
+
val n = b.length
|
|
646
|
+
if (m == 0) return n
|
|
647
|
+
if (n == 0) return m
|
|
648
|
+
|
|
649
|
+
// Single-row DP to save memory
|
|
650
|
+
var prev = IntArray(n + 1) { it }
|
|
651
|
+
var curr = IntArray(n + 1)
|
|
652
|
+
|
|
653
|
+
for (i in 1..m) {
|
|
654
|
+
curr[0] = i
|
|
655
|
+
for (j in 1..n) {
|
|
656
|
+
val cost = if (a[i - 1] == b[j - 1]) 0 else 1
|
|
657
|
+
curr[j] = minOf(
|
|
658
|
+
prev[j] + 1, // deletion
|
|
659
|
+
curr[j - 1] + 1, // insertion
|
|
660
|
+
prev[j - 1] + cost // substitution
|
|
661
|
+
)
|
|
662
|
+
}
|
|
663
|
+
val tmp = prev
|
|
664
|
+
prev = curr
|
|
665
|
+
curr = tmp
|
|
666
|
+
}
|
|
667
|
+
return prev[n]
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// ── State machine ───────────────────────────────────────────────────
|
|
671
|
+
|
|
672
|
+
private fun transitionState(newState: SwabbleState, reason: String? = null) {
|
|
673
|
+
if (currentState == newState) return
|
|
674
|
+
currentState = newState
|
|
675
|
+
|
|
676
|
+
notifyListeners("stateChange", JSObject().apply {
|
|
677
|
+
put("state", newState.value)
|
|
678
|
+
if (reason != null) {
|
|
679
|
+
put("reason", reason)
|
|
680
|
+
}
|
|
681
|
+
})
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// ── Restart / silence detection ─────────────────────────────────────
|
|
685
|
+
|
|
686
|
+
private fun scheduleRestart(delayMs: Long = 350) {
|
|
687
|
+
if (stopRequested) return
|
|
688
|
+
restartJob?.cancel()
|
|
689
|
+
restartJob = scope.launch {
|
|
690
|
+
delay(delayMs)
|
|
691
|
+
if (!stopRequested) {
|
|
692
|
+
activity.runOnUiThread {
|
|
693
|
+
if (stopRequested) return@runOnUiThread
|
|
694
|
+
try {
|
|
695
|
+
val cfg = config ?: return@runOnUiThread
|
|
696
|
+
segmentStartTime = System.currentTimeMillis()
|
|
697
|
+
lastSpeechTime = segmentStartTime
|
|
698
|
+
lastDispatchedCommand = null
|
|
699
|
+
speechRecognizer?.cancel()
|
|
700
|
+
speechRecognizer?.startListening(createRecognitionIntent(cfg))
|
|
701
|
+
} catch (_: Throwable) {
|
|
702
|
+
// Will be picked up by onError and retried
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/** Start a silence timer during capture state; return to listening if silence exceeds threshold. */
|
|
710
|
+
private fun startSilenceTimer() {
|
|
711
|
+
silenceJob?.cancel()
|
|
712
|
+
silenceJob = scope.launch {
|
|
713
|
+
delay(silenceThresholdMs)
|
|
714
|
+
if (currentState == SwabbleState.CAPTURING && !stopRequested) {
|
|
715
|
+
transitionState(SwabbleState.LISTENING)
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// ── Audio focus ─────────────────────────────────────────────────────
|
|
721
|
+
|
|
722
|
+
private fun getAudioManager(): AudioManager {
|
|
723
|
+
if (audioManager == null) {
|
|
724
|
+
audioManager = context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
|
|
725
|
+
}
|
|
726
|
+
return audioManager!!
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
private fun requestAudioFocus() {
|
|
730
|
+
val am = getAudioManager()
|
|
731
|
+
|
|
732
|
+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
|
733
|
+
val focusRequest = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
|
|
734
|
+
.setOnAudioFocusChangeListener { focusChange ->
|
|
735
|
+
when (focusChange) {
|
|
736
|
+
AudioManager.AUDIOFOCUS_LOSS -> {
|
|
737
|
+
// Another app took focus permanently — stop
|
|
738
|
+
if (!stopRequested) {
|
|
739
|
+
stopRecognitionInternal()
|
|
740
|
+
transitionState(SwabbleState.IDLE, "Audio focus lost")
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
AudioManager.AUDIOFOCUS_LOSS_TRANSIENT -> {
|
|
744
|
+
// Temporary loss (e.g. phone call) — pause
|
|
745
|
+
notifyListeners("error", JSObject().apply {
|
|
746
|
+
put("code", "AUDIO_FOCUS_LOST")
|
|
747
|
+
put("message", "Audio focus temporarily lost")
|
|
748
|
+
put("recoverable", true)
|
|
749
|
+
})
|
|
750
|
+
}
|
|
751
|
+
AudioManager.AUDIOFOCUS_GAIN -> {
|
|
752
|
+
hasAudioFocus = true
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
.build()
|
|
757
|
+
|
|
758
|
+
audioFocusRequest = focusRequest
|
|
759
|
+
val result = am.requestAudioFocus(focusRequest)
|
|
760
|
+
hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
|
|
761
|
+
} else {
|
|
762
|
+
@Suppress("DEPRECATION")
|
|
763
|
+
val result = am.requestAudioFocus(
|
|
764
|
+
{ /* legacy listener */ },
|
|
765
|
+
AudioManager.STREAM_MUSIC,
|
|
766
|
+
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
|
|
767
|
+
)
|
|
768
|
+
hasAudioFocus = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
private fun abandonAudioFocus() {
|
|
773
|
+
val am = getAudioManager()
|
|
774
|
+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
|
775
|
+
audioFocusRequest?.let { am.abandonAudioFocusRequest(it) }
|
|
776
|
+
} else {
|
|
777
|
+
@Suppress("DEPRECATION")
|
|
778
|
+
am.abandonAudioFocus(null)
|
|
779
|
+
}
|
|
780
|
+
hasAudioFocus = false
|
|
781
|
+
audioFocusRequest = null
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// ── Helpers ──────────────────────────────────────────────────────────
|
|
785
|
+
|
|
786
|
+
override fun hasRequiredPermissions(): Boolean {
|
|
787
|
+
return getPermissionState("microphone") == com.getcapacitor.PermissionState.GRANTED
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
private fun buildPermissionResult(): JSObject {
|
|
791
|
+
val micStatus = getPermissionState("microphone")
|
|
792
|
+
val speechAvailable = SpeechRecognizer.isRecognitionAvailable(context)
|
|
793
|
+
|
|
794
|
+
return JSObject().apply {
|
|
795
|
+
put("microphone", when (micStatus) {
|
|
796
|
+
com.getcapacitor.PermissionState.GRANTED -> "granted"
|
|
797
|
+
com.getcapacitor.PermissionState.DENIED -> "denied"
|
|
798
|
+
else -> "prompt"
|
|
799
|
+
})
|
|
800
|
+
put("speechRecognition", if (speechAvailable) "granted" else "not_supported")
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
private fun getErrorMessage(error: Int): String {
|
|
805
|
+
return when (error) {
|
|
806
|
+
SpeechRecognizer.ERROR_AUDIO -> "Audio recording error"
|
|
807
|
+
SpeechRecognizer.ERROR_CLIENT -> "Client error"
|
|
808
|
+
SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Insufficient permissions"
|
|
809
|
+
SpeechRecognizer.ERROR_NETWORK -> "Network error"
|
|
810
|
+
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
|
|
811
|
+
SpeechRecognizer.ERROR_NO_MATCH -> "No speech match"
|
|
812
|
+
SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy"
|
|
813
|
+
SpeechRecognizer.ERROR_SERVER -> "Server error"
|
|
814
|
+
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
|
|
815
|
+
else -> "Unknown error: $error"
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
/** Human-readable name for AudioDeviceInfo types. */
|
|
820
|
+
private fun getDeviceTypeName(type: Int): String {
|
|
821
|
+
return when (type) {
|
|
822
|
+
AudioDeviceInfo.TYPE_BUILTIN_MIC -> "Built-in Microphone"
|
|
823
|
+
AudioDeviceInfo.TYPE_WIRED_HEADSET -> "Wired Headset"
|
|
824
|
+
AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "Bluetooth SCO"
|
|
825
|
+
AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "Bluetooth A2DP"
|
|
826
|
+
AudioDeviceInfo.TYPE_USB_DEVICE -> "USB Device"
|
|
827
|
+
AudioDeviceInfo.TYPE_USB_ACCESSORY -> "USB Accessory"
|
|
828
|
+
AudioDeviceInfo.TYPE_TELEPHONY -> "Telephony"
|
|
829
|
+
else -> "Audio Input"
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// ── Lifecycle ───────────────────────────────────────────────────────
|
|
834
|
+
|
|
835
|
+
override fun handleOnDestroy() {
|
|
836
|
+
super.handleOnDestroy()
|
|
837
|
+
stopRecognitionInternal()
|
|
838
|
+
scope.cancel()
|
|
839
|
+
}
|
|
840
|
+
}
|