capacitor-native-speech-recognition 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ package app.capgo.speechrecognition;
2
+
3
+ import android.Manifest;
4
+ import android.app.Activity;
5
+ import android.content.Intent;
6
+ import android.os.Build;
7
+ import android.os.Bundle;
8
+ import android.speech.RecognitionListener;
9
+ import android.speech.RecognizerIntent;
10
+ import android.speech.SpeechRecognizer;
11
+ import androidx.activity.result.ActivityResult;
12
+ import com.getcapacitor.JSArray;
13
+ import com.getcapacitor.JSObject;
14
+ import com.getcapacitor.Logger;
15
+ import com.getcapacitor.PermissionState;
16
+ import com.getcapacitor.Plugin;
17
+ import com.getcapacitor.PluginCall;
18
+ import com.getcapacitor.PluginMethod;
19
+ import com.getcapacitor.annotation.ActivityCallback;
20
+ import com.getcapacitor.annotation.CapacitorPlugin;
21
+ import com.getcapacitor.annotation.Permission;
22
+ import com.getcapacitor.annotation.PermissionCallback;
23
+ import java.util.ArrayList;
24
+ import java.util.List;
25
+ import java.util.Locale;
26
+ import java.util.concurrent.locks.ReentrantLock;
27
+ import org.json.JSONArray;
28
+
29
+ @CapacitorPlugin(
30
+ name = "SpeechRecognition",
31
+ permissions = { @Permission(strings = { Manifest.permission.RECORD_AUDIO }, alias = SpeechRecognitionPlugin.SPEECH_RECOGNITION) }
32
+ )
33
+ public class SpeechRecognitionPlugin extends Plugin implements Constants {
34
+
35
+ public static final String SPEECH_RECOGNITION = "speechRecognition";
36
+ private static final String TAG = "SpeechRecognition";
37
+ private static final String PLUGIN_VERSION = "7.0.0";
38
+
39
+ private Receiver languageReceiver;
40
+ private SpeechRecognizer speechRecognizer;
41
+ private final ReentrantLock lock = new ReentrantLock();
42
+ private boolean listening = false;
43
+ private JSONArray previousPartialResults = new JSONArray();
44
+
45
+ @Override
46
+ public void load() {
47
+ super.load();
48
+ bridge
49
+ .getWebView()
50
+ .post(() -> {
51
+ speechRecognizer = SpeechRecognizer.createSpeechRecognizer(bridge.getActivity());
52
+ SpeechRecognitionListener listener = new SpeechRecognitionListener();
53
+ speechRecognizer.setRecognitionListener(listener);
54
+ Logger.info(getLogTag(), "Instantiated SpeechRecognizer in load()");
55
+ });
56
+ }
57
+
58
+ @PluginMethod
59
+ public void available(PluginCall call) {
60
+ boolean val = SpeechRecognizer.isRecognitionAvailable(bridge.getContext());
61
+ call.resolve(new JSObject().put("available", val));
62
+ }
63
+
64
+ @PluginMethod
65
+ public void start(PluginCall call) {
66
+ if (!SpeechRecognizer.isRecognitionAvailable(bridge.getContext())) {
67
+ Logger.warn(TAG, "start() called but speech recognizer unavailable");
68
+ call.unavailable(NOT_AVAILABLE);
69
+ return;
70
+ }
71
+
72
+ if (getPermissionState(SPEECH_RECOGNITION) != PermissionState.GRANTED) {
73
+ Logger.warn(TAG, "start() missing RECORD_AUDIO permission");
74
+ call.reject(MISSING_PERMISSION);
75
+ return;
76
+ }
77
+
78
+ String language = call.getString("language", Locale.getDefault().toString());
79
+ int maxResults = call.getInt("maxResults", MAX_RESULTS);
80
+ String prompt = call.getString("prompt", null);
81
+ boolean partialResults = call.getBoolean("partialResults", false);
82
+ boolean popup = call.getBoolean("popup", false);
83
+ int allowForSilence = call.getInt("allowForSilence", 0);
84
+ Logger.info(
85
+ TAG,
86
+ String.format(
87
+ "Starting recognition | lang=%s maxResults=%d partial=%s popup=%s allowForSilence=%d",
88
+ language,
89
+ maxResults,
90
+ partialResults,
91
+ popup,
92
+ allowForSilence
93
+ )
94
+ );
95
+ beginListening(language, maxResults, prompt, partialResults, popup, call, allowForSilence);
96
+ }
97
+
98
+ @PluginMethod
99
+ public void stop(final PluginCall call) {
100
+ Logger.info(TAG, "stop() requested");
101
+ try {
102
+ stopListening();
103
+ } catch (Exception ex) {
104
+ call.reject(ex.getLocalizedMessage());
105
+ return;
106
+ }
107
+ call.resolve();
108
+ }
109
+
110
+ @PluginMethod
111
+ public void getSupportedLanguages(PluginCall call) {
112
+ if (languageReceiver == null) {
113
+ languageReceiver = new Receiver(call);
114
+ }
115
+
116
+ List<String> supportedLanguages = languageReceiver.getSupportedLanguages();
117
+ if (supportedLanguages != null) {
118
+ JSONArray languages = new JSONArray(supportedLanguages);
119
+ call.resolve(new JSObject().put("languages", languages));
120
+ return;
121
+ }
122
+
123
+ Intent detailsIntent = new Intent(RecognizerIntent.ACTION_GET_LANGUAGE_DETAILS);
124
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
125
+ detailsIntent.setPackage("com.google.android.googlequicksearchbox");
126
+ }
127
+ bridge.getActivity().sendOrderedBroadcast(detailsIntent, null, languageReceiver, null, Activity.RESULT_OK, null, null);
128
+ }
129
+
130
+ @PluginMethod
131
+ public void isListening(PluginCall call) {
132
+ call.resolve(new JSObject().put("listening", listening));
133
+ }
134
+
135
+ @PluginMethod
136
+ @Override
137
+ public void checkPermissions(PluginCall call) {
138
+ String state = permissionStateValue(getPermissionState(SPEECH_RECOGNITION));
139
+ call.resolve(new JSObject().put("speechRecognition", state));
140
+ }
141
+
142
+ @PluginMethod
143
+ @Override
144
+ public void requestPermissions(PluginCall call) {
145
+ requestPermissionForAlias(SPEECH_RECOGNITION, call, "permissionsCallback");
146
+ }
147
+
148
+ @PluginMethod
149
+ public void getPluginVersion(PluginCall call) {
150
+ JSObject ret = new JSObject();
151
+ ret.put("version", PLUGIN_VERSION);
152
+ call.resolve(ret);
153
+ }
154
+
155
+ @PermissionCallback
156
+ private void permissionsCallback(PluginCall call) {
157
+ String state = permissionStateValue(getPermissionState(SPEECH_RECOGNITION));
158
+ call.resolve(new JSObject().put("speechRecognition", state));
159
+ }
160
+
161
+ @ActivityCallback
162
+ private void listeningResult(PluginCall call, ActivityResult result) {
163
+ if (call == null) {
164
+ return;
165
+ }
166
+
167
+ int resultCode = result.getResultCode();
168
+ if (resultCode == Activity.RESULT_OK) {
169
+ try {
170
+ ArrayList<String> matchesList = result.getData().getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
171
+ JSObject resultObj = new JSObject();
172
+ resultObj.put("matches", new JSArray(matchesList));
173
+ call.resolve(resultObj);
174
+ } catch (Exception ex) {
175
+ call.reject(ex.getMessage());
176
+ }
177
+ } else {
178
+ call.reject(Integer.toString(resultCode));
179
+ }
180
+
181
+ lock.lock();
182
+ listening(false);
183
+ lock.unlock();
184
+ }
185
+
186
+ private void beginListening(
187
+ String language,
188
+ int maxResults,
189
+ String prompt,
190
+ final boolean partialResults,
191
+ boolean showPopup,
192
+ PluginCall call,
193
+ int allowForSilence
194
+ ) {
195
+ Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
196
+ intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
197
+ intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
198
+ intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, maxResults);
199
+ intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, bridge.getActivity().getPackageName());
200
+ intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, partialResults);
201
+ intent.putExtra("android.speech.extra.DICTATION_MODE", partialResults);
202
+
203
+ if (allowForSilence > 0) {
204
+ intent.putExtra(RecognizerIntent.EXTRA_SEGMENTED_SESSION, true);
205
+ intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, allowForSilence);
206
+ intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, allowForSilence);
207
+ }
208
+
209
+ if (prompt != null) {
210
+ intent.putExtra(RecognizerIntent.EXTRA_PROMPT, prompt);
211
+ }
212
+
213
+ try {
214
+ lock.lock();
215
+ resetPartialResultsCache();
216
+ } finally {
217
+ lock.unlock();
218
+ }
219
+
220
+ if (showPopup) {
221
+ bridge
222
+ .getActivity()
223
+ .runOnUiThread(() -> {
224
+ try {
225
+ SpeechRecognitionPlugin.this.listening(true);
226
+ SpeechRecognitionPlugin.this.startActivityForResult(call, intent, "listeningResult");
227
+ } catch (Exception ex) {
228
+ SpeechRecognitionPlugin.this.listening(false);
229
+ call.reject(ex.getMessage());
230
+ }
231
+ });
232
+ return;
233
+ }
234
+
235
+ bridge
236
+ .getWebView()
237
+ .post(() -> {
238
+ try {
239
+ SpeechRecognitionPlugin.this.lock.lock();
240
+ Logger.info(getLogTag(), "Rebuilding and starting recognizer");
241
+ rebuildRecognizerLocked(call, partialResults);
242
+ speechRecognizer.startListening(intent);
243
+ SpeechRecognitionPlugin.this.listening(true);
244
+ if (partialResults) {
245
+ call.resolve();
246
+ }
247
+ } catch (Exception ex) {
248
+ Logger.error(getLogTag(), "Error starting listening: " + ex.getMessage(), ex);
249
+ call.reject(ex.getMessage());
250
+ } finally {
251
+ SpeechRecognitionPlugin.this.lock.unlock();
252
+ }
253
+ });
254
+ }
255
+
256
+ private void stopListening() {
257
+ bridge
258
+ .getWebView()
259
+ .post(() -> {
260
+ try {
261
+ SpeechRecognitionPlugin.this.lock.lock();
262
+ Logger.info(getLogTag(), "Stopping listening");
263
+ if (speechRecognizer != null) {
264
+ try {
265
+ speechRecognizer.stopListening();
266
+ } catch (Exception ignored) {}
267
+ try {
268
+ speechRecognizer.cancel();
269
+ } catch (Exception ignored) {}
270
+ // Don't destroy here - let rebuildRecognizerLocked handle cleanup
271
+ }
272
+ resetPartialResultsCache();
273
+ SpeechRecognitionPlugin.this.listening(false);
274
+ } finally {
275
+ SpeechRecognitionPlugin.this.lock.unlock();
276
+ }
277
+ });
278
+ }
279
+
280
+ private void destroyRecognizer() {
281
+ bridge.getWebView().post(() -> {
282
+ try {
283
+ SpeechRecognitionPlugin.this.lock.lock();
284
+ if (speechRecognizer != null) {
285
+ speechRecognizer.destroy();
286
+ speechRecognizer = null;
287
+ }
288
+ } finally {
289
+ SpeechRecognitionPlugin.this.lock.unlock();
290
+ }
291
+ });
292
+ }
293
+
294
+ @Override
295
+ protected void handleOnDestroy() {
296
+ super.handleOnDestroy();
297
+ destroyRecognizer();
298
+ }
299
+
300
+ private void listening(boolean value) {
301
+ this.listening = value;
302
+ }
303
+
304
+ private void resetPartialResultsCache() {
305
+ previousPartialResults = new JSONArray();
306
+ }
307
+
308
+ private void rebuildRecognizerLocked(PluginCall call, boolean partialResults) {
309
+ // Reuse the existing recognizer if available - destroying/recreating causes ERROR_SERVER_DISCONNECTED (11)
310
+ // Only create new if null (first time or after an error destroyed it)
311
+ if (speechRecognizer == null) {
312
+ speechRecognizer = SpeechRecognizer.createSpeechRecognizer(bridge.getActivity());
313
+ Logger.info(getLogTag(), "Created new SpeechRecognizer instance");
314
+ } else {
315
+ // Cancel any pending recognition before starting a new one
316
+ try {
317
+ speechRecognizer.cancel();
318
+ } catch (Exception ignored) {}
319
+ Logger.info(getLogTag(), "Reusing existing SpeechRecognizer instance");
320
+ }
321
+
322
+ SpeechRecognitionListener listener = new SpeechRecognitionListener();
323
+ listener.setCall(call);
324
+ listener.setPartialResults(partialResults);
325
+ speechRecognizer.setRecognitionListener(listener);
326
+ }
327
+
328
+ private String permissionStateValue(PermissionState state) {
329
+ switch (state) {
330
+ case GRANTED:
331
+ return "granted";
332
+ case DENIED:
333
+ return "denied";
334
+ case PROMPT:
335
+ case PROMPT_WITH_RATIONALE:
336
+ default:
337
+ return "prompt";
338
+ }
339
+ }
340
+
341
+ private class SpeechRecognitionListener implements RecognitionListener {
342
+
343
+ private PluginCall call;
344
+ private boolean partialResults;
345
+
346
+ public void setCall(PluginCall call) {
347
+ this.call = call;
348
+ }
349
+
350
+ public void setPartialResults(boolean partialResults) {
351
+ this.partialResults = partialResults;
352
+ }
353
+
354
+ @Override
355
+ public void onReadyForSpeech(Bundle params) {}
356
+
357
+ @Override
358
+ public void onBeginningOfSpeech() {
359
+ try {
360
+ lock.lock();
361
+ JSObject ret = new JSObject();
362
+ ret.put("status", "started");
363
+ notifyListeners(LISTENING_EVENT, ret);
364
+ Logger.debug(TAG, "Listening started");
365
+ } finally {
366
+ lock.unlock();
367
+ }
368
+ }
369
+
370
+ @Override
371
+ public void onRmsChanged(float rmsdB) {}
372
+
373
+ @Override
374
+ public void onBufferReceived(byte[] buffer) {}
375
+
376
+ @Override
377
+ public void onEndOfSpeech() {
378
+ bridge
379
+ .getWebView()
380
+ .post(() -> {
381
+ try {
382
+ lock.lock();
383
+ listening(false);
384
+
385
+ JSObject ret = new JSObject();
386
+ ret.put("status", "stopped");
387
+ notifyListeners(LISTENING_EVENT, ret);
388
+ } finally {
389
+ lock.unlock();
390
+ }
391
+ });
392
+ }
393
+
394
+ @Override
395
+ public void onError(int error) {
396
+ String errorMssg = getErrorText(error);
397
+
398
+ // Reset state synchronously on the same thread
399
+ try {
400
+ lock.lock();
401
+ resetPartialResultsCache();
402
+ } finally {
403
+ lock.unlock();
404
+ }
405
+ SpeechRecognitionPlugin.this.listening(false);
406
+
407
+ // Destroy the recognizer synchronously to ensure clean state for next attempt
408
+ if (speechRecognizer != null) {
409
+ try {
410
+ speechRecognizer.cancel();
411
+ } catch (Exception ignored) {}
412
+ try {
413
+ speechRecognizer.destroy();
414
+ } catch (Exception ignored) {}
415
+ speechRecognizer = null;
416
+ }
417
+
418
+ Logger.error(TAG, "Recognizer error: " + errorMssg, null);
419
+
420
+ if (call != null) {
421
+ call.reject(errorMssg);
422
+ }
423
+ }
424
+
425
+ @Override
426
+ public void onResults(Bundle results) {
427
+ ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
428
+
429
+ try {
430
+ JSArray jsArray = new JSArray(matches);
431
+ Logger.debug(TAG, "Received final results count=" + (matches == null ? 0 : matches.size()));
432
+
433
+ if (call != null) {
434
+ if (!partialResults) {
435
+ call.resolve(new JSObject().put("status", "success").put("matches", jsArray));
436
+ } else {
437
+ JSObject ret = new JSObject();
438
+ ret.put("matches", jsArray);
439
+ notifyListeners(PARTIAL_RESULTS_EVENT, ret);
440
+ }
441
+ }
442
+ } catch (Exception ex) {
443
+ if (call != null) {
444
+ call.resolve(new JSObject().put("status", "error").put("message", ex.getMessage()));
445
+ }
446
+ } finally {
447
+ try {
448
+ lock.lock();
449
+ resetPartialResultsCache();
450
+ } finally {
451
+ lock.unlock();
452
+ }
453
+ }
454
+ }
455
+
456
+ @Override
457
+ public void onPartialResults(Bundle partialResultsBundle) {
458
+ ArrayList<String> matches = partialResultsBundle.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
459
+ JSArray matchesJSON = new JSArray(matches);
460
+
461
+ try {
462
+ lock.lock();
463
+ if (matches != null && matches.size() > 0 && !previousPartialResults.equals(matchesJSON)) {
464
+ previousPartialResults = matchesJSON;
465
+ JSObject ret = new JSObject();
466
+ ret.put("matches", previousPartialResults);
467
+ notifyListeners(PARTIAL_RESULTS_EVENT, ret);
468
+ Logger.debug(TAG, "Partial results updated");
469
+ }
470
+ } catch (Exception ex) {
471
+ } finally {
472
+ lock.unlock();
473
+ }
474
+ }
475
+
476
+ @Override
477
+ public void onSegmentResults(Bundle results) {
478
+ ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
479
+ if (matches == null) {
480
+ return;
481
+ }
482
+ try {
483
+ JSObject ret = new JSObject();
484
+ ret.put("matches", new JSArray(matches));
485
+ notifyListeners(SEGMENT_RESULTS_EVENT, ret);
486
+ Logger.debug(TAG, "Segment results emitted");
487
+ } catch (Exception ignored) {}
488
+ }
489
+
490
+ @Override
491
+ public void onEndOfSegmentedSession() {
492
+ notifyListeners(END_OF_SEGMENT_EVENT, new JSObject());
493
+ Logger.debug(TAG, "Segmented session ended");
494
+ }
495
+
496
+ @Override
497
+ public void onEvent(int eventType, Bundle params) {}
498
+ }
499
+
500
+ private String getErrorText(int errorCode) {
501
+ switch (errorCode) {
502
+ case SpeechRecognizer.ERROR_AUDIO:
503
+ return "Audio recording error";
504
+ case SpeechRecognizer.ERROR_CLIENT:
505
+ return "Client side error";
506
+ case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
507
+ return "Insufficient permissions";
508
+ case SpeechRecognizer.ERROR_NETWORK:
509
+ return "Network error";
510
+ case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
511
+ return "Network timeout";
512
+ case SpeechRecognizer.ERROR_NO_MATCH:
513
+ return "No match";
514
+ case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
515
+ return "RecognitionService busy";
516
+ case SpeechRecognizer.ERROR_SERVER:
517
+ return "Error from server";
518
+ case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
519
+ return "No speech input";
520
+ case SpeechRecognizer.ERROR_SERVER_DISCONNECTED:
521
+ return "Server disconnected";
522
+ default:
523
+ return "Didn't understand, please try again. Error code: " + errorCode;
524
+ }
525
+ }
526
+ }
File without changes