capacitor-native-speech-recognition 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,380 @@
1
+ # capacitor-native-speech-recognition
2
+
3
+ Natural, low-latency speech recognition for Capacitor apps with parity across iOS and Android, streaming partial results, and permission helpers baked in.
4
+
5
+ ## Why this plugin?
6
+
7
+ This package starts from the excellent [`capacitor-community/speech-recognition`](https://github.com/capacitor-community/speech-recognition) plugin, but folds in the most requested pull requests from that repo (punctuation support, segmented sessions, crash fixes) and keeps them maintained under the Capgo umbrella. You get the familiar API plus:
8
+
9
+ - ✅ **Merged community PRs** – punctuation toggles on iOS (PR #74), segmented results & silence handling on Android (PR #104), and the `recognitionRequest` safety fix (PR #105) ship out-of-the-box.
10
+ - 🚀 **New Capgo features** – configurable silence windows, streaming segment listeners, consistent permission helpers, and a refreshed example app.
11
+ - 🛠️ **Active maintenance** – same conventions as all Capgo plugins (SPM, Podspec, workflows, example app) so it tracks Capacitor major versions without bit-rot.
12
+ - 📦 **Drop-in migration** – TypeScript definitions remain compatible with the community plugin while exposing the extra options (`addPunctuation`, `allowForSilence`, `segmentResults`, etc.).
13
+
14
+ ## Documentation
15
+
16
+ This repository is prepared to be published as a new npm package. Update the package name placeholders
17
+ (`capacitor-native-speech-recognition`, `rahmanimorteza/capgo-speech-recognition`) before publishing.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ npm install capacitor-native-speech-recognition
23
+ npx cap sync
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```ts
29
+ import { SpeechRecognition } from 'capacitor-native-speech-recognition';
30
+
31
+ await SpeechRecognition.requestPermissions();
32
+
33
+ const { available } = await SpeechRecognition.available();
34
+ if (!available) {
35
+ console.warn('Speech recognition is not supported on this device.');
36
+ }
37
+
38
+ const partialListener = await SpeechRecognition.addListener('partialResults', (event) => {
39
+ console.log('Partial:', event.matches?.[0]);
40
+ });
41
+
42
+ await SpeechRecognition.start({
43
+ language: 'en-US',
44
+ maxResults: 3,
45
+ partialResults: true,
46
+ });
47
+
48
+ // Later, when you want to stop listening
49
+ await SpeechRecognition.stop();
50
+ await partialListener.remove();
51
+ ```
52
+
53
+ ### iOS usage descriptions
54
+
55
+ Add the following keys to your app `Info.plist`:
56
+
57
+ - `NSSpeechRecognitionUsageDescription`
58
+ - `NSMicrophoneUsageDescription`
59
+
60
+ ## API
61
+
62
+ <docgen-index>
63
+
64
+ * [`available()`](#available)
65
+ * [`start(...)`](#start)
66
+ * [`stop()`](#stop)
67
+ * [`getSupportedLanguages()`](#getsupportedlanguages)
68
+ * [`isListening()`](#islistening)
69
+ * [`checkPermissions()`](#checkpermissions)
70
+ * [`requestPermissions()`](#requestpermissions)
71
+ * [`getPluginVersion()`](#getpluginversion)
72
+ * [`addListener('endOfSegmentedSession', ...)`](#addlistenerendofsegmentedsession-)
73
+ * [`addListener('segmentResults', ...)`](#addlistenersegmentresults-)
74
+ * [`addListener('partialResults', ...)`](#addlistenerpartialresults-)
75
+ * [`addListener('listeningState', ...)`](#addlistenerlisteningstate-)
76
+ * [`removeAllListeners()`](#removealllisteners)
77
+ * [Interfaces](#interfaces)
78
+ * [Type Aliases](#type-aliases)
79
+
80
+ </docgen-index>
81
+
82
+ <docgen-api>
83
+ <!--Update the source file JSDoc comments and rerun docgen to update the docs below-->
84
+
85
+ ### available()
86
+
87
+ ```typescript
88
+ available() => Promise<SpeechRecognitionAvailability>
89
+ ```
90
+
91
+ Checks whether the native speech recognition service is usable on the current device.
92
+
93
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionavailability">SpeechRecognitionAvailability</a>&gt;</code>
94
+
95
+ --------------------
96
+
97
+
98
+ ### start(...)
99
+
100
+ ```typescript
101
+ start(options?: SpeechRecognitionStartOptions | undefined) => Promise<SpeechRecognitionMatches>
102
+ ```
103
+
104
+ Begins capturing audio and transcribing speech.
105
+
106
+ When `partialResults` is `true`, the returned promise resolves immediately and updates are
107
+ streamed through the `partialResults` listener until {@link stop} is called.
108
+
109
+ | Param | Type |
110
+ | ------------- | --------------------------------------------------------------------------------------- |
111
+ | **`options`** | <code><a href="#speechrecognitionstartoptions">SpeechRecognitionStartOptions</a></code> |
112
+
113
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionmatches">SpeechRecognitionMatches</a>&gt;</code>
114
+
115
+ --------------------
116
+
117
+
118
+ ### stop()
119
+
120
+ ```typescript
121
+ stop() => Promise<void>
122
+ ```
123
+
124
+ Stops listening and tears down native resources.
125
+
126
+ --------------------
127
+
128
+
129
+ ### getSupportedLanguages()
130
+
131
+ ```typescript
132
+ getSupportedLanguages() => Promise<SpeechRecognitionLanguages>
133
+ ```
134
+
135
+ Gets the locales supported by the underlying recognizer.
136
+
137
+ Android 13+ devices no longer expose this list; in that case `languages` is empty.
138
+
139
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionlanguages">SpeechRecognitionLanguages</a>&gt;</code>
140
+
141
+ --------------------
142
+
143
+
144
+ ### isListening()
145
+
146
+ ```typescript
147
+ isListening() => Promise<SpeechRecognitionListening>
148
+ ```
149
+
150
+ Returns whether the plugin is actively listening for speech.
151
+
152
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionlistening">SpeechRecognitionListening</a>&gt;</code>
153
+
154
+ --------------------
155
+
156
+
157
+ ### checkPermissions()
158
+
159
+ ```typescript
160
+ checkPermissions() => Promise<SpeechRecognitionPermissionStatus>
161
+ ```
162
+
163
+ Gets the current permission state.
164
+
165
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionpermissionstatus">SpeechRecognitionPermissionStatus</a>&gt;</code>
166
+
167
+ --------------------
168
+
169
+
170
+ ### requestPermissions()
171
+
172
+ ```typescript
173
+ requestPermissions() => Promise<SpeechRecognitionPermissionStatus>
174
+ ```
175
+
176
+ Requests the microphone + speech recognition permissions.
177
+
178
+ **Returns:** <code>Promise&lt;<a href="#speechrecognitionpermissionstatus">SpeechRecognitionPermissionStatus</a>&gt;</code>
179
+
180
+ --------------------
181
+
182
+
183
+ ### getPluginVersion()
184
+
185
+ ```typescript
186
+ getPluginVersion() => Promise<{ version: string; }>
187
+ ```
188
+
189
+ Returns the native plugin version bundled with this package.
190
+
191
+ Useful when reporting issues to confirm that native and JS versions match.
192
+
193
+ **Returns:** <code>Promise&lt;{ version: string; }&gt;</code>
194
+
195
+ --------------------
196
+
197
+
198
+ ### addListener('endOfSegmentedSession', ...)
199
+
200
+ ```typescript
201
+ addListener(eventName: 'endOfSegmentedSession', listenerFunc: () => void) => Promise<PluginListenerHandle>
202
+ ```
203
+
204
+ Listen for segmented session completion events (Android only).
205
+
206
+ | Param | Type |
207
+ | ------------------ | ------------------------------------ |
208
+ | **`eventName`** | <code>'endOfSegmentedSession'</code> |
209
+ | **`listenerFunc`** | <code>() =&gt; void</code> |
210
+
211
+ **Returns:** <code>Promise&lt;<a href="#pluginlistenerhandle">PluginListenerHandle</a>&gt;</code>
212
+
213
+ --------------------
214
+
215
+
216
+ ### addListener('segmentResults', ...)
217
+
218
+ ```typescript
219
+ addListener(eventName: 'segmentResults', listenerFunc: (event: SpeechRecognitionSegmentResultEvent) => void) => Promise<PluginListenerHandle>
220
+ ```
221
+
222
+ Listen for segmented recognition results (Android only).
223
+
224
+ | Param | Type |
225
+ | ------------------ | ----------------------------------------------------------------------------------------------------------------------- |
226
+ | **`eventName`** | <code>'segmentResults'</code> |
227
+ | **`listenerFunc`** | <code>(event: <a href="#speechrecognitionsegmentresultevent">SpeechRecognitionSegmentResultEvent</a>) =&gt; void</code> |
228
+
229
+ **Returns:** <code>Promise&lt;<a href="#pluginlistenerhandle">PluginListenerHandle</a>&gt;</code>
230
+
231
+ --------------------
232
+
233
+
234
+ ### addListener('partialResults', ...)
235
+
236
+ ```typescript
237
+ addListener(eventName: 'partialResults', listenerFunc: (event: SpeechRecognitionPartialResultEvent) => void) => Promise<PluginListenerHandle>
238
+ ```
239
+
240
+ Listen for partial transcription updates emitted while `partialResults` is enabled.
241
+
242
+ | Param | Type |
243
+ | ------------------ | ----------------------------------------------------------------------------------------------------------------------- |
244
+ | **`eventName`** | <code>'partialResults'</code> |
245
+ | **`listenerFunc`** | <code>(event: <a href="#speechrecognitionpartialresultevent">SpeechRecognitionPartialResultEvent</a>) =&gt; void</code> |
246
+
247
+ **Returns:** <code>Promise&lt;<a href="#pluginlistenerhandle">PluginListenerHandle</a>&gt;</code>
248
+
249
+ --------------------
250
+
251
+
252
+ ### addListener('listeningState', ...)
253
+
254
+ ```typescript
255
+ addListener(eventName: 'listeningState', listenerFunc: (event: SpeechRecognitionListeningEvent) => void) => Promise<PluginListenerHandle>
256
+ ```
257
+
258
+ Listen for changes to the native listening state.
259
+
260
+ | Param | Type |
261
+ | ------------------ | --------------------------------------------------------------------------------------------------------------- |
262
+ | **`eventName`** | <code>'listeningState'</code> |
263
+ | **`listenerFunc`** | <code>(event: <a href="#speechrecognitionlisteningevent">SpeechRecognitionListeningEvent</a>) =&gt; void</code> |
264
+
265
+ **Returns:** <code>Promise&lt;<a href="#pluginlistenerhandle">PluginListenerHandle</a>&gt;</code>
266
+
267
+ --------------------
268
+
269
+
270
+ ### removeAllListeners()
271
+
272
+ ```typescript
273
+ removeAllListeners() => Promise<void>
274
+ ```
275
+
276
+ Removes every registered listener.
277
+
278
+ --------------------
279
+
280
+
281
+ ### Interfaces
282
+
283
+
284
+ #### SpeechRecognitionAvailability
285
+
286
+ | Prop | Type |
287
+ | --------------- | -------------------- |
288
+ | **`available`** | <code>boolean</code> |
289
+
290
+
291
+ #### SpeechRecognitionMatches
292
+
293
+ | Prop | Type |
294
+ | ------------- | --------------------- |
295
+ | **`matches`** | <code>string[]</code> |
296
+
297
+
298
+ #### SpeechRecognitionStartOptions
299
+
300
+ Configure how the recognizer behaves when calling {@link SpeechRecognitionPlugin.start}.
301
+
302
+ | Prop | Type | Description |
303
+ | --------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
304
+ | **`language`** | <code>string</code> | Locale identifier such as `en-US`. When omitted the device language is used. |
305
+ | **`maxResults`** | <code>number</code> | Maximum number of final matches returned by native APIs. Defaults to `5`. |
306
+ | **`prompt`** | <code>string</code> | Prompt message shown inside the Android system dialog (ignored on iOS). |
307
+ | **`popup`** | <code>boolean</code> | When `true`, Android shows the OS speech dialog instead of running inline recognition. Defaults to `false`. |
308
+ | **`partialResults`** | <code>boolean</code> | Emits partial transcription updates through the `partialResults` listener while audio is captured. |
309
+ | **`addPunctuation`** | <code>boolean</code> | Enables native punctuation handling where supported (iOS 16+). |
310
+ | **`allowForSilence`** | <code>number</code> | Allow a number of milliseconds of silence before splitting the recognition session into segments. Required to be greater than zero and currently supported on Android only. |
311
+
312
+
313
+ #### SpeechRecognitionLanguages
314
+
315
+ | Prop | Type |
316
+ | --------------- | --------------------- |
317
+ | **`languages`** | <code>string[]</code> |
318
+
319
+
320
+ #### SpeechRecognitionListening
321
+
322
+ | Prop | Type |
323
+ | --------------- | -------------------- |
324
+ | **`listening`** | <code>boolean</code> |
325
+
326
+
327
+ #### SpeechRecognitionPermissionStatus
328
+
329
+ Permission map returned by `checkPermissions` and `requestPermissions`.
330
+
331
+ On Android the state maps to the `RECORD_AUDIO` permission.
332
+ On iOS it combines speech recognition plus microphone permission.
333
+
334
+ | Prop | Type |
335
+ | ----------------------- | ----------------------------------------------------------- |
336
+ | **`speechRecognition`** | <code><a href="#permissionstate">PermissionState</a></code> |
337
+
338
+
339
+ #### PluginListenerHandle
340
+
341
+ | Prop | Type |
342
+ | ------------ | ----------------------------------------- |
343
+ | **`remove`** | <code>() =&gt; Promise&lt;void&gt;</code> |
344
+
345
+
346
+ #### SpeechRecognitionSegmentResultEvent
347
+
348
+ Raised whenever a segmented result is produced (Android only).
349
+
350
+ | Prop | Type |
351
+ | ------------- | --------------------- |
352
+ | **`matches`** | <code>string[]</code> |
353
+
354
+
355
+ #### SpeechRecognitionPartialResultEvent
356
+
357
+ Raised whenever a partial transcription is produced.
358
+
359
+ | Prop | Type |
360
+ | ------------- | --------------------- |
361
+ | **`matches`** | <code>string[]</code> |
362
+
363
+
364
+ #### SpeechRecognitionListeningEvent
365
+
366
+ Raised when the listening state changes.
367
+
368
+ | Prop | Type |
369
+ | ------------ | ----------------------------------- |
370
+ | **`status`** | <code>'started' \| 'stopped'</code> |
371
+
372
+
373
+ ### Type Aliases
374
+
375
+
376
+ #### PermissionState
377
+
378
+ <code>'prompt' | 'prompt-with-rationale' | 'granted' | 'denied'</code>
379
+
380
+ </docgen-api>
@@ -0,0 +1,57 @@
1
+ ext {
2
+ junitVersion = project.hasProperty('junitVersion') ? rootProject.ext.junitVersion : '4.13.2'
3
+ androidxAppCompatVersion = project.hasProperty('androidxAppCompatVersion') ? rootProject.ext.androidxAppCompatVersion : '1.7.1'
4
+ androidxJunitVersion = project.hasProperty('androidxJunitVersion') ? rootProject.ext.androidxJunitVersion : '1.3.0'
5
+ androidxEspressoCoreVersion = project.hasProperty('androidxEspressoCoreVersion') ? rootProject.ext.androidxEspressoCoreVersion : '3.7.0'
6
+ }
7
+
8
+ buildscript {
9
+ repositories {
10
+ google()
11
+ mavenCentral()
12
+ }
13
+ dependencies {
14
+ classpath 'com.android.tools.build:gradle:8.13.0'
15
+ }
16
+ }
17
+
18
+ apply plugin: 'com.android.library'
19
+
20
+ android {
21
+ namespace = "app.capgo.speechrecognition"
22
+ compileSdk = project.hasProperty('compileSdkVersion') ? rootProject.ext.compileSdkVersion : 36
23
+ defaultConfig {
24
+ minSdkVersion project.hasProperty('minSdkVersion') ? rootProject.ext.minSdkVersion : 24
25
+ targetSdkVersion project.hasProperty('targetSdkVersion') ? rootProject.ext.targetSdkVersion : 36
26
+ versionCode 1
27
+ versionName "1.0"
28
+ testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
29
+ }
30
+ buildTypes {
31
+ release {
32
+ minifyEnabled false
33
+ proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
34
+ }
35
+ }
36
+ lintOptions {
37
+ abortOnError = false
38
+ }
39
+ compileOptions {
40
+ sourceCompatibility JavaVersion.VERSION_21
41
+ targetCompatibility JavaVersion.VERSION_21
42
+ }
43
+ }
44
+
45
+ repositories {
46
+ google()
47
+ mavenCentral()
48
+ }
49
+
50
+ dependencies {
51
+ implementation fileTree(dir: 'libs', include: ['*.jar'])
52
+ implementation project(':capacitor-android')
53
+ implementation "androidx.appcompat:appcompat:$androidxAppCompatVersion"
54
+ testImplementation "junit:junit:$junitVersion"
55
+ androidTestImplementation "androidx.test.ext:junit:$androidxJunitVersion"
56
+ androidTestImplementation "androidx.test.espresso:espresso-core:$androidxEspressoCoreVersion"
57
+ }
@@ -0,0 +1,3 @@
1
+ <manifest xmlns:android="http://schemas.android.com/apk/res/android">
2
+ <uses-permission android:name="android.permission.RECORD_AUDIO" />
3
+ </manifest>
@@ -0,0 +1,17 @@
1
+ package app.capgo.speechrecognition;
2
+
3
+ import android.Manifest;
4
+
5
+ public interface Constants {
6
+ int REQUEST_CODE_PERMISSION = 2001;
7
+ int REQUEST_CODE_SPEECH = 2002;
8
+ int MAX_RESULTS = 5;
9
+ String NOT_AVAILABLE = "Speech recognition service is not available.";
10
+ String MISSING_PERMISSION = "Missing permission";
11
+ String SEGMENT_RESULTS_EVENT = "segmentResults";
12
+ String END_OF_SEGMENT_EVENT = "endOfSegmentedSession";
13
+ String LISTENING_EVENT = "listeningState";
14
+ String PARTIAL_RESULTS_EVENT = "partialResults";
15
+ String RECORD_AUDIO_PERMISSION = Manifest.permission.RECORD_AUDIO;
16
+ String LANGUAGE_ERROR = "Could not get list of languages";
17
+ }
@@ -0,0 +1,50 @@
1
+ package app.capgo.speechrecognition;
2
+
3
+ import android.content.BroadcastReceiver;
4
+ import android.content.Context;
5
+ import android.content.Intent;
6
+ import android.os.Bundle;
7
+ import android.speech.RecognizerIntent;
8
+ import com.getcapacitor.JSArray;
9
+ import com.getcapacitor.JSObject;
10
+ import com.getcapacitor.PluginCall;
11
+ import java.util.List;
12
+
13
+ public class Receiver extends BroadcastReceiver implements Constants {
14
+
15
+ private List<String> supportedLanguagesList;
16
+ private String languagePref;
17
+ private final PluginCall call;
18
+
19
+ public Receiver(PluginCall call) {
20
+ super();
21
+ this.call = call;
22
+ }
23
+
24
+ @Override
25
+ public void onReceive(Context context, Intent intent) {
26
+ Bundle extras = getResultExtras(true);
27
+
28
+ if (extras.containsKey(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE)) {
29
+ languagePref = extras.getString(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE);
30
+ }
31
+
32
+ if (extras.containsKey(RecognizerIntent.EXTRA_SUPPORTED_LANGUAGES)) {
33
+ supportedLanguagesList = extras.getStringArrayList(RecognizerIntent.EXTRA_SUPPORTED_LANGUAGES);
34
+
35
+ JSArray languagesList = new JSArray(supportedLanguagesList);
36
+ call.resolve(new JSObject().put("languages", languagesList));
37
+ return;
38
+ }
39
+
40
+ call.reject(LANGUAGE_ERROR);
41
+ }
42
+
43
+ public List<String> getSupportedLanguages() {
44
+ return supportedLanguagesList;
45
+ }
46
+
47
+ public String getLanguagePreference() {
48
+ return languagePref;
49
+ }
50
+ }