rn-speech-to-text 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +2 -0
  3. package/android/build.gradle +35 -0
  4. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/README +9 -0
  5. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/am/final.mdl +0 -0
  6. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/conf/mfcc.conf +7 -0
  7. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/conf/model.conf +10 -0
  8. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/graph/Gr.fst +0 -0
  9. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/graph/HCLr.fst +0 -0
  10. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/graph/disambig_tid.int +17 -0
  11. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/graph/phones/word_boundary.int +166 -0
  12. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/final.dubm +0 -0
  13. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/final.ie +0 -0
  14. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/final.mat +0 -0
  15. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/global_cmvn.stats +3 -0
  16. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/online_cmvn.conf +1 -0
  17. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/ivector/splice.conf +2 -0
  18. package/android/src/main/assets/models/vosk-model-small-en-us-0.15/uuid +1 -0
  19. package/android/src/main/java/com/vinfbsomni/SpeechToTextModule.java +310 -0
  20. package/android/src/main/java/com/vinfbsomni/SpeechToTextPackage.java +25 -0
  21. package/ios/SpeechToText.m +22 -0
  22. package/ios/SpeechToText.swift +138 -0
  23. package/package.json +37 -0
  24. package/react-native.config.js +1 -0
  25. package/rn-speech-to-text.podspec +23 -0
  26. package/src/SpeechToText.js +134 -0
  27. package/src/index.js +1 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 tarun-vin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # rn-speech-to-text
2
+ speech-to-text package
@@ -0,0 +1,35 @@
1
+ buildscript {
2
+ repositories {
3
+ mavenCentral()
4
+ google()
5
+ }
6
+ }
7
+
8
+ apply plugin: 'com.android.library'
9
+
10
+ android {
11
+ namespace "com.vinfbsomni"
12
+ compileSdkVersion 34
13
+
14
+ defaultConfig {
15
+ minSdkVersion 21
16
+ targetSdkVersion 34
17
+ }
18
+
19
+ sourceSets {
20
+ main {
21
+ java.srcDirs = ['src/main/java']
22
+ }
23
+ }
24
+ }
25
+
26
+ repositories {
27
+ mavenCentral()
28
+ google()
29
+ }
30
+
31
+ dependencies {
32
+ implementation 'com.facebook.react:react-native:+'
33
+
34
+ implementation 'com.alphacephei:vosk-android:0.3.47'
35
+ }
@@ -0,0 +1,9 @@
1
+ US English model for mobile Vosk applications
2
+
3
+ Copyright 2020 Alpha Cephei Inc
4
+
5
+ Accuracy: 10.38 (tedlium test) 9.85 (librispeech test-clean)
6
+ Speed: 0.11xRT (desktop)
7
+ Latency: 0.15s (right context)
8
+
9
+
@@ -0,0 +1,7 @@
1
+ --sample-frequency=16000
2
+ --use-energy=false
3
+ --num-mel-bins=40
4
+ --num-ceps=40
5
+ --low-freq=20
6
+ --high-freq=7600
7
+ --allow-downsample=true
@@ -0,0 +1,10 @@
1
+ --min-active=200
2
+ --max-active=3000
3
+ --beam=10.0
4
+ --lattice-beam=2.0
5
+ --acoustic-scale=1.0
6
+ --frame-subsampling-factor=3
7
+ --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
8
+ --endpoint.rule2.min-trailing-silence=0.5
9
+ --endpoint.rule3.min-trailing-silence=0.75
10
+ --endpoint.rule4.min-trailing-silence=1.0
@@ -0,0 +1,17 @@
1
+ 10015
2
+ 10016
3
+ 10017
4
+ 10018
5
+ 10019
6
+ 10020
7
+ 10021
8
+ 10022
9
+ 10023
10
+ 10024
11
+ 10025
12
+ 10026
13
+ 10027
14
+ 10028
15
+ 10029
16
+ 10030
17
+ 10031
@@ -0,0 +1,166 @@
1
+ 1 nonword
2
+ 2 begin
3
+ 3 end
4
+ 4 internal
5
+ 5 singleton
6
+ 6 nonword
7
+ 7 begin
8
+ 8 end
9
+ 9 internal
10
+ 10 singleton
11
+ 11 begin
12
+ 12 end
13
+ 13 internal
14
+ 14 singleton
15
+ 15 begin
16
+ 16 end
17
+ 17 internal
18
+ 18 singleton
19
+ 19 begin
20
+ 20 end
21
+ 21 internal
22
+ 22 singleton
23
+ 23 begin
24
+ 24 end
25
+ 25 internal
26
+ 26 singleton
27
+ 27 begin
28
+ 28 end
29
+ 29 internal
30
+ 30 singleton
31
+ 31 begin
32
+ 32 end
33
+ 33 internal
34
+ 34 singleton
35
+ 35 begin
36
+ 36 end
37
+ 37 internal
38
+ 38 singleton
39
+ 39 begin
40
+ 40 end
41
+ 41 internal
42
+ 42 singleton
43
+ 43 begin
44
+ 44 end
45
+ 45 internal
46
+ 46 singleton
47
+ 47 begin
48
+ 48 end
49
+ 49 internal
50
+ 50 singleton
51
+ 51 begin
52
+ 52 end
53
+ 53 internal
54
+ 54 singleton
55
+ 55 begin
56
+ 56 end
57
+ 57 internal
58
+ 58 singleton
59
+ 59 begin
60
+ 60 end
61
+ 61 internal
62
+ 62 singleton
63
+ 63 begin
64
+ 64 end
65
+ 65 internal
66
+ 66 singleton
67
+ 67 begin
68
+ 68 end
69
+ 69 internal
70
+ 70 singleton
71
+ 71 begin
72
+ 72 end
73
+ 73 internal
74
+ 74 singleton
75
+ 75 begin
76
+ 76 end
77
+ 77 internal
78
+ 78 singleton
79
+ 79 begin
80
+ 80 end
81
+ 81 internal
82
+ 82 singleton
83
+ 83 begin
84
+ 84 end
85
+ 85 internal
86
+ 86 singleton
87
+ 87 begin
88
+ 88 end
89
+ 89 internal
90
+ 90 singleton
91
+ 91 begin
92
+ 92 end
93
+ 93 internal
94
+ 94 singleton
95
+ 95 begin
96
+ 96 end
97
+ 97 internal
98
+ 98 singleton
99
+ 99 begin
100
+ 100 end
101
+ 101 internal
102
+ 102 singleton
103
+ 103 begin
104
+ 104 end
105
+ 105 internal
106
+ 106 singleton
107
+ 107 begin
108
+ 108 end
109
+ 109 internal
110
+ 110 singleton
111
+ 111 begin
112
+ 112 end
113
+ 113 internal
114
+ 114 singleton
115
+ 115 begin
116
+ 116 end
117
+ 117 internal
118
+ 118 singleton
119
+ 119 begin
120
+ 120 end
121
+ 121 internal
122
+ 122 singleton
123
+ 123 begin
124
+ 124 end
125
+ 125 internal
126
+ 126 singleton
127
+ 127 begin
128
+ 128 end
129
+ 129 internal
130
+ 130 singleton
131
+ 131 begin
132
+ 132 end
133
+ 133 internal
134
+ 134 singleton
135
+ 135 begin
136
+ 136 end
137
+ 137 internal
138
+ 138 singleton
139
+ 139 begin
140
+ 140 end
141
+ 141 internal
142
+ 142 singleton
143
+ 143 begin
144
+ 144 end
145
+ 145 internal
146
+ 146 singleton
147
+ 147 begin
148
+ 148 end
149
+ 149 internal
150
+ 150 singleton
151
+ 151 begin
152
+ 152 end
153
+ 153 internal
154
+ 154 singleton
155
+ 155 begin
156
+ 156 end
157
+ 157 internal
158
+ 158 singleton
159
+ 159 begin
160
+ 160 end
161
+ 161 internal
162
+ 162 singleton
163
+ 163 begin
164
+ 164 end
165
+ 165 internal
166
+ 166 singleton
@@ -0,0 +1,3 @@
1
+ [
2
+ 1.682383e+11 -1.1595e+10 -1.521733e+10 4.32034e+09 -2.257938e+10 -1.969666e+10 -2.559265e+10 -1.535687e+10 -1.276854e+10 -4.494483e+09 -1.209085e+10 -5.64008e+09 -1.134847e+10 -3.419512e+09 -1.079542e+10 -4.145463e+09 -6.637486e+09 -1.11318e+09 -3.479773e+09 -1.245932e+08 -1.386961e+09 6.560655e+07 -2.436518e+08 -4.032432e+07 4.620046e+08 -7.714964e+07 9.551484e+08 -4.119761e+08 8.208582e+08 -7.117156e+08 7.457703e+08 -4.3106e+08 1.202726e+09 2.904036e+08 1.231931e+09 3.629848e+08 6.366939e+08 -4.586172e+08 -5.267629e+08 -3.507819e+08 1.679838e+09
3
+ 1.741141e+13 8.92488e+11 8.743834e+11 8.848896e+11 1.190313e+12 1.160279e+12 1.300066e+12 1.005678e+12 9.39335e+11 8.089614e+11 7.927041e+11 6.882427e+11 6.444235e+11 5.151451e+11 4.825723e+11 3.210106e+11 2.720254e+11 1.772539e+11 1.248102e+11 6.691599e+10 3.599804e+10 1.207574e+10 1.679301e+09 4.594778e+08 5.821614e+09 1.451758e+10 2.55803e+10 3.43277e+10 4.245286e+10 4.784859e+10 4.988591e+10 4.925451e+10 5.074584e+10 4.9557e+10 4.407876e+10 3.421443e+10 3.138606e+10 2.539716e+10 1.948134e+10 1.381167e+10 0 ]
@@ -0,0 +1 @@
1
+ # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
@@ -0,0 +1,2 @@
1
+ --left-context=3
2
+ --right-context=3
@@ -0,0 +1 @@
1
+ custom-vosk-model-uuid-1
@@ -0,0 +1,310 @@
1
+ package com.vinfbsomni;
2
+
3
+ import android.Manifest;
4
+ import android.content.pm.PackageManager;
5
+ import android.media.AudioFormat;
6
+ import android.media.AudioRecord;
7
+ import android.media.MediaRecorder;
8
+ import android.util.Log;
9
+
10
+ import androidx.annotation.NonNull;
11
+ import androidx.annotation.Nullable;
12
+ import androidx.core.content.ContextCompat;
13
+
14
+ import com.facebook.react.bridge.Arguments;
15
+ import com.facebook.react.bridge.LifecycleEventListener;
16
+ import com.facebook.react.bridge.Promise;
17
+ import com.facebook.react.bridge.ReactApplicationContext;
18
+ import com.facebook.react.bridge.ReactContextBaseJavaModule;
19
+ import com.facebook.react.bridge.ReactMethod;
20
+ import com.facebook.react.bridge.WritableMap;
21
+ import com.facebook.react.modules.core.DeviceEventManagerModule;
22
+
23
+ import org.json.JSONObject;
24
+ import org.vosk.Model;
25
+ import org.vosk.Recognizer;
26
+ import org.vosk.android.StorageService;
27
+
28
+ import java.io.IOException;
29
+
30
+ public class SpeechToTextModule extends ReactContextBaseJavaModule implements LifecycleEventListener {
31
+
32
+ private static final String TAG = "SpeechToTextVosk";
33
+
34
+ // AudioRecord config (16k mono PCM16 is common for ASR)
35
+ private static final int SAMPLE_RATE = 16000;
36
+ private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
37
+ private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
38
+
39
+ // This is the folder under android/app/src/main/assets/
40
+ // Your model is at: android/app/src/main/assets/models/vosk-model-small-en-us-0.15/...
41
+ private static final String ASSET_MODEL_PATH = "models/vosk-model-small-en-us-0.15";
42
+ private static final String UNPACKED_MODEL_DIR = "model"; // directory name in app internal storage
43
+
44
+ private final ReactApplicationContext reactContext;
45
+
46
+ private AudioRecord audioRecord;
47
+ private boolean isRecording = false;
48
+ private Thread recordingThread;
49
+
50
+ private Model voskModel;
51
+ private Recognizer recognizer;
52
+
53
+ public SpeechToTextModule(ReactApplicationContext reactContext) {
54
+ super(reactContext);
55
+ this.reactContext = reactContext;
56
+ reactContext.addLifecycleEventListener(this);
57
+ }
58
+
59
+ @NonNull
60
+ @Override
61
+ public String getName() {
62
+ return "SpeechToText";
63
+ }
64
+
65
+ // ---- Helpers ----
66
+
67
+ private void sendEvent(String eventName, @Nullable WritableMap params) {
68
+ reactContext
69
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class)
70
+ .emit(eventName, params);
71
+ }
72
+
73
+ private void sendErrorEvent(String message) {
74
+ WritableMap map = Arguments.createMap();
75
+ map.putString("error", message);
76
+ sendEvent("onSpeechError", map);
77
+ }
78
+
79
+ // ---- Model initialization (must be called from JS before start()) ----
80
+
81
+ @ReactMethod
82
+ public void initModel(Promise promise) {
83
+ if (voskModel != null) {
84
+ // Already initialized
85
+ promise.resolve(null);
86
+ return;
87
+ }
88
+
89
+ try {
90
+ Log.d(TAG, "Unpacking Vosk model from assets: " + ASSET_MODEL_PATH);
91
+
92
+ StorageService.unpack(
93
+ reactContext,
94
+ ASSET_MODEL_PATH,
95
+ UNPACKED_MODEL_DIR,
96
+ (Model model) -> {
97
+ voskModel = model;
98
+ Log.d(TAG, "Vosk model loaded successfully");
99
+ promise.resolve(null);
100
+ },
101
+ (IOException e) -> {
102
+ Log.e(TAG, "Failed to unpack/load Vosk model", e);
103
+ promise.reject("model_init_failed", e);
104
+ }
105
+ );
106
+ } catch (Exception e) {
107
+ Log.e(TAG, "Exception during model init", e);
108
+ promise.reject("model_init_failed", e);
109
+ }
110
+ }
111
+
112
+ // ---- Authorization (status only; runtime request done in JS) ----
113
+
114
+ @ReactMethod
115
+ public void requestAuthorization(Promise promise) {
116
+ int status = ContextCompat.checkSelfPermission(
117
+ reactContext,
118
+ Manifest.permission.RECORD_AUDIO
119
+ );
120
+
121
+ if (status == PackageManager.PERMISSION_GRANTED) {
122
+ promise.resolve("authorized");
123
+ } else {
124
+ promise.resolve("denied");
125
+ }
126
+ }
127
+
128
+ // ---- Start raw audio capture with AudioRecord + Vosk ----
129
+
130
+ @ReactMethod
131
+ public void start(Promise promise) {
132
+ if (isRecording) {
133
+ // Already recording; just resolve
134
+ promise.resolve(null);
135
+ return;
136
+ }
137
+
138
+ int status = ContextCompat.checkSelfPermission(
139
+ reactContext,
140
+ Manifest.permission.RECORD_AUDIO
141
+ );
142
+ if (status != PackageManager.PERMISSION_GRANTED) {
143
+ promise.reject("mic_permission_denied", "RECORD_AUDIO permission not granted");
144
+ return;
145
+ }
146
+
147
+ if (voskModel == null) {
148
+ // JS must call initModel() first and wait for it to complete
149
+ promise.reject("model_not_initialized", "Call initModel() and wait for it to complete before calling start()");
150
+ return;
151
+ }
152
+
153
+ try {
154
+ // Create recognizer for this session
155
+ releaseRecognizer();
156
+ recognizer = new Recognizer(voskModel, SAMPLE_RATE);
157
+
158
+ int minBufferSize = AudioRecord.getMinBufferSize(
159
+ SAMPLE_RATE,
160
+ CHANNEL_CONFIG,
161
+ AUDIO_FORMAT
162
+ );
163
+ if (minBufferSize == AudioRecord.ERROR || minBufferSize == AudioRecord.ERROR_BAD_VALUE) {
164
+ promise.reject("audio_init_error", "Invalid buffer size for AudioRecord");
165
+ return;
166
+ }
167
+
168
+ audioRecord = new AudioRecord(
169
+ MediaRecorder.AudioSource.MIC,
170
+ SAMPLE_RATE,
171
+ CHANNEL_CONFIG,
172
+ AUDIO_FORMAT,
173
+ minBufferSize
174
+ );
175
+
176
+ if (audioRecord.getState() != AudioRecord.STATE_INITIALIZED) {
177
+ audioRecord.release();
178
+ audioRecord = null;
179
+ promise.reject("audio_init_error", "AudioRecord failed to initialize");
180
+ return;
181
+ }
182
+
183
+ isRecording = true;
184
+ audioRecord.startRecording();
185
+
186
+ // Background thread that continually reads audio & sends to Vosk
187
+ recordingThread = new Thread(() -> {
188
+ byte[] buffer = new byte[minBufferSize];
189
+
190
+ try {
191
+ while (isRecording && audioRecord != null && recognizer != null) {
192
+ int read = audioRecord.read(buffer, 0, buffer.length);
193
+ if (read > 0) {
194
+ boolean isFinal = recognizer.acceptWaveForm(buffer, read);
195
+
196
+ String resultJson;
197
+ if (isFinal) {
198
+ resultJson = recognizer.getResult(); // final segment
199
+ } else {
200
+ resultJson = recognizer.getPartialResult(); // partial segment
201
+ }
202
+
203
+ try {
204
+ JSONObject json = new JSONObject(resultJson);
205
+ if (isFinal) {
206
+ String text = json.optString("text", "");
207
+ if (!text.isEmpty()) {
208
+ WritableMap map = Arguments.createMap();
209
+ map.putString("value", text);
210
+ sendEvent("onSpeechResults", map);
211
+ }
212
+ } else {
213
+ String partial = json.optString("partial", "");
214
+ if (!partial.isEmpty()) {
215
+ WritableMap map = Arguments.createMap();
216
+ map.putString("value", partial);
217
+ sendEvent("onSpeechPartialResults", map);
218
+ }
219
+ }
220
+ } catch (Exception e) {
221
+ Log.e(TAG, "Vosk JSON parse error", e);
222
+ }
223
+ }
224
+ }
225
+ } catch (Exception e) {
226
+ Log.e(TAG, "AudioRecord / Vosk error", e);
227
+ sendErrorEvent("AudioRecord / Vosk error: " + e.getMessage());
228
+ }
229
+ }, "AudioRecordVoskThread");
230
+
231
+ recordingThread.start();
232
+ promise.resolve(null);
233
+ } catch (Exception e) {
234
+ Log.e(TAG, "start AudioRecord+Vosk error", e);
235
+ stopInternal();
236
+ promise.reject("start_error", e);
237
+ }
238
+ }
239
+
240
+ // ---- Stop raw audio capture ----
241
+
242
+ @ReactMethod
243
+ public void stop(Promise promise) {
244
+ stopInternal();
245
+ promise.resolve(null);
246
+ }
247
+
248
+ private void stopInternal() {
249
+ isRecording = false;
250
+
251
+ if (recordingThread != null) {
252
+ try {
253
+ recordingThread.join();
254
+ } catch (InterruptedException e) {
255
+ // ignore
256
+ }
257
+ recordingThread = null;
258
+ }
259
+
260
+ if (audioRecord != null) {
261
+ try {
262
+ if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {
263
+ audioRecord.stop();
264
+ }
265
+ } catch (Exception e) {
266
+ Log.e(TAG, "AudioRecord stop error", e);
267
+ }
268
+ audioRecord.release();
269
+ audioRecord = null;
270
+ }
271
+
272
+ releaseRecognizer();
273
+ // Usually you keep the Model alive for the app lifetime to avoid reload cost.
274
+ // If you really want to free memory completely, uncomment below:
275
+ /*
276
+ if (voskModel != null) {
277
+ try {
278
+ voskModel.close();
279
+ } catch (Exception e) {
280
+ Log.e(TAG, "Error closing Vosk model", e);
281
+ }
282
+ voskModel = null;
283
+ }
284
+ */
285
+ }
286
+
287
+ private void releaseRecognizer() {
288
+ if (recognizer != null) {
289
+ try {
290
+ recognizer.close();
291
+ } catch (Exception e) {
292
+ Log.e(TAG, "Error closing recognizer", e);
293
+ }
294
+ recognizer = null;
295
+ }
296
+ }
297
+
298
+ // ---- Lifecycle cleanup ----
299
+
300
+ @Override
301
+ public void onHostResume() { }
302
+
303
+ @Override
304
+ public void onHostPause() { }
305
+
306
+ @Override
307
+ public void onHostDestroy() {
308
+ stopInternal();
309
+ }
310
+ }
@@ -0,0 +1,25 @@
1
+ package com.vinfbsomni;
2
+
3
+ import com.facebook.react.ReactPackage;
4
+ import com.facebook.react.bridge.NativeModule;
5
+ import com.facebook.react.bridge.ReactApplicationContext;
6
+ import com.facebook.react.uimanager.ViewManager;
7
+
8
+ import java.util.ArrayList;
9
+ import java.util.Collections;
10
+ import java.util.List;
11
+
12
+ public class SpeechToTextPackage implements ReactPackage {
13
+
14
+ @Override
15
+ public List<NativeModule> createNativeModules(ReactApplicationContext reactContext) {
16
+ List<NativeModule> modules = new ArrayList<>();
17
+ modules.add(new SpeechToTextModule(reactContext));
18
+ return modules;
19
+ }
20
+
21
+ @Override
22
+ public List<ViewManager> createViewManagers(ReactApplicationContext reactContext) {
23
+ return Collections.emptyList();
24
+ }
25
+ }
@@ -0,0 +1,22 @@
1
+ //
2
+ // SpeechToText.m
3
+ // Sango
4
+ //
5
+ // Created by TarunKumar on 26/02/26.
6
+ //
7
+
8
+ #import <React/RCTBridgeModule.h>
9
+ #import <React/RCTEventEmitter.h>
10
+
11
+ @interface RCT_EXTERN_MODULE(SpeechToText, RCTEventEmitter)
12
+
13
+ RCT_EXTERN_METHOD(requestAuthorization:(RCTPromiseResolveBlock)resolve
14
+ rejecter:(RCTPromiseRejectBlock)reject)
15
+
16
+ RCT_EXTERN_METHOD(start:(RCTPromiseResolveBlock)resolve
17
+ rejecter:(RCTPromiseRejectBlock)reject)
18
+
19
+ RCT_EXTERN_METHOD(stop:(RCTPromiseResolveBlock)resolve
20
+ rejecter:(RCTPromiseRejectBlock)reject)
21
+
22
+ @end
@@ -0,0 +1,138 @@
1
+ //
2
+ // SpeechToText.swift
3
+ // Sango
4
+ //
5
+ // Created by TarunKumar on 26/02/26.
6
+ //
7
+
8
+ import Foundation
9
+ import Speech
10
+ import AVFoundation
11
+ import React
12
+
13
+ @objc(SpeechToText)
14
+ class SpeechToText: RCTEventEmitter {
15
+
16
+ private let audioEngine = AVAudioEngine()
17
+ private var speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
18
+ private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
19
+ private var recognitionTask: SFSpeechRecognitionTask?
20
+
21
+ // MARK: - React Native setup
22
+
23
+ override static func requiresMainQueueSetup() -> Bool {
24
+ return true
25
+ }
26
+
27
+ override func supportedEvents() -> [String]! {
28
+ return ["onSpeechResults", "onSpeechPartialResults", "onSpeechError"]
29
+ }
30
+
31
+ // MARK: - Public methods exposed to JS
32
+
33
+ @objc
34
+ func requestAuthorization(_ resolve: @escaping RCTPromiseResolveBlock,
35
+ rejecter reject: @escaping RCTPromiseRejectBlock) {
36
+ SFSpeechRecognizer.requestAuthorization { status in
37
+ DispatchQueue.main.async {
38
+ switch status {
39
+ case .authorized:
40
+ resolve("authorized")
41
+ case .denied:
42
+ resolve("denied")
43
+ case .restricted:
44
+ resolve("restricted")
45
+ case .notDetermined:
46
+ resolve("notDetermined")
47
+ @unknown default:
48
+ resolve("unknown")
49
+ }
50
+ }
51
+ }
52
+ }
53
+
54
+ @objc
55
+ func start(_ resolve: @escaping RCTPromiseResolveBlock,
56
+ rejecter reject: @escaping RCTPromiseRejectBlock) {
57
+ do {
58
+ try startRecording()
59
+ resolve(nil)
60
+ } catch {
61
+ stopRecording()
62
+ reject("start_error", error.localizedDescription, error)
63
+ sendEvent(withName: "onSpeechError", body: ["error": error.localizedDescription])
64
+ }
65
+ }
66
+
67
+ @objc
68
+ func stop(_ resolve: @escaping RCTPromiseResolveBlock,
69
+ rejecter reject: @escaping RCTPromiseRejectBlock) {
70
+ stopRecording()
71
+ resolve(nil)
72
+ }
73
+
74
+ // MARK: - Recording logic
75
+
76
+ private func startRecording() throws {
77
+ // reset any previous session
78
+ stopRecording()
79
+
80
+ // Configure audio session
81
+ let audioSession = AVAudioSession.sharedInstance()
82
+ try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
83
+ try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
84
+
85
+ recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
86
+ guard let recognitionRequest = recognitionRequest else {
87
+ throw NSError(
88
+ domain: "SpeechToText",
89
+ code: -1,
90
+ userInfo: [NSLocalizedDescriptionKey: "Unable to create recognition request"]
91
+ )
92
+ }
93
+ recognitionRequest.shouldReportPartialResults = true
94
+
95
+ let inputNode = audioEngine.inputNode
96
+
97
+ recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) {
98
+ [weak self] result, error in
99
+ guard let self = self else { return }
100
+
101
+ if let result = result {
102
+ let transcript = result.bestTranscription.formattedString
103
+ let eventName = result.isFinal ? "onSpeechResults" : "onSpeechPartialResults"
104
+ self.sendEvent(withName: eventName, body: ["value": transcript])
105
+ // NOTE: do NOT auto-stop here; JS controls stop()
106
+ }
107
+
108
+ if let error = error {
109
+ self.sendEvent(withName: "onSpeechError", body: ["error": error.localizedDescription])
110
+ self.stopRecording()
111
+ }
112
+ }
113
+
114
+ let recordingFormat = inputNode.outputFormat(forBus: 0)
115
+ inputNode.removeTap(onBus: 0)
116
+ inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {
117
+ [weak self] buffer, _ in
118
+ self?.recognitionRequest?.append(buffer)
119
+ }
120
+
121
+ audioEngine.prepare()
122
+ try audioEngine.start()
123
+ }
124
+
125
+ private func stopRecording() {
126
+ if audioEngine.isRunning {
127
+ audioEngine.stop()
128
+ audioEngine.inputNode.removeTap(onBus: 0)
129
+ }
130
+
131
+ recognitionRequest?.endAudio()
132
+ recognitionTask?.cancel()
133
+ recognitionRequest = nil
134
+ recognitionTask = nil
135
+
136
+ try? AVAudioSession.sharedInstance().setActive(false)
137
+ }
138
+ }
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "rn-speech-to-text",
3
+ "version": "0.1.0",
4
+ "description": "Speech-to-text native module for React Native (iOS & Android).",
5
+ "main": "src/index.js",
6
+ "react-native": "src/index.js",
7
+ "keywords": [
8
+ "react-native",
9
+ "speech-to-text",
10
+ "speech",
11
+ "voice",
12
+ "stt"
13
+ ],
14
+ "author": "Tarun Kumar <tarun.kumar@vinculumgroup.com> (https://github.com/tarun-vin)",
15
+ "license": "MIT",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "https://github.com/tarun-vin/rn-speech-to-text.git"
19
+ },
20
+ "bugs": {
21
+ "url": "https://github.com/tarun-vin/rn-speech-to-text/issues"
22
+ },
23
+ "homepage": "https://github.com/tarun-vin/rn-speech-to-text#readme",
24
+ "peerDependencies": {
25
+ "react": ">=18.0.0",
26
+ "react-native": ">=0.71.0"
27
+ },
28
+ "files": [
29
+ "src",
30
+ "android",
31
+ "ios",
32
+ "react-native.config.js",
33
+ "rn-speech-to-text.podspec",
34
+ "README.md",
35
+ "LICENSE"
36
+ ]
37
+ }
@@ -0,0 +1 @@
1
+ module.exports = {};
@@ -0,0 +1,23 @@
1
+ require 'json'
2
+
3
+ package = JSON.parse(File.read(File.join(__dir__, 'package.json')))
4
+
5
+ Pod::Spec.new do |s|
6
+ s.name = "rn-speech-to-text"
7
+ s.version = package['version']
8
+ s.summary = "Speech-to-text native module for React Native (iOS & Android)."
9
+ s.description = <<-DESC
10
+ Speech-to-text native module for React Native using iOS Speech framework and Android SpeechRecognizer.
11
+ DESC
12
+ s.homepage = package['homepage']
13
+ s.license = { :type => package['license'] || "MIT" }
14
+ s.author = package['author'] || { "Tarun Kumar" => "tarun.kumar@vinculumgroup.com" }
15
+
16
+ s.platform = :ios, "11.0"
17
+ s.source = { :git => "https://github.com/tarun-vin/rn-speech-to-text.git", :tag => "0.1.0" }
18
+
19
+ s.source_files = "ios/**/*.{h,m,mm,swift}"
20
+ s.requires_arc = true
21
+
22
+ s.dependency "React-Core"
23
+ end
@@ -0,0 +1,134 @@
1
+ // SpeechToText.js
2
+ import {
3
+ NativeModules,
4
+ NativeEventEmitter,
5
+ Platform,
6
+ PermissionsAndroid,
7
+ } from 'react-native';
8
+
9
+ const { SpeechToText } = NativeModules;
10
+
11
+ if (!SpeechToText) {
12
+ throw new Error('SpeechToText native module not linked');
13
+ }
14
+
15
+ const emitter = new NativeEventEmitter(SpeechToText);
16
+
17
+ // Android-only transcript buffers (used to merge final + partial)
18
+ let androidFinalTranscript = '';
19
+ let androidLivePartial = '';
20
+
21
+ function normalizeText(value) {
22
+ return typeof value === 'string' ? value.trim() : '';
23
+ }
24
+
25
+ function buildAndroidMergedText() {
26
+ return [androidFinalTranscript, androidLivePartial].filter(Boolean).join(' ').trim();
27
+ }
28
+
29
+ function resetAndroidBuffers() {
30
+ androidFinalTranscript = '';
31
+ androidLivePartial = '';
32
+ }
33
+
34
+ export async function requestAuthorization() {
35
+ if (Platform.OS === 'ios') {
36
+ // iOS: use native Speech framework auth
37
+ return SpeechToText.requestAuthorization();
38
+ }
39
+
40
+ // Android: request RECORD_AUDIO runtime permission
41
+ const result = await PermissionsAndroid.request(
42
+ PermissionsAndroid.PERMISSIONS.RECORD_AUDIO,
43
+ {
44
+ title: 'Microphone Permission',
45
+ message: 'We need access to your microphone for voice search.',
46
+ buttonPositive: 'OK',
47
+ },
48
+ );
49
+
50
+ if (result === PermissionsAndroid.RESULTS.GRANTED) {
51
+ return 'authorized';
52
+ }
53
+ return 'denied';
54
+ }
55
+
56
+ export function initModel() {
57
+ if (Platform.OS === 'ios') return;
58
+ return SpeechToText.initModel();
59
+ }
60
+
61
+ export function start() {
62
+ if (Platform.OS === 'android') {
63
+ resetAndroidBuffers();
64
+ }
65
+ return SpeechToText.start();
66
+ }
67
+
68
+ export async function stop() {
69
+ try {
70
+ return await SpeechToText.stop();
71
+ } finally {
72
+ // Prepare for next session
73
+ if (Platform.OS === 'android') {
74
+ resetAndroidBuffers();
75
+ }
76
+ }
77
+ }
78
+
79
+ export function addResultListener(listener) {
80
+ // Keep raw final events behavior unchanged
81
+ return emitter.addListener('onSpeechResults', listener);
82
+ }
83
+
84
+ export function addPartialResultListener(listener) {
85
+ if (Platform.OS !== 'android') {
86
+ return emitter.addListener('onSpeechPartialResults', listener);
87
+ }
88
+
89
+ // Android:
90
+ // - partial events update current segment
91
+ // - final events commit segment
92
+ // - both emit merged text via onSpeechPartialResults listener so existing UI code works unchanged
93
+ const emitMerged = (event = {}) => {
94
+ listener({
95
+ ...event,
96
+ value: buildAndroidMergedText(),
97
+ });
98
+ };
99
+
100
+ const partialSub = emitter.addListener('onSpeechPartialResults', (event) => {
101
+ androidLivePartial = normalizeText(event?.value);
102
+ emitMerged(event);
103
+ });
104
+
105
+ const finalSub = emitter.addListener('onSpeechResults', (event) => {
106
+ const finalSegment = normalizeText(event?.value);
107
+
108
+ if (finalSegment) {
109
+ if (!androidFinalTranscript) {
110
+ androidFinalTranscript = finalSegment;
111
+ } else if (
112
+ androidFinalTranscript !== finalSegment &&
113
+ !androidFinalTranscript.endsWith(` ${finalSegment}`)
114
+ ) {
115
+ androidFinalTranscript = `${androidFinalTranscript} ${finalSegment}`;
116
+ }
117
+ }
118
+
119
+ // Clear current in-progress partial once finalized
120
+ androidLivePartial = '';
121
+ emitMerged(event);
122
+ });
123
+
124
+ return {
125
+ remove() {
126
+ partialSub.remove();
127
+ finalSub.remove();
128
+ },
129
+ };
130
+ }
131
+
132
+ export function addErrorListener(listener) {
133
+ return emitter.addListener('onSpeechError', listener);
134
+ }
package/src/index.js ADDED
@@ -0,0 +1 @@
1
+ export * from './SpeechToText';