react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
package/README.md CHANGED
@@ -1,236 +1,232 @@
1
- # react-native-sherpa-onnx
2
-
3
- React Native SDK for sherpa-onnx - providing offline speech processing capabilities
4
-
5
- [![npm version](https://img.shields.io/npm/v/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
6
- [![npm downloads](https://img.shields.io/npm/dm/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
7
- [![npm license](https://img.shields.io/npm/l/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
8
- [![Android](https://img.shields.io/badge/Android-Supported-green)](https://www.android.com/)
9
- [![iOS](https://img.shields.io/badge/iOS-Supported-blue)](https://www.apple.com/ios/)
10
-
11
- A React Native TurboModule that provides offline speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
12
-
13
- ## Feature Support
14
-
15
- | Feature | Status |
16
- |---------|--------|
17
- | Offline Speech-to-Text | ✅ **Supported** |
18
- | Text-to-Speech | ✅ **Supported** |
19
- | Speaker Diarization | Not yet supported |
20
- | Speech Enhancement | Not yet supported |
21
- | Source Separation | ❌ Not yet supported |
22
- | VAD (Voice Activity Detection) | Not yet supported |
23
-
24
- ## Platform Support Status
25
-
26
- | Platform | Status | Notes |
27
- |----------|--------|-------|
28
- | **Android** | ✅ **Production Ready** | Fully tested, CI/CD automated, multiple models supported |
29
- | **iOS** | 🟡 **Beta / Experimental** | XCFramework + Podspec ready<br/>✅ GitHub Actions builds pass<br/>❌ **No local Xcode testing** *(Windows-only dev)* |
30
-
31
- ### 🔧 **iOS Contributors WANTED!**
32
-
33
- **Full iOS support is a priority!** Help bring sherpa-onnx to iOS devices.
34
-
35
- **What's ready:**
36
- - XCFramework integration
37
- - Podspec configuration
38
- - GitHub Actions CI (macOS runner)
39
- - TypeScript bindings
40
-
41
- **What's needed:**
42
- - **Local Xcode testing** (Simulator + Device)
43
- - **iOS example app** (beyond CI)
44
- - **TurboModule iOS testing**
45
- - **Edge case testing**
46
-
47
- ## Supported Model Types
48
-
49
- ### Speech-to-Text (STT) Models
50
-
51
- | Model Type | `modelType` Value | Description | Download Links |
52
- | ------------------------ | ----------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ |
53
- | **Zipformer/Transducer** | `'transducer'` | Requires `encoder.onnx`, `decoder.onnx`, `joiner.onnx`, and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html) |
54
- | **Paraformer** | `'paraformer'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html) |
55
- | **NeMo CTC** | `'nemo_ctc'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/index.html) |
56
- | **Whisper** | `'whisper'` | Requires `encoder.onnx`, `decoder.onnx`, and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html) |
57
- | **WeNet CTC** | `'wenet_ctc'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/wenet/index.html) |
58
- | **SenseVoice** | `'sense_voice'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/sense-voice/index.html) |
59
- | **FunASR Nano** | `'funasr_nano'` | Requires `encoder_adaptor.onnx`, `llm.onnx`, `embedding.onnx`, and `tokenizer` directory | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/funasr-nano/index.html) |
60
-
61
- ### Text-to-Speech (TTS) Models
62
-
63
- | Model Type | `modelType` Value | Description | Download Links |
64
- | ---------------- | ----------------- | ---------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
65
- | **VITS** | `'vits'` | Fast, high-quality TTS. Includes Piper, Coqui, MeloTTS, MMS variants. Requires `model.onnx`, `tokens.txt` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
66
- | **Matcha** | `'matcha'` | High-quality acoustic model + vocoder. Requires `acoustic_model.onnx`, `vocoder.onnx`, `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html) |
67
- | **Kokoro** | `'kokoro'` | Multi-speaker, multi-language. Requires `model.onnx`, `voices.bin`, `tokens.txt`, `espeak-ng-data/` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
68
- | **KittenTTS** | `'kitten'` | Lightweight, multi-speaker. Requires `model.onnx`, `voices.bin`, `tokens.txt`, `espeak-ng-data/` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
69
- | **Zipvoice** | `'zipvoice'` | Voice cloning capable. Requires `encoder.onnx`, `decoder.onnx`, `vocoder.onnx`, `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/zipvoice.html) |
70
-
71
- ## Features
72
-
73
- - ✅ **Offline Speech-to-Text** - No internet connection required for speech recognition
74
- - ✅ **Multiple Model Types** - Supports Zipformer/Transducer, Paraformer, NeMo CTC, Whisper, WeNet CTC, SenseVoice, and FunASR Nano models
75
- - **Model Quantization** - Automatic detection and preference for quantized (int8) models
76
- - **Flexible Model Loading** - Asset models, file system models, or auto-detection
77
- - **Android Support** - Fully supported on Android
78
- - **iOS Support** - Fully supported on iOS (requires sherpa-onnx XCFramework)
79
- - **TypeScript Support** - Full TypeScript definitions included
80
- - 🚧 **Additional Features Coming Soon** - Speaker Diarization, Speech Enhancement, Source Separation, and VAD support are planned for future releases
81
-
82
- ## Installation
83
-
84
- ```sh
85
- npm install react-native-sherpa-onnx
86
- ```
87
-
88
- If your project uses Yarn (v3+) or Plug'n'Play, configure Yarn to use the Node Modules linker to avoid postinstall issues:
89
-
90
- ```yaml
91
- # .yarnrc.yml
92
- nodeLinker: node-modules
93
- ```
94
-
95
- Alternatively, set the environment variable during install:
96
-
97
- ```sh
98
- YARN_NODE_LINKER=node-modules yarn install
99
- ```
100
-
101
- ### Android
102
-
103
- No additional setup required. The library automatically handles native dependencies via Gradle.
104
-
105
- ### iOS
106
-
107
- The sherpa-onnx XCFramework is **not included in the repository or npm package** due to its size (~80MB), but **no manual action is required**! The framework is automatically downloaded during `pod install`.
108
-
109
- #### Quick Setup
110
-
111
- ```sh
112
- cd example
113
- bundle install
114
- bundle exec pod install --project-directory=ios
115
- ```
116
-
117
- That's it! The `Podfile` automatically:
118
- 1. Copies required header files from the git submodule
119
- 2. Downloads the latest XCFramework from [GitHub Releases](https://github.com/XDcobra/react-native-sherpa-onnx/releases?q=framework)
120
- 3. Verifies everything is in place before building
121
-
122
- #### For Advanced Users: Building the Framework Locally
123
-
124
- If you want to build the XCFramework yourself instead of using the prebuilt release:
125
-
126
- ```sh
127
- # Clone sherpa-onnx repository
128
- git clone https://github.com/k2-fsa/sherpa-onnx.git
129
- cd sherpa-onnx
130
- git checkout v1.12.23
131
-
132
- # Build the iOS XCFramework (requires macOS, Xcode, CMake, and ONNX Runtime)
133
- ./build-ios.sh
134
-
135
- # Copy to your project
136
- cp -r build-ios/sherpa_onnx.xcframework /path/to/react-native-sherpa-onnx/ios/Frameworks/
137
- ```
138
-
139
- Then run `pod install` as usual.
140
-
141
- **Note:** The iOS implementation uses the same C++ wrapper as Android, ensuring consistent behavior across platforms.
142
-
143
- ## Documentation
144
-
145
- - [Speech-to-Text (STT)](./docs/stt.md)
146
- - [Text-to-Speech (TTS)](./docs/tts.md)
147
- - [Voice Activity Detection (VAD)](./docs/vad.md)
148
- - [Speaker Diarization](./docs/diarization.md)
149
- - [Speech Enhancement](./docs/enhancement.md)
150
- - [Source Separation](./docs/separation.md)
151
- - [General STT Model Setup](./docs/STT_MODEL_SETUP.md)
152
- - [General TTS Model Setup](./docs/TTS_MODEL_SETUP.md)
153
-
154
- ### Example Model READMEs
155
-
156
- - [kokoro (US) README](./example/android/app/src/main/assets/models/kokoro-us/README.md)
157
- - [kokoro (ZH) README](./example/android/app/src/main/assets/models/kokoro-zh/README.md)
158
- - [funasr-nano README](./example/android/app/src/main/assets/models/sherpa-onnx-funasr-nano-int8/README.md)
159
- - [kitten-nano README](./example/android/app/src/main/assets/models/sherpa-onnx-kitten-nano-en-v0_1-fp16/README.md)
160
- - [matcha README](./example/android/app/src/main/assets/models/sherpa-onnx-matcha-icefall-en_US-ljspeech/README.md)
161
- - [nemo-ctc README](./example/android/app/src/main/assets/models/sherpa-onnx-nemo-parakeet-tdt-ctc-en/README.md)
162
- - [paraformer README](./example/android/app/src/main/assets/models/sherpa-onnx-paraformer-zh-small/README.md)
163
- - [sense-voice README](./example/android/app/src/main/assets/models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8/README.md)
164
- - [vits README](./example/android/app/src/main/assets/models/sherpa-onnx-vits-piper-en_US-libritts_r-medium/README.md)
165
- - [wenet-ctc README](./example/android/app/src/main/assets/models/sherpa-onnx-wenetspeech-ctc-zh-en-cantonese/README.md)
166
- - [whisper-tiny README](./example/android/app/src/main/assets/models/sherpa-onnx-whisper-tiny-en/README.md)
167
- - [zipformer README](./example/android/app/src/main/assets/models/sherpa-onnx-zipformer-small-en/README.md)
168
-
169
- ## Requirements
170
-
171
- - React Native >= 0.70
172
- - Android API 24+ (Android 7.0+)
173
- - iOS 13.0+ (requires sherpa-onnx XCFramework - see iOS Setup below)
174
-
175
- ## Example Apps
176
-
177
- We provide example applications to help you get started with `react-native-sherpa-onnx`:
178
-
179
- ### Example App (Audio to Text)
180
-
181
- The example app included in this repository demonstrates basic audio-to-text transcription capabilities. It includes:
182
-
183
- - Multiple model type support (Zipformer, Paraformer, NeMo CTC, Whisper, WeNet CTC, SenseVoice, FunASR Nano)
184
- - Model selection and configuration
185
- - Audio file transcription
186
- - Test audio files for different languages
187
-
188
- **Getting started:**
189
-
190
- ```sh
191
- cd example
192
- yarn install
193
- yarn android # or yarn ios
194
- ```
195
-
196
- <div align="center">
197
- <img src="./docs/images/example_home_screen.png" alt="Model selection home screen" width="30%" />
198
- <img src="./docs/images/example_english.png" alt="Transcribe english audio" width="30%" />
199
- <img src="./docs/images/example_multilanguage.png" alt="Transcribe english and chinese audio" width="30%" />
200
- </div>
201
-
202
- ### Video to Text Comparison App
203
-
204
- A comprehensive comparison app that demonstrates video-to-text transcription using `react-native-sherpa-onnx` alongside other speech-to-text solutions:
205
-
206
- **Repository:** [mobile-videototext-comparison](https://github.com/XDcobra/mobile-videototext-comparison)
207
-
208
- **Features:**
209
-
210
- - Video to audio conversion (using native APIs)
211
- - Audio to text transcription
212
- - Video to text (video --> WAV --> text)
213
- - Comparison between different STT providers
214
- - Performance benchmarking
215
-
216
- This app showcases how to integrate `react-native-sherpa-onnx` into a real-world application that processes video files and converts them to text.
217
-
218
- <div align="center">
219
- <img src="./docs/images/vtt_model_overview.png" alt="Video-to-Text Model Overview" width="30%" />
220
- <img src="./docs/images/vtt_result_file_picker.png" alt="Video-to-Text file picker" width="30%" />
221
- <img src="./docs/images/vtt_result_test_audio.png" alt="Video-to-Text test audio" width="30%" />
222
- </div>
223
-
224
- ## Contributing
225
-
226
- - [Development workflow](CONTRIBUTING.md#development-workflow)
227
- - [Sending a pull request](CONTRIBUTING.md#sending-a-pull-request)
228
- - [Code of conduct](CODE_OF_CONDUCT.md)
229
-
230
- ## License
231
-
232
- MIT
233
-
234
- ---
235
-
236
- Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
1
+ # react-native-sherpa-onnx
2
+
3
+ React Native SDK for sherpa-onnx - providing offline speech processing capabilities
4
+
5
+ <div align="center">
6
+ <img src="./docs/images/banner.png" alt="Banner" width="560" />
7
+ </div>
8
+
9
+ <div align="center">
10
+
11
+ [![npm version](https://img.shields.io/npm/v/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
12
+ [![npm downloads](https://img.shields.io/npm/dm/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
13
+ [![npm license](https://img.shields.io/npm/l/react-native-sherpa-onnx.svg)](https://www.npmjs.com/package/react-native-sherpa-onnx)
14
+ [![Android](https://img.shields.io/badge/Android-Supported-green)](https://www.android.com/)
15
+ [![iOS](https://img.shields.io/badge/iOS-Supported-blue)](https://www.apple.com/ios/)
16
+
17
+ </div>
18
+
19
+ > **⚠️ SDK 0.3.0 Breaking changes from 0.2.0**
20
+ > Since the last release I have restructured and improved the SDK significantly: full iOS support, smoother behaviour, fewer failure points, and a much smaller footprint (~95% size reduction). As a result, **logic and the public API have changed**. If you are upgrading from 0.2.x, please follow the [Breaking changes (upgrading to 0.3.0)](docs/migration.md#breaking-changes-upgrading-to-030) section and the updated API documentation
21
+
22
+ A React Native TurboModule that provides offline speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
23
+
24
+ ## Table of contents
25
+
26
+ - [Feature Support](#feature-support)
27
+ - [Platform Support Status](#platform-support-status)
28
+ - [Supported Model Types](#supported-model-types)
29
+ - [Speech-to-Text (STT) Models](#speech-to-text-stt-models)
30
+ - [Text-to-Speech (TTS) Models](#text-to-speech-tts-models)
31
+ - [Installation](#installation)
32
+ - [Android](#android)
33
+ - [iOS](#ios)
34
+ - [Documentation](#documentation)
35
+ - [Requirements](#requirements)
36
+ - [Breaking changes (upgrading to 0.3.0)](#breaking-changes-upgrading-to-030)
37
+ - [Instance-based API (TTS + STT)](#instance-based-api-tts--stt)
38
+ - [Speech-to-Text (STT)](#speech-to-text-stt)
39
+ - [Text-to-Speech (TTS)](#text-to-speech-tts)
40
+ - [Example Apps](#example-apps)
41
+ - [Example App (Audio to Text)](#example-app-audio-to-text)
42
+ - [Video to Text Comparison App](#video-to-text-comparison-app)
43
+ - [Contributing](#contributing)
44
+ - [License](#license)
45
+
46
+ ## Feature Support
47
+
48
+ | Feature | Status | Notes |
49
+ |---------|--------|-------|
50
+ | Offline Speech-to-Text | ✅ **Supported** | No internet required; multiple model types (Zipformer, Paraformer, Whisper, etc.). See [Supported Model Types](#supported-model-types). |
51
+ | Text-to-Speech | **Supported** | Multiple model types (VITS, Matcha, Kokoro, etc.). See [Supported Model Types](#supported-model-types). |
52
+ | Execution providers (CPU, NNAPI, XNNPACK, Core ML, QNN) | **Supported** | See [Execution provider support](./docs/execution-providers.md). |
53
+ | Play Asset Delivery (PAD) | **Supported** | Android only. See [Model Setup](./docs/MODEL_SETUP.md). |
54
+ | Automatic Model type detection | ✅ **Supported** | `detectSttModel()` and `detectTtsModel()` for a path. See [Model Setup: Model type detection](./docs/MODEL_SETUP.md#model-type-detection-without-initialization). |
55
+ | Model quantization | ✅ **Supported** | Automatic detection and preference for quantized (int8) models. |
56
+ | Flexible model loading | ✅ **Supported** | Asset models, file system models, or auto-detection. |
57
+ | TypeScript | **Supported** | Full type definitions included. |
58
+ | Speaker Diarization | Not yet supported | Scheduled for release 0.4.0 |
59
+ | Speech Enhancement | Not yet supported | Scheduled for release 0.5.0 |
60
+ | Source Separation | ❌ Not yet supported | Scheduled for release 0.6.0 |
61
+ | VAD (Voice Activity Detection) | ❌ Not yet supported | Scheduled for release 0.7.0 |
62
+
63
+ ## Platform Support Status
64
+
65
+ | Platform | Status | Notes |
66
+ |----------|--------|-------|
67
+ | **Android** | **Production Ready** | CI/CD automated, multiple models supported |
68
+ | **iOS** | **Production Ready** | CI/CD automated, multiple models supported |
69
+
70
+ ## Supported Model Types
71
+
72
+ ### Speech-to-Text (STT) Models
73
+
74
+ | Model Type | `modelType` Value | Description | Download Links |
75
+ | ------------------------ | ----------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ |
76
+ | **Zipformer/Transducer** | `'transducer'` | Requires `encoder.onnx`, `decoder.onnx`, `joiner.onnx`, and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html) |
77
+ | **Paraformer** | `'paraformer'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html) |
78
+ | **NeMo CTC** | `'nemo_ctc'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/index.html) |
79
+ | **Whisper** | `'whisper'` | Requires `encoder.onnx`, `decoder.onnx`, and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html) |
80
+ | **WeNet CTC** | `'wenet_ctc'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/wenet/index.html) |
81
+ | **SenseVoice** | `'sense_voice'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/sense-voice/index.html) |
82
+ | **FunASR Nano** | `'funasr_nano'` | Requires `encoder_adaptor.onnx`, `llm.onnx`, `embedding.onnx`, and `tokenizer` directory | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/funasr-nano/index.html) |
83
+
84
+ ### Text-to-Speech (TTS) Models
85
+
86
+ | Model Type | `modelType` Value | Description | Download Links |
87
+ | ---------------- | ----------------- | ---------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
88
+ | **VITS** | `'vits'` | Fast, high-quality TTS. Includes Piper, Coqui, MeloTTS, MMS variants. Requires `model.onnx`, `tokens.txt` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
89
+ | **Matcha** | `'matcha'` | High-quality acoustic model + vocoder. Requires `acoustic_model.onnx`, `vocoder.onnx`, `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html) |
90
+ | **Kokoro** | `'kokoro'` | Multi-speaker, multi-language. Requires `model.onnx`, `voices.bin`, `tokens.txt`, `espeak-ng-data/` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
91
+ | **KittenTTS** | `'kitten'` | Lightweight, multi-speaker. Requires `model.onnx`, `voices.bin`, `tokens.txt`, `espeak-ng-data/` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
92
+ | **Zipvoice** | `'zipvoice'` | Voice cloning capable. Requires `encoder.onnx`, `decoder.onnx`, `vocoder.onnx`, `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/zipvoice.html) |
93
+ | **Pocket** | `'pocket'` | Flow-matching TTS. Requires `lm_flow.onnx`, `lm_main.onnx`, `encoder.onnx`, `decoder.onnx`, `text_conditioner.onnx`, `vocab.json`, `token_scores.json` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
94
+
95
+ ## Installation
96
+
97
+ ```sh
98
+ npm install react-native-sherpa-onnx
99
+ ```
100
+
101
+ If your project uses Yarn (v3+) or Plug'n'Play, configure Yarn to use the Node Modules linker to avoid postinstall issues:
102
+
103
+ ```yaml
104
+ # .yarnrc.yml
105
+ nodeLinker: node-modules
106
+ ```
107
+
108
+ Alternatively, set the environment variable during install:
109
+
110
+ ```sh
111
+ YARN_NODE_LINKER=node-modules yarn install
112
+ ```
113
+
114
+ ### Android
115
+
116
+ No additional setup required. The library automatically handles native dependencies via Gradle. For execution provider support (CPU, NNAPI, XNNPACK, QNN) and optional QNN setup, see [Execution provider support](./docs/execution-providers.md). For building Android native libs yourself, see [sherpa-onnx-prebuilt](third_party/sherpa-onnx-prebuilt/README.md).
117
+
118
+
119
+ ### iOS
120
+
121
+ The sherpa-onnx **XCFramework is not shipped in the repo or npm** (size ~80MB). It is **downloaded automatically** when you run `pod install`; no manual steps are required. The version used is pinned in `third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG` and the archive is fetched from [GitHub Releases](https://github.com/XDcobra/react-native-sherpa-onnx/releases?q=framework).
122
+
123
+ #### Setup
124
+
125
+ ```sh
126
+ cd your-app/ios
127
+ bundle install
128
+ bundle exec pod install
129
+ ```
130
+
131
+ The podspec runs `scripts/setup-ios-framework.sh`, which downloads the XCFramework (and, if needed, libarchive sources) so the Pod builds correctly. Libarchive is compiled from source as part of the Pod; its version is pinned in `third_party/libarchive_prebuilt/IOS_RELEASE_TAG`.
132
+
133
+ #### For Advanced Users: Building the Framework Locally
134
+ #### Advanced: Building the iOS framework yourself
135
+
136
+ If you need a custom sherpa-onnx build (e.g. different version or patches), you can build the XCFramework and place it in `ios/Frameworks/` before running `pod install`. The repo does not include an iOS build script; use one of:
137
+
138
+ - **This repo's CI:** The [build-sherpa-onnx-ios-framework](.github/workflows/build-sherpa-onnx-ios-framework.yml) workflow produces the XCFramework and publishes it as a GitHub Release. You can run equivalent steps locally or inspect the workflow for the exact build and merge steps (including `libsherpa-onnx-cxx-api.a` and libarchive).
139
+ - **Version and layout:** Pinned version and release layout are documented in [third_party/sherpa-onnx-prebuilt](third_party/sherpa-onnx-prebuilt/README.md) (Android focus; for iOS, see `IOS_RELEASE_TAG` and the [iOS framework workflow](.github/workflows/build-sherpa-onnx-ios-framework.yml)).
140
+
141
+ The XCFramework must include the C++ API (`libsherpa-onnx-cxx-api.a` merged or linked) so that the iOS Obj-C++ code can use `sherpa_onnx::cxx::*`. The workflow's build script ensures this; if you use upstream `build-ios.sh` from sherpa-onnx, you may need to merge the C++ API into the static library yourself.
142
+
143
+ ## Documentation
144
+
145
+ - [Text-to-Speech (TTS)](./docs/tts.md)
146
+ - [Execution provider support (QNN, NNAPI, XNNPACK, Core ML)](./docs/execution-providers.md) – Checking and using acceleration backends
147
+ - [Voice Activity Detection (VAD)](./docs/vad.md)
148
+ - [Speaker Diarization](./docs/diarization.md)
149
+ - [Speech Enhancement](./docs/enhancement.md)
150
+ - [Source Separation](./docs/separation.md)
151
+ - [Model Setup](./docs/MODEL_SETUP.md) – Bundled assets, Play Asset Delivery (PAD), model discovery APIs, and troubleshooting
152
+ - [Model Download Manager](./docs/download-manager.md)
153
+
154
+ Note: For when to use `listAssetModels()` vs `listModelsAtPath()` and how to combine bundled and PAD/file-based models, see [Model Setup](./docs/MODEL_SETUP.md).
155
+
156
+ ## Requirements
157
+
158
+ - React Native >= 0.70
159
+ - Android API 24+ (Android 7.0+)
160
+ - iOS 13.0+
161
+
162
+ ## Example Apps
163
+
164
+ We provide example applications to help you get started with `react-native-sherpa-onnx`:
165
+
166
+ ### Example App (Audio to Text)
167
+
168
+ The example app included in this repository demonstrates basic audio-to-text transcription capabilities. It includes:
169
+
170
+ - Multiple model type support (Zipformer, Paraformer, NeMo CTC, Whisper, WeNet CTC, SenseVoice, FunASR Nano)
171
+ - Model selection and configuration
172
+ - Audio file transcription
173
+ - Test audio files for different languages
174
+
175
+ **Getting started:**
176
+
177
+ ```sh
178
+ cd example
179
+ yarn install
180
+ yarn android # or yarn ios
181
+ ```
182
+
183
+ <div align="center">
184
+ <table>
185
+ <tr>
186
+ <td><img src="./docs/images/example_home_screen.png" alt="Model selection home screen" width="240" /></td>
187
+ <td><img src="./docs/images/example_stt_1.png" alt="Transcribe english audio" width="240" /></td>
188
+ <td><img src="./docs/images/example_stt_2.png" alt="Transcribe cantonese audio" width="240" /></td>
189
+ </tr>
190
+ <tr>
191
+ <td><img src="./docs/images/example_tts.png" alt="Text to speech generation" width="240" /></td>
192
+ <td><img src="./docs/images/example_provider.png" alt="Text to speech generation" width="240" /></td>
193
+ </tr>
194
+ </table>
195
+ </div>
196
+
197
+ ### Video to Text Comparison App
198
+
199
+ A comprehensive comparison app that demonstrates video-to-text transcription using `react-native-sherpa-onnx` alongside other speech-to-text solutions:
200
+
201
+ **Repository:** [mobile-videototext-comparison](https://github.com/XDcobra/mobile-videototext-comparison)
202
+
203
+ **Features:**
204
+
205
+ - Video to audio conversion (using native APIs)
206
+ - Audio to text transcription
207
+ - Video to text (video --> WAV --> text)
208
+ - Comparison between different STT providers
209
+ - Performance benchmarking
210
+
211
+ This app showcases how to integrate `react-native-sherpa-onnx` into a real-world application that processes video files and converts them to text.
212
+
213
+ <div align="center">
214
+ <img src="./docs/images/vtt_model_overview.png" alt="Video-to-Text Model Overview" width="30%" />
215
+ <img src="./docs/images/vtt_result_file_picker.png" alt="Video-to-Text file picker" width="30%" />
216
+ <img src="./docs/images/vtt_result_test_audio.png" alt="Video-to-Text test audio" width="30%" />
217
+ </div>
218
+
219
+ ## Contributing
220
+
221
+ - [Development workflow](CONTRIBUTING.md#development-workflow)
222
+ - [Sending a pull request](CONTRIBUTING.md#sending-a-pull-request)
223
+ - [Code of conduct](CODE_OF_CONDUCT.md)
224
+
225
+ ## License
226
+
227
+ MIT
228
+
229
+ ---
230
+
231
+ Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
232
+