react-native-sherpa-onnx 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +90 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +345 -153
  15. package/android/prebuilt-versions.gradle +2 -2
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
  21. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
  22. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
  23. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  30. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  31. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  32. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  33. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  34. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  35. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  36. package/ios/SherpaOnnx+TTS.mm +178 -20
  37. package/ios/SherpaOnnx.mm +108 -1
  38. package/ios/SherpaOnnxAudioConvert.h +10 -0
  39. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  40. package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
  41. package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
  42. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader.d.js +2 -0
  56. package/lib/module/download/background-downloader.d.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +385 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -3
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +267 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/extraction/extractTarZst.js +54 -0
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +190 -0
  94. package/lib/module/extraction/index.js.map +1 -0
  95. package/lib/module/extraction/types.js +2 -0
  96. package/lib/module/extraction/types.js.map +1 -0
  97. package/lib/module/index.js +2 -1
  98. package/lib/module/index.js.map +1 -1
  99. package/lib/module/licenses.js +63 -0
  100. package/lib/module/licenses.js.map +1 -0
  101. package/lib/module/stt/index.js +16 -2
  102. package/lib/module/stt/index.js.map +1 -1
  103. package/lib/module/stt/streaming.js +2 -0
  104. package/lib/module/stt/streaming.js.map +1 -1
  105. package/lib/module/stt/streamingTypes.js.map +1 -1
  106. package/lib/module/stt/types.js.map +1 -1
  107. package/lib/module/tts/index.js +20 -2
  108. package/lib/module/tts/index.js.map +1 -1
  109. package/lib/module/tts/streaming.js +4 -0
  110. package/lib/module/tts/streaming.js.map +1 -1
  111. package/lib/module/tts/types.js.map +1 -1
  112. package/lib/module/utils.js +16 -1
  113. package/lib/module/utils.js.map +1 -1
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
  115. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  116. package/lib/typescript/src/audio/index.d.ts +10 -0
  117. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  119. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  120. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  121. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  122. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  124. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  126. package/lib/typescript/src/download/constants.d.ts +5 -0
  127. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  128. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  130. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  132. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  134. package/lib/typescript/src/download/index.d.ts +7 -5
  135. package/lib/typescript/src/download/index.d.ts.map +1 -1
  136. package/lib/typescript/src/download/localModels.d.ts +15 -0
  137. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  138. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  140. package/lib/typescript/src/download/paths.d.ts +28 -0
  141. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  142. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  144. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  146. package/lib/typescript/src/download/registry.d.ts +14 -0
  147. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  148. package/lib/typescript/src/download/retry.d.ts +15 -0
  149. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  150. package/lib/typescript/src/download/types.d.ts +96 -0
  151. package/lib/typescript/src/download/types.d.ts.map +1 -0
  152. package/lib/typescript/src/download/validation.d.ts +19 -0
  153. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  154. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  155. package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/extraction/index.d.ts +50 -0
  158. package/lib/typescript/src/extraction/index.d.ts.map +1 -0
  159. package/lib/typescript/src/extraction/types.d.ts +60 -0
  160. package/lib/typescript/src/extraction/types.d.ts.map +1 -0
  161. package/lib/typescript/src/index.d.ts +1 -0
  162. package/lib/typescript/src/index.d.ts.map +1 -1
  163. package/lib/typescript/src/licenses.d.ts +10 -0
  164. package/lib/typescript/src/licenses.d.ts.map +1 -0
  165. package/lib/typescript/src/stt/index.d.ts +4 -1
  166. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  167. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  168. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  169. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  170. package/lib/typescript/src/stt/types.d.ts +3 -1
  171. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  172. package/lib/typescript/src/tts/index.d.ts +3 -1
  173. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  174. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  175. package/lib/typescript/src/tts/types.d.ts +6 -5
  176. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  177. package/lib/typescript/src/utils.d.ts +5 -0
  178. package/lib/typescript/src/utils.d.ts.map +1 -1
  179. package/package.json +11 -1
  180. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  181. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  182. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  183. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  184. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  185. package/scripts/ci/update_model_license_csv.sh +765 -0
  186. package/scripts/setup-ios-framework.sh +14 -11
  187. package/scripts/update_commercial_use.js +73 -0
  188. package/src/NativeSherpaOnnx.ts +92 -5
  189. package/src/audio/index.ts +20 -0
  190. package/src/download/ModelDownloadManager.ts +55 -1343
  191. package/src/download/activeModelOperations.ts +38 -0
  192. package/src/download/background-downloader.d.ts +43 -0
  193. package/src/download/bulkPurge.ts +102 -0
  194. package/src/download/checksumPrompt.ts +25 -0
  195. package/src/download/constants.ts +5 -0
  196. package/src/download/downloadEvents.ts +55 -0
  197. package/src/download/downloadTask.ts +497 -0
  198. package/src/download/ensureModel.ts +124 -0
  199. package/src/download/index.ts +19 -2
  200. package/src/download/localModels.ts +234 -0
  201. package/src/download/modelExtraction.ts +244 -0
  202. package/src/download/paths.ts +134 -0
  203. package/src/download/postDownloadProcessing.ts +292 -0
  204. package/src/download/protectedModelKeys.ts +30 -0
  205. package/src/download/registry.ts +404 -0
  206. package/src/download/retry.ts +76 -0
  207. package/src/download/types.ts +120 -0
  208. package/src/download/validation.ts +114 -8
  209. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  210. package/src/extraction/extractTarZst.ts +79 -0
  211. package/src/extraction/index.ts +269 -0
  212. package/src/extraction/types.ts +63 -0
  213. package/src/index.tsx +2 -0
  214. package/src/licenses.ts +100 -0
  215. package/src/stt/index.ts +20 -2
  216. package/src/stt/streaming.ts +3 -0
  217. package/src/stt/streamingTypes.ts +5 -0
  218. package/src/stt/types.ts +3 -1
  219. package/src/tts/index.ts +30 -2
  220. package/src/tts/streaming.ts +10 -0
  221. package/src/tts/types.ts +6 -5
  222. package/src/utils.ts +22 -1
  223. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
  224. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
  225. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  226. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  227. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  228. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  229. package/lib/module/download/extractTarBz2.js.map +0 -1
  230. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  231. package/scripts/check-qnn-support.sh +0 -78
  232. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
package/src/stt/index.ts CHANGED
@@ -40,7 +40,7 @@ function normalizeSttResult(raw: {
40
40
  *
41
41
  * @param modelPath - Model path configuration (asset, file, or auto)
42
42
  * @param options - Optional preferInt8 and modelType (default: auto)
43
- * @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
43
+ * @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), optional error when success is false, and optionally isHardwareSpecificUnsupported
44
44
  * @example
45
45
  * ```typescript
46
46
  * const path = { type: 'asset' as const, path: 'models/sherpa-onnx-whisper-tiny-en' };
@@ -55,15 +55,33 @@ export async function detectSttModel(
55
55
  options?: { preferInt8?: boolean; modelType?: STTModelType }
56
56
  ): Promise<{
57
57
  success: boolean;
58
+ /** Native validation/detect failure. */
59
+ error?: string;
58
60
  detectedModels: Array<{ type: string; modelDir: string }>;
59
61
  modelType?: string;
62
+ isHardwareSpecificUnsupported?: boolean;
60
63
  }> {
61
64
  const resolvedPath = await resolveModelPath(modelPath);
62
- return SherpaOnnx.detectSttModel(
65
+ const raw = await SherpaOnnx.detectSttModel(
63
66
  resolvedPath,
64
67
  options?.preferInt8,
65
68
  options?.modelType
66
69
  );
70
+ const err =
71
+ typeof (raw as { error?: unknown }).error === 'string'
72
+ ? String((raw as { error: string }).error).trim()
73
+ : '';
74
+ return {
75
+ success: raw.success,
76
+ ...(err.length > 0 ? { error: err } : {}),
77
+ ...(raw.isHardwareSpecificUnsupported === true
78
+ ? { isHardwareSpecificUnsupported: true }
79
+ : {}),
80
+ detectedModels: raw.detectedModels ?? [],
81
+ ...(raw.modelType != null && raw.modelType !== ''
82
+ ? { modelType: raw.modelType }
83
+ : {}),
84
+ };
67
85
  }
68
86
 
69
87
  /**
@@ -82,6 +82,7 @@ function flattenInitOptionsForNative(options: StreamingSttInitOptions): {
82
82
  provider?: string;
83
83
  ruleFsts?: string;
84
84
  ruleFars?: string;
85
+ dither?: number;
85
86
  blankPenalty?: number;
86
87
  debug?: boolean;
87
88
  rule1MustContainNonSilence?: boolean;
@@ -107,6 +108,7 @@ function flattenInitOptionsForNative(options: StreamingSttInitOptions): {
107
108
  provider: options.provider,
108
109
  ruleFsts: options.ruleFsts,
109
110
  ruleFars: options.ruleFars,
111
+ dither: options.dither,
110
112
  blankPenalty: options.blankPenalty,
111
113
  debug: options.debug,
112
114
  rule1MustContainNonSilence: ep?.rule1?.mustContainNonSilence,
@@ -200,6 +202,7 @@ export async function createStreamingSTT(
200
202
  if (flat.provider !== undefined) nativeOptions.provider = flat.provider;
201
203
  if (flat.ruleFsts !== undefined) nativeOptions.ruleFsts = flat.ruleFsts;
202
204
  if (flat.ruleFars !== undefined) nativeOptions.ruleFars = flat.ruleFars;
205
+ if (flat.dither !== undefined) nativeOptions.dither = flat.dither;
203
206
  if (flat.blankPenalty !== undefined)
204
207
  nativeOptions.blankPenalty = flat.blankPenalty;
205
208
  if (flat.debug !== undefined) nativeOptions.debug = flat.debug;
@@ -75,6 +75,11 @@ export interface StreamingSttInitOptions {
75
75
  ruleFsts?: string;
76
76
  /** Path(s) to rule FARs for ITN. */
77
77
  ruleFars?: string;
78
+ /**
79
+ * Feature extraction dither. **Android:** applied natively. **iOS:** ignored (C/CXX API has no
80
+ * `dither` on `FeatureConfig`); library default applies.
81
+ */
82
+ dither?: number;
78
83
  /** Blank penalty. */
79
84
  blankPenalty?: number;
80
85
  /** Enable debug logging. Default: false. */
package/src/stt/types.ts CHANGED
@@ -228,7 +228,9 @@ export interface STTInitializeOptions {
228
228
  ruleFars?: string;
229
229
 
230
230
  /**
231
- * Dither for feature extraction (Kotlin FeatureConfig.dither). Default 0.
231
+ * Dither for feature extraction (Kotlin `FeatureConfig.dither`). Default: no dither.
232
+ * **Android:** applied natively. **iOS:** ignored — the bundled sherpa-onnx C/CXX API does not
233
+ * expose this field; the native default is used.
232
234
  */
233
235
  dither?: number;
234
236
 
package/src/tts/index.ts CHANGED
@@ -86,7 +86,7 @@ function flattenTtsModelOptionsForNative(
86
86
  *
87
87
  * @param modelPath - Model path configuration (asset, file, or auto)
88
88
  * @param options - Optional modelType (default: 'auto')
89
- * @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), and optionally lexiconLanguageCandidates (language ids for multi-lang Kokoro/Kitten)
89
+ * @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), optional error when success is false, and optionally lexiconLanguageCandidates (language ids for multi-lang Kokoro/Kitten)
90
90
  * @example
91
91
  * ```typescript
92
92
  * const result = await detectTtsModel({ type: 'asset', path: 'models/vits-piper-en' });
@@ -101,13 +101,31 @@ export async function detectTtsModel(
101
101
  options?: { modelType?: TTSModelType }
102
102
  ): Promise<{
103
103
  success: boolean;
104
+ /** Native validation/detect failure (e.g. missing lexicon for Zipvoice). */
105
+ error?: string;
104
106
  detectedModels: Array<{ type: string; modelDir: string }>;
105
107
  modelType?: string;
106
108
  /** Language ids from detected lexicon files ("default" for lexicon.txt, or e.g. "us-en", "zh" from lexicon-us-en.txt, lexicon-zh.txt). Present for Kokoro/Kitten; use for language selection UI. */
107
109
  lexiconLanguageCandidates?: string[];
108
110
  }> {
109
111
  const resolvedPath = await resolveModelPath(modelPath);
110
- return SherpaOnnx.detectTtsModel(resolvedPath, options?.modelType);
112
+ const raw = await SherpaOnnx.detectTtsModel(resolvedPath, options?.modelType);
113
+ const err =
114
+ typeof (raw as { error?: unknown }).error === 'string'
115
+ ? String((raw as { error: string }).error).trim()
116
+ : '';
117
+ return {
118
+ success: raw.success,
119
+ ...(err.length > 0 ? { error: err } : {}),
120
+ detectedModels: raw.detectedModels ?? [],
121
+ ...(raw.modelType != null && raw.modelType !== ''
122
+ ? { modelType: raw.modelType }
123
+ : {}),
124
+ ...(raw.lexiconLanguageCandidates != null &&
125
+ raw.lexiconLanguageCandidates.length > 0
126
+ ? { lexiconLanguageCandidates: raw.lexiconLanguageCandidates }
127
+ : {}),
128
+ };
111
129
  }
112
130
 
113
131
  /**
@@ -124,6 +142,16 @@ function toNativeTtsOptions(
124
142
  if (options.silenceScale !== undefined)
125
143
  out.silenceScale = options.silenceScale;
126
144
  if (options.referenceAudio != null) {
145
+ const sr = options.referenceAudio.sampleRate;
146
+ if (
147
+ typeof __DEV__ !== 'undefined' &&
148
+ __DEV__ &&
149
+ (!Number.isFinite(sr) || sr <= 0)
150
+ ) {
151
+ console.warn(
152
+ '[react-native-sherpa-onnx] TTS referenceAudio.sampleRate must be > 0 for voice cloning (Zipvoice/Pocket).'
153
+ );
154
+ }
127
155
  out.referenceAudio = options.referenceAudio.samples;
128
156
  out.referenceSampleRate = options.referenceAudio.sampleRate;
129
157
  }
@@ -90,6 +90,16 @@ function toNativeTtsOptions(
90
90
  if (options.silenceScale !== undefined)
91
91
  out.silenceScale = options.silenceScale;
92
92
  if (options.referenceAudio != null) {
93
+ const sr = options.referenceAudio.sampleRate;
94
+ if (
95
+ typeof __DEV__ !== 'undefined' &&
96
+ __DEV__ &&
97
+ (!Number.isFinite(sr) || sr <= 0)
98
+ ) {
99
+ console.warn(
100
+ '[react-native-sherpa-onnx] TTS referenceAudio.sampleRate must be > 0 for voice cloning (Zipvoice/Pocket).'
101
+ );
102
+ }
93
103
  out.referenceAudio = options.referenceAudio.samples;
94
104
  out.referenceSampleRate = options.referenceAudio.sampleRate;
95
105
  }
package/src/tts/types.ts CHANGED
@@ -202,15 +202,16 @@ export interface TtsGenerationOptions {
202
202
  silenceScale?: number;
203
203
 
204
204
  /**
205
- * Reference audio for voice cloning (Kotlin GenerationConfig).
206
- * In the Kotlin/RN stack, only Pocket TTS uses this; other model types (vits, matcha, kokoro, kitten) ignore it.
207
- * Mono float samples in [-1, 1] and sample rate in Hz.
205
+ * Reference audio for voice cloning (native GenerationConfig / Zipvoice prompt).
206
+ * **Native (iOS & Android):** Requires non-empty samples and `sampleRate > 0`. Used for **Zipvoice** (cloning) and **Pocket** (Mimi encoder).
207
+ * Other model types (vits, matcha, kokoro, kitten) are **rejected** if reference audio is passed.
208
+ * Mono float samples in [-1, 1].
208
209
  */
209
210
  referenceAudio?: { samples: number[]; sampleRate: number };
210
211
 
211
212
  /**
212
- * Transcript text of the reference audio (Kotlin GenerationConfig.referenceText).
213
- * Required for Pocket TTS when referenceAudio is provided; ignored by other model types.
213
+ * Transcript of the reference utterance for **Zipvoice** voice cloning (prompt text); **required** when cloning with Zipvoice (non-empty after trim).
214
+ * **Pocket:** not read by sherpa-onnx native code; optional, e.g. for app metadata only.
214
215
  */
215
216
  referenceText?: string;
216
217
 
package/src/utils.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import { Platform } from 'react-native';
2
2
  import type { ModelPathConfig } from './types';
3
3
  import SherpaOnnx from './NativeSherpaOnnx';
4
+ import { resolveActualModelDir } from './download';
4
5
 
5
6
  /**
6
7
  * Utility functions for model path handling
@@ -73,13 +74,33 @@ export function autoModelPath(path: string): ModelPathConfig {
73
74
  * This handles different path types (asset, file, auto) and returns
74
75
  * a platform-specific absolute path that can be used by native code.
75
76
  *
77
+ * For type 'file', the path is normalized so that when the given path is an
78
+ * install directory (e.g. with .ready and manifest.json and one model subdir),
79
+ * the returned path is the subdirectory that actually contains the .onnx files.
80
+ * This allows apps that build paths as baseDir/modelId to work without change.
81
+ *
76
82
  * @param config - Model path configuration
77
83
  * @returns Promise resolving to absolute path usable by native code
78
84
  */
79
85
  export async function resolveModelPath(
80
86
  config: ModelPathConfig
81
87
  ): Promise<string> {
82
- return SherpaOnnx.resolveModelPath(config);
88
+ const path = await SherpaOnnx.resolveModelPath(config);
89
+ if (config.type === 'file') {
90
+ const resolved = await resolveActualModelDir(path);
91
+ // Diagnostic: log so we can tell if /usr/share/espeak-ng-data is due to our path or sherpa-onnx fallback.
92
+ if (__DEV__) {
93
+ console.log(
94
+ '[SherpaOnnx] resolveModelPath(file): native path=',
95
+ path,
96
+ resolved !== path
97
+ ? `resolvedActualModelDir=> ${resolved}`
98
+ : '(unchanged)'
99
+ );
100
+ }
101
+ return resolved;
102
+ }
103
+ return path;
83
104
  }
84
105
 
85
106
  /**
@@ -1 +1 @@
1
- libarchive-android-v3.8.5-1
1
+ libarchive-android-v3.8.5-2
@@ -1 +1 @@
1
- libarchive-ios-v3.8.5-1
1
+ libarchive-ios-v3.8.5-2
@@ -1 +1 @@
1
- sherpa-onnx-android-v1.12.28
1
+ sherpa-onnx-android-v1.12.31-1
@@ -1 +1 @@
1
- framework-v1.12.28
1
+ sherpa-onnx-ios-v1.12.31-1
@@ -1,301 +0,0 @@
1
- /**
2
- * sherpa-onnx-tts-zipvoice-jni.cpp
3
- *
4
- * Purpose: JNI for Zipvoice TTS using the sherpa-onnx C-API (OfflineTtsZipvoiceModelConfig). The
5
- * Kotlin TTS API does not expose Zipvoice config, so this native layer is used for Zipvoice-only flows.
6
- */
7
- #include <jni.h>
8
- #include <cstring>
9
- #include <android/log.h>
10
-
11
- #include "sherpa-onnx/c-api/c-api.h"
12
-
13
- #define LOG_TAG "ZipvoiceTtsJni"
14
- #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
15
- #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
16
-
17
- namespace {
18
-
19
- // Helper: get a non-null C string from a jstring (returns "" for null).
20
- struct JStringGuard {
21
- JNIEnv* env;
22
- jstring jstr;
23
- const char* cstr;
24
-
25
- JStringGuard(JNIEnv* e, jstring s) : env(e), jstr(s), cstr(nullptr) {
26
- if (s) cstr = env->GetStringUTFChars(s, nullptr);
27
- }
28
- ~JStringGuard() {
29
- if (cstr) env->ReleaseStringUTFChars(jstr, cstr);
30
- }
31
- const char* get() const { return cstr ? cstr : ""; }
32
- };
33
-
34
- // Build a Java float[] + int pair as Object[] { float[], Integer } for returning generated audio.
35
- jobjectArray buildAudioResult(JNIEnv* env, const float* samples, int32_t n, int32_t sampleRate) {
36
- jclass objClass = env->FindClass("java/lang/Object");
37
- if (!objClass) return nullptr;
38
-
39
- jobjectArray result = env->NewObjectArray(2, objClass, nullptr);
40
- if (!result) {
41
- env->DeleteLocalRef(objClass);
42
- return nullptr;
43
- }
44
-
45
- // Element 0: float[] samples
46
- jfloatArray jsamples = env->NewFloatArray(n);
47
- if (jsamples && n > 0) {
48
- env->SetFloatArrayRegion(jsamples, 0, n, samples);
49
- }
50
- env->SetObjectArrayElement(result, 0, jsamples);
51
- if (jsamples) env->DeleteLocalRef(jsamples);
52
-
53
- // Element 1: Integer sampleRate
54
- jclass intClass = env->FindClass("java/lang/Integer");
55
- jmethodID intValueOf = env->GetStaticMethodID(intClass, "valueOf", "(I)Ljava/lang/Integer;");
56
- jobject jrate = env->CallStaticObjectMethod(intClass, intValueOf, sampleRate);
57
- env->SetObjectArrayElement(result, 1, jrate);
58
- env->DeleteLocalRef(intClass);
59
- if (jrate) env->DeleteLocalRef(jrate);
60
-
61
- env->DeleteLocalRef(objClass);
62
- return result;
63
- }
64
-
65
- } // namespace
66
-
67
- extern "C" {
68
-
69
- // Create a Zipvoice TTS instance via C-API. Returns the pointer as a jlong (0 on failure).
70
- JNIEXPORT jlong JNICALL
71
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeCreate(
72
- JNIEnv* env, jobject /* this */,
73
- jstring j_tokens, jstring j_encoder, jstring j_decoder, jstring j_vocoder,
74
- jstring j_data_dir, jstring j_lexicon,
75
- jfloat feat_scale, jfloat t_shift, jfloat target_rms, jfloat guidance_scale,
76
- jint num_threads, jboolean debug,
77
- jstring j_rule_fsts, jstring j_rule_fars, jint max_num_sentences, jfloat silence_scale,
78
- jstring j_provider) {
79
- JStringGuard tokens(env, j_tokens);
80
- JStringGuard encoder(env, j_encoder);
81
- JStringGuard decoder(env, j_decoder);
82
- JStringGuard vocoder(env, j_vocoder);
83
- JStringGuard dataDir(env, j_data_dir);
84
- JStringGuard lexicon(env, j_lexicon);
85
- JStringGuard ruleFsts(env, j_rule_fsts);
86
- JStringGuard ruleFars(env, j_rule_fars);
87
- JStringGuard provider(env, j_provider);
88
-
89
- LOGI("nativeCreate: tokens=%s, encoder=%s, decoder=%s, vocoder=%s, dataDir=%s, lexicon=%s",
90
- tokens.get(), encoder.get(), decoder.get(), vocoder.get(), dataDir.get(), lexicon.get());
91
- LOGI("nativeCreate: featScale=%.3f, tShift=%.3f, targetRms=%.3f, guidanceScale=%.3f, threads=%d, debug=%d",
92
- feat_scale, t_shift, target_rms, guidance_scale, num_threads, debug);
93
- LOGI("nativeCreate: ruleFsts=%s, ruleFars=%s, maxNumSentences=%d, silenceScale=%.3f, provider=%s",
94
- ruleFsts.get(), ruleFars.get(), max_num_sentences, silence_scale, provider.get());
95
-
96
- SherpaOnnxOfflineTtsConfig config;
97
- memset(&config, 0, sizeof(config));
98
-
99
- config.model.zipvoice.tokens = tokens.get();
100
- config.model.zipvoice.encoder = encoder.get();
101
- config.model.zipvoice.decoder = decoder.get();
102
- config.model.zipvoice.vocoder = vocoder.get();
103
- config.model.zipvoice.data_dir = dataDir.get();
104
- config.model.zipvoice.lexicon = lexicon.get();
105
- config.model.zipvoice.feat_scale = feat_scale;
106
- config.model.zipvoice.t_shift = t_shift;
107
- config.model.zipvoice.target_rms = target_rms;
108
- config.model.zipvoice.guidance_scale = guidance_scale;
109
-
110
- config.model.num_threads = num_threads;
111
- config.model.debug = debug ? 1 : 0;
112
- config.model.provider = (provider.get() && *provider.get()) ? provider.get() : "cpu";
113
-
114
- config.rule_fsts = ruleFsts.get();
115
- config.rule_fars = ruleFars.get();
116
- config.max_num_sentences = max_num_sentences;
117
- config.silence_scale = silence_scale;
118
-
119
- const SherpaOnnxOfflineTts* tts = SherpaOnnxCreateOfflineTts(&config);
120
- if (!tts) {
121
- LOGE("nativeCreate: SherpaOnnxCreateOfflineTts returned null");
122
- return 0;
123
- }
124
-
125
- LOGI("nativeCreate: success, sampleRate=%d, numSpeakers=%d",
126
- SherpaOnnxOfflineTtsSampleRate(tts), SherpaOnnxOfflineTtsNumSpeakers(tts));
127
-
128
- return reinterpret_cast<jlong>(tts);
129
- }
130
-
131
- // Destroy a Zipvoice TTS instance.
132
- JNIEXPORT void JNICALL
133
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeDestroy(
134
- JNIEnv* /* env */, jobject /* this */, jlong ptr) {
135
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
136
- if (tts) {
137
- SherpaOnnxDestroyOfflineTts(tts);
138
- LOGI("nativeDestroy: released");
139
- }
140
- }
141
-
142
- // Get the sample rate of the Zipvoice TTS model.
143
- JNIEXPORT jint JNICALL
144
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeGetSampleRate(
145
- JNIEnv* /* env */, jobject /* this */, jlong ptr) {
146
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
147
- return tts ? SherpaOnnxOfflineTtsSampleRate(tts) : 0;
148
- }
149
-
150
- // Get the number of speakers of the Zipvoice TTS model.
151
- JNIEXPORT jint JNICALL
152
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeGetNumSpeakers(
153
- JNIEnv* /* env */, jobject /* this */, jlong ptr) {
154
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
155
- return tts ? SherpaOnnxOfflineTtsNumSpeakers(tts) : 0;
156
- }
157
-
158
- // Generate audio (non-zero-shot). Returns Object[] { float[], Integer }.
159
- JNIEXPORT jobjectArray JNICALL
160
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeGenerate(
161
- JNIEnv* env, jobject /* this */,
162
- jlong ptr, jstring j_text, jint sid, jfloat speed) {
163
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
164
- if (!tts) {
165
- LOGE("nativeGenerate: tts pointer is null");
166
- return nullptr;
167
- }
168
-
169
- JStringGuard text(env, j_text);
170
- LOGI("nativeGenerate: text=%s, sid=%d, speed=%.2f", text.get(), sid, speed);
171
-
172
- const SherpaOnnxGeneratedAudio* audio =
173
- SherpaOnnxOfflineTtsGenerate(tts, text.get(), sid, speed);
174
- if (!audio) {
175
- LOGE("nativeGenerate: SherpaOnnxOfflineTtsGenerate returned null");
176
- return nullptr;
177
- }
178
-
179
- LOGI("nativeGenerate: got %d samples at %d Hz", audio->n, audio->sample_rate);
180
- jobjectArray result = buildAudioResult(env, audio->samples, audio->n, audio->sample_rate);
181
-
182
- SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
183
- return result;
184
- }
185
-
186
- // Generate audio with callback for streaming. Returns Object[] { float[], Integer } for the
187
- // final concatenated audio. The callback is invoked per chunk.
188
- JNIEXPORT jobjectArray JNICALL
189
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeGenerateWithCallback(
190
- JNIEnv* env, jobject thiz,
191
- jlong ptr, jstring j_text, jint sid, jfloat speed) {
192
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
193
- if (!tts) {
194
- LOGE("nativeGenerateWithCallback: tts pointer is null");
195
- return nullptr;
196
- }
197
-
198
- JStringGuard text(env, j_text);
199
-
200
- // We use the progress callback variant to get chunks.
201
- // The JNI environment and `thiz` are stored in a struct passed through void* arg.
202
- struct CallbackCtx {
203
- JNIEnv* env;
204
- jobject thiz;
205
- jmethodID onChunkId;
206
- bool cancelled;
207
- };
208
-
209
- jclass cls = env->GetObjectClass(thiz);
210
- jmethodID onChunkId = env->GetMethodID(cls, "onNativeChunk", "([FI)Z");
211
- env->DeleteLocalRef(cls);
212
- if (!onChunkId) {
213
- LOGE("nativeGenerateWithCallback: onNativeChunk method not found");
214
- return nullptr;
215
- }
216
-
217
- CallbackCtx ctx{env, thiz, onChunkId, false};
218
-
219
- auto callback = [](const float* samples, int32_t n, float /* progress */, void* arg) -> int32_t {
220
- auto* c = static_cast<CallbackCtx*>(arg);
221
- if (c->cancelled) return 0;
222
-
223
- jfloatArray chunk = c->env->NewFloatArray(n);
224
- if (chunk && n > 0) {
225
- c->env->SetFloatArrayRegion(chunk, 0, n, samples);
226
- }
227
-
228
- // Call Java: boolean onNativeChunk(float[] samples, int n)
229
- jboolean cont = c->env->CallBooleanMethod(c->thiz, c->onChunkId, chunk, n);
230
- if (chunk) c->env->DeleteLocalRef(chunk);
231
-
232
- if (!cont) {
233
- c->cancelled = true;
234
- return 0;
235
- }
236
- return 1;
237
- };
238
-
239
- const SherpaOnnxGeneratedAudio* audio =
240
- SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
241
- tts, text.get(), sid, speed, callback, &ctx);
242
-
243
- if (!audio) {
244
- LOGE("nativeGenerateWithCallback: generate returned null");
245
- return nullptr;
246
- }
247
-
248
- jobjectArray result = buildAudioResult(env, audio->samples, audio->n, audio->sample_rate);
249
- SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
250
- return result;
251
- }
252
-
253
- // Zero-shot voice cloning with Zipvoice. Returns Object[] { float[], Integer }.
254
- JNIEXPORT jobjectArray JNICALL
255
- Java_com_sherpaonnx_ZipvoiceTtsWrapper_nativeGenerateWithZipvoice(
256
- JNIEnv* env, jobject /* this */,
257
- jlong ptr, jstring j_text, jstring j_prompt_text,
258
- jfloatArray j_prompt_samples, jint prompt_sr,
259
- jfloat speed, jint num_steps) {
260
- auto* tts = reinterpret_cast<const SherpaOnnxOfflineTts*>(ptr);
261
- if (!tts) {
262
- LOGE("nativeGenerateWithZipvoice: tts pointer is null");
263
- return nullptr;
264
- }
265
-
266
- JStringGuard text(env, j_text);
267
- JStringGuard promptText(env, j_prompt_text);
268
-
269
- jfloat* promptSamples = nullptr;
270
- jint nPrompt = 0;
271
- if (j_prompt_samples) {
272
- nPrompt = env->GetArrayLength(j_prompt_samples);
273
- promptSamples = env->GetFloatArrayElements(j_prompt_samples, nullptr);
274
- }
275
-
276
- LOGI("nativeGenerateWithZipvoice: text=%s, promptLen=%d, promptSr=%d, speed=%.2f, steps=%d",
277
- text.get(), nPrompt, prompt_sr, speed, num_steps);
278
-
279
- const SherpaOnnxGeneratedAudio* audio =
280
- SherpaOnnxOfflineTtsGenerateWithZipvoice(
281
- tts, text.get(), promptText.get(),
282
- promptSamples, nPrompt, prompt_sr,
283
- speed, num_steps);
284
-
285
- if (promptSamples) {
286
- env->ReleaseFloatArrayElements(j_prompt_samples, promptSamples, JNI_ABORT);
287
- }
288
-
289
- if (!audio) {
290
- LOGE("nativeGenerateWithZipvoice: returned null");
291
- return nullptr;
292
- }
293
-
294
- LOGI("nativeGenerateWithZipvoice: got %d samples at %d Hz", audio->n, audio->sample_rate);
295
- jobjectArray result = buildAudioResult(env, audio->samples, audio->n, audio->sample_rate);
296
-
297
- SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
298
- return result;
299
- }
300
-
301
- } // extern "C"