react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
@@ -1,109 +1,699 @@
1
- package com.sherpaonnx
2
-
3
- import android.util.Log
4
- import com.facebook.react.bridge.Arguments
5
- import com.facebook.react.bridge.Promise
6
- import java.io.File
7
-
8
- internal class SherpaOnnxSttHelper(
9
- private val native: NativeSttBridge,
10
- private val logTag: String
11
- ) {
12
- interface NativeSttBridge {
13
- fun nativeSttInitialize(
14
- modelDir: String,
15
- preferInt8: Boolean,
16
- hasPreferInt8: Boolean,
17
- modelType: String
18
- ): HashMap<String, Any>?
19
-
20
- fun nativeSttTranscribe(filePath: String): String
21
-
22
- fun nativeSttRelease()
23
- }
24
-
25
- fun initializeSherpaOnnx(
26
- modelDir: String,
27
- preferInt8: Boolean?,
28
- modelType: String?,
29
- promise: Promise
30
- ) {
31
- try {
32
- val modelDirFile = File(modelDir)
33
- if (!modelDirFile.exists()) {
34
- val errorMsg = "Model directory does not exist: $modelDir"
35
- Log.e(logTag, errorMsg)
36
- promise.reject("INIT_ERROR", errorMsg)
37
- return
38
- }
39
-
40
- if (!modelDirFile.isDirectory) {
41
- val errorMsg = "Model path is not a directory: $modelDir"
42
- Log.e(logTag, errorMsg)
43
- promise.reject("INIT_ERROR", errorMsg)
44
- return
45
- }
46
-
47
- val result = native.nativeSttInitialize(
48
- modelDir,
49
- preferInt8 ?: false,
50
- preferInt8 != null,
51
- modelType ?: "auto"
52
- )
53
-
54
- if (result == null) {
55
- val errorMsg = "Failed to initialize sherpa-onnx. Check native logs for details."
56
- Log.e(logTag, "Native initialization returned null for modelDir: $modelDir")
57
- promise.reject("INIT_ERROR", errorMsg)
58
- return
59
- }
60
-
61
- val success = result["success"] as? Boolean ?: false
62
- val detectedModels = result["detectedModels"] as? ArrayList<*>
63
- ?: arrayListOf<HashMap<String, String>>()
64
-
65
- if (success) {
66
- val resultMap = Arguments.createMap()
67
- resultMap.putBoolean("success", true)
68
- val detectedModelsArray = Arguments.createArray()
69
- for (model in detectedModels) {
70
- val modelMap = model as? HashMap<*, *>
71
- if (modelMap != null) {
72
- val modelResultMap = Arguments.createMap()
73
- modelResultMap.putString("type", modelMap["type"] as? String ?: "")
74
- modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
75
- detectedModelsArray.pushMap(modelResultMap)
76
- }
77
- }
78
- resultMap.putArray("detectedModels", detectedModelsArray)
79
- promise.resolve(resultMap)
80
- } else {
81
- val errorMsg = "Failed to initialize sherpa-onnx. Check native logs for details."
82
- Log.e(logTag, "Native initialization returned false for modelDir: $modelDir")
83
- promise.reject("INIT_ERROR", errorMsg)
84
- }
85
- } catch (e: Exception) {
86
- val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
87
- Log.e(logTag, errorMsg, e)
88
- promise.reject("INIT_ERROR", errorMsg, e)
89
- }
90
- }
91
-
92
- fun transcribeFile(filePath: String, promise: Promise) {
93
- try {
94
- val result = native.nativeSttTranscribe(filePath)
95
- promise.resolve(result)
96
- } catch (e: Exception) {
97
- promise.reject("TRANSCRIBE_ERROR", "Failed to transcribe file", e)
98
- }
99
- }
100
-
101
- fun unloadSherpaOnnx(promise: Promise) {
102
- try {
103
- native.nativeSttRelease()
104
- promise.resolve(null)
105
- } catch (e: Exception) {
106
- promise.reject("RELEASE_ERROR", "Failed to release resources", e)
107
- }
108
- }
109
- }
1
+ package com.sherpaonnx
2
+
3
+ import android.content.Context
4
+ import android.net.Uri
5
+ import android.util.Log
6
+ import com.facebook.react.bridge.Arguments
7
+ import com.facebook.react.bridge.Promise
8
+ import com.facebook.react.bridge.ReadableMap
9
+ import com.facebook.react.bridge.WritableMap
10
+ import com.k2fsa.sherpa.onnx.FeatureConfig
11
+ import com.k2fsa.sherpa.onnx.OfflineRecognizerResult
12
+ import com.k2fsa.sherpa.onnx.OfflineModelConfig
13
+ import com.k2fsa.sherpa.onnx.OfflineRecognizer
14
+ import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
15
+ import com.k2fsa.sherpa.onnx.OfflineStream
16
+ import com.k2fsa.sherpa.onnx.OfflineTransducerModelConfig
17
+ import com.k2fsa.sherpa.onnx.OfflineParaformerModelConfig
18
+ import com.k2fsa.sherpa.onnx.OfflineNemoEncDecCtcModelConfig
19
+ import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
20
+ import com.k2fsa.sherpa.onnx.OfflineSenseVoiceModelConfig
21
+ import com.k2fsa.sherpa.onnx.OfflineZipformerCtcModelConfig
22
+ import com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig
23
+ import com.k2fsa.sherpa.onnx.OfflineFunAsrNanoModelConfig
24
+ import com.k2fsa.sherpa.onnx.OfflineMoonshineModelConfig
25
+ import com.k2fsa.sherpa.onnx.OfflineDolphinModelConfig
26
+ import com.k2fsa.sherpa.onnx.OfflineFireRedAsrModelConfig
27
+ import com.k2fsa.sherpa.onnx.OfflineCanaryModelConfig
28
+ import com.k2fsa.sherpa.onnx.OfflineOmnilingualAsrCtcModelConfig
29
+ import com.k2fsa.sherpa.onnx.OfflineMedAsrCtcModelConfig
30
+ import com.k2fsa.sherpa.onnx.WaveReader
31
+ import java.io.File
32
+ import java.util.concurrent.ConcurrentHashMap
33
+
34
+ internal class SherpaOnnxSttHelper(
35
+ private val context: Context,
36
+ private val detectSttModel: (
37
+ modelDir: String,
38
+ preferInt8: Boolean,
39
+ hasPreferInt8: Boolean,
40
+ modelType: String,
41
+ debug: Boolean
42
+ ) -> HashMap<String, Any>?,
43
+ private val logTag: String
44
+ ) {
45
+
46
+ private data class SttEngineInstance(
47
+ @Volatile var recognizer: OfflineRecognizer? = null,
48
+ @Volatile var lastRecognizerConfig: OfflineRecognizerConfig? = null,
49
+ @Volatile var currentSttModelType: String? = null
50
+ )
51
+
52
+ private val instances = ConcurrentHashMap<String, SttEngineInstance>()
53
+
54
+ private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
55
+
56
+ /** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
57
+ private fun supportsHotwords(modelType: String): Boolean =
58
+ modelType == "transducer" || modelType == "nemo_transducer"
59
+
60
+ /**
61
+ * Resolves a single path to a file path. For content URIs (content://...) copies to app cache
62
+ * so the native layer can read it; for file paths returns as-is.
63
+ * Use for hotwords file or any single file path that may come from a document picker.
64
+ * @param path File path or content URI
65
+ * @param cacheFilePrefix Prefix for the cache file name (e.g. "stt_hotwords", "stt_rule_fst")
66
+ * @return Resolved file path
67
+ * @throws IllegalStateException if content URI cannot be opened
68
+ */
69
+ private fun resolveContentUriToFile(path: String, cacheFilePrefix: String): String {
70
+ if (!path.startsWith("content://")) return path
71
+ val uri = Uri.parse(path)
72
+ val cacheFile = File(context.cacheDir, "${cacheFilePrefix}_${System.nanoTime()}")
73
+ try {
74
+ context.contentResolver.openInputStream(uri)?.use { input ->
75
+ cacheFile.outputStream().use { output ->
76
+ input.copyTo(output)
77
+ }
78
+ } ?: throw IllegalStateException("File is not readable (content URI could not be opened): $path")
79
+ } catch (e: SecurityException) {
80
+ throw IllegalStateException("File is not readable (no permission to read content URI): $path", e)
81
+ } catch (e: Exception) {
82
+ if (e is IllegalStateException) throw e
83
+ throw IllegalStateException("File is not readable (content URI could not be opened): ${e.message ?: path}", e)
84
+ }
85
+ return cacheFile.absolutePath
86
+ }
87
+
88
+ /**
89
+ * Resolves a string that may contain one or more paths (comma-separated). Each path may be
90
+ * a content URI; each is resolved to a file path. Use for ruleFsts / ruleFars.
91
+ * @param pathsString Single path or comma-separated paths (e.g. "path1,path2")
92
+ * @param cacheFilePrefix Prefix for cache file names (e.g. "stt_rule_fst", "stt_rule_far")
93
+ * @return Resolved paths joined by comma, or empty string if pathsString is blank
94
+ */
95
+ private fun resolveFilePaths(pathsString: String, cacheFilePrefix: String): String {
96
+ if (pathsString.isBlank()) return pathsString
97
+ return pathsString.split(',').map { it.trim() }.filter { it.isNotEmpty() }
98
+ .mapIndexed { index, p -> resolveContentUriToFile(p, "${cacheFilePrefix}_$index") }
99
+ .joinToString(",")
100
+ }
101
+
102
+ /** Resolves hotwords path (single file); delegates to [resolveContentUriToFile]. */
103
+ private fun resolveHotwordsPath(path: String): String =
104
+ resolveContentUriToFile(path, "stt_hotwords")
105
+
106
+ /**
107
+ * Validates hotwords file format (one hotword per line; optional " :score" at end).
108
+ * Call after resolveHotwordsPath so path is always a file path (not content URI).
109
+ * @return null if valid, or an error message if invalid.
110
+ */
111
+ private fun validateHotwordsFile(filePath: String): String? {
112
+ val file = File(filePath)
113
+ if (!file.exists()) return "Hotwords file does not exist: $filePath"
114
+ if (!file.isFile) return "Hotwords path is not a file: $filePath"
115
+ if (!file.canRead()) return "Hotwords file is not readable: $filePath"
116
+ val content = try {
117
+ file.readText(Charsets.UTF_8)
118
+ } catch (e: Exception) {
119
+ return "Failed to read hotwords file: ${e.message}"
120
+ }
121
+ if (content.contains('\u0000')) return "Hotwords file contains null bytes (not a valid text file)."
122
+ val lines = content.split('\n', '\r')
123
+ var validLines = 0
124
+ for (raw in lines) {
125
+ val line = raw.trim()
126
+ if (line.isEmpty()) continue
127
+ val hotwordPart = if (line.contains(" :")) {
128
+ val lastColon = line.lastIndexOf(" :")
129
+ val afterScore = line.substring(lastColon + 2).trim()
130
+ if (afterScore.isEmpty()) return "Invalid hotword line (missing score after ' :'): ${line.take(60)}…"
131
+ val score = afterScore.toFloatOrNull()
132
+ if (score == null) return "Invalid hotword line (score must be a number after ' :'): ${line.take(60)}…"
133
+ line.substring(0, lastColon).trim()
134
+ } else if (line.contains('\t')) {
135
+ // Likely sentencepiece .vocab format (token<TAB>score); hotwords use " :score" and one word/phrase per line.
136
+ val afterTab = line.substringAfter('\t').trim()
137
+ if (afterTab.toFloatOrNull() != null) {
138
+ return "This file looks like a sentencepiece .vocab file (token<TAB>score). Use a hotwords file instead: one word or phrase per line, optional ' :score' at end."
139
+ }
140
+ line
141
+ } else line
142
+ if (hotwordPart.isEmpty()) return "Invalid hotword line (empty hotword): ${line.take(60)}…"
143
+ if (!hotwordPart.any { it.isLetter() }) return "Invalid hotword line (must contain at least one letter): ${line.take(60)}…"
144
+ validLines++
145
+ }
146
+ if (validLines == 0) return "Hotwords file has no valid lines (one hotword or phrase per line, UTF-8 text)."
147
+ return null
148
+ }
149
+
150
+ fun initializeStt(
151
+ instanceId: String,
152
+ modelDir: String,
153
+ preferInt8: Boolean?,
154
+ modelType: String?,
155
+ debug: Boolean?,
156
+ hotwordsFile: String?,
157
+ hotwordsScore: Double?,
158
+ numThreads: Double?,
159
+ provider: String?,
160
+ ruleFsts: String?,
161
+ ruleFars: String?,
162
+ dither: Double?,
163
+ modelOptions: ReadableMap?,
164
+ modelingUnit: String?,
165
+ bpeVocab: String?,
166
+ promise: Promise
167
+ ) {
168
+ try {
169
+ val modelDirFile = File(modelDir)
170
+ if (!modelDirFile.exists()) {
171
+ val errorMsg = "Model directory does not exist: $modelDir"
172
+ Log.e(logTag, errorMsg)
173
+ promise.reject("INIT_ERROR", errorMsg)
174
+ return
175
+ }
176
+
177
+ if (!modelDirFile.isDirectory) {
178
+ val errorMsg = "Model path is not a directory: $modelDir"
179
+ Log.e(logTag, errorMsg)
180
+ promise.reject("INIT_ERROR", errorMsg)
181
+ return
182
+ }
183
+
184
+ val result = detectSttModel(
185
+ modelDir,
186
+ preferInt8 ?: false,
187
+ preferInt8 != null,
188
+ modelType ?: "auto",
189
+ debug ?: false
190
+ )
191
+
192
+ if (result == null) {
193
+ val errorMsg = "Failed to detect STT model. Check native logs for details."
194
+ Log.e(logTag, "Detection returned null for modelDir: $modelDir")
195
+ promise.reject("INIT_ERROR", errorMsg)
196
+ return
197
+ }
198
+
199
+ val success = result["success"] as? Boolean ?: false
200
+ val detectedModels = result["detectedModels"] as? ArrayList<*>
201
+ ?: arrayListOf<HashMap<String, String>>()
202
+
203
+ if (!success) {
204
+ val reason = result["error"] as? String
205
+ val errorMsg = if (!reason.isNullOrBlank()) {
206
+ "Failed to initialize sherpa-onnx: $reason"
207
+ } else {
208
+ "Failed to initialize sherpa-onnx. Check native logs for details."
209
+ }
210
+ Log.e(logTag, "Detection failed for modelDir: $modelDir")
211
+ promise.reject("INIT_ERROR", errorMsg)
212
+ return
213
+ }
214
+
215
+ val paths = result["paths"] as? Map<*, *> ?: emptyMap<String, String>()
216
+ val pathStrings = paths.mapValues { (_, v) -> (v as? String).orEmpty() }.mapKeys { it.key.toString() }
217
+ val modelTypeStr = result["modelType"] as? String ?: "unknown"
218
+
219
+ val hotwordsFileTrimmed = hotwordsFile?.trim().orEmpty()
220
+ if (hotwordsFileTrimmed.isNotEmpty() && !supportsHotwords(modelTypeStr)) {
221
+ val errorMsg = "Hotwords are only supported for transducer models (transducer, nemo_transducer). Current model type: $modelTypeStr"
222
+ Log.e(logTag, errorMsg)
223
+ promise.reject("HOTWORDS_NOT_SUPPORTED", errorMsg)
224
+ return
225
+ }
226
+ val resolvedHotwordsPath = if (hotwordsFileTrimmed.isNotEmpty()) {
227
+ try {
228
+ resolveHotwordsPath(hotwordsFileTrimmed)
229
+ } catch (e: Exception) {
230
+ val errorMsg = e.message ?: "Hotwords file could not be resolved"
231
+ Log.e(logTag, errorMsg, e)
232
+ promise.reject("INVALID_HOTWORDS_FILE", errorMsg, e)
233
+ return
234
+ }
235
+ } else ""
236
+ if (resolvedHotwordsPath.isNotEmpty()) {
237
+ validateHotwordsFile(resolvedHotwordsPath)?.let { errorMsg ->
238
+ Log.e(logTag, errorMsg)
239
+ promise.reject("INVALID_HOTWORDS_FILE", errorMsg)
240
+ return
241
+ }
242
+ }
243
+
244
+ val resolvedRuleFsts = try {
245
+ resolveFilePaths(ruleFsts.orEmpty().trim(), "stt_rule_fst")
246
+ } catch (e: Exception) {
247
+ val errorMsg = e.message ?: "Rule FST path(s) could not be resolved"
248
+ Log.e(logTag, errorMsg, e)
249
+ promise.reject("INIT_ERROR", errorMsg, e)
250
+ return
251
+ }
252
+ val resolvedRuleFars = try {
253
+ resolveFilePaths(ruleFars.orEmpty().trim(), "stt_rule_far")
254
+ } catch (e: Exception) {
255
+ val errorMsg = e.message ?: "Rule FAR path(s) could not be resolved"
256
+ Log.e(logTag, errorMsg, e)
257
+ promise.reject("INIT_ERROR", errorMsg, e)
258
+ return
259
+ }
260
+
261
+ val inst = instances.getOrPut(instanceId) { SttEngineInstance() }
262
+ inst.recognizer?.release()
263
+ inst.recognizer = null
264
+ val config = buildRecognizerConfig(
265
+ pathStrings,
266
+ modelTypeStr,
267
+ hotwordsFile = resolvedHotwordsPath,
268
+ hotwordsScore = hotwordsScore?.toFloat() ?: 1.5f,
269
+ numThreads = numThreads?.toInt(),
270
+ provider = provider,
271
+ ruleFsts = resolvedRuleFsts,
272
+ ruleFars = resolvedRuleFars,
273
+ dither = dither?.toFloat() ?: 0f,
274
+ modelOptions = modelOptions,
275
+ modelingUnit = modelingUnit?.trim().orEmpty(),
276
+ bpeVocab = bpeVocab?.trim().orEmpty()
277
+ )
278
+ inst.lastRecognizerConfig = config
279
+ inst.currentSttModelType = modelTypeStr
280
+ inst.recognizer = OfflineRecognizer(config = config)
281
+
282
+
283
+
284
+ val resultMap = Arguments.createMap()
285
+ resultMap.putBoolean("success", true)
286
+ resultMap.putString("modelType", modelTypeStr)
287
+ resultMap.putString("decodingMethod", config.decodingMethod)
288
+ val detectedModelsArray = Arguments.createArray()
289
+ for (model in detectedModels) {
290
+ val modelMap = model as? HashMap<*, *>
291
+ if (modelMap != null) {
292
+ val modelResultMap = Arguments.createMap()
293
+ modelResultMap.putString("type", modelMap["type"] as? String ?: "")
294
+ modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
295
+ detectedModelsArray.pushMap(modelResultMap)
296
+ }
297
+ }
298
+ resultMap.putArray("detectedModels", detectedModelsArray)
299
+ promise.resolve(resultMap)
300
+ } catch (e: Exception) {
301
+ val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
302
+ Log.e(logTag, errorMsg, e)
303
+ promise.reject("INIT_ERROR", errorMsg, e)
304
+ }
305
+ }
306
+
307
+ fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
308
+ try {
309
+ val inst = getInstance(instanceId) ?: run {
310
+ promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
311
+ return
312
+ }
313
+ val rec = inst.recognizer
314
+ if (rec == null) {
315
+ promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
316
+ return
317
+ }
318
+ val wave = WaveReader.readWave(filePath)
319
+ val stream: OfflineStream = rec.createStream()
320
+ stream.acceptWaveform(wave.samples, wave.sampleRate)
321
+ rec.decode(stream)
322
+ val result = rec.getResult(stream)
323
+ promise.resolve(resultToWritableMap(result))
324
+ } catch (e: Exception) {
325
+ val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe file"
326
+ Log.e(logTag, "transcribeFile error: $message", e)
327
+ promise.reject("TRANSCRIBE_ERROR", message, e)
328
+ }
329
+ }
330
+
331
+ fun transcribeSamples(instanceId: String, samples: com.facebook.react.bridge.ReadableArray, sampleRate: Int, promise: Promise) {
332
+ try {
333
+ val inst = getInstance(instanceId) ?: run {
334
+ promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
335
+ return
336
+ }
337
+ val rec = inst.recognizer
338
+ if (rec == null) {
339
+ promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
340
+ return
341
+ }
342
+ val floatSamples = FloatArray(samples.size()) { i -> samples.getDouble(i).toFloat() }
343
+ val stream: OfflineStream = rec.createStream()
344
+ try {
345
+ stream.acceptWaveform(floatSamples, sampleRate)
346
+ rec.decode(stream)
347
+ val result = rec.getResult(stream)
348
+ promise.resolve(resultToWritableMap(result))
349
+ } finally {
350
+ stream.release()
351
+ }
352
+ } catch (e: Exception) {
353
+ val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe samples"
354
+ Log.e(logTag, "transcribeSamples error: $message", e)
355
+ promise.reject("TRANSCRIBE_ERROR", message, e)
356
+ }
357
+ }
358
+
359
+ fun setSttConfig(instanceId: String, options: ReadableMap, promise: Promise) {
360
+ try {
361
+ val inst = getInstance(instanceId) ?: run {
362
+ promise.reject("CONFIG_ERROR", "STT instance not found: $instanceId")
363
+ return
364
+ }
365
+ val rec = inst.recognizer
366
+ val current = inst.lastRecognizerConfig
367
+ if (rec == null || current == null) {
368
+ promise.reject("CONFIG_ERROR", "STT not initialized. Call initializeStt first.")
369
+ return
370
+ }
371
+ val merged = current.copy(
372
+ decodingMethod = if (options.hasKey("decodingMethod")) options.getString("decodingMethod") ?: current.decodingMethod else current.decodingMethod,
373
+ maxActivePaths = if (options.hasKey("maxActivePaths")) options.getDouble("maxActivePaths").toInt() else current.maxActivePaths,
374
+ hotwordsFile = if (options.hasKey("hotwordsFile")) options.getString("hotwordsFile") ?: current.hotwordsFile else current.hotwordsFile,
375
+ hotwordsScore = if (options.hasKey("hotwordsScore")) options.getDouble("hotwordsScore").toFloat() else current.hotwordsScore,
376
+ blankPenalty = if (options.hasKey("blankPenalty")) options.getDouble("blankPenalty").toFloat() else current.blankPenalty,
377
+ ruleFsts = if (options.hasKey("ruleFsts")) options.getString("ruleFsts") ?: current.ruleFsts else current.ruleFsts,
378
+ ruleFars = if (options.hasKey("ruleFars")) options.getString("ruleFars") ?: current.ruleFars else current.ruleFars
379
+ )
380
+ val resolvedRuleFsts = try {
381
+ resolveFilePaths(merged.ruleFsts.trim(), "stt_rule_fst")
382
+ } catch (e: Exception) {
383
+ val errorMsg = e.message ?: "Rule FST path(s) could not be resolved"
384
+ Log.e(logTag, errorMsg, e)
385
+ promise.reject("CONFIG_ERROR", errorMsg, e)
386
+ return
387
+ }
388
+ val resolvedRuleFars = try {
389
+ resolveFilePaths(merged.ruleFars.trim(), "stt_rule_far")
390
+ } catch (e: Exception) {
391
+ val errorMsg = e.message ?: "Rule FAR path(s) could not be resolved"
392
+ Log.e(logTag, errorMsg, e)
393
+ promise.reject("CONFIG_ERROR", errorMsg, e)
394
+ return
395
+ }
396
+
397
+ val newHotwordsFile = merged.hotwordsFile.trim()
398
+ val resolvedHotwordsPath = if (newHotwordsFile.isNotEmpty()) {
399
+ val modelType = inst.currentSttModelType
400
+ if (modelType == null || !supportsHotwords(modelType)) {
401
+ val errorMsg = "Hotwords are only supported for transducer models (transducer, nemo_transducer). Current model type: ${modelType ?: "unknown"}"
402
+ Log.e(logTag, errorMsg)
403
+ promise.reject("HOTWORDS_NOT_SUPPORTED", errorMsg)
404
+ return
405
+ }
406
+ try {
407
+ resolveHotwordsPath(newHotwordsFile)
408
+ } catch (e: Exception) {
409
+ val errorMsg = e.message ?: "Hotwords file could not be resolved"
410
+ Log.e(logTag, errorMsg, e)
411
+ promise.reject("INVALID_HOTWORDS_FILE", errorMsg, e)
412
+ return
413
+ }.also { path ->
414
+ validateHotwordsFile(path)?.let { errorMsg ->
415
+ Log.e(logTag, errorMsg)
416
+ promise.reject("INVALID_HOTWORDS_FILE", errorMsg)
417
+ return
418
+ }
419
+ }
420
+ } else ""
421
+ val configWithPaths = merged.copy(
422
+ hotwordsFile = resolvedHotwordsPath,
423
+ ruleFsts = resolvedRuleFsts,
424
+ ruleFars = resolvedRuleFars
425
+ )
426
+ val configToApply = if (configWithPaths.hotwordsFile.isNotEmpty()) {
427
+ configWithPaths.copy(
428
+ decodingMethod = "modified_beam_search",
429
+ maxActivePaths = maxOf(4, configWithPaths.maxActivePaths)
430
+ )
431
+ } else configWithPaths
432
+ inst.lastRecognizerConfig = configToApply
433
+ rec.setConfig(configToApply)
434
+ promise.resolve(null)
435
+ } catch (e: Exception) {
436
+ val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to set STT config"
437
+ Log.e(logTag, "setSttConfig error: $message", e)
438
+ promise.reject("CONFIG_ERROR", message, e)
439
+ }
440
+ }
441
+
442
+ private fun resultToWritableMap(result: OfflineRecognizerResult): WritableMap {
443
+ val map = Arguments.createMap()
444
+ map.putString("text", result.text)
445
+ val tokensArray = Arguments.createArray()
446
+ for (t in result.tokens) tokensArray.pushString(t)
447
+ map.putArray("tokens", tokensArray)
448
+ val timestampsArray = Arguments.createArray()
449
+ for (t in result.timestamps) timestampsArray.pushDouble(t.toDouble())
450
+ map.putArray("timestamps", timestampsArray)
451
+ map.putString("lang", result.lang)
452
+ map.putString("emotion", result.emotion)
453
+ map.putString("event", result.event)
454
+ val durationsArray = Arguments.createArray()
455
+ for (d in result.durations) durationsArray.pushDouble(d.toDouble())
456
+ map.putArray("durations", durationsArray)
457
+ return map
458
+ }
459
+
460
+ fun unloadStt(instanceId: String, promise: Promise) {
461
+ try {
462
+ val inst = instances.remove(instanceId)
463
+ if (inst != null) {
464
+ inst.recognizer?.release()
465
+ inst.recognizer = null
466
+ inst.lastRecognizerConfig = null
467
+ inst.currentSttModelType = null
468
+ }
469
+ promise.resolve(null)
470
+ } catch (e: Exception) {
471
+ promise.reject("RELEASE_ERROR", "Failed to release resources", e)
472
+ }
473
+ }
474
+
475
+ private fun path(paths: Map<String, String>, key: String): String =
476
+ paths[key].orEmpty()
477
+
478
+ /** Builds a short summary of modelOptions for Crashlytics (max ~200 chars). */
479
+ private fun modelOptionsSummary(modelOptions: ReadableMap?): String {
480
+ if (modelOptions == null) return ""
481
+ val parts = mutableListOf<String>()
482
+ modelOptions.getMap("whisper")?.let { w ->
483
+ val lang = w.getString("language") ?: ""
484
+ val task = w.getString("task") ?: ""
485
+ parts.add("whisper:lang=$lang,task=$task")
486
+ }
487
+ modelOptions.getMap("senseVoice")?.let { sv ->
488
+ val lang = sv.getString("language") ?: ""
489
+ val itn = if (sv.hasKey("useItn")) sv.getBoolean("useItn") else null
490
+ parts.add("senseVoice:lang=$lang" + (itn?.let { ",itn=$it" } ?: ""))
491
+ }
492
+ modelOptions.getMap("canary")?.let { c ->
493
+ val src = c.getString("srcLang") ?: ""
494
+ val tgt = c.getString("tgtLang") ?: ""
495
+ parts.add("canary:src=$src,tgt=$tgt")
496
+ }
497
+ modelOptions.getMap("funasrNano")?.let { fn ->
498
+ val lang = fn.getString("language") ?: ""
499
+ val hasHotwords = fn.hasKey("hotwords") && fn.getString("hotwords")?.isNotBlank() == true
500
+ parts.add("funasrNano:lang=$lang,hotwords=$hasHotwords")
501
+ }
502
+ return parts.joinToString(";").take(200)
503
+ }
504
+
505
+ private fun buildRecognizerConfig(
506
+ paths: Map<String, String>,
507
+ modelType: String,
508
+ hotwordsFile: String = "",
509
+ hotwordsScore: Float = 1.5f,
510
+ numThreads: Int? = null,
511
+ provider: String? = null,
512
+ ruleFsts: String = "",
513
+ ruleFars: String = "",
514
+ dither: Float = 0f,
515
+ modelOptions: ReadableMap? = null,
516
+ modelingUnit: String = "",
517
+ bpeVocab: String = ""
518
+ ): OfflineRecognizerConfig {
519
+ val featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80, dither = dither)
520
+ val modelConfig = when (modelType) {
521
+ "transducer", "nemo_transducer" -> OfflineModelConfig(
522
+ transducer = OfflineTransducerModelConfig(
523
+ encoder = path(paths, "encoder"),
524
+ decoder = path(paths, "decoder"),
525
+ joiner = path(paths, "joiner")
526
+ ),
527
+ tokens = path(paths, "tokens"),
528
+ modelType = modelType
529
+ )
530
+ "paraformer" -> OfflineModelConfig(
531
+ paraformer = OfflineParaformerModelConfig(model = path(paths, "paraformerModel")),
532
+ tokens = path(paths, "tokens"),
533
+ modelType = "paraformer"
534
+ )
535
+ "nemo_ctc" -> OfflineModelConfig(
536
+ nemo = OfflineNemoEncDecCtcModelConfig(model = path(paths, "ctcModel")),
537
+ tokens = path(paths, "tokens"),
538
+ modelType = "nemo_ctc"
539
+ )
540
+ "wenet_ctc" -> OfflineModelConfig(
541
+ wenetCtc = com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig(model = path(paths, "ctcModel")),
542
+ tokens = path(paths, "tokens"),
543
+ modelType = "wenet_ctc"
544
+ )
545
+ "sense_voice" -> {
546
+ val sv = modelOptions?.getMap("senseVoice")
547
+ OfflineModelConfig(
548
+ senseVoice = OfflineSenseVoiceModelConfig(
549
+ model = path(paths, "ctcModel"),
550
+ language = sv?.getString("language") ?: "",
551
+ useInverseTextNormalization = if (sv?.hasKey("useItn") == true) sv.getBoolean("useItn") else true
552
+ ),
553
+ tokens = path(paths, "tokens"),
554
+ modelType = "sense_voice"
555
+ )
556
+ }
557
+ "zipformer_ctc", "ctc" -> OfflineModelConfig(
558
+ zipformerCtc = OfflineZipformerCtcModelConfig(model = path(paths, "ctcModel")),
559
+ tokens = path(paths, "tokens"),
560
+ modelType = if (modelType == "ctc") "zipformer_ctc" else modelType
561
+ )
562
+ "whisper" -> {
563
+ val w = modelOptions?.getMap("whisper")
564
+ OfflineModelConfig(
565
+ whisper = OfflineWhisperModelConfig(
566
+ encoder = path(paths, "whisperEncoder"),
567
+ decoder = path(paths, "whisperDecoder"),
568
+ language = w?.getString("language") ?: "en",
569
+ task = w?.getString("task") ?: "transcribe",
570
+ tailPaddings = if (w?.hasKey("tailPaddings") == true) w.getInt("tailPaddings") else 1000,
571
+ enableTokenTimestamps = w?.hasKey("enableTokenTimestamps") == true && w.getBoolean("enableTokenTimestamps"),
572
+ enableSegmentTimestamps = w?.hasKey("enableSegmentTimestamps") == true && w.getBoolean("enableSegmentTimestamps")
573
+ ),
574
+ tokens = path(paths, "tokens"),
575
+ modelType = "whisper"
576
+ )
577
+ }
578
+ "fire_red_asr" -> OfflineModelConfig(
579
+ fireRedAsr = OfflineFireRedAsrModelConfig(
580
+ encoder = path(paths, "fireRedEncoder"),
581
+ decoder = path(paths, "fireRedDecoder")
582
+ ),
583
+ tokens = path(paths, "tokens"),
584
+ modelType = "fire_red_asr"
585
+ )
586
+ "moonshine" -> OfflineModelConfig(
587
+ moonshine = OfflineMoonshineModelConfig(
588
+ preprocessor = path(paths, "moonshinePreprocessor"),
589
+ encoder = path(paths, "moonshineEncoder"),
590
+ uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
591
+ cachedDecoder = path(paths, "moonshineCachedDecoder")
592
+ ),
593
+ tokens = path(paths, "tokens"),
594
+ modelType = "moonshine"
595
+ )
596
+ "dolphin" -> OfflineModelConfig(
597
+ dolphin = OfflineDolphinModelConfig(model = path(paths, "dolphinModel")),
598
+ tokens = path(paths, "tokens"),
599
+ modelType = "dolphin"
600
+ )
601
+ "canary" -> {
602
+ val c = modelOptions?.getMap("canary")
603
+ OfflineModelConfig(
604
+ canary = OfflineCanaryModelConfig(
605
+ encoder = path(paths, "canaryEncoder"),
606
+ decoder = path(paths, "canaryDecoder"),
607
+ srcLang = c?.getString("srcLang") ?: "en",
608
+ tgtLang = c?.getString("tgtLang") ?: "en",
609
+ usePnc = if (c?.hasKey("usePnc") == true) c.getBoolean("usePnc") else true
610
+ ),
611
+ tokens = path(paths, "tokens"),
612
+ modelType = "canary"
613
+ )
614
+ }
615
+ "omnilingual" -> OfflineModelConfig(
616
+ omnilingual = OfflineOmnilingualAsrCtcModelConfig(model = path(paths, "omnilingualModel")),
617
+ tokens = path(paths, "tokens"),
618
+ modelType = "omnilingual"
619
+ )
620
+ "medasr" -> OfflineModelConfig(
621
+ medasr = OfflineMedAsrCtcModelConfig(model = path(paths, "medasrModel")),
622
+ tokens = path(paths, "tokens"),
623
+ modelType = "medasr"
624
+ )
625
+ "telespeech_ctc" -> OfflineModelConfig(
626
+ teleSpeech = path(paths, "telespeechCtcModel"),
627
+ tokens = path(paths, "tokens"),
628
+ modelType = "telespeech_ctc"
629
+ )
630
+ "funasr_nano" -> {
631
+ val fn = modelOptions?.getMap("funasrNano")
632
+ OfflineModelConfig(
633
+ funasrNano = OfflineFunAsrNanoModelConfig(
634
+ encoderAdaptor = path(paths, "funasrEncoderAdaptor"),
635
+ llm = path(paths, "funasrLLM"),
636
+ embedding = path(paths, "funasrEmbedding"),
637
+ tokenizer = path(paths, "funasrTokenizer"),
638
+ systemPrompt = fn?.getString("systemPrompt") ?: "You are a helpful assistant.",
639
+ userPrompt = fn?.getString("userPrompt") ?: "语音转写:",
640
+ maxNewTokens = if (fn?.hasKey("maxNewTokens") == true) fn.getInt("maxNewTokens") else 512,
641
+ temperature = if (fn?.hasKey("temperature") == true) fn.getDouble("temperature").toFloat() else 1e-6f,
642
+ topP = if (fn?.hasKey("topP") == true) fn.getDouble("topP").toFloat() else 0.8f,
643
+ seed = if (fn?.hasKey("seed") == true) fn.getInt("seed") else 42,
644
+ language = fn?.getString("language") ?: "",
645
+ itn = if (fn?.hasKey("itn") == true) fn.getBoolean("itn") else true,
646
+ hotwords = fn?.getString("hotwords") ?: ""
647
+ ),
648
+ tokens = ""
649
+ )
650
+ }
651
+ else -> {
652
+ val tokens = path(paths, "tokens")
653
+ when {
654
+ path(paths, "encoder").isNotEmpty() -> OfflineModelConfig(
655
+ transducer = OfflineTransducerModelConfig(
656
+ encoder = path(paths, "encoder"),
657
+ decoder = path(paths, "decoder"),
658
+ joiner = path(paths, "joiner")
659
+ ),
660
+ tokens = tokens,
661
+ modelType = "transducer"
662
+ )
663
+ path(paths, "paraformerModel").isNotEmpty() -> OfflineModelConfig(
664
+ paraformer = OfflineParaformerModelConfig(model = path(paths, "paraformerModel")),
665
+ tokens = tokens,
666
+ modelType = "paraformer"
667
+ )
668
+ path(paths, "ctcModel").isNotEmpty() -> OfflineModelConfig(
669
+ zipformerCtc = OfflineZipformerCtcModelConfig(model = path(paths, "ctcModel")),
670
+ tokens = tokens,
671
+ modelType = modelType
672
+ )
673
+ else -> OfflineModelConfig(tokens = tokens, modelType = modelType)
674
+ }
675
+ }
676
+ }
677
+ val effectiveBpeVocab = bpeVocab.ifEmpty { path(paths, "bpeVocab") }
678
+ val finalModelConfig = modelConfig.copy(
679
+ numThreads = numThreads ?: 1,
680
+ provider = provider ?: "cpu",
681
+ modelingUnit = modelingUnit,
682
+ bpeVocab = effectiveBpeVocab
683
+ )
684
+ val baseConfig = OfflineRecognizerConfig(
685
+ featConfig = featConfig,
686
+ modelConfig = finalModelConfig,
687
+ hotwordsFile = hotwordsFile,
688
+ hotwordsScore = hotwordsScore,
689
+ ruleFsts = ruleFsts,
690
+ ruleFars = ruleFars
691
+ )
692
+ return if (hotwordsFile.isNotEmpty() && (modelType == "transducer" || modelType == "nemo_transducer")) {
693
+ baseConfig.copy(
694
+ decodingMethod = "modified_beam_search",
695
+ maxActivePaths = maxOf(4, baseConfig.maxActivePaths)
696
+ )
697
+ } else baseConfig
698
+ }
699
+ }