react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
|
@@ -1,109 +1,699 @@
|
|
|
1
|
-
package com.sherpaonnx
|
|
2
|
-
|
|
3
|
-
import android.
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
1
|
+
package com.sherpaonnx
|
|
2
|
+
|
|
3
|
+
import android.content.Context
|
|
4
|
+
import android.net.Uri
|
|
5
|
+
import android.util.Log
|
|
6
|
+
import com.facebook.react.bridge.Arguments
|
|
7
|
+
import com.facebook.react.bridge.Promise
|
|
8
|
+
import com.facebook.react.bridge.ReadableMap
|
|
9
|
+
import com.facebook.react.bridge.WritableMap
|
|
10
|
+
import com.k2fsa.sherpa.onnx.FeatureConfig
|
|
11
|
+
import com.k2fsa.sherpa.onnx.OfflineRecognizerResult
|
|
12
|
+
import com.k2fsa.sherpa.onnx.OfflineModelConfig
|
|
13
|
+
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
|
14
|
+
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
|
15
|
+
import com.k2fsa.sherpa.onnx.OfflineStream
|
|
16
|
+
import com.k2fsa.sherpa.onnx.OfflineTransducerModelConfig
|
|
17
|
+
import com.k2fsa.sherpa.onnx.OfflineParaformerModelConfig
|
|
18
|
+
import com.k2fsa.sherpa.onnx.OfflineNemoEncDecCtcModelConfig
|
|
19
|
+
import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig
|
|
20
|
+
import com.k2fsa.sherpa.onnx.OfflineSenseVoiceModelConfig
|
|
21
|
+
import com.k2fsa.sherpa.onnx.OfflineZipformerCtcModelConfig
|
|
22
|
+
import com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig
|
|
23
|
+
import com.k2fsa.sherpa.onnx.OfflineFunAsrNanoModelConfig
|
|
24
|
+
import com.k2fsa.sherpa.onnx.OfflineMoonshineModelConfig
|
|
25
|
+
import com.k2fsa.sherpa.onnx.OfflineDolphinModelConfig
|
|
26
|
+
import com.k2fsa.sherpa.onnx.OfflineFireRedAsrModelConfig
|
|
27
|
+
import com.k2fsa.sherpa.onnx.OfflineCanaryModelConfig
|
|
28
|
+
import com.k2fsa.sherpa.onnx.OfflineOmnilingualAsrCtcModelConfig
|
|
29
|
+
import com.k2fsa.sherpa.onnx.OfflineMedAsrCtcModelConfig
|
|
30
|
+
import com.k2fsa.sherpa.onnx.WaveReader
|
|
31
|
+
import java.io.File
|
|
32
|
+
import java.util.concurrent.ConcurrentHashMap
|
|
33
|
+
|
|
34
|
+
internal class SherpaOnnxSttHelper(
|
|
35
|
+
private val context: Context,
|
|
36
|
+
private val detectSttModel: (
|
|
37
|
+
modelDir: String,
|
|
38
|
+
preferInt8: Boolean,
|
|
39
|
+
hasPreferInt8: Boolean,
|
|
40
|
+
modelType: String,
|
|
41
|
+
debug: Boolean
|
|
42
|
+
) -> HashMap<String, Any>?,
|
|
43
|
+
private val logTag: String
|
|
44
|
+
) {
|
|
45
|
+
|
|
46
|
+
private data class SttEngineInstance(
|
|
47
|
+
@Volatile var recognizer: OfflineRecognizer? = null,
|
|
48
|
+
@Volatile var lastRecognizerConfig: OfflineRecognizerConfig? = null,
|
|
49
|
+
@Volatile var currentSttModelType: String? = null
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
private val instances = ConcurrentHashMap<String, SttEngineInstance>()
|
|
53
|
+
|
|
54
|
+
private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
|
|
55
|
+
|
|
56
|
+
/** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
|
|
57
|
+
private fun supportsHotwords(modelType: String): Boolean =
|
|
58
|
+
modelType == "transducer" || modelType == "nemo_transducer"
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Resolves a single path to a file path. For content URIs (content://...) copies to app cache
|
|
62
|
+
* so the native layer can read it; for file paths returns as-is.
|
|
63
|
+
* Use for hotwords file or any single file path that may come from a document picker.
|
|
64
|
+
* @param path File path or content URI
|
|
65
|
+
* @param cacheFilePrefix Prefix for the cache file name (e.g. "stt_hotwords", "stt_rule_fst")
|
|
66
|
+
* @return Resolved file path
|
|
67
|
+
* @throws IllegalStateException if content URI cannot be opened
|
|
68
|
+
*/
|
|
69
|
+
private fun resolveContentUriToFile(path: String, cacheFilePrefix: String): String {
|
|
70
|
+
if (!path.startsWith("content://")) return path
|
|
71
|
+
val uri = Uri.parse(path)
|
|
72
|
+
val cacheFile = File(context.cacheDir, "${cacheFilePrefix}_${System.nanoTime()}")
|
|
73
|
+
try {
|
|
74
|
+
context.contentResolver.openInputStream(uri)?.use { input ->
|
|
75
|
+
cacheFile.outputStream().use { output ->
|
|
76
|
+
input.copyTo(output)
|
|
77
|
+
}
|
|
78
|
+
} ?: throw IllegalStateException("File is not readable (content URI could not be opened): $path")
|
|
79
|
+
} catch (e: SecurityException) {
|
|
80
|
+
throw IllegalStateException("File is not readable (no permission to read content URI): $path", e)
|
|
81
|
+
} catch (e: Exception) {
|
|
82
|
+
if (e is IllegalStateException) throw e
|
|
83
|
+
throw IllegalStateException("File is not readable (content URI could not be opened): ${e.message ?: path}", e)
|
|
84
|
+
}
|
|
85
|
+
return cacheFile.absolutePath
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Resolves a string that may contain one or more paths (comma-separated). Each path may be
|
|
90
|
+
* a content URI; each is resolved to a file path. Use for ruleFsts / ruleFars.
|
|
91
|
+
* @param pathsString Single path or comma-separated paths (e.g. "path1,path2")
|
|
92
|
+
* @param cacheFilePrefix Prefix for cache file names (e.g. "stt_rule_fst", "stt_rule_far")
|
|
93
|
+
* @return Resolved paths joined by comma, or empty string if pathsString is blank
|
|
94
|
+
*/
|
|
95
|
+
private fun resolveFilePaths(pathsString: String, cacheFilePrefix: String): String {
|
|
96
|
+
if (pathsString.isBlank()) return pathsString
|
|
97
|
+
return pathsString.split(',').map { it.trim() }.filter { it.isNotEmpty() }
|
|
98
|
+
.mapIndexed { index, p -> resolveContentUriToFile(p, "${cacheFilePrefix}_$index") }
|
|
99
|
+
.joinToString(",")
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Resolves hotwords path (single file); delegates to [resolveContentUriToFile]. */
|
|
103
|
+
private fun resolveHotwordsPath(path: String): String =
|
|
104
|
+
resolveContentUriToFile(path, "stt_hotwords")
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Validates hotwords file format (one hotword per line; optional " :score" at end).
|
|
108
|
+
* Call after resolveHotwordsPath so path is always a file path (not content URI).
|
|
109
|
+
* @return null if valid, or an error message if invalid.
|
|
110
|
+
*/
|
|
111
|
+
private fun validateHotwordsFile(filePath: String): String? {
|
|
112
|
+
val file = File(filePath)
|
|
113
|
+
if (!file.exists()) return "Hotwords file does not exist: $filePath"
|
|
114
|
+
if (!file.isFile) return "Hotwords path is not a file: $filePath"
|
|
115
|
+
if (!file.canRead()) return "Hotwords file is not readable: $filePath"
|
|
116
|
+
val content = try {
|
|
117
|
+
file.readText(Charsets.UTF_8)
|
|
118
|
+
} catch (e: Exception) {
|
|
119
|
+
return "Failed to read hotwords file: ${e.message}"
|
|
120
|
+
}
|
|
121
|
+
if (content.contains('\u0000')) return "Hotwords file contains null bytes (not a valid text file)."
|
|
122
|
+
val lines = content.split('\n', '\r')
|
|
123
|
+
var validLines = 0
|
|
124
|
+
for (raw in lines) {
|
|
125
|
+
val line = raw.trim()
|
|
126
|
+
if (line.isEmpty()) continue
|
|
127
|
+
val hotwordPart = if (line.contains(" :")) {
|
|
128
|
+
val lastColon = line.lastIndexOf(" :")
|
|
129
|
+
val afterScore = line.substring(lastColon + 2).trim()
|
|
130
|
+
if (afterScore.isEmpty()) return "Invalid hotword line (missing score after ' :'): ${line.take(60)}…"
|
|
131
|
+
val score = afterScore.toFloatOrNull()
|
|
132
|
+
if (score == null) return "Invalid hotword line (score must be a number after ' :'): ${line.take(60)}…"
|
|
133
|
+
line.substring(0, lastColon).trim()
|
|
134
|
+
} else if (line.contains('\t')) {
|
|
135
|
+
// Likely sentencepiece .vocab format (token<TAB>score); hotwords use " :score" and one word/phrase per line.
|
|
136
|
+
val afterTab = line.substringAfter('\t').trim()
|
|
137
|
+
if (afterTab.toFloatOrNull() != null) {
|
|
138
|
+
return "This file looks like a sentencepiece .vocab file (token<TAB>score). Use a hotwords file instead: one word or phrase per line, optional ' :score' at end."
|
|
139
|
+
}
|
|
140
|
+
line
|
|
141
|
+
} else line
|
|
142
|
+
if (hotwordPart.isEmpty()) return "Invalid hotword line (empty hotword): ${line.take(60)}…"
|
|
143
|
+
if (!hotwordPart.any { it.isLetter() }) return "Invalid hotword line (must contain at least one letter): ${line.take(60)}…"
|
|
144
|
+
validLines++
|
|
145
|
+
}
|
|
146
|
+
if (validLines == 0) return "Hotwords file has no valid lines (one hotword or phrase per line, UTF-8 text)."
|
|
147
|
+
return null
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
fun initializeStt(
|
|
151
|
+
instanceId: String,
|
|
152
|
+
modelDir: String,
|
|
153
|
+
preferInt8: Boolean?,
|
|
154
|
+
modelType: String?,
|
|
155
|
+
debug: Boolean?,
|
|
156
|
+
hotwordsFile: String?,
|
|
157
|
+
hotwordsScore: Double?,
|
|
158
|
+
numThreads: Double?,
|
|
159
|
+
provider: String?,
|
|
160
|
+
ruleFsts: String?,
|
|
161
|
+
ruleFars: String?,
|
|
162
|
+
dither: Double?,
|
|
163
|
+
modelOptions: ReadableMap?,
|
|
164
|
+
modelingUnit: String?,
|
|
165
|
+
bpeVocab: String?,
|
|
166
|
+
promise: Promise
|
|
167
|
+
) {
|
|
168
|
+
try {
|
|
169
|
+
val modelDirFile = File(modelDir)
|
|
170
|
+
if (!modelDirFile.exists()) {
|
|
171
|
+
val errorMsg = "Model directory does not exist: $modelDir"
|
|
172
|
+
Log.e(logTag, errorMsg)
|
|
173
|
+
promise.reject("INIT_ERROR", errorMsg)
|
|
174
|
+
return
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (!modelDirFile.isDirectory) {
|
|
178
|
+
val errorMsg = "Model path is not a directory: $modelDir"
|
|
179
|
+
Log.e(logTag, errorMsg)
|
|
180
|
+
promise.reject("INIT_ERROR", errorMsg)
|
|
181
|
+
return
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
val result = detectSttModel(
|
|
185
|
+
modelDir,
|
|
186
|
+
preferInt8 ?: false,
|
|
187
|
+
preferInt8 != null,
|
|
188
|
+
modelType ?: "auto",
|
|
189
|
+
debug ?: false
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if (result == null) {
|
|
193
|
+
val errorMsg = "Failed to detect STT model. Check native logs for details."
|
|
194
|
+
Log.e(logTag, "Detection returned null for modelDir: $modelDir")
|
|
195
|
+
promise.reject("INIT_ERROR", errorMsg)
|
|
196
|
+
return
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
val success = result["success"] as? Boolean ?: false
|
|
200
|
+
val detectedModels = result["detectedModels"] as? ArrayList<*>
|
|
201
|
+
?: arrayListOf<HashMap<String, String>>()
|
|
202
|
+
|
|
203
|
+
if (!success) {
|
|
204
|
+
val reason = result["error"] as? String
|
|
205
|
+
val errorMsg = if (!reason.isNullOrBlank()) {
|
|
206
|
+
"Failed to initialize sherpa-onnx: $reason"
|
|
207
|
+
} else {
|
|
208
|
+
"Failed to initialize sherpa-onnx. Check native logs for details."
|
|
209
|
+
}
|
|
210
|
+
Log.e(logTag, "Detection failed for modelDir: $modelDir")
|
|
211
|
+
promise.reject("INIT_ERROR", errorMsg)
|
|
212
|
+
return
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
val paths = result["paths"] as? Map<*, *> ?: emptyMap<String, String>()
|
|
216
|
+
val pathStrings = paths.mapValues { (_, v) -> (v as? String).orEmpty() }.mapKeys { it.key.toString() }
|
|
217
|
+
val modelTypeStr = result["modelType"] as? String ?: "unknown"
|
|
218
|
+
|
|
219
|
+
val hotwordsFileTrimmed = hotwordsFile?.trim().orEmpty()
|
|
220
|
+
if (hotwordsFileTrimmed.isNotEmpty() && !supportsHotwords(modelTypeStr)) {
|
|
221
|
+
val errorMsg = "Hotwords are only supported for transducer models (transducer, nemo_transducer). Current model type: $modelTypeStr"
|
|
222
|
+
Log.e(logTag, errorMsg)
|
|
223
|
+
promise.reject("HOTWORDS_NOT_SUPPORTED", errorMsg)
|
|
224
|
+
return
|
|
225
|
+
}
|
|
226
|
+
val resolvedHotwordsPath = if (hotwordsFileTrimmed.isNotEmpty()) {
|
|
227
|
+
try {
|
|
228
|
+
resolveHotwordsPath(hotwordsFileTrimmed)
|
|
229
|
+
} catch (e: Exception) {
|
|
230
|
+
val errorMsg = e.message ?: "Hotwords file could not be resolved"
|
|
231
|
+
Log.e(logTag, errorMsg, e)
|
|
232
|
+
promise.reject("INVALID_HOTWORDS_FILE", errorMsg, e)
|
|
233
|
+
return
|
|
234
|
+
}
|
|
235
|
+
} else ""
|
|
236
|
+
if (resolvedHotwordsPath.isNotEmpty()) {
|
|
237
|
+
validateHotwordsFile(resolvedHotwordsPath)?.let { errorMsg ->
|
|
238
|
+
Log.e(logTag, errorMsg)
|
|
239
|
+
promise.reject("INVALID_HOTWORDS_FILE", errorMsg)
|
|
240
|
+
return
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
val resolvedRuleFsts = try {
|
|
245
|
+
resolveFilePaths(ruleFsts.orEmpty().trim(), "stt_rule_fst")
|
|
246
|
+
} catch (e: Exception) {
|
|
247
|
+
val errorMsg = e.message ?: "Rule FST path(s) could not be resolved"
|
|
248
|
+
Log.e(logTag, errorMsg, e)
|
|
249
|
+
promise.reject("INIT_ERROR", errorMsg, e)
|
|
250
|
+
return
|
|
251
|
+
}
|
|
252
|
+
val resolvedRuleFars = try {
|
|
253
|
+
resolveFilePaths(ruleFars.orEmpty().trim(), "stt_rule_far")
|
|
254
|
+
} catch (e: Exception) {
|
|
255
|
+
val errorMsg = e.message ?: "Rule FAR path(s) could not be resolved"
|
|
256
|
+
Log.e(logTag, errorMsg, e)
|
|
257
|
+
promise.reject("INIT_ERROR", errorMsg, e)
|
|
258
|
+
return
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
val inst = instances.getOrPut(instanceId) { SttEngineInstance() }
|
|
262
|
+
inst.recognizer?.release()
|
|
263
|
+
inst.recognizer = null
|
|
264
|
+
val config = buildRecognizerConfig(
|
|
265
|
+
pathStrings,
|
|
266
|
+
modelTypeStr,
|
|
267
|
+
hotwordsFile = resolvedHotwordsPath,
|
|
268
|
+
hotwordsScore = hotwordsScore?.toFloat() ?: 1.5f,
|
|
269
|
+
numThreads = numThreads?.toInt(),
|
|
270
|
+
provider = provider,
|
|
271
|
+
ruleFsts = resolvedRuleFsts,
|
|
272
|
+
ruleFars = resolvedRuleFars,
|
|
273
|
+
dither = dither?.toFloat() ?: 0f,
|
|
274
|
+
modelOptions = modelOptions,
|
|
275
|
+
modelingUnit = modelingUnit?.trim().orEmpty(),
|
|
276
|
+
bpeVocab = bpeVocab?.trim().orEmpty()
|
|
277
|
+
)
|
|
278
|
+
inst.lastRecognizerConfig = config
|
|
279
|
+
inst.currentSttModelType = modelTypeStr
|
|
280
|
+
inst.recognizer = OfflineRecognizer(config = config)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
val resultMap = Arguments.createMap()
|
|
285
|
+
resultMap.putBoolean("success", true)
|
|
286
|
+
resultMap.putString("modelType", modelTypeStr)
|
|
287
|
+
resultMap.putString("decodingMethod", config.decodingMethod)
|
|
288
|
+
val detectedModelsArray = Arguments.createArray()
|
|
289
|
+
for (model in detectedModels) {
|
|
290
|
+
val modelMap = model as? HashMap<*, *>
|
|
291
|
+
if (modelMap != null) {
|
|
292
|
+
val modelResultMap = Arguments.createMap()
|
|
293
|
+
modelResultMap.putString("type", modelMap["type"] as? String ?: "")
|
|
294
|
+
modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
|
|
295
|
+
detectedModelsArray.pushMap(modelResultMap)
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
resultMap.putArray("detectedModels", detectedModelsArray)
|
|
299
|
+
promise.resolve(resultMap)
|
|
300
|
+
} catch (e: Exception) {
|
|
301
|
+
val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
|
|
302
|
+
Log.e(logTag, errorMsg, e)
|
|
303
|
+
promise.reject("INIT_ERROR", errorMsg, e)
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
|
|
308
|
+
try {
|
|
309
|
+
val inst = getInstance(instanceId) ?: run {
|
|
310
|
+
promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
|
|
311
|
+
return
|
|
312
|
+
}
|
|
313
|
+
val rec = inst.recognizer
|
|
314
|
+
if (rec == null) {
|
|
315
|
+
promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
|
|
316
|
+
return
|
|
317
|
+
}
|
|
318
|
+
val wave = WaveReader.readWave(filePath)
|
|
319
|
+
val stream: OfflineStream = rec.createStream()
|
|
320
|
+
stream.acceptWaveform(wave.samples, wave.sampleRate)
|
|
321
|
+
rec.decode(stream)
|
|
322
|
+
val result = rec.getResult(stream)
|
|
323
|
+
promise.resolve(resultToWritableMap(result))
|
|
324
|
+
} catch (e: Exception) {
|
|
325
|
+
val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe file"
|
|
326
|
+
Log.e(logTag, "transcribeFile error: $message", e)
|
|
327
|
+
promise.reject("TRANSCRIBE_ERROR", message, e)
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
fun transcribeSamples(instanceId: String, samples: com.facebook.react.bridge.ReadableArray, sampleRate: Int, promise: Promise) {
|
|
332
|
+
try {
|
|
333
|
+
val inst = getInstance(instanceId) ?: run {
|
|
334
|
+
promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
|
|
335
|
+
return
|
|
336
|
+
}
|
|
337
|
+
val rec = inst.recognizer
|
|
338
|
+
if (rec == null) {
|
|
339
|
+
promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
|
|
340
|
+
return
|
|
341
|
+
}
|
|
342
|
+
val floatSamples = FloatArray(samples.size()) { i -> samples.getDouble(i).toFloat() }
|
|
343
|
+
val stream: OfflineStream = rec.createStream()
|
|
344
|
+
try {
|
|
345
|
+
stream.acceptWaveform(floatSamples, sampleRate)
|
|
346
|
+
rec.decode(stream)
|
|
347
|
+
val result = rec.getResult(stream)
|
|
348
|
+
promise.resolve(resultToWritableMap(result))
|
|
349
|
+
} finally {
|
|
350
|
+
stream.release()
|
|
351
|
+
}
|
|
352
|
+
} catch (e: Exception) {
|
|
353
|
+
val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe samples"
|
|
354
|
+
Log.e(logTag, "transcribeSamples error: $message", e)
|
|
355
|
+
promise.reject("TRANSCRIBE_ERROR", message, e)
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
fun setSttConfig(instanceId: String, options: ReadableMap, promise: Promise) {
|
|
360
|
+
try {
|
|
361
|
+
val inst = getInstance(instanceId) ?: run {
|
|
362
|
+
promise.reject("CONFIG_ERROR", "STT instance not found: $instanceId")
|
|
363
|
+
return
|
|
364
|
+
}
|
|
365
|
+
val rec = inst.recognizer
|
|
366
|
+
val current = inst.lastRecognizerConfig
|
|
367
|
+
if (rec == null || current == null) {
|
|
368
|
+
promise.reject("CONFIG_ERROR", "STT not initialized. Call initializeStt first.")
|
|
369
|
+
return
|
|
370
|
+
}
|
|
371
|
+
val merged = current.copy(
|
|
372
|
+
decodingMethod = if (options.hasKey("decodingMethod")) options.getString("decodingMethod") ?: current.decodingMethod else current.decodingMethod,
|
|
373
|
+
maxActivePaths = if (options.hasKey("maxActivePaths")) options.getDouble("maxActivePaths").toInt() else current.maxActivePaths,
|
|
374
|
+
hotwordsFile = if (options.hasKey("hotwordsFile")) options.getString("hotwordsFile") ?: current.hotwordsFile else current.hotwordsFile,
|
|
375
|
+
hotwordsScore = if (options.hasKey("hotwordsScore")) options.getDouble("hotwordsScore").toFloat() else current.hotwordsScore,
|
|
376
|
+
blankPenalty = if (options.hasKey("blankPenalty")) options.getDouble("blankPenalty").toFloat() else current.blankPenalty,
|
|
377
|
+
ruleFsts = if (options.hasKey("ruleFsts")) options.getString("ruleFsts") ?: current.ruleFsts else current.ruleFsts,
|
|
378
|
+
ruleFars = if (options.hasKey("ruleFars")) options.getString("ruleFars") ?: current.ruleFars else current.ruleFars
|
|
379
|
+
)
|
|
380
|
+
val resolvedRuleFsts = try {
|
|
381
|
+
resolveFilePaths(merged.ruleFsts.trim(), "stt_rule_fst")
|
|
382
|
+
} catch (e: Exception) {
|
|
383
|
+
val errorMsg = e.message ?: "Rule FST path(s) could not be resolved"
|
|
384
|
+
Log.e(logTag, errorMsg, e)
|
|
385
|
+
promise.reject("CONFIG_ERROR", errorMsg, e)
|
|
386
|
+
return
|
|
387
|
+
}
|
|
388
|
+
val resolvedRuleFars = try {
|
|
389
|
+
resolveFilePaths(merged.ruleFars.trim(), "stt_rule_far")
|
|
390
|
+
} catch (e: Exception) {
|
|
391
|
+
val errorMsg = e.message ?: "Rule FAR path(s) could not be resolved"
|
|
392
|
+
Log.e(logTag, errorMsg, e)
|
|
393
|
+
promise.reject("CONFIG_ERROR", errorMsg, e)
|
|
394
|
+
return
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
val newHotwordsFile = merged.hotwordsFile.trim()
|
|
398
|
+
val resolvedHotwordsPath = if (newHotwordsFile.isNotEmpty()) {
|
|
399
|
+
val modelType = inst.currentSttModelType
|
|
400
|
+
if (modelType == null || !supportsHotwords(modelType)) {
|
|
401
|
+
val errorMsg = "Hotwords are only supported for transducer models (transducer, nemo_transducer). Current model type: ${modelType ?: "unknown"}"
|
|
402
|
+
Log.e(logTag, errorMsg)
|
|
403
|
+
promise.reject("HOTWORDS_NOT_SUPPORTED", errorMsg)
|
|
404
|
+
return
|
|
405
|
+
}
|
|
406
|
+
try {
|
|
407
|
+
resolveHotwordsPath(newHotwordsFile)
|
|
408
|
+
} catch (e: Exception) {
|
|
409
|
+
val errorMsg = e.message ?: "Hotwords file could not be resolved"
|
|
410
|
+
Log.e(logTag, errorMsg, e)
|
|
411
|
+
promise.reject("INVALID_HOTWORDS_FILE", errorMsg, e)
|
|
412
|
+
return
|
|
413
|
+
}.also { path ->
|
|
414
|
+
validateHotwordsFile(path)?.let { errorMsg ->
|
|
415
|
+
Log.e(logTag, errorMsg)
|
|
416
|
+
promise.reject("INVALID_HOTWORDS_FILE", errorMsg)
|
|
417
|
+
return
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
} else ""
|
|
421
|
+
val configWithPaths = merged.copy(
|
|
422
|
+
hotwordsFile = resolvedHotwordsPath,
|
|
423
|
+
ruleFsts = resolvedRuleFsts,
|
|
424
|
+
ruleFars = resolvedRuleFars
|
|
425
|
+
)
|
|
426
|
+
val configToApply = if (configWithPaths.hotwordsFile.isNotEmpty()) {
|
|
427
|
+
configWithPaths.copy(
|
|
428
|
+
decodingMethod = "modified_beam_search",
|
|
429
|
+
maxActivePaths = maxOf(4, configWithPaths.maxActivePaths)
|
|
430
|
+
)
|
|
431
|
+
} else configWithPaths
|
|
432
|
+
inst.lastRecognizerConfig = configToApply
|
|
433
|
+
rec.setConfig(configToApply)
|
|
434
|
+
promise.resolve(null)
|
|
435
|
+
} catch (e: Exception) {
|
|
436
|
+
val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to set STT config"
|
|
437
|
+
Log.e(logTag, "setSttConfig error: $message", e)
|
|
438
|
+
promise.reject("CONFIG_ERROR", message, e)
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
private fun resultToWritableMap(result: OfflineRecognizerResult): WritableMap {
|
|
443
|
+
val map = Arguments.createMap()
|
|
444
|
+
map.putString("text", result.text)
|
|
445
|
+
val tokensArray = Arguments.createArray()
|
|
446
|
+
for (t in result.tokens) tokensArray.pushString(t)
|
|
447
|
+
map.putArray("tokens", tokensArray)
|
|
448
|
+
val timestampsArray = Arguments.createArray()
|
|
449
|
+
for (t in result.timestamps) timestampsArray.pushDouble(t.toDouble())
|
|
450
|
+
map.putArray("timestamps", timestampsArray)
|
|
451
|
+
map.putString("lang", result.lang)
|
|
452
|
+
map.putString("emotion", result.emotion)
|
|
453
|
+
map.putString("event", result.event)
|
|
454
|
+
val durationsArray = Arguments.createArray()
|
|
455
|
+
for (d in result.durations) durationsArray.pushDouble(d.toDouble())
|
|
456
|
+
map.putArray("durations", durationsArray)
|
|
457
|
+
return map
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
fun unloadStt(instanceId: String, promise: Promise) {
|
|
461
|
+
try {
|
|
462
|
+
val inst = instances.remove(instanceId)
|
|
463
|
+
if (inst != null) {
|
|
464
|
+
inst.recognizer?.release()
|
|
465
|
+
inst.recognizer = null
|
|
466
|
+
inst.lastRecognizerConfig = null
|
|
467
|
+
inst.currentSttModelType = null
|
|
468
|
+
}
|
|
469
|
+
promise.resolve(null)
|
|
470
|
+
} catch (e: Exception) {
|
|
471
|
+
promise.reject("RELEASE_ERROR", "Failed to release resources", e)
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
private fun path(paths: Map<String, String>, key: String): String =
|
|
476
|
+
paths[key].orEmpty()
|
|
477
|
+
|
|
478
|
+
/** Builds a short summary of modelOptions for Crashlytics (max ~200 chars). */
|
|
479
|
+
private fun modelOptionsSummary(modelOptions: ReadableMap?): String {
|
|
480
|
+
if (modelOptions == null) return ""
|
|
481
|
+
val parts = mutableListOf<String>()
|
|
482
|
+
modelOptions.getMap("whisper")?.let { w ->
|
|
483
|
+
val lang = w.getString("language") ?: ""
|
|
484
|
+
val task = w.getString("task") ?: ""
|
|
485
|
+
parts.add("whisper:lang=$lang,task=$task")
|
|
486
|
+
}
|
|
487
|
+
modelOptions.getMap("senseVoice")?.let { sv ->
|
|
488
|
+
val lang = sv.getString("language") ?: ""
|
|
489
|
+
val itn = if (sv.hasKey("useItn")) sv.getBoolean("useItn") else null
|
|
490
|
+
parts.add("senseVoice:lang=$lang" + (itn?.let { ",itn=$it" } ?: ""))
|
|
491
|
+
}
|
|
492
|
+
modelOptions.getMap("canary")?.let { c ->
|
|
493
|
+
val src = c.getString("srcLang") ?: ""
|
|
494
|
+
val tgt = c.getString("tgtLang") ?: ""
|
|
495
|
+
parts.add("canary:src=$src,tgt=$tgt")
|
|
496
|
+
}
|
|
497
|
+
modelOptions.getMap("funasrNano")?.let { fn ->
|
|
498
|
+
val lang = fn.getString("language") ?: ""
|
|
499
|
+
val hasHotwords = fn.hasKey("hotwords") && fn.getString("hotwords")?.isNotBlank() == true
|
|
500
|
+
parts.add("funasrNano:lang=$lang,hotwords=$hasHotwords")
|
|
501
|
+
}
|
|
502
|
+
return parts.joinToString(";").take(200)
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
private fun buildRecognizerConfig(
|
|
506
|
+
paths: Map<String, String>,
|
|
507
|
+
modelType: String,
|
|
508
|
+
hotwordsFile: String = "",
|
|
509
|
+
hotwordsScore: Float = 1.5f,
|
|
510
|
+
numThreads: Int? = null,
|
|
511
|
+
provider: String? = null,
|
|
512
|
+
ruleFsts: String = "",
|
|
513
|
+
ruleFars: String = "",
|
|
514
|
+
dither: Float = 0f,
|
|
515
|
+
modelOptions: ReadableMap? = null,
|
|
516
|
+
modelingUnit: String = "",
|
|
517
|
+
bpeVocab: String = ""
|
|
518
|
+
): OfflineRecognizerConfig {
|
|
519
|
+
val featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80, dither = dither)
|
|
520
|
+
val modelConfig = when (modelType) {
|
|
521
|
+
"transducer", "nemo_transducer" -> OfflineModelConfig(
|
|
522
|
+
transducer = OfflineTransducerModelConfig(
|
|
523
|
+
encoder = path(paths, "encoder"),
|
|
524
|
+
decoder = path(paths, "decoder"),
|
|
525
|
+
joiner = path(paths, "joiner")
|
|
526
|
+
),
|
|
527
|
+
tokens = path(paths, "tokens"),
|
|
528
|
+
modelType = modelType
|
|
529
|
+
)
|
|
530
|
+
"paraformer" -> OfflineModelConfig(
|
|
531
|
+
paraformer = OfflineParaformerModelConfig(model = path(paths, "paraformerModel")),
|
|
532
|
+
tokens = path(paths, "tokens"),
|
|
533
|
+
modelType = "paraformer"
|
|
534
|
+
)
|
|
535
|
+
"nemo_ctc" -> OfflineModelConfig(
|
|
536
|
+
nemo = OfflineNemoEncDecCtcModelConfig(model = path(paths, "ctcModel")),
|
|
537
|
+
tokens = path(paths, "tokens"),
|
|
538
|
+
modelType = "nemo_ctc"
|
|
539
|
+
)
|
|
540
|
+
"wenet_ctc" -> OfflineModelConfig(
|
|
541
|
+
wenetCtc = com.k2fsa.sherpa.onnx.OfflineWenetCtcModelConfig(model = path(paths, "ctcModel")),
|
|
542
|
+
tokens = path(paths, "tokens"),
|
|
543
|
+
modelType = "wenet_ctc"
|
|
544
|
+
)
|
|
545
|
+
"sense_voice" -> {
|
|
546
|
+
val sv = modelOptions?.getMap("senseVoice")
|
|
547
|
+
OfflineModelConfig(
|
|
548
|
+
senseVoice = OfflineSenseVoiceModelConfig(
|
|
549
|
+
model = path(paths, "ctcModel"),
|
|
550
|
+
language = sv?.getString("language") ?: "",
|
|
551
|
+
useInverseTextNormalization = if (sv?.hasKey("useItn") == true) sv.getBoolean("useItn") else true
|
|
552
|
+
),
|
|
553
|
+
tokens = path(paths, "tokens"),
|
|
554
|
+
modelType = "sense_voice"
|
|
555
|
+
)
|
|
556
|
+
}
|
|
557
|
+
"zipformer_ctc", "ctc" -> OfflineModelConfig(
|
|
558
|
+
zipformerCtc = OfflineZipformerCtcModelConfig(model = path(paths, "ctcModel")),
|
|
559
|
+
tokens = path(paths, "tokens"),
|
|
560
|
+
modelType = if (modelType == "ctc") "zipformer_ctc" else modelType
|
|
561
|
+
)
|
|
562
|
+
"whisper" -> {
|
|
563
|
+
val w = modelOptions?.getMap("whisper")
|
|
564
|
+
OfflineModelConfig(
|
|
565
|
+
whisper = OfflineWhisperModelConfig(
|
|
566
|
+
encoder = path(paths, "whisperEncoder"),
|
|
567
|
+
decoder = path(paths, "whisperDecoder"),
|
|
568
|
+
language = w?.getString("language") ?: "en",
|
|
569
|
+
task = w?.getString("task") ?: "transcribe",
|
|
570
|
+
tailPaddings = if (w?.hasKey("tailPaddings") == true) w.getInt("tailPaddings") else 1000,
|
|
571
|
+
enableTokenTimestamps = w?.hasKey("enableTokenTimestamps") == true && w.getBoolean("enableTokenTimestamps"),
|
|
572
|
+
enableSegmentTimestamps = w?.hasKey("enableSegmentTimestamps") == true && w.getBoolean("enableSegmentTimestamps")
|
|
573
|
+
),
|
|
574
|
+
tokens = path(paths, "tokens"),
|
|
575
|
+
modelType = "whisper"
|
|
576
|
+
)
|
|
577
|
+
}
|
|
578
|
+
"fire_red_asr" -> OfflineModelConfig(
|
|
579
|
+
fireRedAsr = OfflineFireRedAsrModelConfig(
|
|
580
|
+
encoder = path(paths, "fireRedEncoder"),
|
|
581
|
+
decoder = path(paths, "fireRedDecoder")
|
|
582
|
+
),
|
|
583
|
+
tokens = path(paths, "tokens"),
|
|
584
|
+
modelType = "fire_red_asr"
|
|
585
|
+
)
|
|
586
|
+
"moonshine" -> OfflineModelConfig(
|
|
587
|
+
moonshine = OfflineMoonshineModelConfig(
|
|
588
|
+
preprocessor = path(paths, "moonshinePreprocessor"),
|
|
589
|
+
encoder = path(paths, "moonshineEncoder"),
|
|
590
|
+
uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
|
|
591
|
+
cachedDecoder = path(paths, "moonshineCachedDecoder")
|
|
592
|
+
),
|
|
593
|
+
tokens = path(paths, "tokens"),
|
|
594
|
+
modelType = "moonshine"
|
|
595
|
+
)
|
|
596
|
+
"dolphin" -> OfflineModelConfig(
|
|
597
|
+
dolphin = OfflineDolphinModelConfig(model = path(paths, "dolphinModel")),
|
|
598
|
+
tokens = path(paths, "tokens"),
|
|
599
|
+
modelType = "dolphin"
|
|
600
|
+
)
|
|
601
|
+
"canary" -> {
|
|
602
|
+
val c = modelOptions?.getMap("canary")
|
|
603
|
+
OfflineModelConfig(
|
|
604
|
+
canary = OfflineCanaryModelConfig(
|
|
605
|
+
encoder = path(paths, "canaryEncoder"),
|
|
606
|
+
decoder = path(paths, "canaryDecoder"),
|
|
607
|
+
srcLang = c?.getString("srcLang") ?: "en",
|
|
608
|
+
tgtLang = c?.getString("tgtLang") ?: "en",
|
|
609
|
+
usePnc = if (c?.hasKey("usePnc") == true) c.getBoolean("usePnc") else true
|
|
610
|
+
),
|
|
611
|
+
tokens = path(paths, "tokens"),
|
|
612
|
+
modelType = "canary"
|
|
613
|
+
)
|
|
614
|
+
}
|
|
615
|
+
"omnilingual" -> OfflineModelConfig(
|
|
616
|
+
omnilingual = OfflineOmnilingualAsrCtcModelConfig(model = path(paths, "omnilingualModel")),
|
|
617
|
+
tokens = path(paths, "tokens"),
|
|
618
|
+
modelType = "omnilingual"
|
|
619
|
+
)
|
|
620
|
+
"medasr" -> OfflineModelConfig(
|
|
621
|
+
medasr = OfflineMedAsrCtcModelConfig(model = path(paths, "medasrModel")),
|
|
622
|
+
tokens = path(paths, "tokens"),
|
|
623
|
+
modelType = "medasr"
|
|
624
|
+
)
|
|
625
|
+
"telespeech_ctc" -> OfflineModelConfig(
|
|
626
|
+
teleSpeech = path(paths, "telespeechCtcModel"),
|
|
627
|
+
tokens = path(paths, "tokens"),
|
|
628
|
+
modelType = "telespeech_ctc"
|
|
629
|
+
)
|
|
630
|
+
"funasr_nano" -> {
|
|
631
|
+
val fn = modelOptions?.getMap("funasrNano")
|
|
632
|
+
OfflineModelConfig(
|
|
633
|
+
funasrNano = OfflineFunAsrNanoModelConfig(
|
|
634
|
+
encoderAdaptor = path(paths, "funasrEncoderAdaptor"),
|
|
635
|
+
llm = path(paths, "funasrLLM"),
|
|
636
|
+
embedding = path(paths, "funasrEmbedding"),
|
|
637
|
+
tokenizer = path(paths, "funasrTokenizer"),
|
|
638
|
+
systemPrompt = fn?.getString("systemPrompt") ?: "You are a helpful assistant.",
|
|
639
|
+
userPrompt = fn?.getString("userPrompt") ?: "语音转写:",
|
|
640
|
+
maxNewTokens = if (fn?.hasKey("maxNewTokens") == true) fn.getInt("maxNewTokens") else 512,
|
|
641
|
+
temperature = if (fn?.hasKey("temperature") == true) fn.getDouble("temperature").toFloat() else 1e-6f,
|
|
642
|
+
topP = if (fn?.hasKey("topP") == true) fn.getDouble("topP").toFloat() else 0.8f,
|
|
643
|
+
seed = if (fn?.hasKey("seed") == true) fn.getInt("seed") else 42,
|
|
644
|
+
language = fn?.getString("language") ?: "",
|
|
645
|
+
itn = if (fn?.hasKey("itn") == true) fn.getBoolean("itn") else true,
|
|
646
|
+
hotwords = fn?.getString("hotwords") ?: ""
|
|
647
|
+
),
|
|
648
|
+
tokens = ""
|
|
649
|
+
)
|
|
650
|
+
}
|
|
651
|
+
else -> {
|
|
652
|
+
val tokens = path(paths, "tokens")
|
|
653
|
+
when {
|
|
654
|
+
path(paths, "encoder").isNotEmpty() -> OfflineModelConfig(
|
|
655
|
+
transducer = OfflineTransducerModelConfig(
|
|
656
|
+
encoder = path(paths, "encoder"),
|
|
657
|
+
decoder = path(paths, "decoder"),
|
|
658
|
+
joiner = path(paths, "joiner")
|
|
659
|
+
),
|
|
660
|
+
tokens = tokens,
|
|
661
|
+
modelType = "transducer"
|
|
662
|
+
)
|
|
663
|
+
path(paths, "paraformerModel").isNotEmpty() -> OfflineModelConfig(
|
|
664
|
+
paraformer = OfflineParaformerModelConfig(model = path(paths, "paraformerModel")),
|
|
665
|
+
tokens = tokens,
|
|
666
|
+
modelType = "paraformer"
|
|
667
|
+
)
|
|
668
|
+
path(paths, "ctcModel").isNotEmpty() -> OfflineModelConfig(
|
|
669
|
+
zipformerCtc = OfflineZipformerCtcModelConfig(model = path(paths, "ctcModel")),
|
|
670
|
+
tokens = tokens,
|
|
671
|
+
modelType = modelType
|
|
672
|
+
)
|
|
673
|
+
else -> OfflineModelConfig(tokens = tokens, modelType = modelType)
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
val effectiveBpeVocab = bpeVocab.ifEmpty { path(paths, "bpeVocab") }
|
|
678
|
+
val finalModelConfig = modelConfig.copy(
|
|
679
|
+
numThreads = numThreads ?: 1,
|
|
680
|
+
provider = provider ?: "cpu",
|
|
681
|
+
modelingUnit = modelingUnit,
|
|
682
|
+
bpeVocab = effectiveBpeVocab
|
|
683
|
+
)
|
|
684
|
+
val baseConfig = OfflineRecognizerConfig(
|
|
685
|
+
featConfig = featConfig,
|
|
686
|
+
modelConfig = finalModelConfig,
|
|
687
|
+
hotwordsFile = hotwordsFile,
|
|
688
|
+
hotwordsScore = hotwordsScore,
|
|
689
|
+
ruleFsts = ruleFsts,
|
|
690
|
+
ruleFars = ruleFars
|
|
691
|
+
)
|
|
692
|
+
return if (hotwordsFile.isNotEmpty() && (modelType == "transducer" || modelType == "nemo_transducer")) {
|
|
693
|
+
baseConfig.copy(
|
|
694
|
+
decodingMethod = "modified_beam_search",
|
|
695
|
+
maxActivePaths = maxOf(4, baseConfig.maxActivePaths)
|
|
696
|
+
)
|
|
697
|
+
} else baseConfig
|
|
698
|
+
}
|
|
699
|
+
}
|