react-native-sherpa-onnx 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -15
- package/SherpaOnnx.podspec +13 -5
- package/android/prebuilt-download.gradle +18 -5
- package/android/prebuilt-versions.gradle +8 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +43 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +12 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +694 -307
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +194 -99
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +90 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +70 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +39 -19
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx.mm +1 -1
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +9 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +38 -54
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +620 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +131 -28
- package/ios/model_detect/sherpa-onnx-model-detect.h +70 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +52 -0
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +16 -2
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +17 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +48 -48
- package/src/NativeSherpaOnnx.ts +18 -2
- package/src/audio/index.ts +81 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "react-native-sherpa-onnx",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"description": "Offline Speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD with sherpa-onnx for React NativeSpeech-to-Text with sherpa-onnx for React Native",
|
|
5
5
|
"main": "./lib/module/index.js",
|
|
6
6
|
"types": "./lib/typescript/src/index.d.ts",
|
|
@@ -78,6 +78,8 @@
|
|
|
78
78
|
"!**/__tests__",
|
|
79
79
|
"!**/__fixtures__",
|
|
80
80
|
"!**/__mocks__",
|
|
81
|
+
"!test",
|
|
82
|
+
"!test/**",
|
|
81
83
|
"!**/.*",
|
|
82
84
|
"!./third_party/sherpa-onnx",
|
|
83
85
|
"!./third_party/sherpa-onnx/**",
|
|
@@ -154,6 +156,9 @@
|
|
|
154
156
|
"turbo": "^2.5.6",
|
|
155
157
|
"typescript": "^5.9.2"
|
|
156
158
|
},
|
|
159
|
+
"dependencies": {
|
|
160
|
+
"buffer": "^6.0.3"
|
|
161
|
+
},
|
|
157
162
|
"peerDependencies": {
|
|
158
163
|
"@dr.pogodin/react-native-fs": "*",
|
|
159
164
|
"react": "*",
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Compare GitHub release assets (asr-models, tts-models) with local CSV fixtures.
|
|
3
|
+
# If any asset exists on GitHub but is not listed in the corresponding CSV,
|
|
4
|
+
# print a warning (non-fatal) with the list and a hint to run the collect workflows.
|
|
5
|
+
# Exit code is always 0 so this can be used as an informational step.
|
|
6
|
+
|
|
7
|
+
set -e
|
|
8
|
+
|
|
9
|
+
REPO="${SHERPA_ONNX_REPO:-k2-fsa/sherpa-onnx}"
|
|
10
|
+
ASR_CSV="${ASR_CSV:-test/fixtures/asr-models-expected.csv}"
|
|
11
|
+
TTS_CSV="${TTS_CSV:-test/fixtures/tts-models-expected.csv}"
|
|
12
|
+
|
|
13
|
+
if [ ! -f "$ASR_CSV" ]; then
|
|
14
|
+
echo "::warning::Missing $ASR_CSV (run from repo root or set ASR_CSV)"
|
|
15
|
+
exit 0
|
|
16
|
+
fi
|
|
17
|
+
if [ ! -f "$TTS_CSV" ]; then
|
|
18
|
+
echo "::warning::Missing $TTS_CSV (run from repo root or set TTS_CSV)"
|
|
19
|
+
exit 0
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
# Fetch ASR release assets (.tar.bz2, .onnx)
|
|
23
|
+
ASR_ASSETS=""
|
|
24
|
+
ASR_RESP="${ASR_RESP:-$(curl -sL "https://api.github.com/repos/${REPO}/releases/tags/asr-models")}"
|
|
25
|
+
if echo "$ASR_RESP" | jq -e '.assets' >/dev/null 2>&1; then
|
|
26
|
+
ASR_ASSETS=$(echo "$ASR_RESP" | jq -r '.assets[] | select(.name | endswith(".tar.bz2") or endswith(".onnx")) | .name')
|
|
27
|
+
else
|
|
28
|
+
echo "::warning::Could not fetch asr-models release or it has no assets"
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Fetch TTS release assets
|
|
32
|
+
TTS_ASSETS=""
|
|
33
|
+
TTS_RESP="${TTS_RESP:-$(curl -sL "https://api.github.com/repos/${REPO}/releases/tags/tts-models")}"
|
|
34
|
+
if echo "$TTS_RESP" | jq -e '.assets' >/dev/null 2>&1; then
|
|
35
|
+
TTS_ASSETS=$(echo "$TTS_RESP" | jq -r '.assets[] | select(.name | endswith(".tar.bz2") or endswith(".onnx")) | .name')
|
|
36
|
+
else
|
|
37
|
+
echo "::warning::Could not fetch tts-models release or it has no assets"
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# First column of CSV (asset_name); strip optional quotes and whitespace; skip header
|
|
41
|
+
csv_asset_names() { awk -F',' '{ gsub(/^ *"|" *$/, "", $1); gsub(/^ | $/, "", $1); if (NR>1 && $1 != "") print $1 }' "$1"; }
|
|
42
|
+
|
|
43
|
+
ASR_CSV_NAMES=$(csv_asset_names "$ASR_CSV")
|
|
44
|
+
TTS_CSV_NAMES=$(csv_asset_names "$TTS_CSV")
|
|
45
|
+
|
|
46
|
+
ASR_MISSING=""
|
|
47
|
+
while IFS= read -r asset; do
|
|
48
|
+
[ -z "$asset" ] && continue
|
|
49
|
+
if ! echo "$ASR_CSV_NAMES" | grep -qFx "$asset"; then
|
|
50
|
+
ASR_MISSING="${ASR_MISSING} - ${asset}\n"
|
|
51
|
+
fi
|
|
52
|
+
done <<< "$ASR_ASSETS"
|
|
53
|
+
|
|
54
|
+
TTS_MISSING=""
|
|
55
|
+
while IFS= read -r asset; do
|
|
56
|
+
[ -z "$asset" ] && continue
|
|
57
|
+
if ! echo "$TTS_CSV_NAMES" | grep -qFx "$asset"; then
|
|
58
|
+
TTS_MISSING="${TTS_MISSING} - ${asset}\n"
|
|
59
|
+
fi
|
|
60
|
+
done <<< "$TTS_ASSETS"
|
|
61
|
+
|
|
62
|
+
if [ -n "$ASR_MISSING" ] || [ -n "$TTS_MISSING" ]; then
|
|
63
|
+
echo "::warning::New assets are available on GitHub but not yet listed in the expected CSV files."
|
|
64
|
+
[ -n "$ASR_MISSING" ] && echo -e "ASR (asr-models) assets missing from $ASR_CSV:\n$ASR_MISSING"
|
|
65
|
+
[ -n "$TTS_MISSING" ] && echo -e "TTS (tts-models) assets missing from $TTS_CSV:\n$TTS_MISSING"
|
|
66
|
+
echo "Please run the collect workflows to update fixtures:"
|
|
67
|
+
echo " - Testdata - Collect ASR model structures (workflow_dispatch)"
|
|
68
|
+
echo " - Testdata - Collect TTS model structures (workflow_dispatch)"
|
|
69
|
+
exit 0
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
echo "All GitHub release assets are listed in the expected CSV files."
|
|
@@ -73,6 +73,17 @@ fi
|
|
|
73
73
|
# Create frameworks directory if it doesn't exist
|
|
74
74
|
mkdir -p "$FRAMEWORKS_DIR"
|
|
75
75
|
|
|
76
|
+
# Read desired version early so we can skip download only when installed version matches (avoids stale framework after IOS_RELEASE_TAG update).
|
|
77
|
+
if [ -z "$DESIRED_VERSION" ] && [ "$FORCE_DOWNLOAD" != true ]; then
|
|
78
|
+
IOS_TAG_FILE="$PROJECT_ROOT/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG"
|
|
79
|
+
if [ -f "$IOS_TAG_FILE" ]; then
|
|
80
|
+
TAG=$(grep -v '^#' "$IOS_TAG_FILE" | grep -v '^[[:space:]]*$' | head -1 | tr -d '\r\n')
|
|
81
|
+
if [ -n "$TAG" ] && [ "${TAG#framework-v}" != "$TAG" ]; then
|
|
82
|
+
DESIRED_VERSION="${TAG#framework-v}"
|
|
83
|
+
fi
|
|
84
|
+
fi
|
|
85
|
+
fi
|
|
86
|
+
|
|
76
87
|
# Helper: check if a framework path is valid for building (has library + required headers for compiler)
|
|
77
88
|
framework_valid() {
|
|
78
89
|
local fw_root="$1"
|
|
@@ -81,17 +92,47 @@ framework_valid() {
|
|
|
81
92
|
return 0
|
|
82
93
|
}
|
|
83
94
|
|
|
84
|
-
#
|
|
85
|
-
|
|
86
|
-
|
|
95
|
+
# Helper: get installed framework version (from .framework-version or xcframework VERSION.txt)
|
|
96
|
+
get_installed_version() {
|
|
97
|
+
if [ -f "$VERSION_FILE" ]; then
|
|
98
|
+
cat "$VERSION_FILE" 2>/dev/null | tr -d '\r\n'
|
|
99
|
+
return 0
|
|
100
|
+
fi
|
|
101
|
+
for f in "sherpa_onnx.xcframework" "sherpa-onnx.xcframework"; do
|
|
102
|
+
if [ -f "$FRAMEWORKS_DIR/$f/VERSION.txt" ]; then
|
|
103
|
+
grep -Eo '([0-9]+\.)+[0-9]+' "$FRAMEWORKS_DIR/$f/VERSION.txt" | head -n1 | tr -d '\r\n'
|
|
104
|
+
return 0
|
|
105
|
+
fi
|
|
106
|
+
done
|
|
107
|
+
echo ""
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# When run as Xcode build phase (prepare_command): if framework is already present, valid, AND matches IOS_RELEASE_TAG, exit successfully.
|
|
111
|
+
# Otherwise we re-download so that updating IOS_RELEASE_TAG (e.g. to 1.12.28) triggers an update from an older installed version (e.g. 1.12.24).
|
|
87
112
|
if [ "$FORCE_DOWNLOAD" != true ]; then
|
|
88
113
|
if [ -d "$FRAMEWORKS_DIR/sherpa_onnx.xcframework" ] && framework_valid "$FRAMEWORKS_DIR/sherpa_onnx.xcframework"; then
|
|
89
|
-
|
|
90
|
-
|
|
114
|
+
if [ ! -f "$VERSION_FILE" ] && [ -f "$FRAMEWORKS_DIR/sherpa_onnx.xcframework/VERSION.txt" ]; then
|
|
115
|
+
ver=$(grep -Eo '([0-9]+\.)+[0-9]+' "$FRAMEWORKS_DIR/sherpa_onnx.xcframework/VERSION.txt" | head -n1 || true)
|
|
116
|
+
[ -n "$ver" ] && echo "$ver" > "$VERSION_FILE" 2>/dev/null || true
|
|
117
|
+
fi
|
|
118
|
+
installed=$(get_installed_version)
|
|
119
|
+
if [ -n "$DESIRED_VERSION" ] && [ -n "$installed" ] && [ "$installed" = "$DESIRED_VERSION" ]; then
|
|
120
|
+
echo "[SherpaOnnx] Framework already present at $FRAMEWORKS_DIR/sherpa_onnx.xcframework (v$installed), skipping download." >&2
|
|
121
|
+
exit 0
|
|
122
|
+
fi
|
|
123
|
+
[ "$INTERACTIVE" = true ] && echo -e "${YELLOW}Installed framework v${installed} does not match IOS_RELEASE_TAG ($DESIRED_VERSION), will re-download.${NC}" >&2
|
|
91
124
|
fi
|
|
92
125
|
if [ -d "$FRAMEWORKS_DIR/sherpa-onnx.xcframework" ] && framework_valid "$FRAMEWORKS_DIR/sherpa-onnx.xcframework"; then
|
|
93
|
-
|
|
94
|
-
|
|
126
|
+
if [ ! -f "$VERSION_FILE" ] && [ -f "$FRAMEWORKS_DIR/sherpa-onnx.xcframework/VERSION.txt" ]; then
|
|
127
|
+
ver=$(grep -Eo '([0-9]+\.)+[0-9]+' "$FRAMEWORKS_DIR/sherpa-onnx.xcframework/VERSION.txt" | head -n1 || true)
|
|
128
|
+
[ -n "$ver" ] && echo "$ver" > "$VERSION_FILE" 2>/dev/null || true
|
|
129
|
+
fi
|
|
130
|
+
installed=$(get_installed_version)
|
|
131
|
+
if [ -n "$DESIRED_VERSION" ] && [ -n "$installed" ] && [ "$installed" = "$DESIRED_VERSION" ]; then
|
|
132
|
+
echo "[SherpaOnnx] Framework already present at $FRAMEWORKS_DIR/sherpa-onnx.xcframework (v$installed), skipping download." >&2
|
|
133
|
+
exit 0
|
|
134
|
+
fi
|
|
135
|
+
[ "$INTERACTIVE" = true ] && echo -e "${YELLOW}Installed framework v${installed} does not match IOS_RELEASE_TAG ($DESIRED_VERSION), will re-download.${NC}" >&2
|
|
95
136
|
fi
|
|
96
137
|
fi
|
|
97
138
|
|
|
@@ -154,47 +195,6 @@ compare_versions() {
|
|
|
154
195
|
echo "0" # v1 == v2
|
|
155
196
|
}
|
|
156
197
|
|
|
157
|
-
# Function to get latest framework version from GitHub
|
|
158
|
-
get_latest_framework_version() {
|
|
159
|
-
echo -e "${YELLOW}Fetching latest framework release from GitHub...${NC}" >&2
|
|
160
|
-
|
|
161
|
-
local releases_json=$(curl -s "${AUTH_ARGS[@]}" -H "Accept: application/vnd.github+json" "https://api.github.com/repos/XDcobra/react-native-sherpa-onnx/releases" 2>/dev/null || echo "")
|
|
162
|
-
|
|
163
|
-
if [ -z "$releases_json" ]; then
|
|
164
|
-
echo -e "${RED}Error: Could not fetch releases from GitHub API${NC}" >&2
|
|
165
|
-
return 1
|
|
166
|
-
fi
|
|
167
|
-
|
|
168
|
-
# Avoid jq errors on rate-limit HTML or plain-text responses
|
|
169
|
-
if ! echo "$releases_json" | grep -q '"tag_name"'; then
|
|
170
|
-
echo -e "${RED}Error: GitHub API response did not contain release data (possible rate limit).${NC}" >&2
|
|
171
|
-
echo "Response (truncated):" >&2
|
|
172
|
-
echo "$releases_json" | head -5 >&2
|
|
173
|
-
return 1
|
|
174
|
-
fi
|
|
175
|
-
|
|
176
|
-
local version=""
|
|
177
|
-
|
|
178
|
-
if command -v jq &> /dev/null; then
|
|
179
|
-
if echo "$releases_json" | jq -e . > /dev/null 2>&1; then
|
|
180
|
-
version=$(echo "$releases_json" | jq -r '.[] | select(.tag_name | startswith("framework-v")) | .tag_name' | head -1 | sed 's/framework-v//')
|
|
181
|
-
else
|
|
182
|
-
echo -e "${RED}Error: GitHub releases response is not valid JSON${NC}" >&2
|
|
183
|
-
echo "$releases_json" | head -5 >&2
|
|
184
|
-
return 1
|
|
185
|
-
fi
|
|
186
|
-
else
|
|
187
|
-
version=$(echo "$releases_json" | grep -o '"tag_name": "framework-v[0-9.]*' | head -1 | sed 's/.*framework-v//')
|
|
188
|
-
fi
|
|
189
|
-
|
|
190
|
-
if [ -z "$version" ]; then
|
|
191
|
-
echo -e "${RED}Error: No framework releases found with tag format 'framework-vX.Y.Z'${NC}" >&2
|
|
192
|
-
return 1
|
|
193
|
-
fi
|
|
194
|
-
|
|
195
|
-
echo "$version"
|
|
196
|
-
}
|
|
197
|
-
|
|
198
198
|
# Function to get local framework version
|
|
199
199
|
get_local_framework_version() {
|
|
200
200
|
# Prefer explicit version file written by this script
|
package/src/NativeSherpaOnnx.ts
CHANGED
|
@@ -21,7 +21,7 @@ export interface Spec extends TurboModule {
|
|
|
21
21
|
* @param instanceId - Unique ID for this engine instance (from createSTT)
|
|
22
22
|
* @param modelDir - Absolute path to model directory
|
|
23
23
|
* @param preferInt8 - Optional: true = prefer int8 models, false = prefer regular models, undefined = try int8 first (default)
|
|
24
|
-
* @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
|
|
24
|
+
* @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'moonshine_v2', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
|
|
25
25
|
* @param debug - Optional: enable debug logging in native layer and sherpa-onnx (default: false)
|
|
26
26
|
* @param hotwordsFile - Optional: path to hotwords file (OfflineRecognizerConfig)
|
|
27
27
|
* @param hotwordsScore - Optional: hotwords score (default in Kotlin 1.5)
|
|
@@ -65,7 +65,7 @@ export interface Spec extends TurboModule {
|
|
|
65
65
|
* @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
|
|
66
66
|
* @param preferInt8 - Optional: true = prefer int8, false = prefer regular, undefined = try int8 first
|
|
67
67
|
* @param modelType - Optional: explicit type or 'auto' (default)
|
|
68
|
-
* @returns Object with success, detectedModels (array of { type, modelDir }),
|
|
68
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), and optionally isHardwareSpecificUnsupported (true when the model is for unsupported hardware e.g. RK35xx, Ascend)
|
|
69
69
|
*/
|
|
70
70
|
detectSttModel(
|
|
71
71
|
modelDir: string,
|
|
@@ -73,6 +73,8 @@ export interface Spec extends TurboModule {
|
|
|
73
73
|
modelType?: string
|
|
74
74
|
): Promise<{
|
|
75
75
|
success: boolean;
|
|
76
|
+
/** True when detection failed because the model targets unsupported hardware (RK35xx, Ascend, CANN). Use to show a specific message or block init. */
|
|
77
|
+
isHardwareSpecificUnsupported?: boolean;
|
|
76
78
|
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
77
79
|
modelType?: string;
|
|
78
80
|
}>;
|
|
@@ -211,6 +213,20 @@ export interface Spec extends TurboModule {
|
|
|
211
213
|
isEndpoint: boolean;
|
|
212
214
|
}>;
|
|
213
215
|
|
|
216
|
+
/**
|
|
217
|
+
* Start native PCM live capture. Microphone audio is captured and resampled to the requested
|
|
218
|
+
* sampleRate; chunks are emitted via the "pcmLiveStreamData" event (base64 Int16 PCM).
|
|
219
|
+
* App must have RECORD_AUDIO (Android) and NSMicrophoneUsageDescription (iOS) and grant permission before calling.
|
|
220
|
+
*/
|
|
221
|
+
startPcmLiveStream(options: {
|
|
222
|
+
sampleRate: number;
|
|
223
|
+
channelCount?: number;
|
|
224
|
+
bufferSizeFrames?: number;
|
|
225
|
+
}): Promise<void>;
|
|
226
|
+
|
|
227
|
+
/** Stop native PCM live capture. */
|
|
228
|
+
stopPcmLiveStream(): Promise<void>;
|
|
229
|
+
|
|
214
230
|
// ==================== TTS Methods ====================
|
|
215
231
|
|
|
216
232
|
/**
|
package/src/audio/index.ts
CHANGED
|
@@ -1,5 +1,86 @@
|
|
|
1
|
+
import { Buffer } from 'buffer';
|
|
2
|
+
import { DeviceEventEmitter } from 'react-native';
|
|
1
3
|
import SherpaOnnx from '../NativeSherpaOnnx';
|
|
2
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Decode base64-encoded Int16 PCM to float array in [-1, 1].
|
|
7
|
+
* Uses a preallocated Float32Array to avoid GC pressure on the live-mic hot path.
|
|
8
|
+
*/
|
|
9
|
+
function base64PcmToFloatArray(base64: string): Float32Array {
|
|
10
|
+
const bytes = Buffer.from(base64, 'base64');
|
|
11
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
12
|
+
const len = bytes.byteLength / 2;
|
|
13
|
+
const out = new Float32Array(len);
|
|
14
|
+
for (let i = 0; i < len; i++) {
|
|
15
|
+
out[i] = view.getInt16(i * 2, true) / 32768;
|
|
16
|
+
}
|
|
17
|
+
return out;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type PcmLiveStreamOptions = {
|
|
21
|
+
sampleRate?: number;
|
|
22
|
+
channelCount?: number;
|
|
23
|
+
bufferSizeFrames?: number;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export type PcmLiveStreamHandle = {
|
|
27
|
+
start: () => Promise<void>;
|
|
28
|
+
stop: () => Promise<void>;
|
|
29
|
+
onData: (
|
|
30
|
+
callback: (samples: Float32Array, sampleRate: number) => void
|
|
31
|
+
) => () => void;
|
|
32
|
+
onError: (callback: (message: string) => void) => () => void;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Create a PCM live stream from the device microphone. Native capture and resampling ensure
|
|
37
|
+
* PCM is always delivered at the requested sampleRate (e.g. 16000 for STT). The app must have
|
|
38
|
+
* RECORD_AUDIO (Android) and NSMicrophoneUsageDescription (iOS) and grant permission before start().
|
|
39
|
+
*/
|
|
40
|
+
export function createPcmLiveStream(
|
|
41
|
+
options?: PcmLiveStreamOptions
|
|
42
|
+
): PcmLiveStreamHandle {
|
|
43
|
+
const sampleRate = options?.sampleRate ?? 16000;
|
|
44
|
+
const channelCount = options?.channelCount ?? 1;
|
|
45
|
+
const bufferSizeFrames = options?.bufferSizeFrames ?? 0;
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
start: () =>
|
|
49
|
+
SherpaOnnx.startPcmLiveStream({
|
|
50
|
+
sampleRate,
|
|
51
|
+
channelCount,
|
|
52
|
+
bufferSizeFrames,
|
|
53
|
+
}),
|
|
54
|
+
|
|
55
|
+
stop: () => SherpaOnnx.stopPcmLiveStream(),
|
|
56
|
+
|
|
57
|
+
onData: (callback: (samples: Float32Array, sampleRate: number) => void) => {
|
|
58
|
+
const sub = DeviceEventEmitter.addListener(
|
|
59
|
+
'pcmLiveStreamData',
|
|
60
|
+
(event: { base64Pcm?: string; sampleRate?: number }) => {
|
|
61
|
+
const base64 = event?.base64Pcm ?? '';
|
|
62
|
+
const sr = event?.sampleRate ?? sampleRate;
|
|
63
|
+
if (base64) {
|
|
64
|
+
const samples = base64PcmToFloatArray(base64);
|
|
65
|
+
callback(samples, sr);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
);
|
|
69
|
+
return () => sub.remove();
|
|
70
|
+
},
|
|
71
|
+
|
|
72
|
+
onError: (callback: (message: string) => void) => {
|
|
73
|
+
const sub = DeviceEventEmitter.addListener(
|
|
74
|
+
'pcmLiveStreamError',
|
|
75
|
+
(event: { message?: string }) => {
|
|
76
|
+
callback(event?.message ?? 'Unknown error');
|
|
77
|
+
}
|
|
78
|
+
);
|
|
79
|
+
return () => sub.remove();
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
3
84
|
/**
|
|
4
85
|
* Convert any supported audio file to a requested format (e.g. "mp3", "flac", "wav").
|
|
5
86
|
* On Android this requires FFmpeg prebuilts. WAV output is always 16 kHz mono (sherpa-onnx).
|
package/src/stt/streaming.ts
CHANGED
|
@@ -317,11 +317,11 @@ export async function createStreamingSTT(
|
|
|
317
317
|
},
|
|
318
318
|
|
|
319
319
|
async processAudioChunk(
|
|
320
|
-
samples: number[],
|
|
320
|
+
samples: number[] | Float32Array,
|
|
321
321
|
sampleRate: number
|
|
322
322
|
): Promise<{ result: StreamingSttResult; isEndpoint: boolean }> {
|
|
323
323
|
streamGuard();
|
|
324
|
-
let toSend: number[] = samples;
|
|
324
|
+
let toSend: number[] | Float32Array = samples;
|
|
325
325
|
if (enableInputNormalization && samples.length > 0) {
|
|
326
326
|
let maxAbs = 1e-10;
|
|
327
327
|
for (let i = 0; i < samples.length; i++) {
|
|
@@ -329,15 +329,20 @@ export async function createStreamingSTT(
|
|
|
329
329
|
if (abs > maxAbs) maxAbs = abs;
|
|
330
330
|
}
|
|
331
331
|
const scale = maxAbs < 0.01 ? 80 : Math.min(80, 0.8 / maxAbs);
|
|
332
|
-
|
|
332
|
+
const normalized = new Float32Array(samples.length);
|
|
333
333
|
for (let i = 0; i < samples.length; i++) {
|
|
334
334
|
const v = samples[i]! * scale;
|
|
335
|
-
|
|
335
|
+
normalized[i] = v < -1 ? -1 : v > 1 ? 1 : v;
|
|
336
336
|
}
|
|
337
|
+
toSend = normalized;
|
|
337
338
|
}
|
|
339
|
+
// Bridge expects a plain array; Float32Array may not serialize as ReadableArray on all platforms.
|
|
340
|
+
const samplesArray = Array.isArray(toSend)
|
|
341
|
+
? toSend
|
|
342
|
+
: Array.from(toSend);
|
|
338
343
|
const raw = await SherpaOnnx.processSttAudioChunk(
|
|
339
344
|
streamId,
|
|
340
|
-
|
|
345
|
+
samplesArray,
|
|
341
346
|
sampleRate
|
|
342
347
|
);
|
|
343
348
|
return {
|
|
@@ -132,7 +132,7 @@ export interface SttStream {
|
|
|
132
132
|
* Reduces bridge round-trips from 5 to 1 per chunk.
|
|
133
133
|
*/
|
|
134
134
|
processAudioChunk(
|
|
135
|
-
samples: number[],
|
|
135
|
+
samples: number[] | Float32Array,
|
|
136
136
|
sampleRate: number
|
|
137
137
|
): Promise<{ result: StreamingSttResult; isEndpoint: boolean }>;
|
|
138
138
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
sherpa-onnx-android-v1.12.
|
|
1
|
+
sherpa-onnx-android-v1.12.28
|
|
@@ -1 +1 @@
|
|
|
1
|
-
framework-v1.12.
|
|
1
|
+
framework-v1.12.28
|