whisper.rn 0.4.0-rc.1 → 0.4.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +14 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +226 -109
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/cpp/README.md +1 -1
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +5 -3
- package/cpp/ggml-aarch64.c +129 -0
- package/cpp/ggml-aarch64.h +19 -0
- package/cpp/ggml-alloc.c +805 -400
- package/cpp/ggml-alloc.h +60 -10
- package/cpp/ggml-backend-impl.h +216 -0
- package/cpp/ggml-backend-reg.cpp +204 -0
- package/cpp/ggml-backend.cpp +1996 -0
- package/cpp/ggml-backend.cpp.rej +12 -0
- package/cpp/ggml-backend.h +336 -0
- package/cpp/ggml-common.h +1853 -0
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu-aarch64.c +3560 -0
- package/cpp/ggml-cpu-aarch64.h +30 -0
- package/cpp/ggml-cpu-impl.h +371 -0
- package/cpp/ggml-cpu-quants.c +10822 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13970 -0
- package/cpp/ggml-cpu.cpp +663 -0
- package/cpp/ggml-cpu.h +177 -0
- package/cpp/ggml-impl.h +551 -0
- package/cpp/ggml-metal-impl.h +249 -0
- package/cpp/ggml-metal.h +24 -43
- package/cpp/ggml-metal.m +4190 -1075
- package/cpp/ggml-quants.c +5247 -0
- package/cpp/ggml-quants.h +100 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +5474 -18763
- package/cpp/ggml.h +833 -628
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +221 -52
- package/cpp/rn-whisper.h +50 -15
- package/cpp/whisper.cpp +2872 -1371
- package/cpp/whisper.h +170 -41
- package/ios/RNWhisper.mm +139 -46
- package/ios/RNWhisperAudioUtils.h +1 -2
- package/ios/RNWhisperAudioUtils.m +18 -67
- package/ios/RNWhisperContext.h +11 -8
- package/ios/RNWhisperContext.mm +195 -150
- package/jest/mock.js +15 -2
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +76 -28
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +76 -28
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +13 -4
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +37 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +9 -7
- package/src/NativeRNWhisper.ts +20 -4
- package/src/index.ts +98 -42
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/ggml-metal.metal +0 -2353
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAOA,OAAkB,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAA;AACnE,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,mBAAmB,CAAA;AAC1B,OAAO,eAAe,MAAM,mBAAmB,CAAA;AAC/C,OAAO,KAAK,EACV,uBAAuB,EACvB,6BAA6B,EAC7B,mBAAmB,EACpB,MAAM,mBAAmB,CAAA;AAY1B,YAAY,EACV,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,6BAA6B,EAC7B,mBAAmB,GACpB,CAAA;AAQD,MAAM,MAAM,2BAA2B,GAAG;IACxC,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,gBAAgB,CAAC,UAAU,CAAC,CAAA;CACvC,CAAA;AAED,MAAM,MAAM,gCAAgC,GAAG;IAC7C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,2BAA2B,CAAA;CACpC,CAAA;AAGD,MAAM,MAAM,qBAAqB,GAAG,iBAAiB,GAAG;IACtD;;OAEG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAA;IACvC;;OAEG;IACH,aAAa,CAAC,EAAE,CAAC,MAAM,EAAE,2BAA2B,KAAK,IAAI,CAAA;CAC9D,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,QAAQ,EAAE,uBAAuB,CAAA;IACjC,OAAO,CAAC,EAAE,6BAA6B,EAAE,CAAA;IACzC,IAAI,CAAC,EAAE,mBAAmB,CAAA;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAA;CACjB,CAAA;AAGD,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAA;IAC9B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB;;;;OAIG;IACH,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;OAGG;IACH,sBAAsB,CAAC,EAAE,sBAAsB,CAAA;IAC/C;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,GAAG,sBAAsB,CAAA;CACxD,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAA;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,CAAC,EAAE,gBAAgB,CAAA;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAA;QACZ,KAAK,CAAC,EAAE,MAAM,CAAA;QACd,IAAI,CAAC,EAAE,gBAAgB,CAAA;QACvB,WAAW,EAAE,MAAM,CAAA;QACnB,aAAa,EAAE,MAAM,CAAA;KACtB,CAAC,CAAA;CACH,CAAA;AAED,MAAM,MAAM,+BAA+B,GAAG;IAC5C,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAA;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,OAAO,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,IAAI,CAAC,EAAE,gBAAgB,CAAA;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,+BAA+B,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,MAAM,CAAA;IAChB,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;IACf,QAAQ,EAAE,MAAM,CAAA;CACjB,CAAA;AAaD,qBAAa,cAAc;IACzB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;gBAEZ,EACV,SAAS,EACT,GAAG,EACH,WAAW,GACZ,EAAE,oBAAoB;IAMvB,OAAO,CAAC,0BAA0B;IA0ElC;;;OAGG;IACH,UAAU,CACR,gBAAgB,EAAE,MAAM,GAAG,MAAM,EACjC,OAAO,GAAE,qBAA0B,GAClC;QACD,0BAA0B;QAC1B,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;QACzB,gCAAgC;QAChC,OAAO,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAA;KACnC;IAoBD;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,qBAA0B,GAAG;QACjE,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;QACzB,OAAO,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAA;KACnC;IAID,yFAAyF;IACnF,kBAAkB,CAAC,OAAO,GAAE,yBAA8B,GAAG,OAAO,CAAC;QACzE,mCAAmC;QACnC,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;QACzB,8CAA8C;QAC9C,SAAS,EAAE,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,uBAAuB,KAAK,IAAI,KAAK,IAAI,CAAA;KACxE,CAAC;IAiII,KAAK,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAM/C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAA;IACzB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE;QACjB,QAAQ,EAAE,MAAM,CAAA;QAChB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAAA;KAC5B,CAAA;IACD,+DAA+D;IAC/D,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,qHAAqH;IACrH,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB,iGAAiG;IACjG,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,6DAA6D;IAC7D,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAA;AASD,wBAAsB,WAAW,CAAC,EAChC,QAAQ,EACR,gBAAgB,EAChB,aAAa,EACb,MAAa,EACb,YAAmB,EACnB,YAAoB,GACrB,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,CAwD1C;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD;AAED,qCAAqC;AACrC,eAAO,MAAM,UAAU,EAAE,MAAgB,CAAA;AAIzC,kCAAkC;AAClC,eAAO,MAAM,WAAW,EAAE,OAAqB,CAAA;AAE/C,2DAA2D;AAC3D,eAAO,MAAM,qBAAqB,EAAE,OAA+B,CAAA;AAEnE,OAAO,EAAE,eAAe,EAAE,CAAA"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whisper.rn",
|
|
3
|
-
"version": "0.4.0-rc.
|
|
3
|
+
"version": "0.4.0-rc.10",
|
|
4
4
|
"description": "React Native binding of whisper.cpp",
|
|
5
5
|
"main": "lib/commonjs/index",
|
|
6
6
|
"module": "lib/module/index",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
],
|
|
31
31
|
"scripts": {
|
|
32
32
|
"bootstrap": "./scripts/bootstrap.sh",
|
|
33
|
-
"docgen": "typedoc src/index.ts --plugin typedoc-plugin-markdown --readme none --out docs/API",
|
|
33
|
+
"docgen": "typedoc src/index.ts --plugin typedoc-plugin-markdown --excludePrivate --readme none --out docs/API",
|
|
34
34
|
"lint": "eslint \"**/*.{js,ts,tsx}\"",
|
|
35
35
|
"typecheck": "tsc --noEmit",
|
|
36
36
|
"prepack": "yarn docgen && bob build",
|
|
@@ -59,9 +59,11 @@
|
|
|
59
59
|
"@commitlint/config-conventional": "^17.0.2",
|
|
60
60
|
"@evilmartians/lefthook": "^1.2.2",
|
|
61
61
|
"@fugood/eslint-config-react": "^0.5.0",
|
|
62
|
+
"@react-native/babel-preset": "0.74.88",
|
|
63
|
+
"@react-native/metro-config": "^0.73.2",
|
|
62
64
|
"@release-it/conventional-changelog": "^5.0.0",
|
|
63
65
|
"@types/jest": "^29.4.4",
|
|
64
|
-
"@types/react": "
|
|
66
|
+
"@types/react": "^18.2.6",
|
|
65
67
|
"@types/react-native": "0.70.0",
|
|
66
68
|
"@typescript-eslint/eslint-plugin": "^5.55.0",
|
|
67
69
|
"@typescript-eslint/parser": "^5.55.0",
|
|
@@ -69,10 +71,9 @@
|
|
|
69
71
|
"del-cli": "^5.0.0",
|
|
70
72
|
"eslint": "^8.36.0",
|
|
71
73
|
"jest": "^29.5.0",
|
|
72
|
-
"metro-react-native-babel-preset": "0.73.8",
|
|
73
74
|
"pod-install": "^0.1.38",
|
|
74
75
|
"react": "18.2.0",
|
|
75
|
-
"react-native": "0.
|
|
76
|
+
"react-native": "0.74.6",
|
|
76
77
|
"react-native-builder-bob": "^0.20.4",
|
|
77
78
|
"release-it": "^15.8.0",
|
|
78
79
|
"typedoc": "^0.24.7",
|
|
@@ -87,7 +88,7 @@
|
|
|
87
88
|
"react-native": "*"
|
|
88
89
|
},
|
|
89
90
|
"engines": {
|
|
90
|
-
"node": ">=
|
|
91
|
+
"node": ">=18"
|
|
91
92
|
},
|
|
92
93
|
"jest": {
|
|
93
94
|
"preset": "react-native",
|
|
@@ -141,5 +142,6 @@
|
|
|
141
142
|
"name": "RNWhisperSpec",
|
|
142
143
|
"type": "all",
|
|
143
144
|
"jsSrcsDir": "./src/"
|
|
144
|
-
}
|
|
145
|
+
},
|
|
146
|
+
"packageManager": "yarn@1.22.22"
|
|
145
147
|
}
|
package/src/NativeRNWhisper.ts
CHANGED
|
@@ -15,6 +15,8 @@ export type TranscribeOptions = {
|
|
|
15
15
|
maxLen?: number,
|
|
16
16
|
/** Enable token-level timestamps */
|
|
17
17
|
tokenTimestamps?: boolean,
|
|
18
|
+
/** Enable tinydiarize (requires a tdrz model) */
|
|
19
|
+
tdrzEnable?: boolean,
|
|
18
20
|
/** Word timestamp probability threshold */
|
|
19
21
|
wordThold?: number,
|
|
20
22
|
/** Time offset in milliseconds */
|
|
@@ -28,8 +30,6 @@ export type TranscribeOptions = {
|
|
|
28
30
|
beamSize?: number,
|
|
29
31
|
/** Number of best candidates to keep */
|
|
30
32
|
bestOf?: number,
|
|
31
|
-
/** Speed up audio by x2 (reduced accuracy) */
|
|
32
|
-
speedUp?: boolean,
|
|
33
33
|
/** Initial Prompt */
|
|
34
34
|
prompt?: string,
|
|
35
35
|
}
|
|
@@ -52,23 +52,37 @@ export type CoreMLAsset = {
|
|
|
52
52
|
type NativeContextOptions = {
|
|
53
53
|
filePath: string,
|
|
54
54
|
isBundleAsset: boolean,
|
|
55
|
+
useFlashAttn?: boolean,
|
|
56
|
+
useGpu?: boolean,
|
|
55
57
|
useCoreMLIos?: boolean,
|
|
56
58
|
downloadCoreMLAssets?: boolean,
|
|
57
59
|
coreMLAssets?: CoreMLAsset[],
|
|
58
60
|
}
|
|
59
61
|
|
|
62
|
+
export type NativeWhisperContext = {
|
|
63
|
+
contextId: number
|
|
64
|
+
gpu: boolean
|
|
65
|
+
reasonNoGPU: string
|
|
66
|
+
}
|
|
67
|
+
|
|
60
68
|
export interface Spec extends TurboModule {
|
|
61
69
|
getConstants(): {
|
|
62
70
|
useCoreML: boolean
|
|
63
71
|
coreMLAllowFallback: boolean
|
|
64
72
|
};
|
|
65
|
-
initContext(options: NativeContextOptions): Promise<
|
|
73
|
+
initContext(options: NativeContextOptions): Promise<NativeWhisperContext>;
|
|
66
74
|
releaseContext(contextId: number): Promise<void>;
|
|
67
75
|
releaseAllContexts(): Promise<void>;
|
|
68
76
|
transcribeFile(
|
|
69
77
|
contextId: number,
|
|
70
78
|
jobId: number,
|
|
71
|
-
|
|
79
|
+
pathOrBase64: string,
|
|
80
|
+
options: {}, // TranscribeOptions & { onProgress?: boolean, onNewSegments?: boolean }
|
|
81
|
+
): Promise<TranscribeResult>;
|
|
82
|
+
transcribeData(
|
|
83
|
+
contextId: number,
|
|
84
|
+
jobId: number,
|
|
85
|
+
dataBase64: string,
|
|
72
86
|
options: {}, // TranscribeOptions & { onProgress?: boolean, onNewSegments?: boolean }
|
|
73
87
|
): Promise<TranscribeResult>;
|
|
74
88
|
startRealtimeTranscribe(
|
|
@@ -78,6 +92,8 @@ export interface Spec extends TurboModule {
|
|
|
78
92
|
): Promise<void>;
|
|
79
93
|
abortTranscribe(contextId: number, jobId: number): Promise<void>;
|
|
80
94
|
|
|
95
|
+
bench(contextId: number, maxThreads: number): Promise<string>;
|
|
96
|
+
|
|
81
97
|
// iOS specific
|
|
82
98
|
getAudioSessionCurrentCategory: () => Promise<{
|
|
83
99
|
category: string,
|
package/src/index.ts
CHANGED
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
DeviceEventEmitterStatic,
|
|
6
6
|
Image,
|
|
7
7
|
} from 'react-native'
|
|
8
|
-
import RNWhisper from './NativeRNWhisper'
|
|
8
|
+
import RNWhisper, { NativeWhisperContext } from './NativeRNWhisper'
|
|
9
9
|
import type {
|
|
10
10
|
TranscribeOptions,
|
|
11
11
|
TranscribeResult,
|
|
@@ -94,6 +94,11 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
94
94
|
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
95
95
|
*/
|
|
96
96
|
realtimeAudioSliceSec?: number
|
|
97
|
+
/**
|
|
98
|
+
* Min duration of audio to start transcribe in seconds for each slice.
|
|
99
|
+
* The minimum value is 0.5 ms and maximum value is realtimeAudioSliceSec (Default: 1)
|
|
100
|
+
*/
|
|
101
|
+
realtimeAudioMinSec?: number
|
|
97
102
|
/**
|
|
98
103
|
* Output path for audio file. If not set, the audio file will not be saved
|
|
99
104
|
* (Default: Undefined)
|
|
@@ -106,7 +111,7 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
106
111
|
*/
|
|
107
112
|
useVad?: boolean
|
|
108
113
|
/**
|
|
109
|
-
* The length of the collected audio is used for VAD. (ms) (Default: 2000)
|
|
114
|
+
* The length of the collected audio is used for VAD, cannot be less than 2000ms. (ms) (Default: 2000)
|
|
110
115
|
*/
|
|
111
116
|
vadMs?: number
|
|
112
117
|
/**
|
|
@@ -169,6 +174,15 @@ export type TranscribeRealtimeNativeEvent = {
|
|
|
169
174
|
payload: TranscribeRealtimeNativePayload
|
|
170
175
|
}
|
|
171
176
|
|
|
177
|
+
export type BenchResult = {
|
|
178
|
+
config: string
|
|
179
|
+
nThreads: number
|
|
180
|
+
encodeMs: number
|
|
181
|
+
decodeMs: number
|
|
182
|
+
batchMs: number
|
|
183
|
+
promptMs: number
|
|
184
|
+
}
|
|
185
|
+
|
|
172
186
|
const updateAudioSession = async (setting: AudioSessionSettingIos) => {
|
|
173
187
|
await AudioSessionIos.setCategory(
|
|
174
188
|
setting.category,
|
|
@@ -183,36 +197,24 @@ const updateAudioSession = async (setting: AudioSessionSettingIos) => {
|
|
|
183
197
|
export class WhisperContext {
|
|
184
198
|
id: number
|
|
185
199
|
|
|
186
|
-
|
|
187
|
-
|
|
200
|
+
gpu: boolean = false
|
|
201
|
+
|
|
202
|
+
reasonNoGPU: string = ''
|
|
203
|
+
|
|
204
|
+
constructor({
|
|
205
|
+
contextId,
|
|
206
|
+
gpu,
|
|
207
|
+
reasonNoGPU,
|
|
208
|
+
}: NativeWhisperContext) {
|
|
209
|
+
this.id = contextId
|
|
210
|
+
this.gpu = gpu
|
|
211
|
+
this.reasonNoGPU = reasonNoGPU
|
|
188
212
|
}
|
|
189
213
|
|
|
190
|
-
|
|
191
|
-
transcribe(
|
|
192
|
-
filePath: string | number,
|
|
193
|
-
options: TranscribeFileOptions = {},
|
|
194
|
-
): {
|
|
195
|
-
/** Stop the transcribe */
|
|
214
|
+
private transcribeWithNativeMethod(method: 'transcribeFile' | 'transcribeData', data: string, options: TranscribeFileOptions = {}): {
|
|
196
215
|
stop: () => Promise<void>
|
|
197
|
-
/** Transcribe result promise */
|
|
198
216
|
promise: Promise<TranscribeResult>
|
|
199
217
|
} {
|
|
200
|
-
let path = ''
|
|
201
|
-
if (typeof filePath === 'number') {
|
|
202
|
-
try {
|
|
203
|
-
const source = Image.resolveAssetSource(filePath)
|
|
204
|
-
if (source) path = source.uri
|
|
205
|
-
} catch (e) {
|
|
206
|
-
throw new Error(`Invalid asset: ${filePath}`)
|
|
207
|
-
}
|
|
208
|
-
} else {
|
|
209
|
-
if (filePath.startsWith('http'))
|
|
210
|
-
throw new Error(
|
|
211
|
-
'Transcribe remote file is not supported, please download it first',
|
|
212
|
-
)
|
|
213
|
-
path = filePath
|
|
214
|
-
}
|
|
215
|
-
if (path.startsWith('file://')) path = path.slice(7)
|
|
216
218
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
217
219
|
|
|
218
220
|
const { onProgress, onNewSegments, ...rest } = options
|
|
@@ -261,7 +263,7 @@ export class WhisperContext {
|
|
|
261
263
|
removeProgressListener()
|
|
262
264
|
removeNewSegmenetsListener()
|
|
263
265
|
},
|
|
264
|
-
promise: RNWhisper
|
|
266
|
+
promise: RNWhisper[method](this.id, jobId, data, {
|
|
265
267
|
...rest,
|
|
266
268
|
onProgress: !!onProgress,
|
|
267
269
|
onNewSegments: !!onNewSegments,
|
|
@@ -283,6 +285,48 @@ export class WhisperContext {
|
|
|
283
285
|
}
|
|
284
286
|
}
|
|
285
287
|
|
|
288
|
+
/**
|
|
289
|
+
* Transcribe audio file (path or base64 encoded wav file)
|
|
290
|
+
* base64: need add `data:audio/wav;base64,` prefix
|
|
291
|
+
*/
|
|
292
|
+
transcribe(
|
|
293
|
+
filePathOrBase64: string | number,
|
|
294
|
+
options: TranscribeFileOptions = {},
|
|
295
|
+
): {
|
|
296
|
+
/** Stop the transcribe */
|
|
297
|
+
stop: () => Promise<void>
|
|
298
|
+
/** Transcribe result promise */
|
|
299
|
+
promise: Promise<TranscribeResult>
|
|
300
|
+
} {
|
|
301
|
+
let path = ''
|
|
302
|
+
if (typeof filePathOrBase64 === 'number') {
|
|
303
|
+
try {
|
|
304
|
+
const source = Image.resolveAssetSource(filePathOrBase64)
|
|
305
|
+
if (source) path = source.uri
|
|
306
|
+
} catch (e) {
|
|
307
|
+
throw new Error(`Invalid asset: ${filePathOrBase64}`)
|
|
308
|
+
}
|
|
309
|
+
} else {
|
|
310
|
+
if (filePathOrBase64.startsWith('http'))
|
|
311
|
+
throw new Error(
|
|
312
|
+
'Transcribe remote file is not supported, please download it first',
|
|
313
|
+
)
|
|
314
|
+
path = filePathOrBase64
|
|
315
|
+
}
|
|
316
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
|
317
|
+
return this.transcribeWithNativeMethod('transcribeFile', path, options)
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Transcribe audio data (base64 encoded float32 PCM data)
|
|
322
|
+
*/
|
|
323
|
+
transcribeData(data: string, options: TranscribeFileOptions = {}): {
|
|
324
|
+
stop: () => Promise<void>
|
|
325
|
+
promise: Promise<TranscribeResult>
|
|
326
|
+
} {
|
|
327
|
+
return this.transcribeWithNativeMethod('transcribeData', data, options)
|
|
328
|
+
}
|
|
329
|
+
|
|
286
330
|
/** Transcribe the microphone audio stream, the microphone user permission is required */
|
|
287
331
|
async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
|
|
288
332
|
/** Stop the realtime transcribe */
|
|
@@ -297,7 +341,7 @@ export class WhisperContext {
|
|
|
297
341
|
let tOffset: number = 0
|
|
298
342
|
|
|
299
343
|
const putSlice = (payload: TranscribeRealtimeNativePayload) => {
|
|
300
|
-
if (!payload.isUseSlices) return
|
|
344
|
+
if (!payload.isUseSlices || !payload.data) return
|
|
301
345
|
if (sliceIndex !== payload.sliceIndex) {
|
|
302
346
|
const { segments = [] } = slices[sliceIndex]?.data || {}
|
|
303
347
|
tOffset = segments[segments.length - 1]?.t1 || 0
|
|
@@ -305,17 +349,15 @@ export class WhisperContext {
|
|
|
305
349
|
;({ sliceIndex } = payload)
|
|
306
350
|
slices[sliceIndex] = {
|
|
307
351
|
...payload,
|
|
308
|
-
data:
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
}
|
|
318
|
-
: undefined,
|
|
352
|
+
data: {
|
|
353
|
+
...payload.data,
|
|
354
|
+
segments:
|
|
355
|
+
payload.data.segments.map((segment) => ({
|
|
356
|
+
...segment,
|
|
357
|
+
t0: segment.t0 + tOffset,
|
|
358
|
+
t1: segment.t1 + tOffset,
|
|
359
|
+
})) || [],
|
|
360
|
+
}
|
|
319
361
|
}
|
|
320
362
|
}
|
|
321
363
|
|
|
@@ -420,6 +462,12 @@ export class WhisperContext {
|
|
|
420
462
|
}
|
|
421
463
|
}
|
|
422
464
|
|
|
465
|
+
async bench(maxThreads: number): Promise<BenchResult> {
|
|
466
|
+
const result = await RNWhisper.bench(this.id, maxThreads)
|
|
467
|
+
const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result)
|
|
468
|
+
return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs } as BenchResult
|
|
469
|
+
}
|
|
470
|
+
|
|
423
471
|
async release(): Promise<void> {
|
|
424
472
|
return RNWhisper.releaseContext(this.id)
|
|
425
473
|
}
|
|
@@ -440,6 +488,10 @@ export type ContextOptions = {
|
|
|
440
488
|
isBundleAsset?: boolean
|
|
441
489
|
/** Prefer to use Core ML model if exists. If set to false, even if the Core ML model exists, it will not be used. */
|
|
442
490
|
useCoreMLIos?: boolean
|
|
491
|
+
/** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */
|
|
492
|
+
useGpu?: boolean
|
|
493
|
+
/** Use Flash Attention, only recommended if GPU available */
|
|
494
|
+
useFlashAttn?: boolean,
|
|
443
495
|
}
|
|
444
496
|
|
|
445
497
|
const coreMLModelAssetPaths = [
|
|
@@ -453,7 +505,9 @@ export async function initWhisper({
|
|
|
453
505
|
filePath,
|
|
454
506
|
coreMLModelAsset,
|
|
455
507
|
isBundleAsset,
|
|
508
|
+
useGpu = true,
|
|
456
509
|
useCoreMLIos = true,
|
|
510
|
+
useFlashAttn = false,
|
|
457
511
|
}: ContextOptions): Promise<WhisperContext> {
|
|
458
512
|
let path = ''
|
|
459
513
|
let coreMLAssets: CoreMLAsset[] | undefined
|
|
@@ -499,15 +553,17 @@ export async function initWhisper({
|
|
|
499
553
|
path = filePath
|
|
500
554
|
}
|
|
501
555
|
if (path.startsWith('file://')) path = path.slice(7)
|
|
502
|
-
const
|
|
556
|
+
const { contextId, gpu, reasonNoGPU } = await RNWhisper.initContext({
|
|
503
557
|
filePath: path,
|
|
504
558
|
isBundleAsset: !!isBundleAsset,
|
|
559
|
+
useFlashAttn,
|
|
560
|
+
useGpu,
|
|
505
561
|
useCoreMLIos,
|
|
506
562
|
// Only development mode need download Core ML model assets (from packager server)
|
|
507
563
|
downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
|
|
508
564
|
coreMLAssets,
|
|
509
565
|
})
|
|
510
|
-
return new WhisperContext(
|
|
566
|
+
return new WhisperContext({ contextId, gpu, reasonNoGPU })
|
|
511
567
|
}
|
|
512
568
|
|
|
513
569
|
export async function releaseAllWhisper(): Promise<void> {
|
package/src/version.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":"1.
|
|
1
|
+
{"version":"1.7.2"}
|
package/whisper-rn.podspec
CHANGED
|
@@ -16,8 +16,7 @@ if ENV['RNWHISPER_DISABLE_COREML'] != '1' then
|
|
|
16
16
|
base_compiler_flags += " -DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK"
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
if ENV["RNWHISPER_ENABLE_METAL"] == "1" then
|
|
19
|
+
if ENV["RNWHISPER_DISABLE_METAL"] != "1" then
|
|
21
20
|
base_compiler_flags += " -DWSP_GGML_USE_METAL" # -DWSP_GGML_METAL_NDEBUG
|
|
22
21
|
end
|
|
23
22
|
|
|
@@ -32,8 +31,10 @@ Pod::Spec.new do |s|
|
|
|
32
31
|
s.platforms = { :ios => "11.0", :tvos => "11.0" }
|
|
33
32
|
s.source = { :git => "https://github.com/mybigday/whisper.rn.git", :tag => "#{s.version}" }
|
|
34
33
|
|
|
35
|
-
s.source_files = "ios/**/*.{h,m,mm}", "cpp
|
|
36
|
-
s.resources = "cpp
|
|
34
|
+
s.source_files = "ios/**/*.{h,m,mm}", "cpp/*.{h,cpp,c}", "cpp/coreml/*.{h,m,mm}"
|
|
35
|
+
s.resources = "cpp/*.{metallib}"
|
|
36
|
+
|
|
37
|
+
s.requires_arc = true
|
|
37
38
|
|
|
38
39
|
s.dependency "React-Core"
|
|
39
40
|
|
|
@@ -46,19 +47,11 @@ Pod::Spec.new do |s|
|
|
|
46
47
|
|
|
47
48
|
# Don't install the dependencies when we run `pod install` in the old architecture.
|
|
48
49
|
if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
|
|
49
|
-
s
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"OTHER_CFLAGS" => base_optimizer_flags,
|
|
56
|
-
"OTHER_CPLUSPLUSFLAGS" => new_arch_cpp_flags + " " + base_optimizer_flags
|
|
57
|
-
}
|
|
58
|
-
s.dependency "React-Codegen"
|
|
59
|
-
s.dependency "RCT-Folly"
|
|
60
|
-
s.dependency "RCTRequired"
|
|
61
|
-
s.dependency "RCTTypeSafety"
|
|
62
|
-
s.dependency "ReactCommon/turbomodule/core"
|
|
50
|
+
install_modules_dependencies(s)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
s.subspec "no-require-arc" do |ss|
|
|
54
|
+
ss.requires_arc = false
|
|
55
|
+
ss.source_files = "cpp/*.m"
|
|
63
56
|
end
|
|
64
57
|
end
|