whisper.rn 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +74 -39
- package/android/src/main/jni.cpp +45 -12
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/cpp/rn-whisper.cpp +51 -0
- package/cpp/rn-whisper.h +2 -1
- package/ios/RNWhisper.mm +81 -22
- package/ios/RNWhisper.xcodeproj/project.pbxproj +27 -3
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +5 -0
- package/ios/RNWhisperAudioSessionUtils.h +13 -0
- package/ios/RNWhisperAudioSessionUtils.m +85 -0
- package/ios/RNWhisperAudioUtils.h +9 -0
- package/ios/RNWhisperAudioUtils.m +83 -0
- package/ios/RNWhisperContext.h +1 -0
- package/ios/RNWhisperContext.mm +101 -28
- package/lib/commonjs/AudioSessionIos.js +91 -0
- package/lib/commonjs/AudioSessionIos.js.map +1 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +82 -14
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/AudioSessionIos.js +83 -0
- package/lib/module/AudioSessionIos.js.map +1 -0
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +77 -14
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/AudioSessionIos.d.ts +54 -0
- package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts +8 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +62 -4
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/AudioSessionIos.ts +90 -0
- package/src/NativeRNWhisper.ts +11 -1
- package/src/index.ts +178 -28
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory?language=objc
|
|
3
|
+
*/
|
|
4
|
+
export declare enum AudioSessionCategoryIos {
|
|
5
|
+
Ambient = "Ambient",
|
|
6
|
+
SoloAmbient = "SoloAmbient",
|
|
7
|
+
Playback = "Playback",
|
|
8
|
+
Record = "Record",
|
|
9
|
+
PlayAndRecord = "PlayAndRecord",
|
|
10
|
+
MultiRoute = "MultiRoute"
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessioncategoryoptions?language=objc
|
|
14
|
+
*/
|
|
15
|
+
export declare enum AudioSessionCategoryOptionIos {
|
|
16
|
+
MixWithOthers = "MixWithOthers",
|
|
17
|
+
DuckOthers = "DuckOthers",
|
|
18
|
+
InterruptSpokenAudioAndMixWithOthers = "InterruptSpokenAudioAndMixWithOthers",
|
|
19
|
+
AllowBluetooth = "AllowBluetooth",
|
|
20
|
+
AllowBluetoothA2DP = "AllowBluetoothA2DP",
|
|
21
|
+
AllowAirPlay = "AllowAirPlay",
|
|
22
|
+
DefaultToSpeaker = "DefaultToSpeaker"
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessionmode?language=objc
|
|
26
|
+
*/
|
|
27
|
+
export declare enum AudioSessionModeIos {
|
|
28
|
+
Default = "Default",
|
|
29
|
+
VoiceChat = "VoiceChat",
|
|
30
|
+
VideoChat = "VideoChat",
|
|
31
|
+
GameChat = "GameChat",
|
|
32
|
+
VideoRecording = "VideoRecording",
|
|
33
|
+
Measurement = "Measurement",
|
|
34
|
+
MoviePlayback = "MoviePlayback",
|
|
35
|
+
SpokenAudio = "SpokenAudio"
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* AudioSession Utility, iOS only.
|
|
39
|
+
*/
|
|
40
|
+
declare const _default: {
|
|
41
|
+
Category: typeof AudioSessionCategoryIos;
|
|
42
|
+
CategoryOption: typeof AudioSessionCategoryOptionIos;
|
|
43
|
+
Mode: typeof AudioSessionModeIos;
|
|
44
|
+
getCurrentCategory: () => Promise<{
|
|
45
|
+
category: AudioSessionCategoryIos;
|
|
46
|
+
options: AudioSessionCategoryOptionIos[];
|
|
47
|
+
}>;
|
|
48
|
+
getCurrentMode: () => Promise<AudioSessionModeIos>;
|
|
49
|
+
setCategory: (category: AudioSessionCategoryIos, options: AudioSessionCategoryOptionIos[]) => Promise<void>;
|
|
50
|
+
setMode: (mode: AudioSessionModeIos) => Promise<void>;
|
|
51
|
+
setActive: (active: boolean) => Promise<void>;
|
|
52
|
+
};
|
|
53
|
+
export default _default;
|
|
54
|
+
//# sourceMappingURL=AudioSessionIos.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AudioSessionIos.d.ts","sourceRoot":"","sources":["../../src/AudioSessionIos.ts"],"names":[],"mappings":"AAGA;;GAEG;AACH,oBAAY,uBAAuB;IACjC,OAAO,YAAY;IACnB,WAAW,gBAAgB;IAC3B,QAAQ,aAAa;IACrB,MAAM,WAAW;IACjB,aAAa,kBAAkB;IAC/B,UAAU,eAAe;CAC1B;AAED;;GAEG;AACH,oBAAY,6BAA6B;IACvC,aAAa,kBAAkB;IAC/B,UAAU,eAAe;IACzB,oCAAoC,yCAAyC;IAC7E,cAAc,mBAAmB;IACjC,kBAAkB,uBAAuB;IACzC,YAAY,iBAAiB;IAC7B,gBAAgB,qBAAqB;CACtC;AAED;;GAEG;AACH,oBAAY,mBAAmB;IAC7B,OAAO,YAAY;IACnB,SAAS,cAAc;IACvB,SAAS,cAAc;IACvB,QAAQ,aAAa;IACrB,cAAc,mBAAmB;IACjC,WAAW,gBAAgB;IAC3B,aAAa,kBAAkB;IAC/B,WAAW,gBAAgB;CAC5B;AAMD;;GAEG;;;;;8BAM6B,QAAQ;QACpC,QAAQ,EAAE,uBAAuB,CAAC;QAClC,OAAO,EAAE,6BAA6B,EAAE,CAAC;KAC1C,CAAC;0BASwB,QAAQ,mBAAmB,CAAC;4BAO1C,uBAAuB,WACxB,6BAA6B,EAAE,KACvC,QAAQ,IAAI,CAAC;oBAKM,mBAAmB,KAAG,QAAQ,IAAI,CAAC;wBAK/B,OAAO,KAAG,QAAQ,IAAI,CAAC;;AApCnD,wBAwCC"}
|
|
@@ -60,6 +60,14 @@ export interface Spec extends TurboModule {
|
|
|
60
60
|
transcribeFile(contextId: number, jobId: number, path: string, options: {}): Promise<TranscribeResult>;
|
|
61
61
|
startRealtimeTranscribe(contextId: number, jobId: number, options: TranscribeOptions): Promise<void>;
|
|
62
62
|
abortTranscribe(contextId: number, jobId: number): Promise<void>;
|
|
63
|
+
getAudioSessionCurrentCategory: () => Promise<{
|
|
64
|
+
category: string;
|
|
65
|
+
options: Array<string>;
|
|
66
|
+
}>;
|
|
67
|
+
getAudioSessionCurrentMode: () => Promise<string>;
|
|
68
|
+
setAudioSessionCategory: (category: string, options: Array<string>) => Promise<void>;
|
|
69
|
+
setAudioSessionMode: (mode: string) => Promise<void>;
|
|
70
|
+
setAudioSessionActive: (active: boolean) => Promise<void>;
|
|
63
71
|
}
|
|
64
72
|
declare const _default: Spec;
|
|
65
73
|
export default _default;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"NativeRNWhisper.d.ts","sourceRoot":"","sources":["../../src/NativeRNWhisper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,8CAA8C,CAAA;AAI/E,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oGAAoG;IACpG,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;IACH,SAAS,EAAE,OAAO,CAAC;CACpB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAA;AAED,KAAK,oBAAoB,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,OAAO,CAAC;IACvB,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC;CAC9B,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,YAAY,IAAI;QACd,SAAS,EAAE,OAAO,CAAA;QAClB,mBAAmB,EAAE,OAAO,CAAA;KAC7B,CAAC;IACF,WAAW,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5D,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,cAAc,CACZ,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,EAAE,GACV,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC7B,uBAAuB,CACrB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,IAAI,CAAC,CAAC;IACjB,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"NativeRNWhisper.d.ts","sourceRoot":"","sources":["../../src/NativeRNWhisper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,8CAA8C,CAAA;AAI/E,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,oGAAoG;IACpG,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;IACH,SAAS,EAAE,OAAO,CAAC;CACpB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAA;AAED,KAAK,oBAAoB,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,OAAO,CAAC;IACvB,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,YAAY,CAAC,EAAE,WAAW,EAAE,CAAC;CAC9B,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,YAAY,IAAI;QACd,SAAS,EAAE,OAAO,CAAA;QAClB,mBAAmB,EAAE,OAAO,CAAA;KAC7B,CAAC;IACF,WAAW,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5D,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,cAAc,CACZ,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,EAAE,GACV,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC7B,uBAAuB,CACrB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,IAAI,CAAC,CAAC;IACjB,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAGjE,8BAA8B,EAAE,MAAM,OAAO,CAAC;QAC5C,QAAQ,EAAE,MAAM,CAAC;QACjB,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;KACxB,CAAC,CAAC;IACH,0BAA0B,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;IAClD,uBAAuB,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACrF,mBAAmB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACrD,qBAAqB,EAAE,CAAC,MAAM,EAAE,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3D;;AAED,wBAAiE"}
|
|
@@ -1,16 +1,39 @@
|
|
|
1
1
|
import type { TranscribeOptions, TranscribeResult } from './NativeRNWhisper';
|
|
2
|
-
|
|
2
|
+
import AudioSessionIos from './AudioSessionIos';
|
|
3
|
+
import type { AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos } from './AudioSessionIos';
|
|
4
|
+
export type { TranscribeOptions, TranscribeResult, AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos, };
|
|
5
|
+
export type TranscribeNewSegmentsResult = {
|
|
6
|
+
nNew: number;
|
|
7
|
+
totalNNew: number;
|
|
8
|
+
result: string;
|
|
9
|
+
segments: TranscribeResult['segments'];
|
|
10
|
+
};
|
|
11
|
+
export type TranscribeNewSegmentsNativeEvent = {
|
|
12
|
+
contextId: number;
|
|
13
|
+
jobId: number;
|
|
14
|
+
result: TranscribeNewSegmentsResult;
|
|
15
|
+
};
|
|
3
16
|
export type TranscribeFileOptions = TranscribeOptions & {
|
|
4
17
|
/**
|
|
5
18
|
* Progress callback, the progress is between 0 and 100
|
|
6
19
|
*/
|
|
7
20
|
onProgress?: (progress: number) => void;
|
|
21
|
+
/**
|
|
22
|
+
* Callback when new segments are transcribed
|
|
23
|
+
*/
|
|
24
|
+
onNewSegments?: (result: TranscribeNewSegmentsResult) => void;
|
|
8
25
|
};
|
|
9
26
|
export type TranscribeProgressNativeEvent = {
|
|
10
27
|
contextId: number;
|
|
11
28
|
jobId: number;
|
|
12
29
|
progress: number;
|
|
13
30
|
};
|
|
31
|
+
export type AudioSessionSettingIos = {
|
|
32
|
+
category: AudioSessionCategoryIos;
|
|
33
|
+
options?: AudioSessionCategoryOptionIos[];
|
|
34
|
+
mode?: AudioSessionModeIos;
|
|
35
|
+
active?: boolean;
|
|
36
|
+
};
|
|
14
37
|
export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
15
38
|
/**
|
|
16
39
|
* Realtime record max duration in seconds.
|
|
@@ -24,6 +47,40 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
24
47
|
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
25
48
|
*/
|
|
26
49
|
realtimeAudioSliceSec?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Output path for audio file. If not set, the audio file will not be saved
|
|
52
|
+
* (Default: Undefined)
|
|
53
|
+
*/
|
|
54
|
+
audioOutputPath?: string;
|
|
55
|
+
/**
|
|
56
|
+
* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
|
|
57
|
+
* The first VAD will be triggered after 2 second of recording.
|
|
58
|
+
* (Default: false)
|
|
59
|
+
*/
|
|
60
|
+
useVad?: boolean;
|
|
61
|
+
/**
|
|
62
|
+
* The length of the collected audio is used for VAD. (ms) (Default: 2000)
|
|
63
|
+
*/
|
|
64
|
+
vadMs?: number;
|
|
65
|
+
/**
|
|
66
|
+
* VAD threshold. (Default: 0.6)
|
|
67
|
+
*/
|
|
68
|
+
vadThold?: number;
|
|
69
|
+
/**
|
|
70
|
+
* Frequency to apply High-pass filter in VAD. (Default: 100.0)
|
|
71
|
+
*/
|
|
72
|
+
vadFreqThold?: number;
|
|
73
|
+
/**
|
|
74
|
+
* iOS: Audio session settings when start transcribe
|
|
75
|
+
* Keep empty to use current audio session state
|
|
76
|
+
*/
|
|
77
|
+
audioSessionOnStartIos?: AudioSessionSettingIos;
|
|
78
|
+
/**
|
|
79
|
+
* iOS: Audio session settings when stop transcribe
|
|
80
|
+
* - Keep empty to use last audio session state
|
|
81
|
+
* - Use `restore` to restore audio session state before start transcribe
|
|
82
|
+
*/
|
|
83
|
+
audioSessionOnStopIos?: string | AudioSessionSettingIos;
|
|
27
84
|
};
|
|
28
85
|
export type TranscribeRealtimeEvent = {
|
|
29
86
|
contextId: number;
|
|
@@ -67,14 +124,14 @@ export declare class WhisperContext {
|
|
|
67
124
|
/** Transcribe audio file */
|
|
68
125
|
transcribe(filePath: string | number, options?: TranscribeFileOptions): {
|
|
69
126
|
/** Stop the transcribe */
|
|
70
|
-
stop: () => void
|
|
127
|
+
stop: () => Promise<void>;
|
|
71
128
|
/** Transcribe result promise */
|
|
72
129
|
promise: Promise<TranscribeResult>;
|
|
73
130
|
};
|
|
74
131
|
/** Transcribe the microphone audio stream, the microphone user permission is required */
|
|
75
132
|
transcribeRealtime(options?: TranscribeRealtimeOptions): Promise<{
|
|
76
133
|
/** Stop the realtime transcribe */
|
|
77
|
-
stop: () => void
|
|
134
|
+
stop: () => Promise<void>;
|
|
78
135
|
/** Subscribe to realtime transcribe events */
|
|
79
136
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void;
|
|
80
137
|
}>;
|
|
@@ -89,7 +146,7 @@ export type ContextOptions = {
|
|
|
89
146
|
*/
|
|
90
147
|
coreMLModelAsset?: {
|
|
91
148
|
filename: string;
|
|
92
|
-
assets: number[];
|
|
149
|
+
assets: string[] | number[];
|
|
93
150
|
};
|
|
94
151
|
/** Is the file path a bundle asset for pure string filePath */
|
|
95
152
|
isBundleAsset?: boolean;
|
|
@@ -102,4 +159,5 @@ export declare const libVersion: string;
|
|
|
102
159
|
export declare const isUseCoreML: boolean;
|
|
103
160
|
/** Is allow fallback to CPU if load CoreML model failed */
|
|
104
161
|
export declare const isCoreMLAllowFallback: boolean;
|
|
162
|
+
export { AudioSessionIos };
|
|
105
163
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,mBAAmB,CAAA;AAY1B,YAAY,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,mBAAmB,CAAA;AAC1B,OAAO,eAAe,MAAM,mBAAmB,CAAA;AAC/C,OAAO,KAAK,EACV,uBAAuB,EACvB,6BAA6B,EAC7B,mBAAmB,EACpB,MAAM,mBAAmB,CAAA;AAY1B,YAAY,EACV,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,6BAA6B,EAC7B,mBAAmB,GACpB,CAAA;AAQD,MAAM,MAAM,2BAA2B,GAAG;IACxC,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,gBAAgB,CAAC,UAAU,CAAC,CAAA;CACvC,CAAA;AAED,MAAM,MAAM,gCAAgC,GAAG;IAC7C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,2BAA2B,CAAA;CACpC,CAAA;AAGD,MAAM,MAAM,qBAAqB,GAAG,iBAAiB,GAAG;IACtD;;OAEG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAA;IACvC;;OAEG;IACH,aAAa,CAAC,EAAE,CAAC,MAAM,EAAE,2BAA2B,KAAK,IAAI,CAAA;CAC9D,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,QAAQ,EAAE,uBAAuB,CAAA;IACjC,OAAO,CAAC,EAAE,6BAA6B,EAAE,CAAA;IACzC,IAAI,CAAC,EAAE,mBAAmB,CAAA;IAC1B,MAAM,CAAC,EAAE,OAAO,CAAA;CACjB,CAAA;AAGD,MAAM,MAAM,yBAAyB,GAAG,iBAAiB,GAAG;IAC1D;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAA;IAC9B;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB;;;;OAIG;IACH,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;OAGG;IACH,sBAAsB,CAAC,EAAE,sBAAsB,CAAA;IAC/C;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,GAAG,sBAAsB,CAAA;CACxD,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAA;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,IAAI,CAAC,EAAE,gBAAgB,CAAA;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAA;QACZ,KAAK,CAAC,EAAE,MAAM,CAAA;QACd,IAAI,CAAC,EAAE,gBAAgB,CAAA;QACvB,WAAW,EAAE,MAAM,CAAA;QACnB,aAAa,EAAE,MAAM,CAAA;KACtB,CAAC,CAAA;CACH,CAAA;AAED,MAAM,MAAM,+BAA+B,GAAG;IAC5C,oEAAoE;IACpE,WAAW,EAAE,OAAO,CAAA;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAA;IAC3B,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,OAAO,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,IAAI,CAAC,EAAE,gBAAgB,CAAA;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,+BAA+B,CAAA;CACzC,CAAA;AAaD,qBAAa,cAAc;IACzB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAItB,4BAA4B;IAC5B,UAAU,CACR,QAAQ,EAAE,MAAM,GAAG,MAAM,EACzB,OAAO,GAAE,qBAA0B,GAClC;QACD,0BAA0B;QAC1B,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;QACzB,gCAAgC;QAChC,OAAO,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAA;KACnC;IAuFD,yFAAyF;IACnF,kBAAkB,CAAC,OAAO,GAAE,yBAA8B,GAAG,OAAO,CAAC;QACzE,mCAAmC;QACnC,IAAI,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;QACzB,8CAA8C;QAC9C,SAAS,EAAE,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,uBAAuB,KAAK,IAAI,KAAK,IAAI,CAAA;KACxE,CAAC;IAmII,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAA;IACzB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE;QACjB,QAAQ,EAAE,MAAM,CAAA;QAChB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAAA;KAC5B,CAAA;IACD,+DAA+D;IAC/D,aAAa,CAAC,EAAE,OAAO,CAAA;CACxB,CAAA;AASD,wBAAsB,WAAW,CAAC,EAChC,QAAQ,EACR,gBAAgB,EAChB,aAAa,GACd,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,CAqD1C;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD;AAED,qCAAqC;AACrC,eAAO,MAAM,UAAU,EAAE,MAAgB,CAAA;AAIzC,kCAAkC;AAClC,eAAO,MAAM,WAAW,EAAE,OAAqB,CAAA;AAE/C,2DAA2D;AAC3D,eAAO,MAAM,qBAAqB,EAAE,OAA+B,CAAA;AAEnE,OAAO,EAAE,eAAe,EAAE,CAAA"}
|
package/package.json
CHANGED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { Platform } from 'react-native'
|
|
2
|
+
import RNWhisper from './NativeRNWhisper'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessioncategory?language=objc
|
|
6
|
+
*/
|
|
7
|
+
export enum AudioSessionCategoryIos {
|
|
8
|
+
Ambient = 'Ambient',
|
|
9
|
+
SoloAmbient = 'SoloAmbient',
|
|
10
|
+
Playback = 'Playback',
|
|
11
|
+
Record = 'Record',
|
|
12
|
+
PlayAndRecord = 'PlayAndRecord',
|
|
13
|
+
MultiRoute = 'MultiRoute',
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessioncategoryoptions?language=objc
|
|
18
|
+
*/
|
|
19
|
+
export enum AudioSessionCategoryOptionIos {
|
|
20
|
+
MixWithOthers = 'MixWithOthers',
|
|
21
|
+
DuckOthers = 'DuckOthers',
|
|
22
|
+
InterruptSpokenAudioAndMixWithOthers = 'InterruptSpokenAudioAndMixWithOthers',
|
|
23
|
+
AllowBluetooth = 'AllowBluetooth',
|
|
24
|
+
AllowBluetoothA2DP = 'AllowBluetoothA2DP',
|
|
25
|
+
AllowAirPlay = 'AllowAirPlay',
|
|
26
|
+
DefaultToSpeaker = 'DefaultToSpeaker',
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* @see https://developer.apple.com/documentation/avfaudio/avaudiosessionmode?language=objc
|
|
31
|
+
*/
|
|
32
|
+
export enum AudioSessionModeIos {
|
|
33
|
+
Default = 'Default',
|
|
34
|
+
VoiceChat = 'VoiceChat',
|
|
35
|
+
VideoChat = 'VideoChat',
|
|
36
|
+
GameChat = 'GameChat',
|
|
37
|
+
VideoRecording = 'VideoRecording',
|
|
38
|
+
Measurement = 'Measurement',
|
|
39
|
+
MoviePlayback = 'MoviePlayback',
|
|
40
|
+
SpokenAudio = 'SpokenAudio',
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const checkPlatform = () => {
|
|
44
|
+
if (Platform.OS !== 'ios') throw new Error('Only supported on iOS')
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* AudioSession Utility, iOS only.
|
|
49
|
+
*/
|
|
50
|
+
export default {
|
|
51
|
+
Category: AudioSessionCategoryIos,
|
|
52
|
+
CategoryOption: AudioSessionCategoryOptionIos,
|
|
53
|
+
Mode: AudioSessionModeIos,
|
|
54
|
+
|
|
55
|
+
getCurrentCategory: async (): Promise<{
|
|
56
|
+
category: AudioSessionCategoryIos,
|
|
57
|
+
options: AudioSessionCategoryOptionIos[],
|
|
58
|
+
}> => {
|
|
59
|
+
checkPlatform()
|
|
60
|
+
const result = await RNWhisper.getAudioSessionCurrentCategory()
|
|
61
|
+
return {
|
|
62
|
+
category: (result.category.replace('AVAudioSessionCategory', '') as AudioSessionCategoryIos),
|
|
63
|
+
options: result.options?.map((option: string) => (option.replace('AVAudioSessionCategoryOption', '') as AudioSessionCategoryOptionIos)),
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
getCurrentMode: async (): Promise<AudioSessionModeIos> => {
|
|
68
|
+
checkPlatform()
|
|
69
|
+
const mode = await RNWhisper.getAudioSessionCurrentMode()
|
|
70
|
+
return (mode.replace('AVAudioSessionMode', '') as AudioSessionModeIos)
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
setCategory: async (
|
|
74
|
+
category: AudioSessionCategoryIos,
|
|
75
|
+
options: AudioSessionCategoryOptionIos[],
|
|
76
|
+
): Promise<void> => {
|
|
77
|
+
checkPlatform()
|
|
78
|
+
await RNWhisper.setAudioSessionCategory(category, options)
|
|
79
|
+
},
|
|
80
|
+
|
|
81
|
+
setMode: async (mode: AudioSessionModeIos): Promise<void> => {
|
|
82
|
+
checkPlatform()
|
|
83
|
+
await RNWhisper.setAudioSessionMode(mode)
|
|
84
|
+
},
|
|
85
|
+
|
|
86
|
+
setActive: async (active: boolean): Promise<void> => {
|
|
87
|
+
checkPlatform()
|
|
88
|
+
await RNWhisper.setAudioSessionActive(active)
|
|
89
|
+
},
|
|
90
|
+
}
|
package/src/NativeRNWhisper.ts
CHANGED
|
@@ -68,7 +68,7 @@ export interface Spec extends TurboModule {
|
|
|
68
68
|
contextId: number,
|
|
69
69
|
jobId: number,
|
|
70
70
|
path: string,
|
|
71
|
-
options: {}, // TranscribeOptions & { onProgress?: boolean }
|
|
71
|
+
options: {}, // TranscribeOptions & { onProgress?: boolean, onNewSegments?: boolean }
|
|
72
72
|
): Promise<TranscribeResult>;
|
|
73
73
|
startRealtimeTranscribe(
|
|
74
74
|
contextId: number,
|
|
@@ -76,6 +76,16 @@ export interface Spec extends TurboModule {
|
|
|
76
76
|
options: TranscribeOptions,
|
|
77
77
|
): Promise<void>;
|
|
78
78
|
abortTranscribe(contextId: number, jobId: number): Promise<void>;
|
|
79
|
+
|
|
80
|
+
// iOS specific
|
|
81
|
+
getAudioSessionCurrentCategory: () => Promise<{
|
|
82
|
+
category: string,
|
|
83
|
+
options: Array<string>,
|
|
84
|
+
}>;
|
|
85
|
+
getAudioSessionCurrentMode: () => Promise<string>;
|
|
86
|
+
setAudioSessionCategory: (category: string, options: Array<string>) => Promise<void>;
|
|
87
|
+
setAudioSessionMode: (mode: string) => Promise<void>;
|
|
88
|
+
setAudioSessionActive: (active: boolean) => Promise<void>;
|
|
79
89
|
}
|
|
80
90
|
|
|
81
91
|
export default TurboModuleRegistry.get<Spec>('RNWhisper') as Spec
|
package/src/index.ts
CHANGED
|
@@ -11,6 +11,12 @@ import type {
|
|
|
11
11
|
TranscribeResult,
|
|
12
12
|
CoreMLAsset,
|
|
13
13
|
} from './NativeRNWhisper'
|
|
14
|
+
import AudioSessionIos from './AudioSessionIos'
|
|
15
|
+
import type {
|
|
16
|
+
AudioSessionCategoryIos,
|
|
17
|
+
AudioSessionCategoryOptionIos,
|
|
18
|
+
AudioSessionModeIos,
|
|
19
|
+
} from './AudioSessionIos'
|
|
14
20
|
import { version } from './version.json'
|
|
15
21
|
|
|
16
22
|
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
|
@@ -22,20 +28,43 @@ if (Platform.OS === 'android') {
|
|
|
22
28
|
EventEmitter = DeviceEventEmitter
|
|
23
29
|
}
|
|
24
30
|
|
|
25
|
-
export type {
|
|
26
|
-
|
|
31
|
+
export type {
|
|
32
|
+
TranscribeOptions,
|
|
33
|
+
TranscribeResult,
|
|
34
|
+
AudioSessionCategoryIos,
|
|
35
|
+
AudioSessionCategoryOptionIos,
|
|
36
|
+
AudioSessionModeIos,
|
|
37
|
+
}
|
|
27
38
|
|
|
28
39
|
const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'
|
|
40
|
+
const EVENT_ON_TRANSCRIBE_NEW_SEGMENTS = '@RNWhisper_onTranscribeNewSegments'
|
|
29
41
|
|
|
30
42
|
const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'
|
|
31
43
|
const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
|
|
32
44
|
|
|
45
|
+
export type TranscribeNewSegmentsResult = {
|
|
46
|
+
nNew: number
|
|
47
|
+
totalNNew: number
|
|
48
|
+
result: string
|
|
49
|
+
segments: TranscribeResult['segments']
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export type TranscribeNewSegmentsNativeEvent = {
|
|
53
|
+
contextId: number
|
|
54
|
+
jobId: number
|
|
55
|
+
result: TranscribeNewSegmentsResult
|
|
56
|
+
}
|
|
57
|
+
|
|
33
58
|
// Fn -> Boolean in TranscribeFileNativeOptions
|
|
34
59
|
export type TranscribeFileOptions = TranscribeOptions & {
|
|
35
60
|
/**
|
|
36
61
|
* Progress callback, the progress is between 0 and 100
|
|
37
62
|
*/
|
|
38
63
|
onProgress?: (progress: number) => void
|
|
64
|
+
/**
|
|
65
|
+
* Callback when new segments are transcribed
|
|
66
|
+
*/
|
|
67
|
+
onNewSegments?: (result: TranscribeNewSegmentsResult) => void
|
|
39
68
|
}
|
|
40
69
|
|
|
41
70
|
export type TranscribeProgressNativeEvent = {
|
|
@@ -44,6 +73,13 @@ export type TranscribeProgressNativeEvent = {
|
|
|
44
73
|
progress: number
|
|
45
74
|
}
|
|
46
75
|
|
|
76
|
+
export type AudioSessionSettingIos = {
|
|
77
|
+
category: AudioSessionCategoryIos
|
|
78
|
+
options?: AudioSessionCategoryOptionIos[]
|
|
79
|
+
mode?: AudioSessionModeIos
|
|
80
|
+
active?: boolean
|
|
81
|
+
}
|
|
82
|
+
|
|
47
83
|
// Codegen missing TSIntersectionType support so we dont put it into the native spec
|
|
48
84
|
export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
49
85
|
/**
|
|
@@ -58,6 +94,40 @@ export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
|
58
94
|
* (Default: Equal to `realtimeMaxAudioSec`)
|
|
59
95
|
*/
|
|
60
96
|
realtimeAudioSliceSec?: number
|
|
97
|
+
/**
|
|
98
|
+
* Output path for audio file. If not set, the audio file will not be saved
|
|
99
|
+
* (Default: Undefined)
|
|
100
|
+
*/
|
|
101
|
+
audioOutputPath?: string
|
|
102
|
+
/**
|
|
103
|
+
* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
|
|
104
|
+
* The first VAD will be triggered after 2 second of recording.
|
|
105
|
+
* (Default: false)
|
|
106
|
+
*/
|
|
107
|
+
useVad?: boolean
|
|
108
|
+
/**
|
|
109
|
+
* The length of the collected audio is used for VAD. (ms) (Default: 2000)
|
|
110
|
+
*/
|
|
111
|
+
vadMs?: number
|
|
112
|
+
/**
|
|
113
|
+
* VAD threshold. (Default: 0.6)
|
|
114
|
+
*/
|
|
115
|
+
vadThold?: number
|
|
116
|
+
/**
|
|
117
|
+
* Frequency to apply High-pass filter in VAD. (Default: 100.0)
|
|
118
|
+
*/
|
|
119
|
+
vadFreqThold?: number
|
|
120
|
+
/**
|
|
121
|
+
* iOS: Audio session settings when start transcribe
|
|
122
|
+
* Keep empty to use current audio session state
|
|
123
|
+
*/
|
|
124
|
+
audioSessionOnStartIos?: AudioSessionSettingIos
|
|
125
|
+
/**
|
|
126
|
+
* iOS: Audio session settings when stop transcribe
|
|
127
|
+
* - Keep empty to use last audio session state
|
|
128
|
+
* - Use `restore` to restore audio session state before start transcribe
|
|
129
|
+
*/
|
|
130
|
+
audioSessionOnStopIos?: string | AudioSessionSettingIos
|
|
61
131
|
}
|
|
62
132
|
|
|
63
133
|
export type TranscribeRealtimeEvent = {
|
|
@@ -99,6 +169,17 @@ export type TranscribeRealtimeNativeEvent = {
|
|
|
99
169
|
payload: TranscribeRealtimeNativePayload
|
|
100
170
|
}
|
|
101
171
|
|
|
172
|
+
const updateAudioSession = async (setting: AudioSessionSettingIos) => {
|
|
173
|
+
await AudioSessionIos.setCategory(
|
|
174
|
+
setting.category,
|
|
175
|
+
setting.options || [],
|
|
176
|
+
)
|
|
177
|
+
if (setting.mode) {
|
|
178
|
+
await AudioSessionIos.setMode(setting.mode)
|
|
179
|
+
}
|
|
180
|
+
await AudioSessionIos.setActive(setting.active ?? true)
|
|
181
|
+
}
|
|
182
|
+
|
|
102
183
|
export class WhisperContext {
|
|
103
184
|
id: number
|
|
104
185
|
|
|
@@ -112,7 +193,7 @@ export class WhisperContext {
|
|
|
112
193
|
options: TranscribeFileOptions = {},
|
|
113
194
|
): {
|
|
114
195
|
/** Stop the transcribe */
|
|
115
|
-
stop: () => void
|
|
196
|
+
stop: () => Promise<void>
|
|
116
197
|
/** Transcribe result promise */
|
|
117
198
|
promise: Promise<TranscribeResult>
|
|
118
199
|
} {
|
|
@@ -126,13 +207,16 @@ export class WhisperContext {
|
|
|
126
207
|
}
|
|
127
208
|
} else {
|
|
128
209
|
if (filePath.startsWith('http'))
|
|
129
|
-
throw new Error(
|
|
210
|
+
throw new Error(
|
|
211
|
+
'Transcribe remote file is not supported, please download it first',
|
|
212
|
+
)
|
|
130
213
|
path = filePath
|
|
131
214
|
}
|
|
132
215
|
if (path.startsWith('file://')) path = path.slice(7)
|
|
133
216
|
const jobId: number = Math.floor(Math.random() * 10000)
|
|
134
217
|
|
|
135
|
-
const { onProgress, ...rest } = options
|
|
218
|
+
const { onProgress, onNewSegments, ...rest } = options
|
|
219
|
+
|
|
136
220
|
let progressListener: any
|
|
137
221
|
let lastProgress: number = 0
|
|
138
222
|
if (onProgress) {
|
|
@@ -152,37 +236,60 @@ export class WhisperContext {
|
|
|
152
236
|
progressListener = null
|
|
153
237
|
}
|
|
154
238
|
}
|
|
239
|
+
|
|
240
|
+
let newSegmentsListener: any
|
|
241
|
+
if (onNewSegments) {
|
|
242
|
+
newSegmentsListener = EventEmitter.addListener(
|
|
243
|
+
EVENT_ON_TRANSCRIBE_NEW_SEGMENTS,
|
|
244
|
+
(evt: TranscribeNewSegmentsNativeEvent) => {
|
|
245
|
+
const { contextId, result } = evt
|
|
246
|
+
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
247
|
+
onNewSegments(result)
|
|
248
|
+
},
|
|
249
|
+
)
|
|
250
|
+
}
|
|
251
|
+
const removeNewSegmenetsListener = () => {
|
|
252
|
+
if (newSegmentsListener) {
|
|
253
|
+
newSegmentsListener.remove()
|
|
254
|
+
newSegmentsListener = null
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
155
258
|
return {
|
|
156
259
|
stop: async () => {
|
|
157
260
|
await RNWhisper.abortTranscribe(this.id, jobId)
|
|
158
261
|
removeProgressListener()
|
|
262
|
+
removeNewSegmenetsListener()
|
|
159
263
|
},
|
|
160
264
|
promise: RNWhisper.transcribeFile(this.id, jobId, path, {
|
|
161
265
|
...rest,
|
|
162
|
-
onProgress: !!onProgress
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
266
|
+
onProgress: !!onProgress,
|
|
267
|
+
onNewSegments: !!onNewSegments,
|
|
268
|
+
})
|
|
269
|
+
.then((result) => {
|
|
270
|
+
removeProgressListener()
|
|
271
|
+
removeNewSegmenetsListener()
|
|
272
|
+
if (!result.isAborted && lastProgress !== 100) {
|
|
273
|
+
// Handle the case that the last progress event is not triggered
|
|
274
|
+
onProgress?.(100)
|
|
275
|
+
}
|
|
276
|
+
return result
|
|
277
|
+
})
|
|
278
|
+
.catch((e) => {
|
|
279
|
+
removeProgressListener()
|
|
280
|
+
removeNewSegmenetsListener()
|
|
281
|
+
throw e
|
|
282
|
+
}),
|
|
174
283
|
}
|
|
175
284
|
}
|
|
176
285
|
|
|
177
286
|
/** Transcribe the microphone audio stream, the microphone user permission is required */
|
|
178
287
|
async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
|
|
179
288
|
/** Stop the realtime transcribe */
|
|
180
|
-
stop: () => void
|
|
289
|
+
stop: () => Promise<void>
|
|
181
290
|
/** Subscribe to realtime transcribe events */
|
|
182
291
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void
|
|
183
292
|
}> {
|
|
184
|
-
const jobId: number = Math.floor(Math.random() * 10000)
|
|
185
|
-
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
186
293
|
let lastTranscribePayload: TranscribeRealtimeNativePayload
|
|
187
294
|
|
|
188
295
|
const slices: TranscribeRealtimeNativePayload[] = []
|
|
@@ -234,8 +341,40 @@ export class WhisperContext {
|
|
|
234
341
|
return { ...payload, ...mergedPayload, slices }
|
|
235
342
|
}
|
|
236
343
|
|
|
344
|
+
let prevAudioSession: AudioSessionSettingIos | undefined
|
|
345
|
+
if (Platform.OS === 'ios' && options?.audioSessionOnStartIos) {
|
|
346
|
+
// iOS: Remember current audio session state
|
|
347
|
+
if (options?.audioSessionOnStopIos === 'restore') {
|
|
348
|
+
const categoryResult = await AudioSessionIos.getCurrentCategory()
|
|
349
|
+
const mode = await AudioSessionIos.getCurrentMode()
|
|
350
|
+
|
|
351
|
+
prevAudioSession = {
|
|
352
|
+
...categoryResult,
|
|
353
|
+
mode,
|
|
354
|
+
active: false, // TODO: Need to check isOtherAudioPlaying to set active
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// iOS: Update audio session state
|
|
359
|
+
await updateAudioSession(options?.audioSessionOnStartIos)
|
|
360
|
+
}
|
|
361
|
+
if (Platform.OS === 'ios' && typeof options?.audioSessionOnStopIos === 'object') {
|
|
362
|
+
prevAudioSession = options?.audioSessionOnStopIos
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const jobId: number = Math.floor(Math.random() * 10000)
|
|
366
|
+
try {
|
|
367
|
+
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
368
|
+
} catch (e) {
|
|
369
|
+
if (prevAudioSession) await updateAudioSession(prevAudioSession)
|
|
370
|
+
throw e
|
|
371
|
+
}
|
|
372
|
+
|
|
237
373
|
return {
|
|
238
|
-
stop: () =>
|
|
374
|
+
stop: async () => {
|
|
375
|
+
await RNWhisper.abortTranscribe(this.id, jobId)
|
|
376
|
+
if (prevAudioSession) await updateAudioSession(prevAudioSession)
|
|
377
|
+
},
|
|
239
378
|
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
|
|
240
379
|
let transcribeListener: any = EventEmitter.addListener(
|
|
241
380
|
EVENT_ON_REALTIME_TRANSCRIBE,
|
|
@@ -295,7 +434,7 @@ export type ContextOptions = {
|
|
|
295
434
|
*/
|
|
296
435
|
coreMLModelAsset?: {
|
|
297
436
|
filename: string
|
|
298
|
-
assets: number[]
|
|
437
|
+
assets: string[] | number[]
|
|
299
438
|
}
|
|
300
439
|
/** Is the file path a bundle asset for pure string filePath */
|
|
301
440
|
isBundleAsset?: boolean
|
|
@@ -320,12 +459,19 @@ export async function initWhisper({
|
|
|
320
459
|
if (filename && assets) {
|
|
321
460
|
coreMLAssets = assets
|
|
322
461
|
?.map((asset) => {
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
462
|
+
if (typeof asset === 'number') {
|
|
463
|
+
const { uri } = Image.resolveAssetSource(asset)
|
|
464
|
+
const filepath = coreMLModelAssetPaths.find((p) => uri.includes(p))
|
|
465
|
+
if (filepath) {
|
|
466
|
+
return {
|
|
467
|
+
uri,
|
|
468
|
+
filepath: `${filename}/${filepath}`,
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
} else if (typeof asset === 'string') {
|
|
326
472
|
return {
|
|
327
|
-
uri,
|
|
328
|
-
filepath: `${filename}/${
|
|
473
|
+
uri: asset,
|
|
474
|
+
filepath: `${filename}/${asset}`,
|
|
329
475
|
}
|
|
330
476
|
}
|
|
331
477
|
return undefined
|
|
@@ -344,7 +490,9 @@ export async function initWhisper({
|
|
|
344
490
|
}
|
|
345
491
|
} else {
|
|
346
492
|
if (!isBundleAsset && filePath.startsWith('http'))
|
|
347
|
-
throw new Error(
|
|
493
|
+
throw new Error(
|
|
494
|
+
'Transcribe remote file is not supported, please download it first',
|
|
495
|
+
)
|
|
348
496
|
path = filePath
|
|
349
497
|
}
|
|
350
498
|
if (path.startsWith('file://')) path = path.slice(7)
|
|
@@ -372,3 +520,5 @@ export const isUseCoreML: boolean = !!useCoreML
|
|
|
372
520
|
|
|
373
521
|
/** Is allow fallback to CPU if load CoreML model failed */
|
|
374
522
|
export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback
|
|
523
|
+
|
|
524
|
+
export { AudioSessionIos }
|