whisper.rn 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +119 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +74 -39
- package/android/src/main/jni.cpp +45 -12
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/cpp/rn-whisper.cpp +51 -0
- package/cpp/rn-whisper.h +2 -1
- package/ios/RNWhisper.mm +81 -22
- package/ios/RNWhisper.xcodeproj/project.pbxproj +27 -3
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +5 -0
- package/ios/RNWhisperAudioSessionUtils.h +13 -0
- package/ios/RNWhisperAudioSessionUtils.m +85 -0
- package/ios/RNWhisperAudioUtils.h +9 -0
- package/ios/RNWhisperAudioUtils.m +83 -0
- package/ios/RNWhisperContext.h +1 -0
- package/ios/RNWhisperContext.mm +101 -28
- package/lib/commonjs/AudioSessionIos.js +91 -0
- package/lib/commonjs/AudioSessionIos.js.map +1 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +82 -14
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/AudioSessionIos.js +83 -0
- package/lib/module/AudioSessionIos.js.map +1 -0
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +77 -14
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/AudioSessionIos.d.ts +54 -0
- package/lib/typescript/AudioSessionIos.d.ts.map +1 -0
- package/lib/typescript/NativeRNWhisper.d.ts +8 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +62 -4
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/AudioSessionIos.ts +90 -0
- package/src/NativeRNWhisper.ts +11 -1
- package/src/index.ts +178 -28
package/ios/RNWhisper.mm
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#import "RNWhisper.h"
|
|
2
2
|
#import "RNWhisperContext.h"
|
|
3
3
|
#import "RNWhisperDownloader.h"
|
|
4
|
+
#import "RNWhisperAudioUtils.h"
|
|
5
|
+
#import "RNWhisperAudioSessionUtils.h"
|
|
4
6
|
#include <stdlib.h>
|
|
5
7
|
#include <string>
|
|
6
8
|
|
|
@@ -87,6 +89,7 @@ RCT_REMAP_METHOD(initContext,
|
|
|
87
89
|
- (NSArray *)supportedEvents {
|
|
88
90
|
return@[
|
|
89
91
|
@"@RNWhisper_onTranscribeProgress",
|
|
92
|
+
@"@RNWhisper_onTranscribeNewSegments",
|
|
90
93
|
@"@RNWhisper_onRealtimeTranscribe",
|
|
91
94
|
@"@RNWhisper_onRealtimeTranscribeEnd",
|
|
92
95
|
];
|
|
@@ -121,7 +124,7 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
121
124
|
}
|
|
122
125
|
|
|
123
126
|
int count = 0;
|
|
124
|
-
float *waveFile = [
|
|
127
|
+
float *waveFile = [RNWhisperAudioUtils decodeWaveFile:path count:&count];
|
|
125
128
|
if (waveFile == nil) {
|
|
126
129
|
reject(@"whisper_error", @"Invalid file", nil);
|
|
127
130
|
return;
|
|
@@ -144,6 +147,20 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
144
147
|
];
|
|
145
148
|
});
|
|
146
149
|
}
|
|
150
|
+
onNewSegments: ^(NSDictionary *result) {
|
|
151
|
+
if (rn_whisper_transcribe_is_aborted(jobId)) {
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
155
|
+
[self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
|
|
156
|
+
body:@{
|
|
157
|
+
@"contextId": [NSNumber numberWithInt:contextId],
|
|
158
|
+
@"jobId": [NSNumber numberWithInt:jobId],
|
|
159
|
+
@"result": result
|
|
160
|
+
}
|
|
161
|
+
];
|
|
162
|
+
});
|
|
163
|
+
}
|
|
147
164
|
onEnd: ^(int code) {
|
|
148
165
|
if (code != 0) {
|
|
149
166
|
free(waveFile);
|
|
@@ -242,27 +259,6 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
242
259
|
resolve(nil);
|
|
243
260
|
}
|
|
244
261
|
|
|
245
|
-
- (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
|
|
246
|
-
NSURL *url = [NSURL fileURLWithPath:filePath];
|
|
247
|
-
NSData *fileData = [NSData dataWithContentsOfURL:url];
|
|
248
|
-
if (fileData == nil) {
|
|
249
|
-
return nil;
|
|
250
|
-
}
|
|
251
|
-
NSMutableData *waveData = [[NSMutableData alloc] init];
|
|
252
|
-
[waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
|
|
253
|
-
const short *shortArray = (const short *)[waveData bytes];
|
|
254
|
-
int shortCount = (int) ([waveData length] / sizeof(short));
|
|
255
|
-
float *floatArray = (float *) malloc(shortCount * sizeof(float));
|
|
256
|
-
for (NSInteger i = 0; i < shortCount; i++) {
|
|
257
|
-
float floatValue = ((float)shortArray[i]) / 32767.0;
|
|
258
|
-
floatValue = MAX(floatValue, -1.0);
|
|
259
|
-
floatValue = MIN(floatValue, 1.0);
|
|
260
|
-
floatArray[i] = floatValue;
|
|
261
|
-
}
|
|
262
|
-
*count = shortCount;
|
|
263
|
-
return floatArray;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
262
|
- (void)invalidate {
|
|
267
263
|
[super invalidate];
|
|
268
264
|
|
|
@@ -283,6 +279,69 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
283
279
|
[RNWhisperDownloader clearCache];
|
|
284
280
|
}
|
|
285
281
|
|
|
282
|
+
// MARK: - AudioSessionUtils
|
|
283
|
+
|
|
284
|
+
RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
|
|
285
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
286
|
+
{
|
|
287
|
+
NSString *category = [RNWhisperAudioSessionUtils getCurrentCategory];
|
|
288
|
+
NSArray *options = [RNWhisperAudioSessionUtils getCurrentOptions];
|
|
289
|
+
resolve(@{
|
|
290
|
+
@"category": category,
|
|
291
|
+
@"options": options
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
RCT_EXPORT_METHOD(getAudioSessionCurrentMode:(RCTPromiseResolveBlock)resolve
|
|
296
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
297
|
+
{
|
|
298
|
+
NSString *mode = [RNWhisperAudioSessionUtils getCurrentMode];
|
|
299
|
+
resolve(mode);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
RCT_REMAP_METHOD(setAudioSessionCategory,
|
|
303
|
+
withCategory:(NSString *)category
|
|
304
|
+
withOptions:(NSArray *)options
|
|
305
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
306
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
307
|
+
{
|
|
308
|
+
NSError *error = nil;
|
|
309
|
+
[RNWhisperAudioSessionUtils setCategory:category options:options error:&error];
|
|
310
|
+
if (error != nil) {
|
|
311
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set category. Error: %@", error], nil);
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
resolve(nil);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
RCT_REMAP_METHOD(setAudioSessionMode,
|
|
318
|
+
withMode:(NSString *)mode
|
|
319
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
320
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
321
|
+
{
|
|
322
|
+
NSError *error = nil;
|
|
323
|
+
[RNWhisperAudioSessionUtils setMode:mode error:&error];
|
|
324
|
+
if (error != nil) {
|
|
325
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set mode. Error: %@", error], nil);
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
resolve(nil);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
RCT_REMAP_METHOD(setAudioSessionActive,
|
|
332
|
+
withActive:(BOOL)active
|
|
333
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
334
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
335
|
+
{
|
|
336
|
+
NSError *error = nil;
|
|
337
|
+
[RNWhisperAudioSessionUtils setActive:active error:&error];
|
|
338
|
+
if (error != nil) {
|
|
339
|
+
reject(@"whisper_error", [NSString stringWithFormat:@"Failed to set active. Error: %@", error], nil);
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
resolve(nil);
|
|
343
|
+
}
|
|
344
|
+
|
|
286
345
|
#ifdef RCT_NEW_ARCH_ENABLED
|
|
287
346
|
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
|
|
288
347
|
(const facebook::react::ObjCTurboModule::InitParams &)params
|
|
@@ -8,6 +8,10 @@
|
|
|
8
8
|
|
|
9
9
|
/* Begin PBXBuildFile section */
|
|
10
10
|
5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */ = {isa = PBXBuildFile; fileRef = B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */; };
|
|
11
|
+
7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */; };
|
|
12
|
+
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */; };
|
|
13
|
+
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */; };
|
|
14
|
+
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */; };
|
|
11
15
|
/* End PBXBuildFile section */
|
|
12
16
|
|
|
13
17
|
/* Begin PBXCopyFilesBuildPhase section */
|
|
@@ -24,6 +28,15 @@
|
|
|
24
28
|
|
|
25
29
|
/* Begin PBXFileReference section */
|
|
26
30
|
134814201AA4EA6300B7C361 /* libRNWhisper.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNWhisper.a; sourceTree = BUILT_PRODUCTS_DIR; };
|
|
31
|
+
7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioSessionUtils.h; sourceTree = "<group>"; };
|
|
32
|
+
7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioSessionUtils.m; sourceTree = "<group>"; };
|
|
33
|
+
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisper.h; sourceTree = "<group>"; };
|
|
34
|
+
7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperDownloader.m; sourceTree = "<group>"; };
|
|
35
|
+
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RNWhisperAudioUtils.m; sourceTree = "<group>"; };
|
|
36
|
+
7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperContext.h; sourceTree = "<group>"; };
|
|
37
|
+
7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperDownloader.h; sourceTree = "<group>"; };
|
|
38
|
+
7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RNWhisperAudioUtils.h; sourceTree = "<group>"; };
|
|
39
|
+
7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisperContext.mm; sourceTree = "<group>"; };
|
|
27
40
|
B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RNWhisper.mm; sourceTree = "<group>"; };
|
|
28
41
|
/* End PBXFileReference section */
|
|
29
42
|
|
|
@@ -49,6 +62,15 @@
|
|
|
49
62
|
58B511D21A9E6C8500147676 = {
|
|
50
63
|
isa = PBXGroup;
|
|
51
64
|
children = (
|
|
65
|
+
7F458E902AC7DC74007045F6 /* RNWhisperAudioSessionUtils.h */,
|
|
66
|
+
7F458E912AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m */,
|
|
67
|
+
7FE0BB9F2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.h */,
|
|
68
|
+
7FE0BB9C2ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m */,
|
|
69
|
+
7FE0BB9A2ABE6C7B0049B4E4 /* RNWhisper.h */,
|
|
70
|
+
7FE0BB9D2ABE6C7B0049B4E4 /* RNWhisperContext.h */,
|
|
71
|
+
7FE0BBA02ABE6C7B0049B4E4 /* RNWhisperContext.mm */,
|
|
72
|
+
7FE0BB9E2ABE6C7B0049B4E4 /* RNWhisperDownloader.h */,
|
|
73
|
+
7FE0BB9B2ABE6C7B0049B4E4 /* RNWhisperDownloader.m */,
|
|
52
74
|
B3E7B5891CC2AC0600A0062D /* RNWhisper.mm */,
|
|
53
75
|
134814211AA4EA7D00B7C361 /* Products */,
|
|
54
76
|
);
|
|
@@ -112,6 +134,10 @@
|
|
|
112
134
|
buildActionMask = 2147483647;
|
|
113
135
|
files = (
|
|
114
136
|
5E555C0D2413F4C50049A1A2 /* RNWhisper.mm in Sources */,
|
|
137
|
+
7FE0BBA22ABE6C7B0049B4E4 /* RNWhisperAudioUtils.m in Sources */,
|
|
138
|
+
7FE0BBA32ABE6C7B0049B4E4 /* RNWhisperContext.mm in Sources */,
|
|
139
|
+
7FE0BBA12ABE6C7B0049B4E4 /* RNWhisperDownloader.m in Sources */,
|
|
140
|
+
7F458E922AC7DC74007045F6 /* RNWhisperAudioSessionUtils.m in Sources */,
|
|
115
141
|
);
|
|
116
142
|
runOnlyForDeploymentPostprocessing = 0;
|
|
117
143
|
};
|
|
@@ -223,9 +249,7 @@
|
|
|
223
249
|
"$(SRCROOT)/../../react-native/React/**",
|
|
224
250
|
);
|
|
225
251
|
LIBRARY_SEARCH_PATHS = "$(inherited)";
|
|
226
|
-
OTHER_LDFLAGS =
|
|
227
|
-
"-ObjC",
|
|
228
|
-
);
|
|
252
|
+
OTHER_LDFLAGS = "-ObjC";
|
|
229
253
|
PRODUCT_NAME = RNWhisper;
|
|
230
254
|
SKIP_INSTALL = YES;
|
|
231
255
|
};
|
|
Binary file
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#import <Foundation/Foundation.h>
|
|
2
|
+
#import <AVFoundation/AVFoundation.h>
|
|
3
|
+
|
|
4
|
+
@interface RNWhisperAudioSessionUtils : NSObject
|
|
5
|
+
|
|
6
|
+
+(NSString *)getCurrentCategory;
|
|
7
|
+
+(NSArray *)getCurrentOptions;
|
|
8
|
+
+(NSString *)getCurrentMode;
|
|
9
|
+
+(void)setCategory:(NSString *)category options:(NSArray *)options error:(NSError **)error;
|
|
10
|
+
+(void)setMode:(NSString *)mode error:(NSError **)error;
|
|
11
|
+
+(void)setActive:(BOOL)active error:(NSError **)error;
|
|
12
|
+
|
|
13
|
+
@end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#import "RNWhisperAudioSessionUtils.h"
|
|
2
|
+
|
|
3
|
+
@implementation RNWhisperAudioSessionUtils
|
|
4
|
+
|
|
5
|
+
static NSDictionary *_categories;
|
|
6
|
+
static NSDictionary *_options;
|
|
7
|
+
static NSDictionary *_modes;
|
|
8
|
+
|
|
9
|
+
+ (void)initialize {
|
|
10
|
+
_categories = @{
|
|
11
|
+
@"Ambient": AVAudioSessionCategoryAmbient,
|
|
12
|
+
@"SoloAmbient": AVAudioSessionCategorySoloAmbient,
|
|
13
|
+
@"Playback": AVAudioSessionCategoryPlayback,
|
|
14
|
+
@"Record": AVAudioSessionCategoryRecord,
|
|
15
|
+
@"PlayAndRecord": AVAudioSessionCategoryPlayAndRecord,
|
|
16
|
+
@"MultiRoute": AVAudioSessionCategoryMultiRoute
|
|
17
|
+
};
|
|
18
|
+
_options = @{
|
|
19
|
+
@"MixWithOthers": @(AVAudioSessionCategoryOptionMixWithOthers),
|
|
20
|
+
@"DuckOthers": @(AVAudioSessionCategoryOptionDuckOthers),
|
|
21
|
+
@"InterruptSpokenAudioAndMixWithOthers": @(AVAudioSessionCategoryOptionInterruptSpokenAudioAndMixWithOthers),
|
|
22
|
+
@"AllowBluetooth": @(AVAudioSessionCategoryOptionAllowBluetooth),
|
|
23
|
+
@"AllowBluetoothA2DP": @(AVAudioSessionCategoryOptionAllowBluetoothA2DP),
|
|
24
|
+
@"AllowAirPlay": @(AVAudioSessionCategoryOptionAllowAirPlay),
|
|
25
|
+
@"DefaultToSpeaker": @(AVAudioSessionCategoryOptionDefaultToSpeaker)
|
|
26
|
+
};
|
|
27
|
+
_modes = @{
|
|
28
|
+
@"Default": AVAudioSessionModeDefault,
|
|
29
|
+
@"VoiceChat": AVAudioSessionModeVoiceChat,
|
|
30
|
+
@"VideoChat": AVAudioSessionModeVideoChat,
|
|
31
|
+
@"GameChat": AVAudioSessionModeGameChat,
|
|
32
|
+
@"VideoRecording": AVAudioSessionModeVideoRecording,
|
|
33
|
+
@"Measurement": AVAudioSessionModeMeasurement,
|
|
34
|
+
@"MoviePlayback": AVAudioSessionModeMoviePlayback,
|
|
35
|
+
@"SpokenAudio": AVAudioSessionModeSpokenAudio
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
+(NSString *)getCurrentCategory {
|
|
40
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
41
|
+
return session.category;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
+(NSArray *)getCurrentOptions {
|
|
45
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
46
|
+
AVAudioSessionCategoryOptions options = session.categoryOptions;
|
|
47
|
+
NSMutableArray *result = [NSMutableArray array];
|
|
48
|
+
for (NSString *key in _options) {
|
|
49
|
+
if ((options & [[_options objectForKey:key] unsignedIntegerValue]) != 0) {
|
|
50
|
+
[result addObject:key];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
+(NSString *)getCurrentMode {
|
|
57
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
58
|
+
return session.mode;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
+(AVAudioSessionCategoryOptions)getOptions:(NSArray *)options {
|
|
62
|
+
AVAudioSessionCategoryOptions result = 0;
|
|
63
|
+
for (NSString *option in options) {
|
|
64
|
+
result |= [[_options objectForKey:option] unsignedIntegerValue];
|
|
65
|
+
}
|
|
66
|
+
return result;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
+(void)setCategory:(NSString *)category options:(NSArray *)options error:(NSError **)error {
|
|
70
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
71
|
+
[session setCategory:[_categories objectForKey:category] withOptions:[self getOptions:options] error:error];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
+(void)setMode:(NSString *)mode error:(NSError **)error {
|
|
75
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
76
|
+
[session setMode:[_modes objectForKey:mode] error:error];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
+(void)setActive:(BOOL)active error:(NSError **)error {
|
|
80
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
81
|
+
[session setActive:active error:error];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#import <Foundation/Foundation.h>
|
|
2
|
+
|
|
3
|
+
@interface RNWhisperAudioUtils : NSObject
|
|
4
|
+
|
|
5
|
+
+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples;
|
|
6
|
+
+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile;
|
|
7
|
+
+ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count;
|
|
8
|
+
|
|
9
|
+
@end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#import "RNWhisperAudioUtils.h"
|
|
2
|
+
#import "whisper.h"
|
|
3
|
+
|
|
4
|
+
@implementation RNWhisperAudioUtils
|
|
5
|
+
|
|
6
|
+
+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples {
|
|
7
|
+
NSMutableData *outputData = [NSMutableData data];
|
|
8
|
+
for (int i = 0; i < buffers.count; i++) {
|
|
9
|
+
int size = [sliceNSamples objectAtIndex:i].intValue;
|
|
10
|
+
NSValue *buffer = [buffers objectAtIndex:i];
|
|
11
|
+
short *bufferPtr = buffer.pointerValue;
|
|
12
|
+
[outputData appendBytes:bufferPtr length:size * sizeof(short)];
|
|
13
|
+
}
|
|
14
|
+
return outputData;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile {
|
|
18
|
+
NSMutableData *outputData = [NSMutableData data];
|
|
19
|
+
|
|
20
|
+
// WAVE header
|
|
21
|
+
[outputData appendData:[@"RIFF" dataUsingEncoding:NSUTF8StringEncoding]]; // chunk id
|
|
22
|
+
int chunkSize = CFSwapInt32HostToLittle(36 + rawData.length);
|
|
23
|
+
[outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
|
|
24
|
+
[outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
|
|
25
|
+
[outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id
|
|
26
|
+
|
|
27
|
+
int subchunk1Size = CFSwapInt32HostToLittle(16);
|
|
28
|
+
[outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];
|
|
29
|
+
|
|
30
|
+
short audioFormat = CFSwapInt16HostToLittle(1); // PCM
|
|
31
|
+
[outputData appendBytes:&audioFormat length:sizeof(audioFormat)];
|
|
32
|
+
|
|
33
|
+
short numChannels = CFSwapInt16HostToLittle(1); // mono
|
|
34
|
+
[outputData appendBytes:&numChannels length:sizeof(numChannels)];
|
|
35
|
+
|
|
36
|
+
int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
|
|
37
|
+
[outputData appendBytes:&sampleRate length:sizeof(sampleRate)];
|
|
38
|
+
|
|
39
|
+
// (bitDepth * sampleRate * channels) >> 3
|
|
40
|
+
int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
|
|
41
|
+
[outputData appendBytes:&byteRate length:sizeof(byteRate)];
|
|
42
|
+
|
|
43
|
+
// (bitDepth * channels) >> 3
|
|
44
|
+
short blockAlign = CFSwapInt16HostToLittle(16 / 8);
|
|
45
|
+
[outputData appendBytes:&blockAlign length:sizeof(blockAlign)];
|
|
46
|
+
|
|
47
|
+
// bitDepth
|
|
48
|
+
short bitsPerSample = CFSwapInt16HostToLittle(16);
|
|
49
|
+
[outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];
|
|
50
|
+
|
|
51
|
+
[outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
|
|
52
|
+
int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
|
|
53
|
+
[outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];
|
|
54
|
+
|
|
55
|
+
// Audio data
|
|
56
|
+
[outputData appendData:rawData];
|
|
57
|
+
|
|
58
|
+
// Save to file
|
|
59
|
+
[outputData writeToFile:audioOutputFile atomically:YES];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
+ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
|
|
63
|
+
NSURL *url = [NSURL fileURLWithPath:filePath];
|
|
64
|
+
NSData *fileData = [NSData dataWithContentsOfURL:url];
|
|
65
|
+
if (fileData == nil) {
|
|
66
|
+
return nil;
|
|
67
|
+
}
|
|
68
|
+
NSMutableData *waveData = [[NSMutableData alloc] init];
|
|
69
|
+
[waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
|
|
70
|
+
const short *shortArray = (const short *)[waveData bytes];
|
|
71
|
+
int shortCount = (int) ([waveData length] / sizeof(short));
|
|
72
|
+
float *floatArray = (float *) malloc(shortCount * sizeof(float));
|
|
73
|
+
for (NSInteger i = 0; i < shortCount; i++) {
|
|
74
|
+
float floatValue = ((float)shortArray[i]) / 32767.0;
|
|
75
|
+
floatValue = MAX(floatValue, -1.0);
|
|
76
|
+
floatValue = MIN(floatValue, 1.0);
|
|
77
|
+
floatArray[i] = floatValue;
|
|
78
|
+
}
|
|
79
|
+
*count = shortCount;
|
|
80
|
+
return floatArray;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
@end
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -53,6 +53,7 @@ typedef struct {
|
|
|
53
53
|
audioDataCount:(int)audioDataCount
|
|
54
54
|
options:(NSDictionary *)options
|
|
55
55
|
onProgress:(void (^)(int))onProgress
|
|
56
|
+
onNewSegments:(void (^)(NSDictionary *))onNewSegments
|
|
56
57
|
onEnd:(void (^)(int))onEnd;
|
|
57
58
|
- (void)stopTranscribe:(int)jobId;
|
|
58
59
|
- (void)stopCurrentTranscribe;
|
package/ios/RNWhisperContext.mm
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#import "RNWhisperContext.h"
|
|
2
|
+
#import "RNWhisperAudioUtils.h"
|
|
3
|
+
#include <vector>
|
|
2
4
|
|
|
3
5
|
#define NUM_BYTES_PER_BUFFER 16 * 1024
|
|
4
6
|
|
|
@@ -77,6 +79,29 @@
|
|
|
77
79
|
}
|
|
78
80
|
}
|
|
79
81
|
|
|
82
|
+
bool vad(RNWhisperContextRecordState *state, int16_t* audioBufferI16, int nSamples, int n)
|
|
83
|
+
{
|
|
84
|
+
bool isSpeech = true;
|
|
85
|
+
if (!state->isTranscribing && state->options[@"useVad"]) {
|
|
86
|
+
int vadSec = state->options[@"vadMs"] != nil ? [state->options[@"vadMs"] intValue] / 1000 : 2;
|
|
87
|
+
int sampleSize = vadSec * WHISPER_SAMPLE_RATE;
|
|
88
|
+
if (nSamples + n > sampleSize) {
|
|
89
|
+
int start = nSamples + n - sampleSize;
|
|
90
|
+
std::vector<float> audioBufferF32Vec(sampleSize);
|
|
91
|
+
for (int i = 0; i < sampleSize; i++) {
|
|
92
|
+
audioBufferF32Vec[i] = (float)audioBufferI16[i + start] / 32768.0f;
|
|
93
|
+
}
|
|
94
|
+
float vadThold = state->options[@"vadThold"] != nil ? [state->options[@"vadThold"] floatValue] : 0.6f;
|
|
95
|
+
float vadFreqThold = state->options[@"vadFreqThold"] != nil ? [state->options[@"vadFreqThold"] floatValue] : 100.0f;
|
|
96
|
+
isSpeech = rn_whisper_vad_simple(audioBufferF32Vec, WHISPER_SAMPLE_RATE, 1000, vadThold, vadFreqThold, false);
|
|
97
|
+
NSLog(@"[RNWhisper] VAD result: %d", isSpeech);
|
|
98
|
+
} else {
|
|
99
|
+
isSpeech = false;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return isSpeech;
|
|
103
|
+
}
|
|
104
|
+
|
|
80
105
|
void AudioInputCallback(void * inUserData,
|
|
81
106
|
AudioQueueRef inAQ,
|
|
82
107
|
AudioQueueBufferRef inBuffer,
|
|
@@ -117,6 +142,11 @@ void AudioInputCallback(void * inUserData,
|
|
|
117
142
|
!state->isTranscribing &&
|
|
118
143
|
nSamples != state->nSamplesTranscribing
|
|
119
144
|
) {
|
|
145
|
+
int16_t* audioBufferI16 = (int16_t*) [state->shortBufferSlices[state->sliceIndex] pointerValue];
|
|
146
|
+
if (!vad(state, audioBufferI16, nSamples, 0)) {
|
|
147
|
+
state->transcribeHandler(state->jobId, @"end", @{});
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
120
150
|
state->isTranscribing = true;
|
|
121
151
|
dispatch_async([state->mSelf getDispatchQueue], ^{
|
|
122
152
|
[state->mSelf fullTranscribeSamples:state];
|
|
@@ -142,11 +172,15 @@ void AudioInputCallback(void * inUserData,
|
|
|
142
172
|
for (int i = 0; i < n; i++) {
|
|
143
173
|
audioBufferI16[nSamples + i] = ((short*)inBuffer->mAudioData)[i];
|
|
144
174
|
}
|
|
175
|
+
|
|
176
|
+
bool isSpeech = vad(state, audioBufferI16, nSamples, n);
|
|
145
177
|
nSamples += n;
|
|
146
178
|
state->sliceNSamples[state->sliceIndex] = [NSNumber numberWithInt:nSamples];
|
|
147
179
|
|
|
148
180
|
AudioQueueEnqueueBuffer(state->queue, inBuffer, 0, NULL);
|
|
149
181
|
|
|
182
|
+
if (!isSpeech) return;
|
|
183
|
+
|
|
150
184
|
if (!state->isTranscribing) {
|
|
151
185
|
state->isTranscribing = true;
|
|
152
186
|
dispatch_async([state->mSelf getDispatchQueue], ^{
|
|
@@ -167,7 +201,8 @@ void AudioInputCallback(void * inUserData,
|
|
|
167
201
|
audioBufferF32[i] = (float)audioBufferI16[i] / 32768.0f;
|
|
168
202
|
}
|
|
169
203
|
CFTimeInterval timeStart = CACurrentMediaTime();
|
|
170
|
-
|
|
204
|
+
struct whisper_full_params params = [state->mSelf getParams:state->options jobId:state->jobId];
|
|
205
|
+
int code = [state->mSelf fullTranscribe:state->jobId params:params audioData:audioBufferF32 audioDataCount:state->nSamplesTranscribing];
|
|
171
206
|
free(audioBufferF32);
|
|
172
207
|
CFTimeInterval timeEnd = CACurrentMediaTime();
|
|
173
208
|
const float timeRecording = (float) state->nSamplesTranscribing / (float) state->dataFormat.mSampleRate;
|
|
@@ -212,6 +247,17 @@ void AudioInputCallback(void * inUserData,
|
|
|
212
247
|
NSLog(@"[RNWhisper] Transcribe end");
|
|
213
248
|
result[@"isStoppedByAction"] = @(state->isStoppedByAction);
|
|
214
249
|
result[@"isCapturing"] = @(false);
|
|
250
|
+
|
|
251
|
+
// Save wav if needed
|
|
252
|
+
if (state->options[@"audioOutputPath"] != nil) {
|
|
253
|
+
// TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
|
|
254
|
+
[RNWhisperAudioUtils
|
|
255
|
+
saveWavFile:[RNWhisperAudioUtils concatShortBuffers:state->shortBufferSlices
|
|
256
|
+
sliceNSamples:state->sliceNSamples]
|
|
257
|
+
audioOutputFile:state->options[@"audioOutputPath"]
|
|
258
|
+
];
|
|
259
|
+
}
|
|
260
|
+
|
|
215
261
|
state->transcribeHandler(state->jobId, @"end", result);
|
|
216
262
|
} else if (code == 0) {
|
|
217
263
|
result[@"isCapturing"] = @(true);
|
|
@@ -272,18 +318,70 @@ void AudioInputCallback(void * inUserData,
|
|
|
272
318
|
return status;
|
|
273
319
|
}
|
|
274
320
|
|
|
321
|
+
struct rnwhisper_segments_callback_data {
|
|
322
|
+
void (^onNewSegments)(NSDictionary *);
|
|
323
|
+
int total_n_new;
|
|
324
|
+
};
|
|
325
|
+
|
|
275
326
|
- (void)transcribeFile:(int)jobId
|
|
276
327
|
audioData:(float *)audioData
|
|
277
328
|
audioDataCount:(int)audioDataCount
|
|
278
329
|
options:(NSDictionary *)options
|
|
279
330
|
onProgress:(void (^)(int))onProgress
|
|
331
|
+
onNewSegments:(void (^)(NSDictionary *))onNewSegments
|
|
280
332
|
onEnd:(void (^)(int))onEnd
|
|
281
333
|
{
|
|
282
334
|
dispatch_async(dQueue, ^{
|
|
283
335
|
self->recordState.isStoppedByAction = false;
|
|
284
336
|
self->recordState.isTranscribing = true;
|
|
285
337
|
self->recordState.jobId = jobId;
|
|
286
|
-
|
|
338
|
+
|
|
339
|
+
whisper_full_params params = [self getParams:options jobId:jobId];
|
|
340
|
+
if (options[@"onProgress"] && [options[@"onProgress"] boolValue]) {
|
|
341
|
+
params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
|
342
|
+
void (^onProgress)(int) = (__bridge void (^)(int))user_data;
|
|
343
|
+
onProgress(progress);
|
|
344
|
+
};
|
|
345
|
+
params.progress_callback_user_data = (__bridge void *)(onProgress);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (options[@"onNewSegments"] && [options[@"onNewSegments"] boolValue]) {
|
|
349
|
+
params.new_segment_callback = [](struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
|
|
350
|
+
struct rnwhisper_segments_callback_data *data = (struct rnwhisper_segments_callback_data *)user_data;
|
|
351
|
+
data->total_n_new += n_new;
|
|
352
|
+
|
|
353
|
+
NSString *text = @"";
|
|
354
|
+
NSMutableArray *segments = [[NSMutableArray alloc] init];
|
|
355
|
+
for (int i = data->total_n_new - n_new; i < data->total_n_new; i++) {
|
|
356
|
+
const char * text_cur = whisper_full_get_segment_text(ctx, i);
|
|
357
|
+
text = [text stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
|
|
358
|
+
|
|
359
|
+
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
|
360
|
+
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
|
361
|
+
NSDictionary *segment = @{
|
|
362
|
+
@"text": [NSString stringWithUTF8String:text_cur],
|
|
363
|
+
@"t0": [NSNumber numberWithLongLong:t0],
|
|
364
|
+
@"t1": [NSNumber numberWithLongLong:t1]
|
|
365
|
+
};
|
|
366
|
+
[segments addObject:segment];
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
NSDictionary *result = @{
|
|
370
|
+
@"nNew": [NSNumber numberWithInt:n_new],
|
|
371
|
+
@"totalNNew": [NSNumber numberWithInt:data->total_n_new],
|
|
372
|
+
@"result": text,
|
|
373
|
+
@"segments": segments
|
|
374
|
+
};
|
|
375
|
+
void (^onNewSegments)(NSDictionary *) = (void (^)(NSDictionary *))data->onNewSegments;
|
|
376
|
+
onNewSegments(result);
|
|
377
|
+
};
|
|
378
|
+
struct rnwhisper_segments_callback_data user_data = {
|
|
379
|
+
.onNewSegments = onNewSegments,
|
|
380
|
+
.total_n_new = 0
|
|
381
|
+
};
|
|
382
|
+
params.new_segment_callback_user_data = &user_data;
|
|
383
|
+
}
|
|
384
|
+
int code = [self fullTranscribe:jobId params:params audioData:audioData audioDataCount:audioDataCount];
|
|
287
385
|
self->recordState.jobId = -1;
|
|
288
386
|
self->recordState.isTranscribing = false;
|
|
289
387
|
onEnd(code);
|
|
@@ -383,36 +481,11 @@ void AudioInputCallback(void * inUserData,
|
|
|
383
481
|
return params;
|
|
384
482
|
}
|
|
385
483
|
|
|
386
|
-
- (int)fullTranscribeWithProgress:(void (^)(int))onProgress
|
|
387
|
-
jobId:(int)jobId
|
|
388
|
-
audioData:(float *)audioData
|
|
389
|
-
audioDataCount:(int)audioDataCount
|
|
390
|
-
options:(NSDictionary *)options
|
|
391
|
-
{
|
|
392
|
-
struct whisper_full_params params = [self getParams:options jobId:jobId];
|
|
393
|
-
if (options[@"onProgress"] && [options[@"onProgress"] boolValue]) {
|
|
394
|
-
params.progress_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
|
395
|
-
void (^onProgress)(int) = (__bridge void (^)(int))user_data;
|
|
396
|
-
onProgress(progress);
|
|
397
|
-
};
|
|
398
|
-
params.progress_callback_user_data = (__bridge void *)(onProgress);
|
|
399
|
-
}
|
|
400
|
-
whisper_reset_timings(self->ctx);
|
|
401
|
-
|
|
402
|
-
int code = whisper_full(self->ctx, params, audioData, audioDataCount);
|
|
403
|
-
rn_whisper_remove_abort_map(jobId);
|
|
404
|
-
// if (code == 0) {
|
|
405
|
-
// whisper_print_timings(self->ctx);
|
|
406
|
-
// }
|
|
407
|
-
return code;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
484
|
- (int)fullTranscribe:(int)jobId
|
|
485
|
+
params:(struct whisper_full_params)params
|
|
411
486
|
audioData:(float *)audioData
|
|
412
487
|
audioDataCount:(int)audioDataCount
|
|
413
|
-
options:(NSDictionary *)options
|
|
414
488
|
{
|
|
415
|
-
struct whisper_full_params params = [self getParams:options jobId:jobId];
|
|
416
489
|
whisper_reset_timings(self->ctx);
|
|
417
490
|
|
|
418
491
|
int code = whisper_full(self->ctx, params, audioData, audioDataCount);
|