whisper.rn 0.4.0-rc.1 → 0.4.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +14 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +226 -109
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/cpp/README.md +1 -1
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +5 -3
- package/cpp/ggml-aarch64.c +129 -0
- package/cpp/ggml-aarch64.h +19 -0
- package/cpp/ggml-alloc.c +805 -400
- package/cpp/ggml-alloc.h +60 -10
- package/cpp/ggml-backend-impl.h +216 -0
- package/cpp/ggml-backend-reg.cpp +204 -0
- package/cpp/ggml-backend.cpp +1996 -0
- package/cpp/ggml-backend.cpp.rej +12 -0
- package/cpp/ggml-backend.h +336 -0
- package/cpp/ggml-common.h +1853 -0
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu-aarch64.c +3560 -0
- package/cpp/ggml-cpu-aarch64.h +30 -0
- package/cpp/ggml-cpu-impl.h +371 -0
- package/cpp/ggml-cpu-quants.c +10822 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13970 -0
- package/cpp/ggml-cpu.cpp +663 -0
- package/cpp/ggml-cpu.h +177 -0
- package/cpp/ggml-impl.h +551 -0
- package/cpp/ggml-metal-impl.h +249 -0
- package/cpp/ggml-metal.h +24 -43
- package/cpp/ggml-metal.m +4190 -1075
- package/cpp/ggml-quants.c +5247 -0
- package/cpp/ggml-quants.h +100 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +5474 -18763
- package/cpp/ggml.h +833 -628
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +221 -52
- package/cpp/rn-whisper.h +50 -15
- package/cpp/whisper.cpp +2872 -1371
- package/cpp/whisper.h +170 -41
- package/ios/RNWhisper.mm +139 -46
- package/ios/RNWhisperAudioUtils.h +1 -2
- package/ios/RNWhisperAudioUtils.m +18 -67
- package/ios/RNWhisperContext.h +11 -8
- package/ios/RNWhisperContext.mm +195 -150
- package/jest/mock.js +15 -2
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +76 -28
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +76 -28
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +13 -4
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +37 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +9 -7
- package/src/NativeRNWhisper.ts +20 -4
- package/src/index.ts +98 -42
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/ggml-metal.metal +0 -2353
|
@@ -3,60 +3,23 @@
|
|
|
3
3
|
|
|
4
4
|
@implementation RNWhisperAudioUtils
|
|
5
5
|
|
|
6
|
-
+ (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
[outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
|
|
24
|
-
[outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
|
|
25
|
-
[outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id
|
|
26
|
-
|
|
27
|
-
int subchunk1Size = CFSwapInt32HostToLittle(16);
|
|
28
|
-
[outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];
|
|
29
|
-
|
|
30
|
-
short audioFormat = CFSwapInt16HostToLittle(1); // PCM
|
|
31
|
-
[outputData appendBytes:&audioFormat length:sizeof(audioFormat)];
|
|
32
|
-
|
|
33
|
-
short numChannels = CFSwapInt16HostToLittle(1); // mono
|
|
34
|
-
[outputData appendBytes:&numChannels length:sizeof(numChannels)];
|
|
35
|
-
|
|
36
|
-
int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
|
|
37
|
-
[outputData appendBytes:&sampleRate length:sizeof(sampleRate)];
|
|
38
|
-
|
|
39
|
-
// (bitDepth * sampleRate * channels) >> 3
|
|
40
|
-
int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
|
|
41
|
-
[outputData appendBytes:&byteRate length:sizeof(byteRate)];
|
|
42
|
-
|
|
43
|
-
// (bitDepth * channels) >> 3
|
|
44
|
-
short blockAlign = CFSwapInt16HostToLittle(16 / 8);
|
|
45
|
-
[outputData appendBytes:&blockAlign length:sizeof(blockAlign)];
|
|
46
|
-
|
|
47
|
-
// bitDepth
|
|
48
|
-
short bitsPerSample = CFSwapInt16HostToLittle(16);
|
|
49
|
-
[outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];
|
|
50
|
-
|
|
51
|
-
[outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
|
|
52
|
-
int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
|
|
53
|
-
[outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];
|
|
54
|
-
|
|
55
|
-
// Audio data
|
|
56
|
-
[outputData appendData:rawData];
|
|
57
|
-
|
|
58
|
-
// Save to file
|
|
59
|
-
[outputData writeToFile:audioOutputFile atomically:YES];
|
|
6
|
+
+ (float *)decodeWaveData:(NSData*)data count:(int *)count cutHeader:(BOOL)cutHeader {
|
|
7
|
+
NSData *waveData = data;
|
|
8
|
+
if (cutHeader) {
|
|
9
|
+
// just cut 44 bytes from the beginning
|
|
10
|
+
waveData = [data subdataWithRange:NSMakeRange(44, [data length]-44)];
|
|
11
|
+
}
|
|
12
|
+
const short *shortArray = (const short *)[waveData bytes];
|
|
13
|
+
int shortCount = (int) ([waveData length] / sizeof(short));
|
|
14
|
+
float *floatArray = (float *) malloc(shortCount * sizeof(float));
|
|
15
|
+
for (NSInteger i = 0; i < shortCount; i++) {
|
|
16
|
+
float floatValue = ((float)shortArray[i]) / 32767.0;
|
|
17
|
+
floatValue = MAX(floatValue, -1.0);
|
|
18
|
+
floatValue = MIN(floatValue, 1.0);
|
|
19
|
+
floatArray[i] = floatValue;
|
|
20
|
+
}
|
|
21
|
+
*count = shortCount;
|
|
22
|
+
return floatArray;
|
|
60
23
|
}
|
|
61
24
|
|
|
62
25
|
+ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
|
|
@@ -65,19 +28,7 @@
|
|
|
65
28
|
if (fileData == nil) {
|
|
66
29
|
return nil;
|
|
67
30
|
}
|
|
68
|
-
|
|
69
|
-
[waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
|
|
70
|
-
const short *shortArray = (const short *)[waveData bytes];
|
|
71
|
-
int shortCount = (int) ([waveData length] / sizeof(short));
|
|
72
|
-
float *floatArray = (float *) malloc(shortCount * sizeof(float));
|
|
73
|
-
for (NSInteger i = 0; i < shortCount; i++) {
|
|
74
|
-
float floatValue = ((float)shortArray[i]) / 32767.0;
|
|
75
|
-
floatValue = MAX(floatValue, -1.0);
|
|
76
|
-
floatValue = MIN(floatValue, 1.0);
|
|
77
|
-
floatArray[i] = floatValue;
|
|
78
|
-
}
|
|
79
|
-
*count = shortCount;
|
|
80
|
-
return floatArray;
|
|
31
|
+
return [RNWhisperAudioUtils decodeWaveData:fileData count:count cutHeader:YES];
|
|
81
32
|
}
|
|
82
33
|
|
|
83
34
|
@end
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -11,22 +11,20 @@
|
|
|
11
11
|
|
|
12
12
|
typedef struct {
|
|
13
13
|
__unsafe_unretained id mSelf;
|
|
14
|
-
|
|
15
|
-
int jobId;
|
|
16
14
|
NSDictionary* options;
|
|
17
15
|
|
|
16
|
+
struct rnwhisper::job * job;
|
|
17
|
+
|
|
18
18
|
bool isTranscribing;
|
|
19
19
|
bool isRealtime;
|
|
20
20
|
bool isCapturing;
|
|
21
21
|
bool isStoppedByAction;
|
|
22
|
-
int maxAudioSec;
|
|
23
22
|
int nSamplesTranscribing;
|
|
24
|
-
|
|
25
|
-
NSMutableArray<NSNumber *> *sliceNSamples;
|
|
23
|
+
std::vector<int> sliceNSamples;
|
|
26
24
|
bool isUseSlices;
|
|
27
25
|
int sliceIndex;
|
|
28
26
|
int transcribeSliceIndex;
|
|
29
|
-
|
|
27
|
+
NSString* audioOutputPath;
|
|
30
28
|
|
|
31
29
|
AudioQueueRef queue;
|
|
32
30
|
AudioStreamBasicDescription dataFormat;
|
|
@@ -40,15 +38,19 @@ typedef struct {
|
|
|
40
38
|
dispatch_queue_t dQueue;
|
|
41
39
|
struct whisper_context * ctx;
|
|
42
40
|
RNWhisperContextRecordState recordState;
|
|
41
|
+
NSString * reasonNoMetal;
|
|
42
|
+
bool isMetalEnabled;
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
+ (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML;
|
|
45
|
+
+ (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML noMetal:(BOOL)noMetal useFlashAttn:(BOOL)useFlashAttn;
|
|
46
|
+
- (bool)isMetalEnabled;
|
|
47
|
+
- (NSString *)reasonNoMetal;
|
|
46
48
|
- (struct whisper_context *)getContext;
|
|
47
49
|
- (dispatch_queue_t)getDispatchQueue;
|
|
48
50
|
- (OSStatus)transcribeRealtime:(int)jobId
|
|
49
51
|
options:(NSDictionary *)options
|
|
50
52
|
onTranscribe:(void (^)(int, NSString *, NSDictionary *))onTranscribe;
|
|
51
|
-
- (void)
|
|
53
|
+
- (void)transcribeData:(int)jobId
|
|
52
54
|
audioData:(float *)audioData
|
|
53
55
|
audioDataCount:(int)audioDataCount
|
|
54
56
|
options:(NSDictionary *)options
|
|
@@ -61,6 +63,7 @@ typedef struct {
|
|
|
61
63
|
- (bool)isTranscribing;
|
|
62
64
|
- (bool)isStoppedByAction;
|
|
63
65
|
- (NSMutableDictionary *)getTextSegments;
|
|
66
|
+
- (NSString *)bench:(int)maxThreads;
|
|
64
67
|
- (void)invalidate;
|
|
65
68
|
|
|
66
69
|
@end
|