whisper.rn 0.4.0-rc.1 → 0.4.0-rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +6 -6
  2. package/android/build.gradle +4 -0
  3. package/android/src/main/CMakeLists.txt +21 -1
  4. package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
  5. package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
  6. package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
  7. package/android/src/main/jni-utils.h +76 -0
  8. package/android/src/main/jni.cpp +226 -109
  9. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  10. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  11. package/cpp/coreml/whisper-encoder-impl.h +1 -1
  12. package/cpp/coreml/whisper-encoder.h +4 -0
  13. package/cpp/coreml/whisper-encoder.mm +5 -3
  14. package/cpp/ggml-alloc.c +797 -400
  15. package/cpp/ggml-alloc.h +60 -10
  16. package/cpp/ggml-backend-impl.h +255 -0
  17. package/cpp/ggml-backend-reg.cpp +582 -0
  18. package/cpp/ggml-backend.cpp +2002 -0
  19. package/cpp/ggml-backend.h +354 -0
  20. package/cpp/ggml-common.h +1851 -0
  21. package/cpp/ggml-cpp.h +39 -0
  22. package/cpp/ggml-cpu-aarch64.cpp +4247 -0
  23. package/cpp/ggml-cpu-aarch64.h +8 -0
  24. package/cpp/ggml-cpu-impl.h +531 -0
  25. package/cpp/ggml-cpu-quants.c +12245 -0
  26. package/cpp/ggml-cpu-quants.h +63 -0
  27. package/cpp/ggml-cpu-traits.cpp +36 -0
  28. package/cpp/ggml-cpu-traits.h +38 -0
  29. package/cpp/ggml-cpu.c +14792 -0
  30. package/cpp/ggml-cpu.cpp +653 -0
  31. package/cpp/ggml-cpu.h +137 -0
  32. package/cpp/ggml-impl.h +567 -0
  33. package/cpp/ggml-metal-impl.h +288 -0
  34. package/cpp/ggml-metal.h +24 -43
  35. package/cpp/ggml-metal.m +4867 -1080
  36. package/cpp/ggml-opt.cpp +854 -0
  37. package/cpp/ggml-opt.h +216 -0
  38. package/cpp/ggml-quants.c +5238 -0
  39. package/cpp/ggml-quants.h +100 -0
  40. package/cpp/ggml-threading.cpp +12 -0
  41. package/cpp/ggml-threading.h +14 -0
  42. package/cpp/ggml-whisper.metallib +0 -0
  43. package/cpp/ggml.c +5106 -19431
  44. package/cpp/ggml.h +847 -669
  45. package/cpp/gguf.cpp +1329 -0
  46. package/cpp/gguf.h +202 -0
  47. package/cpp/rn-audioutils.cpp +68 -0
  48. package/cpp/rn-audioutils.h +14 -0
  49. package/cpp/rn-whisper-log.h +11 -0
  50. package/cpp/rn-whisper.cpp +221 -52
  51. package/cpp/rn-whisper.h +50 -15
  52. package/cpp/whisper.cpp +3174 -1533
  53. package/cpp/whisper.h +176 -44
  54. package/ios/RNWhisper.mm +139 -46
  55. package/ios/RNWhisperAudioUtils.h +1 -2
  56. package/ios/RNWhisperAudioUtils.m +18 -67
  57. package/ios/RNWhisperContext.h +11 -8
  58. package/ios/RNWhisperContext.mm +195 -150
  59. package/jest/mock.js +15 -2
  60. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  61. package/lib/commonjs/index.js +76 -28
  62. package/lib/commonjs/index.js.map +1 -1
  63. package/lib/commonjs/version.json +1 -1
  64. package/lib/module/NativeRNWhisper.js.map +1 -1
  65. package/lib/module/index.js +76 -28
  66. package/lib/module/index.js.map +1 -1
  67. package/lib/module/version.json +1 -1
  68. package/lib/typescript/NativeRNWhisper.d.ts +13 -4
  69. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  70. package/lib/typescript/index.d.ts +37 -5
  71. package/lib/typescript/index.d.ts.map +1 -1
  72. package/package.json +9 -7
  73. package/src/NativeRNWhisper.ts +20 -4
  74. package/src/index.ts +98 -42
  75. package/src/version.json +1 -1
  76. package/whisper-rn.podspec +13 -20
  77. package/cpp/README.md +0 -4
  78. package/cpp/ggml-metal.metal +0 -2353
@@ -3,60 +3,23 @@
3
3
 
4
4
  @implementation RNWhisperAudioUtils
5
5
 
6
- + (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples {
7
- NSMutableData *outputData = [NSMutableData data];
8
- for (int i = 0; i < buffers.count; i++) {
9
- int size = [sliceNSamples objectAtIndex:i].intValue;
10
- NSValue *buffer = [buffers objectAtIndex:i];
11
- short *bufferPtr = buffer.pointerValue;
12
- [outputData appendBytes:bufferPtr length:size * sizeof(short)];
13
- }
14
- return outputData;
15
- }
16
-
17
- + (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile {
18
- NSMutableData *outputData = [NSMutableData data];
19
-
20
- // WAVE header
21
- [outputData appendData:[@"RIFF" dataUsingEncoding:NSUTF8StringEncoding]]; // chunk id
22
- int chunkSize = CFSwapInt32HostToLittle(36 + rawData.length);
23
- [outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
24
- [outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
25
- [outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id
26
-
27
- int subchunk1Size = CFSwapInt32HostToLittle(16);
28
- [outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];
29
-
30
- short audioFormat = CFSwapInt16HostToLittle(1); // PCM
31
- [outputData appendBytes:&audioFormat length:sizeof(audioFormat)];
32
-
33
- short numChannels = CFSwapInt16HostToLittle(1); // mono
34
- [outputData appendBytes:&numChannels length:sizeof(numChannels)];
35
-
36
- int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
37
- [outputData appendBytes:&sampleRate length:sizeof(sampleRate)];
38
-
39
- // (bitDepth * sampleRate * channels) >> 3
40
- int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
41
- [outputData appendBytes:&byteRate length:sizeof(byteRate)];
42
-
43
- // (bitDepth * channels) >> 3
44
- short blockAlign = CFSwapInt16HostToLittle(16 / 8);
45
- [outputData appendBytes:&blockAlign length:sizeof(blockAlign)];
46
-
47
- // bitDepth
48
- short bitsPerSample = CFSwapInt16HostToLittle(16);
49
- [outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];
50
-
51
- [outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
52
- int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
53
- [outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];
54
-
55
- // Audio data
56
- [outputData appendData:rawData];
57
-
58
- // Save to file
59
- [outputData writeToFile:audioOutputFile atomically:YES];
6
+ + (float *)decodeWaveData:(NSData*)data count:(int *)count cutHeader:(BOOL)cutHeader {
7
+ NSData *waveData = data;
8
+ if (cutHeader) {
9
+ // just cut 44 bytes from the beginning
10
+ waveData = [data subdataWithRange:NSMakeRange(44, [data length]-44)];
11
+ }
12
+ const short *shortArray = (const short *)[waveData bytes];
13
+ int shortCount = (int) ([waveData length] / sizeof(short));
14
+ float *floatArray = (float *) malloc(shortCount * sizeof(float));
15
+ for (NSInteger i = 0; i < shortCount; i++) {
16
+ float floatValue = ((float)shortArray[i]) / 32767.0;
17
+ floatValue = MAX(floatValue, -1.0);
18
+ floatValue = MIN(floatValue, 1.0);
19
+ floatArray[i] = floatValue;
20
+ }
21
+ *count = shortCount;
22
+ return floatArray;
60
23
  }
61
24
 
62
25
  + (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
@@ -65,19 +28,7 @@
65
28
  if (fileData == nil) {
66
29
  return nil;
67
30
  }
68
- NSMutableData *waveData = [[NSMutableData alloc] init];
69
- [waveData appendData:[fileData subdataWithRange:NSMakeRange(44, [fileData length]-44)]];
70
- const short *shortArray = (const short *)[waveData bytes];
71
- int shortCount = (int) ([waveData length] / sizeof(short));
72
- float *floatArray = (float *) malloc(shortCount * sizeof(float));
73
- for (NSInteger i = 0; i < shortCount; i++) {
74
- float floatValue = ((float)shortArray[i]) / 32767.0;
75
- floatValue = MAX(floatValue, -1.0);
76
- floatValue = MIN(floatValue, 1.0);
77
- floatArray[i] = floatValue;
78
- }
79
- *count = shortCount;
80
- return floatArray;
31
+ return [RNWhisperAudioUtils decodeWaveData:fileData count:count cutHeader:YES];
81
32
  }
82
33
 
83
34
  @end
@@ -11,22 +11,20 @@
11
11
 
12
12
  typedef struct {
13
13
  __unsafe_unretained id mSelf;
14
-
15
- int jobId;
16
14
  NSDictionary* options;
17
15
 
16
+ struct rnwhisper::job * job;
17
+
18
18
  bool isTranscribing;
19
19
  bool isRealtime;
20
20
  bool isCapturing;
21
21
  bool isStoppedByAction;
22
- int maxAudioSec;
23
22
  int nSamplesTranscribing;
24
- NSMutableArray<NSValue *> *shortBufferSlices;
25
- NSMutableArray<NSNumber *> *sliceNSamples;
23
+ std::vector<int> sliceNSamples;
26
24
  bool isUseSlices;
27
25
  int sliceIndex;
28
26
  int transcribeSliceIndex;
29
- int audioSliceSec;
27
+ NSString* audioOutputPath;
30
28
 
31
29
  AudioQueueRef queue;
32
30
  AudioStreamBasicDescription dataFormat;
@@ -40,15 +38,19 @@ typedef struct {
40
38
  dispatch_queue_t dQueue;
41
39
  struct whisper_context * ctx;
42
40
  RNWhisperContextRecordState recordState;
41
+ NSString * reasonNoMetal;
42
+ bool isMetalEnabled;
43
43
  }
44
44
 
45
- + (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML;
45
+ + (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML noMetal:(BOOL)noMetal useFlashAttn:(BOOL)useFlashAttn;
46
+ - (bool)isMetalEnabled;
47
+ - (NSString *)reasonNoMetal;
46
48
  - (struct whisper_context *)getContext;
47
49
  - (dispatch_queue_t)getDispatchQueue;
48
50
  - (OSStatus)transcribeRealtime:(int)jobId
49
51
  options:(NSDictionary *)options
50
52
  onTranscribe:(void (^)(int, NSString *, NSDictionary *))onTranscribe;
51
- - (void)transcribeFile:(int)jobId
53
+ - (void)transcribeData:(int)jobId
52
54
  audioData:(float *)audioData
53
55
  audioDataCount:(int)audioDataCount
54
56
  options:(NSDictionary *)options
@@ -61,6 +63,7 @@ typedef struct {
61
63
  - (bool)isTranscribing;
62
64
  - (bool)isStoppedByAction;
63
65
  - (NSMutableDictionary *)getTextSegments;
66
+ - (NSString *)bench:(int)maxThreads;
64
67
  - (void)invalidate;
65
68
 
66
69
  @end