whisper.rn 0.4.0-rc.3 → 0.4.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +7 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +0 -80
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +6 -1
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +53 -135
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +188 -109
- package/cpp/README.md +1 -1
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +4 -2
- package/cpp/ggml-alloc.c +451 -282
- package/cpp/ggml-alloc.h +74 -8
- package/cpp/ggml-backend-impl.h +112 -0
- package/cpp/ggml-backend.c +1357 -0
- package/cpp/ggml-backend.h +181 -0
- package/cpp/ggml-impl.h +243 -0
- package/cpp/{ggml-metal.metal → ggml-metal-whisper.metal} +1556 -329
- package/cpp/ggml-metal.h +28 -1
- package/cpp/ggml-metal.m +1128 -308
- package/cpp/ggml-quants.c +7382 -0
- package/cpp/ggml-quants.h +224 -0
- package/cpp/ggml.c +3848 -5245
- package/cpp/ggml.h +353 -155
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +141 -59
- package/cpp/rn-whisper.h +47 -15
- package/cpp/whisper.cpp +1750 -964
- package/cpp/whisper.h +97 -15
- package/ios/RNWhisper.mm +15 -9
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +4 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +19 -0
- package/ios/RNWhisperAudioUtils.h +0 -2
- package/ios/RNWhisperAudioUtils.m +0 -56
- package/ios/RNWhisperContext.h +8 -12
- package/ios/RNWhisperContext.mm +132 -138
- package/jest/mock.js +1 -1
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +28 -9
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +28 -9
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +7 -1
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +7 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +6 -5
- package/src/NativeRNWhisper.ts +8 -1
- package/src/index.ts +29 -17
- package/src/version.json +1 -1
- package/whisper-rn.podspec +1 -2
package/cpp/whisper.h
CHANGED
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
#ifndef WHISPER_H
|
|
2
2
|
#define WHISPER_H
|
|
3
3
|
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
|
|
4
6
|
#include <stddef.h>
|
|
5
7
|
#include <stdint.h>
|
|
6
8
|
#include <stdbool.h>
|
|
7
9
|
|
|
10
|
+
#ifdef __GNUC__
|
|
11
|
+
# define WHISPER_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
|
12
|
+
#elif defined(_MSC_VER)
|
|
13
|
+
# define WHISPER_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
|
14
|
+
#else
|
|
15
|
+
# define WHISPER_DEPRECATED(func, hint) func
|
|
16
|
+
#endif
|
|
17
|
+
|
|
8
18
|
#ifdef WHISPER_SHARED
|
|
9
19
|
# ifdef _WIN32
|
|
10
20
|
# ifdef WHISPER_BUILD
|
|
@@ -21,7 +31,6 @@
|
|
|
21
31
|
|
|
22
32
|
#define WHISPER_SAMPLE_RATE 16000
|
|
23
33
|
#define WHISPER_N_FFT 400
|
|
24
|
-
#define WHISPER_N_MEL 80
|
|
25
34
|
#define WHISPER_HOP_LENGTH 160
|
|
26
35
|
#define WHISPER_CHUNK_SIZE 30
|
|
27
36
|
|
|
@@ -41,7 +50,9 @@ extern "C" {
|
|
|
41
50
|
//
|
|
42
51
|
// ...
|
|
43
52
|
//
|
|
44
|
-
//
|
|
53
|
+
// whisper_context_params cparams = whisper_context_default_params();
|
|
54
|
+
//
|
|
55
|
+
// struct whisper_context * ctx = whisper_init_from_file_with_params("/path/to/ggml-base.en.bin", cparams);
|
|
45
56
|
//
|
|
46
57
|
// if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
|
47
58
|
// fprintf(stderr, "failed to process audio\n");
|
|
@@ -69,7 +80,14 @@ extern "C" {
|
|
|
69
80
|
struct whisper_state;
|
|
70
81
|
struct whisper_full_params;
|
|
71
82
|
|
|
72
|
-
typedef
|
|
83
|
+
typedef int32_t whisper_pos;
|
|
84
|
+
typedef int32_t whisper_token;
|
|
85
|
+
typedef int32_t whisper_seq_id;
|
|
86
|
+
|
|
87
|
+
struct whisper_context_params {
|
|
88
|
+
bool use_gpu;
|
|
89
|
+
bool use_coreml;
|
|
90
|
+
};
|
|
73
91
|
|
|
74
92
|
typedef struct whisper_token_data {
|
|
75
93
|
whisper_token id; // token id
|
|
@@ -96,21 +114,74 @@ extern "C" {
|
|
|
96
114
|
void (*close)(void * ctx);
|
|
97
115
|
} whisper_model_loader;
|
|
98
116
|
|
|
117
|
+
// grammar element type
|
|
118
|
+
enum whisper_gretype {
|
|
119
|
+
// end of rule definition
|
|
120
|
+
WHISPER_GRETYPE_END = 0,
|
|
121
|
+
|
|
122
|
+
// start of alternate definition for rule
|
|
123
|
+
WHISPER_GRETYPE_ALT = 1,
|
|
124
|
+
|
|
125
|
+
// non-terminal element: reference to rule
|
|
126
|
+
WHISPER_GRETYPE_RULE_REF = 2,
|
|
127
|
+
|
|
128
|
+
// terminal element: character (code point)
|
|
129
|
+
WHISPER_GRETYPE_CHAR = 3,
|
|
130
|
+
|
|
131
|
+
// inverse char(s) ([^a], [^a-b] [^abc])
|
|
132
|
+
WHISPER_GRETYPE_CHAR_NOT = 4,
|
|
133
|
+
|
|
134
|
+
// modifies a preceding WHISPER_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_ALT to
|
|
135
|
+
// be an inclusive range ([a-z])
|
|
136
|
+
WHISPER_GRETYPE_CHAR_RNG_UPPER = 5,
|
|
137
|
+
|
|
138
|
+
// modifies a preceding WHISPER_GRETYPE_CHAR or
|
|
139
|
+
// WHISPER_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
|
|
140
|
+
WHISPER_GRETYPE_CHAR_ALT = 6,
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
typedef struct whisper_grammar_element {
|
|
144
|
+
enum whisper_gretype type;
|
|
145
|
+
uint32_t value; // Unicode code point or rule ID
|
|
146
|
+
} whisper_grammar_element;
|
|
147
|
+
|
|
99
148
|
// Various functions for loading a ggml whisper model.
|
|
100
149
|
// Allocate (almost) all memory needed for the model.
|
|
101
150
|
// Return NULL on failure
|
|
102
|
-
|
|
103
|
-
WHISPER_API struct whisper_context *
|
|
104
|
-
|
|
105
|
-
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model);
|
|
106
|
-
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
|
|
107
|
-
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
|
|
151
|
+
WHISPER_API struct whisper_context * whisper_init_from_file_with_params (const char * path_model, struct whisper_context_params params);
|
|
152
|
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
|
153
|
+
WHISPER_API struct whisper_context * whisper_init_with_params (struct whisper_model_loader * loader, struct whisper_context_params params);
|
|
108
154
|
|
|
109
155
|
// These are the same as the above, but the internal state of the context is not allocated automatically
|
|
110
156
|
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
|
111
|
-
WHISPER_API struct whisper_context *
|
|
112
|
-
WHISPER_API struct whisper_context *
|
|
113
|
-
WHISPER_API struct whisper_context *
|
|
157
|
+
WHISPER_API struct whisper_context * whisper_init_from_file_with_params_no_state (const char * path_model, struct whisper_context_params params);
|
|
158
|
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size, struct whisper_context_params params);
|
|
159
|
+
WHISPER_API struct whisper_context * whisper_init_with_params_no_state (struct whisper_model_loader * loader, struct whisper_context_params params);
|
|
160
|
+
|
|
161
|
+
WHISPER_DEPRECATED(
|
|
162
|
+
WHISPER_API struct whisper_context * whisper_init_from_file(const char * path_model),
|
|
163
|
+
"use whisper_init_from_file_with_params instead"
|
|
164
|
+
);
|
|
165
|
+
WHISPER_DEPRECATED(
|
|
166
|
+
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size),
|
|
167
|
+
"use whisper_init_from_buffer_with_params instead"
|
|
168
|
+
);
|
|
169
|
+
WHISPER_DEPRECATED(
|
|
170
|
+
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader),
|
|
171
|
+
"use whisper_init_with_params instead"
|
|
172
|
+
);
|
|
173
|
+
WHISPER_DEPRECATED(
|
|
174
|
+
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model),
|
|
175
|
+
"use whisper_init_from_file_with_params_no_state instead"
|
|
176
|
+
);
|
|
177
|
+
WHISPER_DEPRECATED(
|
|
178
|
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size),
|
|
179
|
+
"use whisper_init_from_buffer_with_params_no_state instead"
|
|
180
|
+
);
|
|
181
|
+
WHISPER_DEPRECATED(
|
|
182
|
+
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader),
|
|
183
|
+
"use whisper_init_with_params_no_state instead"
|
|
184
|
+
);
|
|
114
185
|
|
|
115
186
|
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
|
|
116
187
|
|
|
@@ -135,6 +206,7 @@ extern "C" {
|
|
|
135
206
|
WHISPER_API void whisper_free (struct whisper_context * ctx);
|
|
136
207
|
WHISPER_API void whisper_free_state(struct whisper_state * state);
|
|
137
208
|
WHISPER_API void whisper_free_params(struct whisper_full_params * params);
|
|
209
|
+
WHISPER_API void whisper_free_context_params(struct whisper_context_params * params);
|
|
138
210
|
|
|
139
211
|
// Convert RAW PCM audio to log mel spectrogram.
|
|
140
212
|
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
@@ -244,6 +316,9 @@ extern "C" {
|
|
|
244
316
|
// Return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found
|
|
245
317
|
WHISPER_API const char * whisper_lang_str(int id);
|
|
246
318
|
|
|
319
|
+
// Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
|
|
320
|
+
WHISPER_API const char * whisper_lang_str_full(int id);
|
|
321
|
+
|
|
247
322
|
// Use mel data at offset_ms to try and auto-detect the spoken language
|
|
248
323
|
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
|
|
249
324
|
// Returns the top language id or negative on failure
|
|
@@ -366,6 +441,7 @@ extern "C" {
|
|
|
366
441
|
|
|
367
442
|
bool translate;
|
|
368
443
|
bool no_context; // do not use past transcription (if any) as initial prompt for the decoder
|
|
444
|
+
bool no_timestamps; // do not generate timestamps
|
|
369
445
|
bool single_segment; // force single segment output (useful for streaming)
|
|
370
446
|
bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
|
|
371
447
|
bool print_progress; // print progress information
|
|
@@ -443,9 +519,16 @@ extern "C" {
|
|
|
443
519
|
// called by each decoder to filter obtained logits
|
|
444
520
|
whisper_logits_filter_callback logits_filter_callback;
|
|
445
521
|
void * logits_filter_callback_user_data;
|
|
522
|
+
|
|
523
|
+
const whisper_grammar_element ** grammar_rules;
|
|
524
|
+
size_t n_grammar_rules;
|
|
525
|
+
size_t i_start_rule;
|
|
526
|
+
float grammar_penalty;
|
|
446
527
|
};
|
|
447
528
|
|
|
448
|
-
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_params()
|
|
529
|
+
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
|
530
|
+
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
|
|
531
|
+
WHISPER_API struct whisper_context_params whisper_context_default_params(void);
|
|
449
532
|
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
|
450
533
|
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
|
451
534
|
|
|
@@ -534,8 +617,7 @@ extern "C" {
|
|
|
534
617
|
|
|
535
618
|
// Control logging output; default behavior is to print to stderr
|
|
536
619
|
|
|
537
|
-
|
|
538
|
-
WHISPER_API void whisper_set_log_callback(whisper_log_callback callback);
|
|
620
|
+
WHISPER_API void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data);
|
|
539
621
|
|
|
540
622
|
#ifdef __cplusplus
|
|
541
623
|
}
|
package/ios/RNWhisper.mm
CHANGED
|
@@ -48,6 +48,7 @@ RCT_REMAP_METHOD(initContext,
|
|
|
48
48
|
|
|
49
49
|
NSString *modelPath = [modelOptions objectForKey:@"filePath"];
|
|
50
50
|
BOOL isBundleAsset = [[modelOptions objectForKey:@"isBundleAsset"] boolValue];
|
|
51
|
+
BOOL useGpu = [[modelOptions objectForKey:@"useGpu"] boolValue];
|
|
51
52
|
BOOL useCoreMLIos = [[modelOptions objectForKey:@"useCoreMLIos"] boolValue];
|
|
52
53
|
|
|
53
54
|
// For support debug assets in development mode
|
|
@@ -77,6 +78,7 @@ RCT_REMAP_METHOD(initContext,
|
|
|
77
78
|
initWithModelPath:path
|
|
78
79
|
contextId:contextId
|
|
79
80
|
noCoreML:!useCoreMLIos
|
|
81
|
+
noMetal:!useGpu
|
|
80
82
|
];
|
|
81
83
|
if ([context getContext] == NULL) {
|
|
82
84
|
reject(@"whisper_cpp_error", @"Failed to load the model", nil);
|
|
@@ -85,7 +87,11 @@ RCT_REMAP_METHOD(initContext,
|
|
|
85
87
|
|
|
86
88
|
[contexts setObject:context forKey:[NSNumber numberWithInt:contextId]];
|
|
87
89
|
|
|
88
|
-
resolve(
|
|
90
|
+
resolve(@{
|
|
91
|
+
@"contextId": @(contextId),
|
|
92
|
+
@"gpu": @([context isMetalEnabled]),
|
|
93
|
+
@"reasonNoGPU": [context reasonNoMetal],
|
|
94
|
+
});
|
|
89
95
|
}
|
|
90
96
|
|
|
91
97
|
- (NSArray *)supportedEvents {
|
|
@@ -136,9 +142,9 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
136
142
|
audioDataCount:count
|
|
137
143
|
options:options
|
|
138
144
|
onProgress: ^(int progress) {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
145
|
+
rnwhisper::job* job = rnwhisper::job_get(jobId);
|
|
146
|
+
if (job && job->is_aborted()) return;
|
|
147
|
+
|
|
142
148
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
143
149
|
[self sendEventWithName:@"@RNWhisper_onTranscribeProgress"
|
|
144
150
|
body:@{
|
|
@@ -150,9 +156,9 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
150
156
|
});
|
|
151
157
|
}
|
|
152
158
|
onNewSegments: ^(NSDictionary *result) {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
159
|
+
rnwhisper::job* job = rnwhisper::job_get(jobId);
|
|
160
|
+
if (job && job->is_aborted()) return;
|
|
161
|
+
|
|
156
162
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
157
163
|
[self sendEventWithName:@"@RNWhisper_onTranscribeNewSegments"
|
|
158
164
|
body:@{
|
|
@@ -164,7 +170,7 @@ RCT_REMAP_METHOD(transcribeFile,
|
|
|
164
170
|
});
|
|
165
171
|
}
|
|
166
172
|
onEnd: ^(int code) {
|
|
167
|
-
if (code != 0) {
|
|
173
|
+
if (code != 0 && code != 999) {
|
|
168
174
|
free(waveFile);
|
|
169
175
|
reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to transcribe the file. Code: %d", code], nil);
|
|
170
176
|
return;
|
|
@@ -273,7 +279,7 @@ RCT_REMAP_METHOD(releaseAllContexts,
|
|
|
273
279
|
[context invalidate];
|
|
274
280
|
}
|
|
275
281
|
|
|
276
|
-
|
|
282
|
+
rnwhisper::job_abort_all(); // graceful abort
|
|
277
283
|
|
|
278
284
|
[contexts removeAllObjects];
|
|
279
285
|
contexts = nil;
|
|
Binary file
|
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
+
<plist version="1.0">
|
|
4
|
+
<dict>
|
|
5
|
+
<key>SchemeUserState</key>
|
|
6
|
+
<dict>
|
|
7
|
+
<key>RNWhisper.xcscheme_^#shared#^_</key>
|
|
8
|
+
<dict>
|
|
9
|
+
<key>orderHint</key>
|
|
10
|
+
<integer>0</integer>
|
|
11
|
+
</dict>
|
|
12
|
+
<key>WhisperCpp.xcscheme_^#shared#^_</key>
|
|
13
|
+
<dict>
|
|
14
|
+
<key>orderHint</key>
|
|
15
|
+
<integer>0</integer>
|
|
16
|
+
</dict>
|
|
17
|
+
</dict>
|
|
18
|
+
</dict>
|
|
19
|
+
</plist>
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
@interface RNWhisperAudioUtils : NSObject
|
|
4
4
|
|
|
5
|
-
+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples;
|
|
6
|
-
+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile;
|
|
7
5
|
+ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count;
|
|
8
6
|
|
|
9
7
|
@end
|
|
@@ -3,62 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
@implementation RNWhisperAudioUtils
|
|
5
5
|
|
|
6
|
-
+ (NSData *)concatShortBuffers:(NSMutableArray<NSValue *> *)buffers sliceNSamples:(NSMutableArray<NSNumber *> *)sliceNSamples {
|
|
7
|
-
NSMutableData *outputData = [NSMutableData data];
|
|
8
|
-
for (int i = 0; i < buffers.count; i++) {
|
|
9
|
-
int size = [sliceNSamples objectAtIndex:i].intValue;
|
|
10
|
-
NSValue *buffer = [buffers objectAtIndex:i];
|
|
11
|
-
short *bufferPtr = buffer.pointerValue;
|
|
12
|
-
[outputData appendBytes:bufferPtr length:size * sizeof(short)];
|
|
13
|
-
}
|
|
14
|
-
return outputData;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
+ (void)saveWavFile:(NSData *)rawData audioOutputFile:(NSString *)audioOutputFile {
|
|
18
|
-
NSMutableData *outputData = [NSMutableData data];
|
|
19
|
-
|
|
20
|
-
// WAVE header
|
|
21
|
-
[outputData appendData:[@"RIFF" dataUsingEncoding:NSUTF8StringEncoding]]; // chunk id
|
|
22
|
-
int chunkSize = CFSwapInt32HostToLittle(36 + rawData.length);
|
|
23
|
-
[outputData appendBytes:&chunkSize length:sizeof(chunkSize)];
|
|
24
|
-
[outputData appendData:[@"WAVE" dataUsingEncoding:NSUTF8StringEncoding]]; // format
|
|
25
|
-
[outputData appendData:[@"fmt " dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 1 id
|
|
26
|
-
|
|
27
|
-
int subchunk1Size = CFSwapInt32HostToLittle(16);
|
|
28
|
-
[outputData appendBytes:&subchunk1Size length:sizeof(subchunk1Size)];
|
|
29
|
-
|
|
30
|
-
short audioFormat = CFSwapInt16HostToLittle(1); // PCM
|
|
31
|
-
[outputData appendBytes:&audioFormat length:sizeof(audioFormat)];
|
|
32
|
-
|
|
33
|
-
short numChannels = CFSwapInt16HostToLittle(1); // mono
|
|
34
|
-
[outputData appendBytes:&numChannels length:sizeof(numChannels)];
|
|
35
|
-
|
|
36
|
-
int sampleRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE);
|
|
37
|
-
[outputData appendBytes:&sampleRate length:sizeof(sampleRate)];
|
|
38
|
-
|
|
39
|
-
// (bitDepth * sampleRate * channels) >> 3
|
|
40
|
-
int byteRate = CFSwapInt32HostToLittle(WHISPER_SAMPLE_RATE * 1 * 16 / 8);
|
|
41
|
-
[outputData appendBytes:&byteRate length:sizeof(byteRate)];
|
|
42
|
-
|
|
43
|
-
// (bitDepth * channels) >> 3
|
|
44
|
-
short blockAlign = CFSwapInt16HostToLittle(16 / 8);
|
|
45
|
-
[outputData appendBytes:&blockAlign length:sizeof(blockAlign)];
|
|
46
|
-
|
|
47
|
-
// bitDepth
|
|
48
|
-
short bitsPerSample = CFSwapInt16HostToLittle(16);
|
|
49
|
-
[outputData appendBytes:&bitsPerSample length:sizeof(bitsPerSample)];
|
|
50
|
-
|
|
51
|
-
[outputData appendData:[@"data" dataUsingEncoding:NSUTF8StringEncoding]]; // subchunk 2 id
|
|
52
|
-
int subchunk2Size = CFSwapInt32HostToLittle((int)rawData.length);
|
|
53
|
-
[outputData appendBytes:&subchunk2Size length:sizeof(subchunk2Size)];
|
|
54
|
-
|
|
55
|
-
// Audio data
|
|
56
|
-
[outputData appendData:rawData];
|
|
57
|
-
|
|
58
|
-
// Save to file
|
|
59
|
-
[outputData writeToFile:audioOutputFile atomically:YES];
|
|
60
|
-
}
|
|
61
|
-
|
|
62
6
|
+ (float *)decodeWaveFile:(NSString*)filePath count:(int *)count {
|
|
63
7
|
NSURL *url = [NSURL fileURLWithPath:filePath];
|
|
64
8
|
NSData *fileData = [NSData dataWithContentsOfURL:url];
|
package/ios/RNWhisperContext.h
CHANGED
|
@@ -11,29 +11,21 @@
|
|
|
11
11
|
|
|
12
12
|
typedef struct {
|
|
13
13
|
__unsafe_unretained id mSelf;
|
|
14
|
-
|
|
15
|
-
int jobId;
|
|
16
14
|
NSDictionary* options;
|
|
17
15
|
|
|
16
|
+
struct rnwhisper::job * job;
|
|
17
|
+
|
|
18
18
|
bool isTranscribing;
|
|
19
19
|
bool isRealtime;
|
|
20
20
|
bool isCapturing;
|
|
21
21
|
bool isStoppedByAction;
|
|
22
|
-
int maxAudioSec;
|
|
23
22
|
int nSamplesTranscribing;
|
|
24
|
-
|
|
25
|
-
NSMutableArray<NSNumber *> *sliceNSamples;
|
|
23
|
+
std::vector<int> sliceNSamples;
|
|
26
24
|
bool isUseSlices;
|
|
27
25
|
int sliceIndex;
|
|
28
26
|
int transcribeSliceIndex;
|
|
29
|
-
int audioSliceSec;
|
|
30
27
|
NSString* audioOutputPath;
|
|
31
28
|
|
|
32
|
-
bool useVad;
|
|
33
|
-
int vadMs;
|
|
34
|
-
float vadThold;
|
|
35
|
-
float vadFreqThold;
|
|
36
|
-
|
|
37
29
|
AudioQueueRef queue;
|
|
38
30
|
AudioStreamBasicDescription dataFormat;
|
|
39
31
|
AudioQueueBufferRef buffers[NUM_BUFFERS];
|
|
@@ -46,9 +38,13 @@ typedef struct {
|
|
|
46
38
|
dispatch_queue_t dQueue;
|
|
47
39
|
struct whisper_context * ctx;
|
|
48
40
|
RNWhisperContextRecordState recordState;
|
|
41
|
+
NSString * reasonNoMetal;
|
|
42
|
+
bool isMetalEnabled;
|
|
49
43
|
}
|
|
50
44
|
|
|
51
|
-
+ (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML;
|
|
45
|
+
+ (instancetype)initWithModelPath:(NSString *)modelPath contextId:(int)contextId noCoreML:(BOOL)noCoreML noMetal:(BOOL)noMetal;
|
|
46
|
+
- (bool)isMetalEnabled;
|
|
47
|
+
- (NSString *)reasonNoMetal;
|
|
52
48
|
- (struct whisper_context *)getContext;
|
|
53
49
|
- (dispatch_queue_t)getDispatchQueue;
|
|
54
50
|
- (OSStatus)transcribeRealtime:(int)jobId
|