whisper.rn 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +5 -4
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -8
- package/android/src/main/jni/whisper/Whisper.mk +2 -1
- package/android/src/main/jni/whisper/{jni.c → jni.cpp} +58 -38
- package/cpp/rn-whisper.cpp +31 -0
- package/cpp/rn-whisper.h +16 -0
- package/cpp/whisper.cpp +603 -412
- package/cpp/whisper.h +120 -40
- package/ios/RNWhisper.h +2 -0
- package/ios/RNWhisper.mm +33 -6
- package/jest/mock.js +4 -1
- package/lib/commonjs/index.js +1 -3
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +1 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +9 -0
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +7 -3
- package/src/index.tsx +10 -3
- package/whisper-rn.podspec +15 -8
package/cpp/whisper.h
CHANGED
|
@@ -66,6 +66,7 @@ extern "C" {
|
|
|
66
66
|
//
|
|
67
67
|
|
|
68
68
|
struct whisper_context;
|
|
69
|
+
struct whisper_state;
|
|
69
70
|
|
|
70
71
|
typedef int whisper_token;
|
|
71
72
|
|
|
@@ -101,11 +102,20 @@ extern "C" {
|
|
|
101
102
|
WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
|
|
102
103
|
WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
|
|
103
104
|
|
|
104
|
-
//
|
|
105
|
-
|
|
105
|
+
// These are the same as the above, but the internal state of the context is not allocated automatically
|
|
106
|
+
// It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
|
|
107
|
+
WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model);
|
|
108
|
+
WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size);
|
|
109
|
+
WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader);
|
|
110
|
+
|
|
111
|
+
WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
|
|
112
|
+
|
|
113
|
+
// Frees all allocated memory
|
|
114
|
+
WHISPER_API void whisper_free (struct whisper_context * ctx);
|
|
115
|
+
WHISPER_API void whisper_free_state(struct whisper_state * state);
|
|
106
116
|
|
|
107
117
|
// Convert RAW PCM audio to log mel spectrogram.
|
|
108
|
-
// The resulting spectrogram is stored inside the provided whisper context.
|
|
118
|
+
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
109
119
|
// Returns 0 on success
|
|
110
120
|
WHISPER_API int whisper_pcm_to_mel(
|
|
111
121
|
struct whisper_context * ctx,
|
|
@@ -113,17 +123,30 @@ extern "C" {
|
|
|
113
123
|
int n_samples,
|
|
114
124
|
int n_threads);
|
|
115
125
|
|
|
116
|
-
|
|
117
|
-
|
|
126
|
+
WHISPER_API int whisper_pcm_to_mel_with_state(
|
|
127
|
+
struct whisper_context * ctx,
|
|
128
|
+
struct whisper_state * state,
|
|
129
|
+
const float * samples,
|
|
130
|
+
int n_samples,
|
|
131
|
+
int n_threads);
|
|
132
|
+
|
|
133
|
+
// Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
|
|
134
|
+
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
118
135
|
// Returns 0 on success
|
|
119
136
|
WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
|
|
120
|
-
struct whisper_context* ctx,
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
137
|
+
struct whisper_context * ctx,
|
|
138
|
+
const float * samples,
|
|
139
|
+
int n_samples,
|
|
140
|
+
int n_threads);
|
|
141
|
+
|
|
142
|
+
WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
|
|
143
|
+
struct whisper_context * ctx,
|
|
144
|
+
struct whisper_state * state,
|
|
145
|
+
const float * samples,
|
|
146
|
+
int n_samples,
|
|
147
|
+
int n_threads);
|
|
148
|
+
|
|
149
|
+
// This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
|
127
150
|
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
|
128
151
|
// n_mel must be 80
|
|
129
152
|
// Returns 0 on success
|
|
@@ -133,7 +156,14 @@ extern "C" {
|
|
|
133
156
|
int n_len,
|
|
134
157
|
int n_mel);
|
|
135
158
|
|
|
136
|
-
|
|
159
|
+
WHISPER_API int whisper_set_mel_with_state(
|
|
160
|
+
struct whisper_context * ctx,
|
|
161
|
+
struct whisper_state * state,
|
|
162
|
+
const float * data,
|
|
163
|
+
int n_len,
|
|
164
|
+
int n_mel);
|
|
165
|
+
|
|
166
|
+
// Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
|
|
137
167
|
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
|
138
168
|
// offset can be used to specify the offset of the first frame in the spectrogram.
|
|
139
169
|
// Returns 0 on success
|
|
@@ -142,6 +172,12 @@ extern "C" {
|
|
|
142
172
|
int offset,
|
|
143
173
|
int n_threads);
|
|
144
174
|
|
|
175
|
+
WHISPER_API int whisper_encode_with_state(
|
|
176
|
+
struct whisper_context * ctx,
|
|
177
|
+
struct whisper_state * state,
|
|
178
|
+
int offset,
|
|
179
|
+
int n_threads);
|
|
180
|
+
|
|
145
181
|
// Run the Whisper decoder to obtain the logits and probabilities for the next token.
|
|
146
182
|
// Make sure to call whisper_encode() first.
|
|
147
183
|
// tokens + n_tokens is the provided context for the decoder.
|
|
@@ -155,6 +191,14 @@ extern "C" {
|
|
|
155
191
|
int n_past,
|
|
156
192
|
int n_threads);
|
|
157
193
|
|
|
194
|
+
WHISPER_API int whisper_decode_with_state(
|
|
195
|
+
struct whisper_context * ctx,
|
|
196
|
+
struct whisper_state * state,
|
|
197
|
+
const whisper_token * tokens,
|
|
198
|
+
int n_tokens,
|
|
199
|
+
int n_past,
|
|
200
|
+
int n_threads);
|
|
201
|
+
|
|
158
202
|
// Convert the provided text into tokens.
|
|
159
203
|
// The tokens pointer must be large enough to hold the resulting tokens.
|
|
160
204
|
// Returns the number of tokens on success, no more than n_max_tokens
|
|
@@ -190,17 +234,26 @@ extern "C" {
|
|
|
190
234
|
int n_threads,
|
|
191
235
|
float * lang_probs);
|
|
192
236
|
|
|
193
|
-
WHISPER_API int
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
237
|
+
WHISPER_API int whisper_lang_auto_detect_with_state(
|
|
238
|
+
struct whisper_context * ctx,
|
|
239
|
+
struct whisper_state * state,
|
|
240
|
+
int offset_ms,
|
|
241
|
+
int n_threads,
|
|
242
|
+
float * lang_probs);
|
|
243
|
+
|
|
244
|
+
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
|
|
245
|
+
WHISPER_API int whisper_n_len_from_state(struct whisper_state * state); // mel length
|
|
246
|
+
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
|
|
247
|
+
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
|
|
248
|
+
WHISPER_API int whisper_n_audio_ctx (struct whisper_context * ctx);
|
|
249
|
+
WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);
|
|
198
250
|
|
|
199
251
|
// Token logits obtained from the last call to whisper_decode()
|
|
200
252
|
// The logits for the last token are stored in the last row
|
|
201
253
|
// Rows: n_tokens
|
|
202
254
|
// Cols: n_vocab
|
|
203
|
-
WHISPER_API float * whisper_get_logits(struct whisper_context * ctx);
|
|
255
|
+
WHISPER_API float * whisper_get_logits (struct whisper_context * ctx);
|
|
256
|
+
WHISPER_API float * whisper_get_logits_from_state(struct whisper_state * state);
|
|
204
257
|
|
|
205
258
|
// Token Id -> String. Uses the vocabulary in the provided context
|
|
206
259
|
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
|
@@ -218,7 +271,7 @@ extern "C" {
|
|
|
218
271
|
WHISPER_API whisper_token whisper_token_translate (void);
|
|
219
272
|
WHISPER_API whisper_token whisper_token_transcribe(void);
|
|
220
273
|
|
|
221
|
-
// Performance information
|
|
274
|
+
// Performance information from the default state.
|
|
222
275
|
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
|
223
276
|
WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
|
|
224
277
|
|
|
@@ -236,18 +289,19 @@ extern "C" {
|
|
|
236
289
|
// Text segment callback
|
|
237
290
|
// Called on every newly generated text segment
|
|
238
291
|
// Use the whisper_full_...() functions to obtain the text segments
|
|
239
|
-
typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, int n_new, void * user_data);
|
|
292
|
+
typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, struct whisper_state * state, int n_new, void * user_data);
|
|
240
293
|
|
|
241
294
|
// Encoder begin callback
|
|
242
295
|
// If not NULL, called before the encoder starts
|
|
243
296
|
// If it returns false, the computation is aborted
|
|
244
|
-
typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, void * user_data);
|
|
297
|
+
typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, struct whisper_state * state, void * user_data);
|
|
245
298
|
|
|
246
299
|
// Logits filter callback
|
|
247
300
|
// Can be used to modify the logits before sampling
|
|
248
301
|
// If not NULL, called after applying temperature to logits
|
|
249
302
|
typedef void (*whisper_logits_filter_callback)(
|
|
250
303
|
struct whisper_context * ctx,
|
|
304
|
+
struct whisper_state * state,
|
|
251
305
|
const whisper_token_data * tokens,
|
|
252
306
|
int n_tokens,
|
|
253
307
|
float * logits,
|
|
@@ -334,6 +388,7 @@ extern "C" {
|
|
|
334
388
|
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
|
335
389
|
|
|
336
390
|
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
|
391
|
+
// Not thread safe for same context
|
|
337
392
|
// Uses the specified decoding strategy to obtain the text.
|
|
338
393
|
WHISPER_API int whisper_full(
|
|
339
394
|
struct whisper_context * ctx,
|
|
@@ -341,7 +396,16 @@ extern "C" {
|
|
|
341
396
|
const float * samples,
|
|
342
397
|
int n_samples);
|
|
343
398
|
|
|
344
|
-
|
|
399
|
+
WHISPER_API int whisper_full_with_state(
|
|
400
|
+
struct whisper_context * ctx,
|
|
401
|
+
struct whisper_state * state,
|
|
402
|
+
struct whisper_full_params params,
|
|
403
|
+
const float * samples,
|
|
404
|
+
int n_samples);
|
|
405
|
+
|
|
406
|
+
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
|
407
|
+
// Result is stored in the default state of the context
|
|
408
|
+
// Not thread safe if executed in parallel on the same context.
|
|
345
409
|
// It seems this approach can offer some speedup in some cases.
|
|
346
410
|
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
|
347
411
|
WHISPER_API int whisper_full_parallel(
|
|
@@ -351,40 +415,56 @@ extern "C" {
|
|
|
351
415
|
int n_samples,
|
|
352
416
|
int n_processors);
|
|
353
417
|
|
|
354
|
-
// Number of generated text segments
|
|
418
|
+
// Number of generated text segments
|
|
355
419
|
// A segment can be a few words, a sentence, or even a paragraph.
|
|
356
|
-
WHISPER_API int whisper_full_n_segments(struct whisper_context * ctx);
|
|
420
|
+
WHISPER_API int whisper_full_n_segments (struct whisper_context * ctx);
|
|
421
|
+
WHISPER_API int whisper_full_n_segments_from_state(struct whisper_state * state);
|
|
357
422
|
|
|
358
|
-
// Language id associated with the
|
|
423
|
+
// Language id associated with the context's default state
|
|
359
424
|
WHISPER_API int whisper_full_lang_id(struct whisper_context * ctx);
|
|
360
425
|
|
|
361
|
-
//
|
|
362
|
-
WHISPER_API
|
|
363
|
-
|
|
426
|
+
// Language id associated with the provided state
|
|
427
|
+
WHISPER_API int whisper_full_lang_id_from_state(struct whisper_state * state);
|
|
428
|
+
|
|
429
|
+
// Get the start and end time of the specified segment
|
|
430
|
+
WHISPER_API int64_t whisper_full_get_segment_t0 (struct whisper_context * ctx, int i_segment);
|
|
431
|
+
WHISPER_API int64_t whisper_full_get_segment_t0_from_state(struct whisper_state * state, int i_segment);
|
|
432
|
+
|
|
433
|
+
WHISPER_API int64_t whisper_full_get_segment_t1 (struct whisper_context * ctx, int i_segment);
|
|
434
|
+
WHISPER_API int64_t whisper_full_get_segment_t1_from_state(struct whisper_state * state, int i_segment);
|
|
435
|
+
|
|
436
|
+
// Get the text of the specified segment
|
|
437
|
+
WHISPER_API const char * whisper_full_get_segment_text (struct whisper_context * ctx, int i_segment);
|
|
438
|
+
WHISPER_API const char * whisper_full_get_segment_text_from_state(struct whisper_state * state, int i_segment);
|
|
364
439
|
|
|
365
|
-
// Get
|
|
366
|
-
WHISPER_API
|
|
440
|
+
// Get number of tokens in the specified segment
|
|
441
|
+
WHISPER_API int whisper_full_n_tokens (struct whisper_context * ctx, int i_segment);
|
|
442
|
+
WHISPER_API int whisper_full_n_tokens_from_state(struct whisper_state * state, int i_segment);
|
|
367
443
|
|
|
368
|
-
// Get
|
|
369
|
-
WHISPER_API
|
|
444
|
+
// Get the token text of the specified token in the specified segment
|
|
445
|
+
WHISPER_API const char * whisper_full_get_token_text (struct whisper_context * ctx, int i_segment, int i_token);
|
|
446
|
+
WHISPER_API const char * whisper_full_get_token_text_from_state(struct whisper_context * ctx, struct whisper_state * state, int i_segment, int i_token);
|
|
370
447
|
|
|
371
|
-
|
|
372
|
-
WHISPER_API
|
|
373
|
-
WHISPER_API whisper_token whisper_full_get_token_id (struct whisper_context * ctx, int i_segment, int i_token);
|
|
448
|
+
WHISPER_API whisper_token whisper_full_get_token_id (struct whisper_context * ctx, int i_segment, int i_token);
|
|
449
|
+
WHISPER_API whisper_token whisper_full_get_token_id_from_state(struct whisper_state * state, int i_segment, int i_token);
|
|
374
450
|
|
|
375
|
-
// Get token data for the specified token in the specified segment
|
|
451
|
+
// Get token data for the specified token in the specified segment
|
|
376
452
|
// This contains probabilities, timestamps, etc.
|
|
377
|
-
WHISPER_API whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token);
|
|
453
|
+
WHISPER_API whisper_token_data whisper_full_get_token_data (struct whisper_context * ctx, int i_segment, int i_token);
|
|
454
|
+
WHISPER_API whisper_token_data whisper_full_get_token_data_from_state(struct whisper_state * state, int i_segment, int i_token);
|
|
378
455
|
|
|
379
|
-
// Get the probability of the specified token in the specified segment
|
|
380
|
-
WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
|
|
456
|
+
// Get the probability of the specified token in the specified segment
|
|
457
|
+
WHISPER_API float whisper_full_get_token_p (struct whisper_context * ctx, int i_segment, int i_token);
|
|
458
|
+
WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
|
|
381
459
|
|
|
382
460
|
////////////////////////////////////////////////////////////////////////////
|
|
383
461
|
|
|
384
462
|
// Temporary helpers needed for exposing ggml interface
|
|
385
463
|
|
|
386
464
|
WHISPER_API int whisper_bench_memcpy(int n_threads);
|
|
465
|
+
WHISPER_API const char * whisper_bench_memcpy_str(int n_threads);
|
|
387
466
|
WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
|
|
467
|
+
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads);
|
|
388
468
|
|
|
389
469
|
#ifdef __cplusplus
|
|
390
470
|
}
|
package/ios/RNWhisper.h
CHANGED
package/ios/RNWhisper.mm
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
#import "RNWhisper.h"
|
|
3
3
|
#include <stdlib.h>
|
|
4
|
+
#include <string>
|
|
4
5
|
|
|
5
6
|
@interface WhisperContext : NSObject {
|
|
6
7
|
}
|
|
@@ -74,7 +75,7 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
74
75
|
|
|
75
76
|
const int max_threads = options[@"maxThreads"] != nil ?
|
|
76
77
|
[options[@"maxThreads"] intValue] :
|
|
77
|
-
MIN(
|
|
78
|
+
MIN(4, (int)[[NSProcessInfo processInfo] processorCount]);
|
|
78
79
|
|
|
79
80
|
if (options[@"beamSize"] != nil) {
|
|
80
81
|
params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
|
|
@@ -93,15 +94,18 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
93
94
|
params.no_context = true;
|
|
94
95
|
params.single_segment = false;
|
|
95
96
|
|
|
97
|
+
if (options[@"maxLen"] != nil) {
|
|
98
|
+
params.max_len = [options[@"maxLen"] intValue];
|
|
99
|
+
}
|
|
100
|
+
params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
|
|
101
|
+
|
|
96
102
|
if (options[@"bestOf"] != nil) {
|
|
97
103
|
params.greedy.best_of = [options[@"bestOf"] intValue];
|
|
98
104
|
}
|
|
99
105
|
if (options[@"maxContext"] != nil) {
|
|
100
106
|
params.n_max_text_ctx = [options[@"maxContext"] intValue];
|
|
101
107
|
}
|
|
102
|
-
|
|
103
|
-
params.max_len = [options[@"maxLen"] intValue];
|
|
104
|
-
}
|
|
108
|
+
|
|
105
109
|
if (options[@"offset"] != nil) {
|
|
106
110
|
params.offset_ms = [options[@"offset"] intValue];
|
|
107
111
|
}
|
|
@@ -117,6 +121,15 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
117
121
|
if (options[@"temperatureInc"] != nil) {
|
|
118
122
|
params.temperature_inc = [options[@"temperature_inc"] floatValue];
|
|
119
123
|
}
|
|
124
|
+
|
|
125
|
+
if (options[@"prompt"] != nil) {
|
|
126
|
+
std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
|
|
127
|
+
rn_whisper_convert_prompt(
|
|
128
|
+
context.ctx,
|
|
129
|
+
params,
|
|
130
|
+
prompt
|
|
131
|
+
);
|
|
132
|
+
}
|
|
120
133
|
|
|
121
134
|
whisper_reset_timings(context.ctx);
|
|
122
135
|
int code = whisper_full(context.ctx, params, waveFile, count);
|
|
@@ -132,11 +145,25 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
132
145
|
|
|
133
146
|
NSString *result = @"";
|
|
134
147
|
int n_segments = whisper_full_n_segments(context.ctx);
|
|
148
|
+
|
|
149
|
+
NSMutableArray *segments = [[NSMutableArray alloc] init];
|
|
135
150
|
for (int i = 0; i < n_segments; i++) {
|
|
136
151
|
const char * text_cur = whisper_full_get_segment_text(context.ctx, i);
|
|
137
152
|
result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
|
|
138
|
-
|
|
139
|
-
|
|
153
|
+
|
|
154
|
+
const int64_t t0 = whisper_full_get_segment_t0(context.ctx, i);
|
|
155
|
+
const int64_t t1 = whisper_full_get_segment_t1(context.ctx, i);
|
|
156
|
+
NSDictionary *segment = @{
|
|
157
|
+
@"text": [NSString stringWithUTF8String:text_cur],
|
|
158
|
+
@"t0": [NSNumber numberWithLongLong:t0],
|
|
159
|
+
@"t1": [NSNumber numberWithLongLong:t1]
|
|
160
|
+
};
|
|
161
|
+
[segments addObject:segment];
|
|
162
|
+
}
|
|
163
|
+
resolve(@{
|
|
164
|
+
@"result": result,
|
|
165
|
+
@"segments": segments
|
|
166
|
+
});
|
|
140
167
|
}
|
|
141
168
|
|
|
142
169
|
RCT_REMAP_METHOD(releaseContext,
|
package/jest/mock.js
CHANGED
|
@@ -3,7 +3,10 @@ const { NativeModules } = require('react-native')
|
|
|
3
3
|
if (!NativeModules.RNWhisper) {
|
|
4
4
|
NativeModules.RNWhisper = {
|
|
5
5
|
initContext: jest.fn(() => Promise.resolve(1)),
|
|
6
|
-
transcribe: jest.fn(() => Promise.resolve(
|
|
6
|
+
transcribe: jest.fn(() => Promise.resolve({
|
|
7
|
+
result: ' Test',
|
|
8
|
+
segments: [{ text: ' Test', t0: 0, t1: 33 }],
|
|
9
|
+
})),
|
|
7
10
|
releaseContext: jest.fn(() => Promise.resolve()),
|
|
8
11
|
releaseAllContexts: jest.fn(() => Promise.resolve()),
|
|
9
12
|
}
|
package/lib/commonjs/index.js
CHANGED
|
@@ -21,9 +21,7 @@ class WhisperContext {
|
|
|
21
21
|
}
|
|
22
22
|
async transcribe(path) {
|
|
23
23
|
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
24
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
25
|
-
result
|
|
26
|
-
}));
|
|
24
|
+
return RNWhisper.transcribe(this.id, path, options);
|
|
27
25
|
}
|
|
28
26
|
async release() {
|
|
29
27
|
return RNWhisper.releaseContext(this.id);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","
|
|
1
|
+
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMU,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOX,SAAS,CAACQ,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOd,SAAS,CAACe,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMP,SAAS,CAACkB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOnB,SAAS,CAACoB,kBAAkB,EAAE;AACvC"}
|
package/lib/module/index.js
CHANGED
|
@@ -14,9 +14,7 @@ class WhisperContext {
|
|
|
14
14
|
}
|
|
15
15
|
async transcribe(path) {
|
|
16
16
|
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
17
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
18
|
-
result
|
|
19
|
-
}));
|
|
17
|
+
return RNWhisper.transcribe(this.id, path, options);
|
|
20
18
|
}
|
|
21
19
|
async release() {
|
|
22
20
|
return RNWhisper.releaseContext(this.id);
|
package/lib/module/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","
|
|
1
|
+
{"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SAASA,aAAa,EAAEC,QAAQ,QAAQ,cAAc;AAEtD,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMQ,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOV,SAAS,CAACO,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOb,SAAS,CAACc,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMN,SAAS,CAACiB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOlB,SAAS,CAACmB,kBAAkB,EAAE;AACvC"}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
export type TranscribeOptions = {
|
|
2
|
+
language?: string;
|
|
3
|
+
translate?: boolean;
|
|
2
4
|
maxThreads?: number;
|
|
3
5
|
maxContext?: number;
|
|
4
6
|
maxLen?: number;
|
|
7
|
+
tokenTimestamps?: boolean;
|
|
5
8
|
offset?: number;
|
|
6
9
|
duration?: number;
|
|
7
10
|
wordThold?: number;
|
|
@@ -10,9 +13,15 @@ export type TranscribeOptions = {
|
|
|
10
13
|
beamSize?: number;
|
|
11
14
|
bestOf?: number;
|
|
12
15
|
speedUp?: boolean;
|
|
16
|
+
prompt?: string;
|
|
13
17
|
};
|
|
14
18
|
export type TranscribeResult = {
|
|
15
19
|
result: string;
|
|
20
|
+
segments: Array<{
|
|
21
|
+
text: string;
|
|
22
|
+
t0: number;
|
|
23
|
+
t1: number;
|
|
24
|
+
}>;
|
|
16
25
|
};
|
|
17
26
|
declare class WhisperContext {
|
|
18
27
|
id: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;CACJ,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAIhB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAIpF,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whisper.rn",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "React Native binding of whisper.cpp",
|
|
5
5
|
"main": "lib/commonjs/index",
|
|
6
6
|
"module": "lib/module/index",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"speech recognition"
|
|
48
48
|
],
|
|
49
49
|
"repository": "https://github.com/mybigday/whisper.rn",
|
|
50
|
-
"author": "Jhen <developer@jhen.me>
|
|
50
|
+
"author": "Jhen-Jie Hong <developer@jhen.me>",
|
|
51
51
|
"license": "MIT",
|
|
52
52
|
"bugs": {
|
|
53
53
|
"url": "https://github.com/mybigday/whisper.rn/issues"
|
|
@@ -98,13 +98,17 @@
|
|
|
98
98
|
"@commitlint/config-conventional"
|
|
99
99
|
]
|
|
100
100
|
},
|
|
101
|
+
"publishConfig": {
|
|
102
|
+
"registry": "https://registry.npmjs.org/"
|
|
103
|
+
},
|
|
101
104
|
"release-it": {
|
|
102
105
|
"git": {
|
|
103
106
|
"commitMessage": "chore: release ${version}",
|
|
104
107
|
"tagName": "v${version}"
|
|
105
108
|
},
|
|
106
109
|
"npm": {
|
|
107
|
-
"publish": true
|
|
110
|
+
"publish": true,
|
|
111
|
+
"skipChecks": true
|
|
108
112
|
},
|
|
109
113
|
"github": {
|
|
110
114
|
"release": true
|
package/src/index.tsx
CHANGED
|
@@ -16,9 +16,12 @@ const RNWhisper = NativeModules.RNWhisper
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
export type TranscribeOptions = {
|
|
19
|
+
language?: string,
|
|
20
|
+
translate?: boolean,
|
|
19
21
|
maxThreads?: number,
|
|
20
22
|
maxContext?: number,
|
|
21
23
|
maxLen?: number,
|
|
24
|
+
tokenTimestamps?: boolean,
|
|
22
25
|
offset?: number,
|
|
23
26
|
duration?: number,
|
|
24
27
|
wordThold?: number,
|
|
@@ -27,10 +30,16 @@ export type TranscribeOptions = {
|
|
|
27
30
|
beamSize?: number,
|
|
28
31
|
bestOf?: number,
|
|
29
32
|
speedUp?: boolean,
|
|
33
|
+
prompt?: string,
|
|
30
34
|
}
|
|
31
35
|
|
|
32
36
|
export type TranscribeResult = {
|
|
33
37
|
result: string,
|
|
38
|
+
segments: Array<{
|
|
39
|
+
text: string,
|
|
40
|
+
t0: number,
|
|
41
|
+
t1: number,
|
|
42
|
+
}>,
|
|
34
43
|
}
|
|
35
44
|
|
|
36
45
|
class WhisperContext {
|
|
@@ -41,9 +50,7 @@ class WhisperContext {
|
|
|
41
50
|
}
|
|
42
51
|
|
|
43
52
|
async transcribe(path: string, options: TranscribeOptions = {}): Promise<TranscribeResult> {
|
|
44
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
45
|
-
result
|
|
46
|
-
}))
|
|
53
|
+
return RNWhisper.transcribe(this.id, path, options)
|
|
47
54
|
}
|
|
48
55
|
|
|
49
56
|
async release() {
|
package/whisper-rn.podspec
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
require "json"
|
|
2
2
|
|
|
3
3
|
package = JSON.parse(File.read(File.join(__dir__, "package.json")))
|
|
4
|
-
base_compiler_flags =
|
|
5
|
-
folly_compiler_flags =
|
|
4
|
+
base_compiler_flags = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
|
5
|
+
folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
|
|
6
|
+
base_optimizer_flags = "-O3 -DNDEBUG"
|
|
6
7
|
|
|
7
8
|
Pod::Spec.new do |s|
|
|
8
9
|
s.name = "whisper-rn"
|
|
@@ -20,17 +21,23 @@ Pod::Spec.new do |s|
|
|
|
20
21
|
s.dependency "React-Core"
|
|
21
22
|
|
|
22
23
|
s.compiler_flags = base_compiler_flags
|
|
23
|
-
s.
|
|
24
|
-
|
|
24
|
+
s.pod_target_xcconfig = {
|
|
25
|
+
"OTHER_LDFLAGS" => "-framework Accelerate",
|
|
26
|
+
"OTHER_CFLAGS[config=Release]" => base_optimizer_flags,
|
|
27
|
+
"OTHER_CPLUSPLUSFLAGS[config=Release]" => base_optimizer_flags
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
# Don't install the dependencies when we run `pod install` in the old architecture.
|
|
28
31
|
if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
|
|
29
32
|
s.compiler_flags = base_compiler_flags + " " + folly_compiler_flags + " -DRCT_NEW_ARCH_ENABLED=1"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
new_arch_cpp_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1"
|
|
34
|
+
s.pod_target_xcconfig = {
|
|
35
|
+
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
36
|
+
"HEADER_SEARCH_PATHS" => "\"$(PODS_ROOT)/boost\"",
|
|
37
|
+
"OTHER_LDFLAGS" => "-framework Accelerate",
|
|
38
|
+
"OTHER_CFLAGS[config=Release]" => base_optimizer_flags,
|
|
39
|
+
"OTHER_CPLUSPLUSFLAGS[config=Debug]" => new_arch_cpp_flags,
|
|
40
|
+
"OTHER_CPLUSPLUSFLAGS[config=Release]" => new_arch_cpp_flags + " " + base_optimizer_flags
|
|
34
41
|
}
|
|
35
42
|
s.dependency "React-Codegen"
|
|
36
43
|
s.dependency "RCT-Folly"
|