whisper.rn 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ios/RNWhisper.mm CHANGED
@@ -1,23 +1,8 @@
1
-
2
1
  #import "RNWhisper.h"
2
+ #import "RNWhisperContext.h"
3
3
  #include <stdlib.h>
4
4
  #include <string>
5
5
 
6
- @interface WhisperContext : NSObject {
7
- }
8
-
9
- @property struct whisper_context * ctx;
10
-
11
- @end
12
-
13
- @implementation WhisperContext
14
-
15
- - (void)invalidate {
16
- whisper_free(self.ctx);
17
- }
18
-
19
- @end
20
-
21
6
  @implementation RNWhisper
22
7
 
23
8
  NSMutableDictionary *contexts;
@@ -33,10 +18,8 @@ RCT_REMAP_METHOD(initContext,
33
18
  contexts = [[NSMutableDictionary alloc] init];
34
19
  }
35
20
 
36
- WhisperContext *context = [[WhisperContext alloc] init];
37
- context.ctx = whisper_init_from_file([modelPath UTF8String]);
38
-
39
- if (context.ctx == NULL) {
21
+ RNWhisperContext *context = [RNWhisperContext initWithModelPath:modelPath];
22
+ if ([context getContext] == NULL) {
40
23
  reject(@"whisper_cpp_error", @"Failed to load the model", nil);
41
24
  return;
42
25
  }
@@ -47,123 +30,105 @@ RCT_REMAP_METHOD(initContext,
47
30
  resolve([NSNumber numberWithInt:contextId]);
48
31
  }
49
32
 
50
- RCT_REMAP_METHOD(transcribe,
33
+ RCT_REMAP_METHOD(transcribeFile,
51
34
  withContextId:(int)contextId
35
+ withJobId:(int)jobId
52
36
  withWaveFile:(NSString *)waveFilePath
53
37
  withOptions:(NSDictionary *)options
54
38
  withResolver:(RCTPromiseResolveBlock)resolve
55
39
  withRejecter:(RCTPromiseRejectBlock)reject)
56
40
  {
57
- WhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
41
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
58
42
 
59
43
  if (context == nil) {
60
44
  reject(@"whisper_error", @"Context not found", nil);
61
45
  return;
62
46
  }
47
+ if ([context isCapturing]) {
48
+ reject(@"whisper_error", @"The context is in realtime transcribe mode", nil);
49
+ return;
50
+ }
51
+ if ([context isTranscribing]) {
52
+ reject(@"whisper_error", @"Context is already transcribing", nil);
53
+ return;
54
+ }
63
55
 
64
56
  NSURL *url = [NSURL fileURLWithPath:waveFilePath];
65
57
 
66
58
  int count = 0;
67
59
  float *waveFile = [self decodeWaveFile:url count:&count];
68
-
69
60
  if (waveFile == nil) {
70
61
  reject(@"whisper_error", @"Invalid file", nil);
71
62
  return;
72
63
  }
73
-
74
- struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
75
-
76
- const int max_threads = options[@"maxThreads"] != nil ?
77
- [options[@"maxThreads"] intValue] :
78
- MIN(4, (int)[[NSProcessInfo processInfo] processorCount]);
79
-
80
- if (options[@"beamSize"] != nil) {
81
- params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
82
- params.beam_search.beam_size = [options[@"beamSize"] intValue];
64
+ int code = [context transcribeFile:jobId audioData:waveFile audioDataCount:count options:options];
65
+ if (code != 0) {
66
+ free(waveFile);
67
+ reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to transcribe the file. Code: %d", code], nil);
68
+ return;
83
69
  }
70
+ free(waveFile);
71
+ resolve([context getTextSegments]);
72
+ }
84
73
 
85
- params.print_realtime = false;
86
- params.print_progress = false;
87
- params.print_timestamps = false;
88
- params.print_special = false;
89
- params.speed_up = options[@"speedUp"] != nil ? [options[@"speedUp"] boolValue] : false;
90
- params.translate = options[@"translate"] != nil ? [options[@"translate"] boolValue] : false;
91
- params.language = options[@"language"] != nil ? [options[@"language"] UTF8String] : "auto";
92
- params.n_threads = max_threads;
93
- params.offset_ms = 0;
94
- params.no_context = true;
95
- params.single_segment = false;
96
-
97
- if (options[@"maxLen"] != nil) {
98
- params.max_len = [options[@"maxLen"] intValue];
99
- }
100
- params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
74
+ - (NSArray *)supportedEvents {
75
+ return@[
76
+ @"@RNWhisper_onRealtimeTranscribe",
77
+ @"@RNWhisper_onRealtimeTranscribeEnd",
78
+ ];
79
+ }
101
80
 
102
- if (options[@"bestOf"] != nil) {
103
- params.greedy.best_of = [options[@"bestOf"] intValue];
104
- }
105
- if (options[@"maxContext"] != nil) {
106
- params.n_max_text_ctx = [options[@"maxContext"] intValue];
107
- }
108
-
109
- if (options[@"offset"] != nil) {
110
- params.offset_ms = [options[@"offset"] intValue];
111
- }
112
- if (options[@"duration"] != nil) {
113
- params.duration_ms = [options[@"duration"] intValue];
114
- }
115
- if (options[@"wordThold"] != nil) {
116
- params.thold_pt = [options[@"wordThold"] intValue];
117
- }
118
- if (options[@"temperature"] != nil) {
119
- params.temperature = [options[@"temperature"] floatValue];
120
- }
121
- if (options[@"temperatureInc"] != nil) {
122
- params.temperature_inc = [options[@"temperature_inc"] floatValue];
123
- }
124
-
125
- if (options[@"prompt"] != nil) {
126
- std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
127
- rn_whisper_convert_prompt(
128
- context.ctx,
129
- params,
130
- prompt
131
- );
132
- }
81
+ RCT_REMAP_METHOD(startRealtimeTranscribe,
82
+ withContextId:(int)contextId
83
+ withJobId:(int)jobId
84
+ withOptions:(NSDictionary *)options
85
+ withResolver:(RCTPromiseResolveBlock)resolve
86
+ withRejecter:(RCTPromiseRejectBlock)reject)
87
+ {
88
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
133
89
 
134
- whisper_reset_timings(context.ctx);
135
- int code = whisper_full(context.ctx, params, waveFile, count);
136
- if (code != 0) {
137
- NSLog(@"Failed to run the model");
138
- free(waveFile);
139
- reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to run the model. Code: %d", code], nil);
90
+ if (context == nil) {
91
+ reject(@"whisper_error", @"Context not found", nil);
92
+ return;
93
+ }
94
+ if ([context isCapturing]) {
95
+ reject(@"whisper_error", @"The context is already capturing", nil);
140
96
  return;
141
97
  }
142
98
 
143
- // whisper_print_timings(context.ctx);
144
- free(waveFile);
145
-
146
- NSString *result = @"";
147
- int n_segments = whisper_full_n_segments(context.ctx);
148
-
149
- NSMutableArray *segments = [[NSMutableArray alloc] init];
150
- for (int i = 0; i < n_segments; i++) {
151
- const char * text_cur = whisper_full_get_segment_text(context.ctx, i);
152
- result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
153
-
154
- const int64_t t0 = whisper_full_get_segment_t0(context.ctx, i);
155
- const int64_t t1 = whisper_full_get_segment_t1(context.ctx, i);
156
- NSDictionary *segment = @{
157
- @"text": [NSString stringWithUTF8String:text_cur],
158
- @"t0": [NSNumber numberWithLongLong:t0],
159
- @"t1": [NSNumber numberWithLongLong:t1]
160
- };
161
- [segments addObject:segment];
99
+ OSStatus status = [context transcribeRealtime:jobId
100
+ options:options
101
+ onTranscribe:^(int _jobId, NSString *type, NSDictionary *payload) {
102
+ NSString *eventName = nil;
103
+ if ([type isEqual:@"transcribe"]) {
104
+ eventName = @"@RNWhisper_onRealtimeTranscribe";
105
+ } else if ([type isEqual:@"end"]) {
106
+ eventName = @"@RNWhisper_onRealtimeTranscribeEnd";
107
+ }
108
+ if (eventName == nil) {
109
+ return;
110
+ }
111
+ [self sendEventWithName:eventName
112
+ body:@{
113
+ @"contextId": [NSNumber numberWithInt:contextId],
114
+ @"jobId": [NSNumber numberWithInt:jobId],
115
+ @"payload": payload
116
+ }
117
+ ];
118
+ }
119
+ ];
120
+ if (status == 0) {
121
+ resolve(nil);
122
+ return;
162
123
  }
163
- resolve(@{
164
- @"result": result,
165
- @"segments": segments
166
- });
124
+ reject(@"whisper_error", [NSString stringWithFormat:@"Failed to start realtime transcribe. Status: %d", status], nil);
125
+ }
126
+ RCT_REMAP_METHOD(abortTranscribe,
127
+ withContextId:(int)contextId
128
+ withJobId:(int)jobId)
129
+ {
130
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
131
+ [context stopTranscribe:jobId];
167
132
  }
168
133
 
169
134
  RCT_REMAP_METHOD(releaseContext,
@@ -171,7 +136,7 @@ RCT_REMAP_METHOD(releaseContext,
171
136
  withResolver:(RCTPromiseResolveBlock)resolve
172
137
  withRejecter:(RCTPromiseRejectBlock)reject)
173
138
  {
174
- WhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
139
+ RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
175
140
  if (context == nil) {
176
141
  reject(@"whisper_error", @"Context not found", nil);
177
142
  return;
@@ -210,12 +175,14 @@ RCT_REMAP_METHOD(releaseAllContexts,
210
175
  }
211
176
 
212
177
  - (void)invalidate {
178
+ rn_whisper_abort_all_transcribe();
179
+
213
180
  if (contexts == nil) {
214
181
  return;
215
182
  }
216
183
 
217
184
  for (NSNumber *contextId in contexts) {
218
- WhisperContext *context = contexts[contextId];
185
+ RNWhisperContext *context = contexts[contextId];
219
186
  [context invalidate];
220
187
  }
221
188
 
@@ -0,0 +1,53 @@
1
+ #ifdef __cplusplus
2
+ #import "whisper.h"
3
+ #import "rn-whisper.h"
4
+ #endif
5
+
6
+ #import <AVFoundation/AVFoundation.h>
7
+ #import <AudioToolbox/AudioQueue.h>
8
+
9
+ #define NUM_BUFFERS 3
10
+ #define DEFAULT_MAX_AUDIO_SEC 30
11
+
12
+ typedef struct {
13
+ __unsafe_unretained id mSelf;
14
+
15
+ int jobId;
16
+ NSDictionary* options;
17
+
18
+ bool isTranscribing;
19
+ bool isRealtime;
20
+ bool isCapturing;
21
+ int maxAudioSec;
22
+ int nSamples;
23
+ int16_t* audioBufferI16;
24
+ float* audioBufferF32;
25
+
26
+ AudioQueueRef queue;
27
+ AudioStreamBasicDescription dataFormat;
28
+ AudioQueueBufferRef buffers[NUM_BUFFERS];
29
+
30
+ void (^transcribeHandler)(int, NSString *, NSDictionary *);
31
+ } RNWhisperContextRecordState;
32
+
33
+ @interface RNWhisperContext : NSObject {
34
+ struct whisper_context * ctx;
35
+ RNWhisperContextRecordState recordState;
36
+ }
37
+
38
+ + (instancetype)initWithModelPath:(NSString *)modelPath;
39
+ - (struct whisper_context *)getContext;
40
+ - (OSStatus)transcribeRealtime:(int)jobId
41
+ options:(NSDictionary *)options
42
+ onTranscribe:(void (^)(int, NSString *, NSDictionary *))onTranscribe;
43
+ - (int)transcribeFile:(int)jobId
44
+ audioData:(float *)audioData
45
+ audioDataCount:(int)audioDataCount
46
+ options:(NSDictionary *)options;
47
+ - (void)stopTranscribe:(int)jobId;
48
+ - (bool)isCapturing;
49
+ - (bool)isTranscribing;
50
+ - (NSDictionary *)getTextSegments;
51
+ - (void)invalidate;
52
+
53
+ @end
@@ -0,0 +1,303 @@
1
+ #import "RNWhisperContext.h"
2
+
3
+ #define NUM_BYTES_PER_BUFFER 16 * 1024
4
+
5
+ @implementation RNWhisperContext
6
+
7
+ + (instancetype)initWithModelPath:(NSString *)modelPath {
8
+ RNWhisperContext *context = [[RNWhisperContext alloc] init];
9
+ context->ctx = whisper_init_from_file([modelPath UTF8String]);
10
+ return context;
11
+ }
12
+
13
+ - (struct whisper_context *)getContext {
14
+ return self->ctx;
15
+ }
16
+
17
+ - (void)prepareRealtime:(NSDictionary *)options {
18
+ self->recordState.options = options;
19
+
20
+ self->recordState.dataFormat.mSampleRate = WHISPER_SAMPLE_RATE; // 16000
21
+ self->recordState.dataFormat.mFormatID = kAudioFormatLinearPCM;
22
+ self->recordState.dataFormat.mFramesPerPacket = 1;
23
+ self->recordState.dataFormat.mChannelsPerFrame = 1; // mono
24
+ self->recordState.dataFormat.mBytesPerFrame = 2;
25
+ self->recordState.dataFormat.mBytesPerPacket = 2;
26
+ self->recordState.dataFormat.mBitsPerChannel = 16;
27
+ self->recordState.dataFormat.mReserved = 0;
28
+ self->recordState.dataFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
29
+
30
+ self->recordState.nSamples = 0;
31
+
32
+ int maxAudioSecOpt = options[@"realtimeAudioSec"] != nil ? [options[@"realtimeAudioSec"] intValue] : 0;
33
+ int maxAudioSec = maxAudioSecOpt > 0 ? maxAudioSecOpt : DEFAULT_MAX_AUDIO_SEC;
34
+ self->recordState.maxAudioSec = maxAudioSec;
35
+ self->recordState.audioBufferI16 = (int16_t *) malloc(maxAudioSec * WHISPER_SAMPLE_RATE * sizeof(int16_t));
36
+ self->recordState.audioBufferF32 = (float *) malloc(maxAudioSec * WHISPER_SAMPLE_RATE * sizeof(float));
37
+
38
+ self->recordState.isRealtime = true;
39
+ self->recordState.isTranscribing = false;
40
+ self->recordState.isCapturing = false;
41
+
42
+ self->recordState.mSelf = self;
43
+ }
44
+
45
+ void AudioInputCallback(void * inUserData,
46
+ AudioQueueRef inAQ,
47
+ AudioQueueBufferRef inBuffer,
48
+ const AudioTimeStamp * inStartTime,
49
+ UInt32 inNumberPacketDescriptions,
50
+ const AudioStreamPacketDescription * inPacketDescs)
51
+ {
52
+ RNWhisperContextRecordState *state = (RNWhisperContextRecordState *)inUserData;
53
+
54
+ if (!state->isCapturing) {
55
+ NSLog(@"[RNWhisper] Not capturing, ignoring audio");
56
+ return;
57
+ }
58
+
59
+ const int n = inBuffer->mAudioDataByteSize / 2;
60
+ NSLog(@"[RNWhisper] Captured %d new samples", n);
61
+
62
+ if (state->nSamples + n > state->maxAudioSec * WHISPER_SAMPLE_RATE) {
63
+ NSLog(@"[RNWhisper] Audio buffer is full, ignoring audio");
64
+ state->isCapturing = false;
65
+ if (!state->isTranscribing) {
66
+ state->transcribeHandler(state->jobId, @"end", @{});
67
+ }
68
+ [state->mSelf stopAudio];
69
+ return;
70
+ }
71
+
72
+ for (int i = 0; i < n; i++) {
73
+ state->audioBufferI16[state->nSamples + i] = ((short*)inBuffer->mAudioData)[i];
74
+ }
75
+ state->nSamples += n;
76
+
77
+ AudioQueueEnqueueBuffer(state->queue, inBuffer, 0, NULL);
78
+
79
+ if (!state->isTranscribing) {
80
+ state->isTranscribing = true;
81
+ dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
82
+ NSLog(@"[RNWhisper] Transcribing %d samples", state->nSamples);
83
+ // convert I16 to F32
84
+ for (int i = 0; i < state->nSamples; i++) {
85
+ state->audioBufferF32[i] = (float)state->audioBufferI16[i] / 32768.0f;
86
+ }
87
+ CFTimeInterval timeStart = CACurrentMediaTime();
88
+
89
+ int code = [state->mSelf fullTranscribe:state->jobId audioData:state->audioBufferF32 audioDataCount:state->nSamples options:state->options];
90
+
91
+ CFTimeInterval timeEnd = CACurrentMediaTime();
92
+ const float timeRecording = (float) state->nSamples / (float) state->dataFormat.mSampleRate;
93
+ if (code == 0) {
94
+ state->transcribeHandler(state->jobId, @"transcribe", @{
95
+ @"isCapturing": @(state->isCapturing),
96
+ @"code": [NSNumber numberWithInt:code],
97
+ @"data": [state->mSelf getTextSegments],
98
+ @"processTime": [NSNumber numberWithInt:(timeEnd - timeStart) * 1E3],
99
+ @"recordingTime": [NSNumber numberWithInt:timeRecording * 1E3],
100
+ });
101
+ state->isTranscribing = false;
102
+ return;
103
+ }
104
+ state->transcribeHandler(state->jobId, @"transcribe", @{
105
+ @"isCapturing": @(state->isCapturing),
106
+ @"code": [NSNumber numberWithInt:code],
107
+ @"error": [NSString stringWithFormat:@"Transcribe failed with code %d", code],
108
+ @"processTime": [NSNumber numberWithDouble:timeEnd - timeStart],
109
+ @"recordingTime": [NSNumber numberWithFloat:timeRecording],
110
+ });
111
+ if (!state->isCapturing) {
112
+ NSLog(@"[RNWhisper] Transcribe end");
113
+ state->transcribeHandler(state->jobId, @"end", @{});
114
+ }
115
+ state->isTranscribing = false;
116
+ });
117
+ }
118
+ }
119
+
120
+ - (bool)isCapturing {
121
+ return self->recordState.isCapturing;
122
+ }
123
+
124
+ - (bool)isTranscribing {
125
+ return self->recordState.isTranscribing;
126
+ }
127
+
128
+ - (OSStatus)transcribeRealtime:(int)jobId
129
+ options:(NSDictionary *)options
130
+ onTranscribe:(void (^)(int, NSString *, NSDictionary *))onTranscribe
131
+ {
132
+ self->recordState.transcribeHandler = onTranscribe;
133
+ self->recordState.jobId = jobId;
134
+ [self prepareRealtime:options];
135
+ self->recordState.nSamples = 0;
136
+
137
+ OSStatus status = AudioQueueNewInput(
138
+ &self->recordState.dataFormat,
139
+ AudioInputCallback,
140
+ &self->recordState,
141
+ NULL,
142
+ kCFRunLoopCommonModes,
143
+ 0,
144
+ &self->recordState.queue
145
+ );
146
+
147
+ if (status == 0) {
148
+ for (int i = 0; i < NUM_BUFFERS; i++) {
149
+ AudioQueueAllocateBuffer(self->recordState.queue, NUM_BYTES_PER_BUFFER, &self->recordState.buffers[i]);
150
+ AudioQueueEnqueueBuffer(self->recordState.queue, self->recordState.buffers[i], 0, NULL);
151
+ }
152
+ status = AudioQueueStart(self->recordState.queue, NULL);
153
+ if (status == 0) {
154
+ self->recordState.isCapturing = true;
155
+ }
156
+ }
157
+ return status;
158
+ }
159
+
160
+ - (int)transcribeFile:(int)jobId
161
+ audioData:(float *)audioData
162
+ audioDataCount:(int)audioDataCount
163
+ options:(NSDictionary *)options
164
+ {
165
+ self->recordState.isTranscribing = true;
166
+ self->recordState.jobId = jobId;
167
+ int code = [self fullTranscribe:jobId audioData:audioData audioDataCount:audioDataCount options:options];
168
+ self->recordState.jobId = -1;
169
+ self->recordState.isTranscribing = false;
170
+ return code;
171
+ }
172
+
173
+ - (void)stopAudio {
174
+ AudioQueueStop(self->recordState.queue, true);
175
+ for (int i = 0; i < NUM_BUFFERS; i++) {
176
+ AudioQueueFreeBuffer(self->recordState.queue, self->recordState.buffers[i]);
177
+ }
178
+ AudioQueueDispose(self->recordState.queue, true);
179
+ }
180
+
181
+ - (void)stopTranscribe:(int)jobId {
182
+ rn_whisper_abort_transcribe(jobId);
183
+ if (!self->recordState.isRealtime || !self->recordState.isCapturing) {
184
+ return;
185
+ }
186
+ self->recordState.isCapturing = false;
187
+ [self stopAudio];
188
+ }
189
+
190
+ - (void)stopCurrentTranscribe {
191
+ if (!self->recordState.jobId) {
192
+ return;
193
+ }
194
+ [self stopTranscribe:self->recordState.jobId];
195
+ }
196
+
197
+ - (int)fullTranscribe:(int)jobId audioData:(float *)audioData audioDataCount:(int)audioDataCount options:(NSDictionary *)options {
198
+ struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
199
+
200
+ const int max_threads = options[@"maxThreads"] != nil ?
201
+ [options[@"maxThreads"] intValue] :
202
+ MIN(4, (int)[[NSProcessInfo processInfo] processorCount]);
203
+
204
+ if (options[@"beamSize"] != nil) {
205
+ params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
206
+ params.beam_search.beam_size = [options[@"beamSize"] intValue];
207
+ }
208
+
209
+ params.print_realtime = false;
210
+ params.print_progress = false;
211
+ params.print_timestamps = false;
212
+ params.print_special = false;
213
+ params.speed_up = options[@"speedUp"] != nil ? [options[@"speedUp"] boolValue] : false;
214
+ params.translate = options[@"translate"] != nil ? [options[@"translate"] boolValue] : false;
215
+ params.language = options[@"language"] != nil ? [options[@"language"] UTF8String] : "auto";
216
+ params.n_threads = max_threads;
217
+ params.offset_ms = 0;
218
+ params.no_context = true;
219
+ params.single_segment = self->recordState.isRealtime;
220
+
221
+ if (options[@"maxLen"] != nil) {
222
+ params.max_len = [options[@"maxLen"] intValue];
223
+ }
224
+ params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
225
+
226
+ if (options[@"bestOf"] != nil) {
227
+ params.greedy.best_of = [options[@"bestOf"] intValue];
228
+ }
229
+ if (options[@"maxContext"] != nil) {
230
+ params.n_max_text_ctx = [options[@"maxContext"] intValue];
231
+ }
232
+
233
+ if (options[@"offset"] != nil) {
234
+ params.offset_ms = [options[@"offset"] intValue];
235
+ }
236
+ if (options[@"duration"] != nil) {
237
+ params.duration_ms = [options[@"duration"] intValue];
238
+ }
239
+ if (options[@"wordThold"] != nil) {
240
+ params.thold_pt = [options[@"wordThold"] intValue];
241
+ }
242
+ if (options[@"temperature"] != nil) {
243
+ params.temperature = [options[@"temperature"] floatValue];
244
+ }
245
+ if (options[@"temperatureInc"] != nil) {
246
+ params.temperature_inc = [options[@"temperature_inc"] floatValue];
247
+ }
248
+
249
+ if (options[@"prompt"] != nil) {
250
+ std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
251
+ rn_whisper_convert_prompt(
252
+ self->ctx,
253
+ params,
254
+ prompt
255
+ );
256
+ }
257
+
258
+ params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
259
+ bool is_aborted = *(bool*)user_data;
260
+ return !is_aborted;
261
+ };
262
+ params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(jobId);
263
+
264
+ whisper_reset_timings(self->ctx);
265
+
266
+ int code = whisper_full(self->ctx, params, audioData, audioDataCount);
267
+ rn_whisper_remove_abort_map(jobId);
268
+ // if (code == 0) {
269
+ // whisper_print_timings(self->ctx);
270
+ // }
271
+ return code;
272
+ }
273
+
274
+ - (NSDictionary *)getTextSegments {
275
+ NSString *result = @"";
276
+ int n_segments = whisper_full_n_segments(self->ctx);
277
+
278
+ NSMutableArray *segments = [[NSMutableArray alloc] init];
279
+ for (int i = 0; i < n_segments; i++) {
280
+ const char * text_cur = whisper_full_get_segment_text(self->ctx, i);
281
+ result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
282
+
283
+ const int64_t t0 = whisper_full_get_segment_t0(self->ctx, i);
284
+ const int64_t t1 = whisper_full_get_segment_t1(self->ctx, i);
285
+ NSDictionary *segment = @{
286
+ @"text": [NSString stringWithUTF8String:text_cur],
287
+ @"t0": [NSNumber numberWithLongLong:t0],
288
+ @"t1": [NSNumber numberWithLongLong:t1]
289
+ };
290
+ [segments addObject:segment];
291
+ }
292
+ return @{
293
+ @"result": result,
294
+ @"segments": segments
295
+ };
296
+ }
297
+
298
+ - (void)invalidate {
299
+ [self stopCurrentTranscribe];
300
+ whisper_free(self->ctx);
301
+ }
302
+
303
+ @end
package/jest/mock.js CHANGED
@@ -1,14 +1,50 @@
1
- const { NativeModules } = require('react-native')
1
+ const { NativeModules, DeviceEventEmitter } = require('react-native')
2
2
 
3
3
  if (!NativeModules.RNWhisper) {
4
4
  NativeModules.RNWhisper = {
5
5
  initContext: jest.fn(() => Promise.resolve(1)),
6
- transcribe: jest.fn(() => Promise.resolve({
6
+ transcribeFile: jest.fn(() => Promise.resolve({
7
7
  result: ' Test',
8
8
  segments: [{ text: ' Test', t0: 0, t1: 33 }],
9
9
  })),
10
+ startRealtimeTranscribe: jest.fn((contextId, jobId) => {
11
+ setTimeout(() => {
12
+ // Start
13
+ DeviceEventEmitter.emit('@RNWhisper_onRealtimeTranscribe', {
14
+ contextId,
15
+ jobId,
16
+ payload: {
17
+ isCapturing: true,
18
+ data: {
19
+ result: ' Test',
20
+ segments: [{ text: ' Test', t0: 0, t1: 33 }],
21
+ },
22
+ processTime: 100,
23
+ recordingTime: 1000,
24
+ },
25
+ })
26
+ // End
27
+ DeviceEventEmitter.emit('@RNWhisper_onRealtimeTranscribe', {
28
+ contextId,
29
+ jobId,
30
+ payload: {
31
+ isCapturing: false,
32
+ data: {
33
+ result: ' Test',
34
+ segments: [{ text: ' Test', t0: 0, t1: 33 }],
35
+ },
36
+ processTime: 100,
37
+ recordingTime: 2000,
38
+ },
39
+ })
40
+ })
41
+ }),
10
42
  releaseContext: jest.fn(() => Promise.resolve()),
11
43
  releaseAllContexts: jest.fn(() => Promise.resolve()),
44
+
45
+ // For NativeEventEmitter
46
+ addListener: jest.fn(),
47
+ removeListeners: jest.fn(),
12
48
  }
13
49
  }
14
50