npm - whisper.rn - Versions diffs - 0.1.4 → 0.2.0 - Mend

whisper.rn 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/LICENSE +1 -1
package/README.md +43 -4
package/android/build.gradle +2 -4
package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +47 -7
package/android/src/main/java/com/rnwhisper/WhisperContext.java +196 -7
package/android/src/main/jni/whisper/Whisper.mk +1 -1
package/android/src/main/jni/whisper/jni.cpp +33 -9
package/cpp/rn-whisper.cpp +26 -0
package/cpp/rn-whisper.h +5 -0
package/cpp/whisper.cpp +603 -412
package/cpp/whisper.h +120 -40
package/ios/RNWhisper.h +2 -2
package/ios/RNWhisper.mm +78 -111
package/ios/RNWhisperContext.h +53 -0
package/ios/RNWhisperContext.mm +303 -0
package/jest/mock.js +38 -2
package/lib/commonjs/index.js +63 -2
package/lib/commonjs/index.js.map +1 -1
package/lib/module/index.js +64 -3
package/lib/module/index.js.map +1 -1
package/lib/typescript/index.d.ts +61 -2
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +2 -2
package/src/index.tsx +121 -4
package/whisper-rn.podspec +15 -8

package/cpp/whisper.h CHANGED Viewed

@@ -66,6 +66,7 @@ extern "C" {
     //
     struct whisper_context;
+    struct whisper_state;
     typedef int whisper_token;
@@ -101,11 +102,20 @@ extern "C" {
     WHISPER_API struct whisper_context * whisper_init_from_buffer(void * buffer, size_t buffer_size);
     WHISPER_API struct whisper_context * whisper_init(struct whisper_model_loader * loader);
-    // Frees all memory allocated by the model.
-    WHISPER_API void whisper_free(struct whisper_context * ctx);
+    // These are the same as the above, but the internal state of the context is not allocated automatically
+    // It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
+    WHISPER_API struct whisper_context * whisper_init_from_file_no_state(const char * path_model);
+    WHISPER_API struct whisper_context * whisper_init_from_buffer_no_state(void * buffer, size_t buffer_size);
+    WHISPER_API struct whisper_context * whisper_init_no_state(struct whisper_model_loader * loader);
+    WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
+    // Frees all allocated memory
+    WHISPER_API void whisper_free      (struct whisper_context * ctx);
+    WHISPER_API void whisper_free_state(struct whisper_state * state);
     // Convert RAW PCM audio to log mel spectrogram.
-    // The resulting spectrogram is stored inside the provided whisper context.
+    // The resulting spectrogram is stored inside the default state of the provided whisper context.
     // Returns 0 on success
     WHISPER_API int whisper_pcm_to_mel(
             struct whisper_context * ctx,
@@ -113,17 +123,30 @@ extern "C" {
                                int   n_samples,
                                int   n_threads);
-    // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
-    // The resulting spectrogram is stored inside the provided whisper context.
+    WHISPER_API int whisper_pcm_to_mel_with_state(
+            struct whisper_context * ctx,
+              struct whisper_state * state,
+                       const float * samples,
+                               int   n_samples,
+                               int   n_threads);
+    // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
+    // The resulting spectrogram is stored inside the default state of the provided whisper context.
     // Returns 0 on success
     WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
-        struct whisper_context* ctx,
-        const float* samples,
-        int   n_samples,
-        int   n_threads);
-    // This can be used to set a custom log mel spectrogram inside the provided whisper context.
+        struct whisper_context * ctx,
+                   const float * samples,
+                           int   n_samples,
+                           int   n_threads);
+    WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
+        struct whisper_context * ctx,
+          struct whisper_state * state,
+                   const float * samples,
+                           int   n_samples,
+                           int   n_threads);
+    // This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
     // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
     // n_mel must be 80
     // Returns 0 on success
@@ -133,7 +156,14 @@ extern "C" {
                                int   n_len,
                                int   n_mel);
-    // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
+    WHISPER_API int whisper_set_mel_with_state(
+            struct whisper_context * ctx,
+              struct whisper_state * state,
+                       const float * data,
+                               int   n_len,
+                               int   n_mel);
+    // Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
     // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
     // offset can be used to specify the offset of the first frame in the spectrogram.
     // Returns 0 on success
@@ -142,6 +172,12 @@ extern "C" {
                                int   offset,
                                int   n_threads);
+    WHISPER_API int whisper_encode_with_state(
+            struct whisper_context * ctx,
+              struct whisper_state * state,
+                               int   offset,
+                               int   n_threads);
     // Run the Whisper decoder to obtain the logits and probabilities for the next token.
     // Make sure to call whisper_encode() first.
     // tokens + n_tokens is the provided context for the decoder.
@@ -155,6 +191,14 @@ extern "C" {
                                int   n_past,
                                int   n_threads);
+    WHISPER_API int whisper_decode_with_state(
+            struct whisper_context * ctx,
+              struct whisper_state * state,
+               const whisper_token * tokens,
+                               int   n_tokens,
+                               int   n_past,
+                               int   n_threads);
     // Convert the provided text into tokens.
     // The tokens pointer must be large enough to hold the resulting tokens.
     // Returns the number of tokens on success, no more than n_max_tokens
@@ -190,17 +234,26 @@ extern "C" {
                                int   n_threads,
                              float * lang_probs);
-    WHISPER_API int whisper_n_len          (struct whisper_context * ctx); // mel length
-    WHISPER_API int whisper_n_vocab        (struct whisper_context * ctx);
-    WHISPER_API int whisper_n_text_ctx     (struct whisper_context * ctx);
-    WHISPER_API int whisper_n_audio_ctx    (struct whisper_context * ctx);
-    WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
+    WHISPER_API int whisper_lang_auto_detect_with_state(
+            struct whisper_context * ctx,
+              struct whisper_state * state,
+                               int   offset_ms,
+                               int   n_threads,
+                             float * lang_probs);
+    WHISPER_API int whisper_n_len           (struct whisper_context * ctx); // mel length
+    WHISPER_API int whisper_n_len_from_state(struct whisper_state * state); // mel length
+    WHISPER_API int whisper_n_vocab         (struct whisper_context * ctx);
+    WHISPER_API int whisper_n_text_ctx      (struct whisper_context * ctx);
+    WHISPER_API int whisper_n_audio_ctx     (struct whisper_context * ctx);
+    WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);
     // Token logits obtained from the last call to whisper_decode()
     // The logits for the last token are stored in the last row
     // Rows: n_tokens
     // Cols: n_vocab
-    WHISPER_API float * whisper_get_logits(struct whisper_context * ctx);
+    WHISPER_API float * whisper_get_logits           (struct whisper_context * ctx);
+    WHISPER_API float * whisper_get_logits_from_state(struct whisper_state * state);
     // Token Id -> String. Uses the vocabulary in the provided context
     WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
@@ -218,7 +271,7 @@ extern "C" {
     WHISPER_API whisper_token whisper_token_translate (void);
     WHISPER_API whisper_token whisper_token_transcribe(void);
-    // Performance information
+    // Performance information from the default state.
     WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
     WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
@@ -236,18 +289,19 @@ extern "C" {
     // Text segment callback
     // Called on every newly generated text segment
     // Use the whisper_full_...() functions to obtain the text segments
-    typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, int n_new, void * user_data);
+    typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, struct whisper_state * state, int n_new, void * user_data);
     // Encoder begin callback
     // If not NULL, called before the encoder starts
     // If it returns false, the computation is aborted
-    typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, void * user_data);
+    typedef bool (*whisper_encoder_begin_callback)(struct whisper_context * ctx, struct whisper_state * state, void * user_data);
     // Logits filter callback
     // Can be used to modify the logits before sampling
     // If not NULL, called after applying temperature to logits
     typedef void (*whisper_logits_filter_callback)(
             struct whisper_context * ctx,
+              struct whisper_state * state,
           const whisper_token_data * tokens,
                                int   n_tokens,
                              float * logits,
@@ -334,6 +388,7 @@ extern "C" {
     WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
     // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
+    // Not thread safe for same context
     // Uses the specified decoding strategy to obtain the text.
     WHISPER_API int whisper_full(
                 struct whisper_context * ctx,
@@ -341,7 +396,16 @@ extern "C" {
                            const float * samples,
                                    int   n_samples);
-    // Split the input audio in chunks and process each chunk separately using whisper_full()
+    WHISPER_API int whisper_full_with_state(
+                struct whisper_context * ctx,
+                  struct whisper_state * state,
+            struct whisper_full_params   params,
+                           const float * samples,
+                                   int   n_samples);
+    // Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
+    // Result is stored in the default state of the context
+    // Not thread safe if executed in parallel on the same context.
     // It seems this approach can offer some speedup in some cases.
     // However, the transcription accuracy can be worse at the beginning and end of each chunk.
     WHISPER_API int whisper_full_parallel(
@@ -351,40 +415,56 @@ extern "C" {
                                    int   n_samples,
                                    int   n_processors);
-    // Number of generated text segments.
+    // Number of generated text segments
     // A segment can be a few words, a sentence, or even a paragraph.
-    WHISPER_API int whisper_full_n_segments(struct whisper_context * ctx);
+    WHISPER_API int whisper_full_n_segments           (struct whisper_context * ctx);
+    WHISPER_API int whisper_full_n_segments_from_state(struct whisper_state * state);
-    // Language id associated with the current context
+    // Language id associated with the context's default state
     WHISPER_API int whisper_full_lang_id(struct whisper_context * ctx);
-    // Get the start and end time of the specified segment.
-    WHISPER_API int64_t whisper_full_get_segment_t0(struct whisper_context * ctx, int i_segment);
-    WHISPER_API int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment);
+    // Language id associated with the provided state
+    WHISPER_API int whisper_full_lang_id_from_state(struct whisper_state * state);
+    // Get the start and end time of the specified segment
+    WHISPER_API int64_t whisper_full_get_segment_t0           (struct whisper_context * ctx, int i_segment);
+    WHISPER_API int64_t whisper_full_get_segment_t0_from_state(struct whisper_state * state, int i_segment);
+    WHISPER_API int64_t whisper_full_get_segment_t1           (struct whisper_context * ctx, int i_segment);
+    WHISPER_API int64_t whisper_full_get_segment_t1_from_state(struct whisper_state * state, int i_segment);
+    // Get the text of the specified segment
+    WHISPER_API const char * whisper_full_get_segment_text           (struct whisper_context * ctx, int i_segment);
+    WHISPER_API const char * whisper_full_get_segment_text_from_state(struct whisper_state * state, int i_segment);
-    // Get the text of the specified segment.
-    WHISPER_API const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment);
+    // Get number of tokens in the specified segment
+    WHISPER_API int whisper_full_n_tokens           (struct whisper_context * ctx, int i_segment);
+    WHISPER_API int whisper_full_n_tokens_from_state(struct whisper_state * state, int i_segment);
-    // Get number of tokens in the specified segment.
-    WHISPER_API int whisper_full_n_tokens(struct whisper_context * ctx, int i_segment);
+    // Get the token text of the specified token in the specified segment
+    WHISPER_API const char * whisper_full_get_token_text           (struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API const char * whisper_full_get_token_text_from_state(struct whisper_context * ctx, struct whisper_state * state, int i_segment, int i_token);
-    // Get the token text of the specified token in the specified segment.
-    WHISPER_API const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token);
-    WHISPER_API whisper_token whisper_full_get_token_id (struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API whisper_token whisper_full_get_token_id           (struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API whisper_token whisper_full_get_token_id_from_state(struct whisper_state * state, int i_segment, int i_token);
-    // Get token data for the specified token in the specified segment.
+    // Get token data for the specified token in the specified segment
     // This contains probabilities, timestamps, etc.
-    WHISPER_API whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API whisper_token_data whisper_full_get_token_data           (struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API whisper_token_data whisper_full_get_token_data_from_state(struct whisper_state * state, int i_segment, int i_token);
-    // Get the probability of the specified token in the specified segment.
-    WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
+    // Get the probability of the specified token in the specified segment
+    WHISPER_API float whisper_full_get_token_p           (struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API float whisper_full_get_token_p_from_state(struct whisper_state * state, int i_segment, int i_token);
     ////////////////////////////////////////////////////////////////////////////
     // Temporary helpers needed for exposing ggml interface
     WHISPER_API int whisper_bench_memcpy(int n_threads);
+    WHISPER_API const char * whisper_bench_memcpy_str(int n_threads);
     WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads);
+    WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads);
 #ifdef __cplusplus
 }

package/ios/RNWhisper.h CHANGED Viewed

@@ -3,9 +3,9 @@
 #import "rn-whisper.h"
 #endif
 #import <React/RCTBridgeModule.h>
+#import <React/RCTEventEmitter.h>
-@interface RNWhisper : NSObject <RCTBridgeModule>
+@interface RNWhisper : RCTEventEmitter <RCTBridgeModule>
 @end

package/ios/RNWhisper.mm CHANGED Viewed

@@ -1,23 +1,8 @@
 #import "RNWhisper.h"
+#import "RNWhisperContext.h"
 #include <stdlib.h>
 #include <string>
-@interface WhisperContext : NSObject {
-}
-@property struct whisper_context * ctx;
-@end
-@implementation WhisperContext
-- (void)invalidate {
-    whisper_free(self.ctx);
-}
-@end
 @implementation RNWhisper
 NSMutableDictionary *contexts;
@@ -33,10 +18,8 @@ RCT_REMAP_METHOD(initContext,
         contexts = [[NSMutableDictionary alloc] init];
     }
-    WhisperContext *context = [[WhisperContext alloc] init];
-    context.ctx = whisper_init_from_file([modelPath UTF8String]);
-    if (context.ctx == NULL) {
+    RNWhisperContext *context = [RNWhisperContext initWithModelPath:modelPath];
+    if ([context getContext] == NULL) {
         reject(@"whisper_cpp_error", @"Failed to load the model", nil);
         return;
     }
@@ -47,123 +30,105 @@ RCT_REMAP_METHOD(initContext,
     resolve([NSNumber numberWithInt:contextId]);
 }
-RCT_REMAP_METHOD(transcribe,
+RCT_REMAP_METHOD(transcribeFile,
                  withContextId:(int)contextId
+                 withJobId:(int)jobId
                  withWaveFile:(NSString *)waveFilePath
                  withOptions:(NSDictionary *)options
                  withResolver:(RCTPromiseResolveBlock)resolve
                  withRejecter:(RCTPromiseRejectBlock)reject)
 {
-    WhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
+    RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
     if (context == nil) {
         reject(@"whisper_error", @"Context not found", nil);
         return;
     }
+    if ([context isCapturing]) {
+        reject(@"whisper_error", @"The context is in realtime transcribe mode", nil);
+        return;
+    }
+    if ([context isTranscribing]) {
+        reject(@"whisper_error", @"Context is already transcribing", nil);
+        return;
+    }
     NSURL *url = [NSURL fileURLWithPath:waveFilePath];
     int count = 0;
     float *waveFile = [self decodeWaveFile:url count:&count];
     if (waveFile == nil) {
         reject(@"whisper_error", @"Invalid file", nil);
         return;
     }
-    struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-    const int max_threads = options[@"maxThreads"] != nil ?
-      [options[@"maxThreads"] intValue] :
-      MIN(8, (int)[[NSProcessInfo processInfo] processorCount]) - 2;
-    if (options[@"beamSize"] != nil) {
-        params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
-        params.beam_search.beam_size = [options[@"beamSize"] intValue];
+    int code = [context transcribeFile:jobId audioData:waveFile audioDataCount:count options:options];
+    if (code != 0) {
+        free(waveFile);
+        reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to transcribe the file. Code: %d", code], nil);
+        return;
     }
+    free(waveFile);
+    resolve([context getTextSegments]);
+}
-    params.print_realtime   = false;
-    params.print_progress   = false;
-    params.print_timestamps = false;
-    params.print_special    = false;
-    params.speed_up         = options[@"speedUp"] != nil ? [options[@"speedUp"] boolValue] : false;
-    params.translate        = options[@"translate"] != nil ? [options[@"translate"] boolValue] : false;
-    params.language         = options[@"language"] != nil ? [options[@"language"] UTF8String] : "auto";
-    params.n_threads        = max_threads;
-    params.offset_ms        = 0;
-    params.no_context       = true;
-    params.single_segment   = false;
-    if (options[@"maxLen"] != nil) {
-        params.max_len = [options[@"maxLen"] intValue];
-    }
-    params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
+- (NSArray *)supportedEvents {
+  return@[
+    @"@RNWhisper_onRealtimeTranscribe",
+    @"@RNWhisper_onRealtimeTranscribeEnd",
+  ];
+}
-    if (options[@"bestOf"] != nil) {
-        params.greedy.best_of = [options[@"bestOf"] intValue];
-    }
-    if (options[@"maxContext"] != nil) {
-        params.n_max_text_ctx = [options[@"maxContext"] intValue];
-    }
-    if (options[@"offset"] != nil) {
-        params.offset_ms = [options[@"offset"] intValue];
-    }
-    if (options[@"duration"] != nil) {
-        params.duration_ms = [options[@"duration"] intValue];
-    }
-    if (options[@"wordThold"] != nil) {
-        params.thold_pt = [options[@"wordThold"] intValue];
-    }
-    if (options[@"temperature"] != nil) {
-        params.temperature = [options[@"temperature"] floatValue];
-    }
-    if (options[@"temperatureInc"] != nil) {
-        params.temperature_inc = [options[@"temperature_inc"] floatValue];
-    }
-    if (options[@"prompt"] != nil) {
-        std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
-        rn_whisper_convert_prompt(
-            context.ctx,
-            params,
-            prompt
-        );
-    }
+RCT_REMAP_METHOD(startRealtimeTranscribe,
+                 withContextId:(int)contextId
+                 withJobId:(int)jobId
+                 withOptions:(NSDictionary *)options
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
-    whisper_reset_timings(context.ctx);
-    int code = whisper_full(context.ctx, params, waveFile, count);
-    if (code != 0) {
-        NSLog(@"Failed to run the model");
-        free(waveFile);
-        reject(@"whisper_cpp_error", [NSString stringWithFormat:@"Failed to run the model. Code: %d", code], nil);
+    if (context == nil) {
+        reject(@"whisper_error", @"Context not found", nil);
+        return;
+    }
+    if ([context isCapturing]) {
+        reject(@"whisper_error", @"The context is already capturing", nil);
         return;
     }
-    // whisper_print_timings(context.ctx);
-    free(waveFile);
-    NSString *result = @"";
-    int n_segments = whisper_full_n_segments(context.ctx);
-    NSMutableArray *segments = [[NSMutableArray alloc] init];
-    for (int i = 0; i < n_segments; i++) {
-        const char * text_cur = whisper_full_get_segment_text(context.ctx, i);
-        result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
-        const int64_t t0 = whisper_full_get_segment_t0(context.ctx, i);
-        const int64_t t1 = whisper_full_get_segment_t1(context.ctx, i);
-        NSDictionary *segment = @{
-            @"text": [NSString stringWithUTF8String:text_cur],
-            @"t0": [NSNumber numberWithLongLong:t0],
-            @"t1": [NSNumber numberWithLongLong:t1]
-        };
-        [segments addObject:segment];
+    OSStatus status = [context transcribeRealtime:jobId
+        options:options
+        onTranscribe:^(int _jobId, NSString *type, NSDictionary *payload) {
+            NSString *eventName = nil;
+            if ([type isEqual:@"transcribe"]) {
+                eventName = @"@RNWhisper_onRealtimeTranscribe";
+            } else if ([type isEqual:@"end"]) {
+                eventName = @"@RNWhisper_onRealtimeTranscribeEnd";
+            }
+            if (eventName == nil) {
+                return;
+            }
+            [self sendEventWithName:eventName
+                body:@{
+                    @"contextId": [NSNumber numberWithInt:contextId],
+                    @"jobId": [NSNumber numberWithInt:jobId],
+                    @"payload": payload
+                }
+            ];
+        }
+    ];
+    if (status == 0) {
+        resolve(nil);
+        return;
     }
-    resolve(@{
-        @"result": result,
-        @"segments": segments
-    });
+    reject(@"whisper_error", [NSString stringWithFormat:@"Failed to start realtime transcribe. Status: %d", status], nil);
+}
+RCT_REMAP_METHOD(abortTranscribe,
+                 withContextId:(int)contextId
+                 withJobId:(int)jobId)
+{
+    RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
+    [context stopTranscribe:jobId];
 }
 RCT_REMAP_METHOD(releaseContext,
@@ -171,7 +136,7 @@ RCT_REMAP_METHOD(releaseContext,
                  withResolver:(RCTPromiseResolveBlock)resolve
                  withRejecter:(RCTPromiseRejectBlock)reject)
 {
-    WhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
+    RNWhisperContext *context = contexts[[NSNumber numberWithInt:contextId]];
     if (context == nil) {
         reject(@"whisper_error", @"Context not found", nil);
         return;
@@ -210,12 +175,14 @@ RCT_REMAP_METHOD(releaseAllContexts,
 }
 - (void)invalidate {
+    rn_whisper_abort_all_transcribe();
     if (contexts == nil) {
         return;
     }
     for (NSNumber *contextId in contexts) {
-        WhisperContext *context = contexts[contextId];
+        RNWhisperContext *context = contexts[contextId];
         [context invalidate];
     }

package/ios/RNWhisperContext.h ADDED Viewed

@@ -0,0 +1,53 @@
+#ifdef __cplusplus
+#import "whisper.h"
+#import "rn-whisper.h"
+#endif
+#import <AVFoundation/AVFoundation.h>
+#import <AudioToolbox/AudioQueue.h>
+#define NUM_BUFFERS 3
+#define DEFAULT_MAX_AUDIO_SEC 30
+typedef struct {
+    __unsafe_unretained id mSelf;
+    int jobId;
+    NSDictionary* options;
+    bool isTranscribing;
+    bool isRealtime;
+    bool isCapturing;
+    int maxAudioSec;
+    int nSamples;
+    int16_t* audioBufferI16;
+    float* audioBufferF32;
+    AudioQueueRef queue;
+    AudioStreamBasicDescription dataFormat;
+    AudioQueueBufferRef buffers[NUM_BUFFERS];
+    void (^transcribeHandler)(int, NSString *, NSDictionary *);
+} RNWhisperContextRecordState;
+@interface RNWhisperContext : NSObject {
+    struct whisper_context * ctx;
+    RNWhisperContextRecordState recordState;
+}
++ (instancetype)initWithModelPath:(NSString *)modelPath;
+- (struct whisper_context *)getContext;
+- (OSStatus)transcribeRealtime:(int)jobId
+    options:(NSDictionary *)options
+    onTranscribe:(void (^)(int, NSString *, NSDictionary *))onTranscribe;
+- (int)transcribeFile:(int)jobId
+    audioData:(float *)audioData
+    audioDataCount:(int)audioDataCount
+    options:(NSDictionary *)options;
+- (void)stopTranscribe:(int)jobId;
+- (bool)isCapturing;
+- (bool)isTranscribing;
+- (NSDictionary *)getTextSegments;
+- (void)invalidate;
+@end