whisper.rn 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import com.facebook.react.bridge.ReactContextBaseJavaModule;
12
12
  import com.facebook.react.bridge.ReactMethod;
13
13
  import com.facebook.react.bridge.LifecycleEventListener;
14
14
  import com.facebook.react.bridge.ReadableMap;
15
+ import com.facebook.react.bridge.WritableMap;
15
16
  import com.facebook.react.module.annotations.ReactModule;
16
17
 
17
18
  import java.util.HashMap;
@@ -72,11 +73,11 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
72
73
 
73
74
  @ReactMethod
74
75
  public void transcribe(int id, String filePath, ReadableMap options, Promise promise) {
75
- new AsyncTask<Void, Void, String>() {
76
+ new AsyncTask<Void, Void, WritableMap>() {
76
77
  private Exception exception;
77
78
 
78
79
  @Override
79
- protected String doInBackground(Void... voids) {
80
+ protected WritableMap doInBackground(Void... voids) {
80
81
  try {
81
82
  WhisperContext context = contexts.get(id);
82
83
  if (context == null) {
@@ -90,12 +91,12 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
90
91
  }
91
92
 
92
93
  @Override
93
- protected void onPostExecute(String result) {
94
+ protected void onPostExecute(WritableMap data) {
94
95
  if (exception != null) {
95
96
  promise.reject(exception);
96
97
  return;
97
98
  }
98
- promise.resolve(result);
99
+ promise.resolve(data);
99
100
  }
100
101
  }.execute();
101
102
  }
@@ -1,5 +1,8 @@
1
1
  package com.rnwhisper;
2
2
 
3
+ import com.facebook.react.bridge.Arguments;
4
+ import com.facebook.react.bridge.WritableArray;
5
+ import com.facebook.react.bridge.WritableMap;
3
6
  import com.facebook.react.bridge.ReadableMap;
4
7
 
5
8
  import android.util.Log;
@@ -29,7 +32,7 @@ public class WhisperContext {
29
32
  this.context = context;
30
33
  }
31
34
 
32
- public String transcribe(final String filePath, final ReadableMap options) throws IOException, Exception {
35
+ public WritableMap transcribe(final String filePath, final ReadableMap options) throws IOException, Exception {
33
36
  int code = fullTranscribe(
34
37
  context,
35
38
  decodeWaveFile(new File(filePath)),
@@ -37,14 +40,18 @@ public class WhisperContext {
37
40
  options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
38
41
  // jint max_context,
39
42
  options.hasKey("maxContext") ? options.getInt("maxContext") : -1,
43
+
44
+ // jint word_thold,
45
+ options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
40
46
  // jint max_len,
41
47
  options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
48
+ // jboolean token_timestamps,
49
+ options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
50
+
42
51
  // jint offset,
43
52
  options.hasKey("offset") ? options.getInt("offset") : -1,
44
53
  // jint duration,
45
54
  options.hasKey("duration") ? options.getInt("duration") : -1,
46
- // jint word_thold,
47
- options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
48
55
  // jfloat temperature,
49
56
  options.hasKey("temperature") ? (float) options.getDouble("temperature") : -1.0f,
50
57
  // jfloat temperature_inc,
@@ -58,17 +65,31 @@ public class WhisperContext {
58
65
  // jboolean translate,
59
66
  options.hasKey("translate") ? options.getBoolean("translate") : false,
60
67
  // jstring language,
61
- options.hasKey("language") ? options.getString("language") : "auto"
68
+ options.hasKey("language") ? options.getString("language") : "auto",
69
+ // jstring prompt
70
+ options.hasKey("prompt") ? options.getString("prompt") : null
62
71
  );
63
72
  if (code != 0) {
64
73
  throw new Exception("Transcription failed with code " + code);
65
74
  }
66
75
  Integer count = getTextSegmentCount(context);
67
76
  StringBuilder builder = new StringBuilder();
77
+
78
+ WritableMap data = Arguments.createMap();
79
+ WritableArray segments = Arguments.createArray();
68
80
  for (int i = 0; i < count; i++) {
69
- builder.append(getTextSegment(context, i));
81
+ String text = getTextSegment(context, i);
82
+ builder.append(text);
83
+
84
+ WritableMap segment = Arguments.createMap();
85
+ segment.putString("text", text);
86
+ segment.putInt("t0", getTextSegmentT0(context, i));
87
+ segment.putInt("t1", getTextSegmentT1(context, i));
88
+ segments.pushMap(segment);
70
89
  }
71
- return builder.toString();
90
+ data.putString("result", builder.toString());
91
+ data.putArray("segments", segments);
92
+ return data;
72
93
  }
73
94
 
74
95
  public void release() {
@@ -168,19 +189,23 @@ public class WhisperContext {
168
189
  float[] audio_data,
169
190
  int n_threads,
170
191
  int max_context,
192
+ int word_thold,
171
193
  int max_len,
194
+ boolean token_timestamps,
172
195
  int offset,
173
196
  int duration,
174
- int word_thold,
175
197
  float temperature,
176
198
  float temperature_inc,
177
199
  int beam_size,
178
200
  int best_of,
179
201
  boolean speed_up,
180
202
  boolean translate,
181
- String language
203
+ String language,
204
+ String prompt
182
205
  );
183
206
  protected static native int getTextSegmentCount(long context);
184
207
  protected static native String getTextSegment(long context, int index);
208
+ protected static native int getTextSegmentT0(long context, int index);
209
+ protected static native int getTextSegmentT1(long context, int index);
185
210
  protected static native void freeContext(long contextPtr);
186
211
  }
@@ -15,4 +15,5 @@ LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
15
15
  LOCAL_CPPFLAGS += -std=c++11
16
16
  LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
17
17
  $(WHISPER_LIB_DIR)/whisper.cpp \
18
- $(LOCAL_PATH)/jni.c
18
+ $(WHISPER_LIB_DIR)/rn-whisper.cpp \
19
+ $(LOCAL_PATH)/jni.cpp
@@ -2,10 +2,11 @@
2
2
  #include <android/asset_manager.h>
3
3
  #include <android/asset_manager_jni.h>
4
4
  #include <android/log.h>
5
- #include <stdlib.h>
5
+ #include <cstdlib>
6
6
  #include <sys/sysinfo.h>
7
- #include <string.h>
7
+ #include <string>
8
8
  #include "whisper.h"
9
+ #include "rn-whisper.h"
9
10
  #include "ggml.h"
10
11
 
11
12
  #define UNUSED(x) (void)(x)
@@ -22,27 +23,17 @@ static inline int max(int a, int b) {
22
23
  return (a > b) ? a : b;
23
24
  }
24
25
 
25
- static size_t asset_read(void *ctx, void *output, size_t read_size) {
26
- return AAsset_read((AAsset *) ctx, output, read_size);
27
- }
28
-
29
- static bool asset_is_eof(void *ctx) {
30
- return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
31
- }
32
-
33
- static void asset_close(void *ctx) {
34
- AAsset_close((AAsset *) ctx);
35
- }
26
+ extern "C" {
36
27
 
37
28
  JNIEXPORT jlong JNICALL
38
29
  Java_com_rnwhisper_WhisperContext_initContext(
39
30
  JNIEnv *env, jobject thiz, jstring model_path_str) {
40
31
  UNUSED(thiz);
41
- struct whisper_context *context = NULL;
42
- const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
32
+ struct whisper_context *context = nullptr;
33
+ const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
43
34
  context = whisper_init_from_file(model_path_chars);
44
- (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
45
- return (jlong) context;
35
+ env->ReleaseStringUTFChars(model_path_str, model_path_chars);
36
+ return reinterpret_cast<jlong>(context);
46
37
  }
47
38
 
48
39
  JNIEXPORT jint JNICALL
@@ -53,29 +44,31 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
53
44
  jfloatArray audio_data,
54
45
  jint n_threads,
55
46
  jint max_context,
56
- jint max_len,
47
+ int word_thold,
48
+ int max_len,
49
+ jboolean token_timestamps,
57
50
  jint offset,
58
51
  jint duration,
59
- jint word_thold,
60
52
  jfloat temperature,
61
53
  jfloat temperature_inc,
62
54
  jint beam_size,
63
55
  jint best_of,
64
56
  jboolean speed_up,
65
57
  jboolean translate,
66
- jstring language
58
+ jstring language,
59
+ jstring prompt
67
60
  ) {
68
61
  UNUSED(thiz);
69
- struct whisper_context *context = (struct whisper_context *) context_ptr;
70
- jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL);
71
- const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data);
62
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
63
+ jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
64
+ const jsize audio_data_length = env->GetArrayLength(audio_data);
72
65
 
73
66
  int max_threads = max(1, min(8, get_nprocs() - 2));
74
67
 
75
68
  LOGI("About to create params");
76
69
 
77
70
  struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
78
-
71
+
79
72
  if (beam_size > -1) {
80
73
  params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
81
74
  params.beam_search.beam_size = beam_size;
@@ -86,22 +79,25 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
86
79
  params.print_timestamps = false;
87
80
  params.print_special = false;
88
81
  params.translate = translate;
89
- params.language = language;
82
+ const char *language_chars = env->GetStringUTFChars(language, nullptr);
83
+ params.language = language_chars;
90
84
  params.n_threads = n_threads > 0 ? n_threads : max_threads;
91
85
  params.speed_up = speed_up;
92
86
  params.offset_ms = 0;
93
87
  params.no_context = true;
94
88
  params.single_segment = false;
95
89
 
90
+ if (max_len > -1) {
91
+ params.max_len = max_len;
92
+ }
93
+ params.token_timestamps = token_timestamps;
94
+
96
95
  if (best_of > -1) {
97
96
  params.greedy.best_of = best_of;
98
97
  }
99
98
  if (max_context > -1) {
100
99
  params.n_max_text_ctx = max_context;
101
100
  }
102
- if (max_len > -1) {
103
- params.max_len = max_len;
104
- }
105
101
  if (offset > -1) {
106
102
  params.offset_ms = offset;
107
103
  }
@@ -117,6 +113,13 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
117
113
  if (temperature_inc > -1) {
118
114
  params.temperature_inc = temperature_inc;
119
115
  }
116
+ if (prompt != nullptr) {
117
+ rn_whisper_convert_prompt(
118
+ context,
119
+ params,
120
+ new std::string(env->GetStringUTFChars(prompt, nullptr))
121
+ );
122
+ }
120
123
 
121
124
  LOGI("About to reset timings");
122
125
  whisper_reset_timings(context);
@@ -126,7 +129,8 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
126
129
  if (code == 0) {
127
130
  // whisper_print_timings(context);
128
131
  }
129
- (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT);
132
+ env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
133
+ env->ReleaseStringUTFChars(language, language_chars);
130
134
  return code;
131
135
  }
132
136
 
@@ -135,7 +139,7 @@ Java_com_rnwhisper_WhisperContext_getTextSegmentCount(
135
139
  JNIEnv *env, jobject thiz, jlong context_ptr) {
136
140
  UNUSED(env);
137
141
  UNUSED(thiz);
138
- struct whisper_context *context = (struct whisper_context *) context_ptr;
142
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
139
143
  return whisper_full_n_segments(context);
140
144
  }
141
145
 
@@ -143,17 +147,37 @@ JNIEXPORT jstring JNICALL
143
147
  Java_com_rnwhisper_WhisperContext_getTextSegment(
144
148
  JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
145
149
  UNUSED(thiz);
146
- struct whisper_context *context = (struct whisper_context *) context_ptr;
150
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
147
151
  const char *text = whisper_full_get_segment_text(context, index);
148
- jstring string = (*env)->NewStringUTF(env, text);
152
+ jstring string = env->NewStringUTF(text);
149
153
  return string;
150
154
  }
151
155
 
156
+ JNIEXPORT jint JNICALL
157
+ Java_com_rnwhisper_WhisperContext_getTextSegmentT0(
158
+ JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
159
+ UNUSED(env);
160
+ UNUSED(thiz);
161
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
162
+ return whisper_full_get_segment_t0(context, index);
163
+ }
164
+
165
+ JNIEXPORT jint JNICALL
166
+ Java_com_rnwhisper_WhisperContext_getTextSegmentT1(
167
+ JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
168
+ UNUSED(env);
169
+ UNUSED(thiz);
170
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
171
+ return whisper_full_get_segment_t1(context, index);
172
+ }
173
+
152
174
  JNIEXPORT void JNICALL
153
175
  Java_com_rnwhisper_WhisperContext_freeContext(
154
176
  JNIEnv *env, jobject thiz, jlong context_ptr) {
155
177
  UNUSED(env);
156
178
  UNUSED(thiz);
157
- struct whisper_context *context = (struct whisper_context *) context_ptr;
179
+ struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
158
180
  whisper_free(context);
159
181
  }
182
+
183
+ } // extern "C"
@@ -0,0 +1,31 @@
1
+ #include <cstdio>
2
+ #include <string>
3
+ #include <vector>
4
+ #include "whisper.h"
5
+
6
+ extern "C" {
7
+
8
+ void rn_whisper_convert_prompt(
9
+ struct whisper_context * ctx,
10
+ struct whisper_full_params params,
11
+ std::string * prompt
12
+ ) {
13
+ std::vector<whisper_token> prompt_tokens;
14
+ if (!prompt->empty()) {
15
+ prompt_tokens.resize(1024);
16
+ prompt_tokens.resize(whisper_tokenize(ctx, prompt->c_str(), prompt_tokens.data(), prompt_tokens.size()));
17
+
18
+ // fprintf(stderr, "\n");
19
+ // fprintf(stderr, "initial prompt: '%s'\n", prompt->c_str());
20
+ // fprintf(stderr, "initial tokens: [ ");
21
+ // for (int i = 0; i < (int) prompt_tokens.size(); ++i) {
22
+ // fprintf(stderr, "%d ", prompt_tokens[i]);
23
+ // }
24
+ // fprintf(stderr, "]\n");
25
+
26
+ params.prompt_tokens = prompt_tokens.data();
27
+ params.prompt_n_tokens = prompt_tokens.size();
28
+ }
29
+ }
30
+
31
+ }
@@ -0,0 +1,16 @@
1
+
2
+ #ifdef __cplusplus
3
+ #include <string>
4
+ #include <whisper.h>
5
+ extern "C" {
6
+ #endif
7
+
8
+ void rn_whisper_convert_prompt(
9
+ struct whisper_context * ctx,
10
+ struct whisper_full_params params,
11
+ std::string * prompt
12
+ );
13
+
14
+ #ifdef __cplusplus
15
+ }
16
+ #endif
package/ios/RNWhisper.h CHANGED
@@ -1,7 +1,9 @@
1
1
  #ifdef __cplusplus
2
2
  #import "whisper.h"
3
+ #import "rn-whisper.h"
3
4
  #endif
4
5
 
6
+
5
7
  #import <React/RCTBridgeModule.h>
6
8
 
7
9
  @interface RNWhisper : NSObject <RCTBridgeModule>
package/ios/RNWhisper.mm CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
2
  #import "RNWhisper.h"
3
3
  #include <stdlib.h>
4
+ #include <string>
4
5
 
5
6
  @interface WhisperContext : NSObject {
6
7
  }
@@ -93,15 +94,18 @@ RCT_REMAP_METHOD(transcribe,
93
94
  params.no_context = true;
94
95
  params.single_segment = false;
95
96
 
97
+ if (options[@"maxLen"] != nil) {
98
+ params.max_len = [options[@"maxLen"] intValue];
99
+ }
100
+ params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
101
+
96
102
  if (options[@"bestOf"] != nil) {
97
103
  params.greedy.best_of = [options[@"bestOf"] intValue];
98
104
  }
99
105
  if (options[@"maxContext"] != nil) {
100
106
  params.n_max_text_ctx = [options[@"maxContext"] intValue];
101
107
  }
102
- if (options[@"maxLen"] != nil) {
103
- params.max_len = [options[@"maxLen"] intValue];
104
- }
108
+
105
109
  if (options[@"offset"] != nil) {
106
110
  params.offset_ms = [options[@"offset"] intValue];
107
111
  }
@@ -117,6 +121,15 @@ RCT_REMAP_METHOD(transcribe,
117
121
  if (options[@"temperatureInc"] != nil) {
118
122
  params.temperature_inc = [options[@"temperature_inc"] floatValue];
119
123
  }
124
+
125
+ if (options[@"prompt"] != nil) {
126
+ std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
127
+ rn_whisper_convert_prompt(
128
+ context.ctx,
129
+ params,
130
+ prompt
131
+ );
132
+ }
120
133
 
121
134
  whisper_reset_timings(context.ctx);
122
135
  int code = whisper_full(context.ctx, params, waveFile, count);
@@ -132,11 +145,25 @@ RCT_REMAP_METHOD(transcribe,
132
145
 
133
146
  NSString *result = @"";
134
147
  int n_segments = whisper_full_n_segments(context.ctx);
148
+
149
+ NSMutableArray *segments = [[NSMutableArray alloc] init];
135
150
  for (int i = 0; i < n_segments; i++) {
136
151
  const char * text_cur = whisper_full_get_segment_text(context.ctx, i);
137
152
  result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
138
- }
139
- resolve(result);
153
+
154
+ const int64_t t0 = whisper_full_get_segment_t0(context.ctx, i);
155
+ const int64_t t1 = whisper_full_get_segment_t1(context.ctx, i);
156
+ NSDictionary *segment = @{
157
+ @"text": [NSString stringWithUTF8String:text_cur],
158
+ @"t0": [NSNumber numberWithLongLong:t0],
159
+ @"t1": [NSNumber numberWithLongLong:t1]
160
+ };
161
+ [segments addObject:segment];
162
+ }
163
+ resolve(@{
164
+ @"result": result,
165
+ @"segments": segments
166
+ });
140
167
  }
141
168
 
142
169
  RCT_REMAP_METHOD(releaseContext,
package/jest/mock.js CHANGED
@@ -3,7 +3,10 @@ const { NativeModules } = require('react-native')
3
3
  if (!NativeModules.RNWhisper) {
4
4
  NativeModules.RNWhisper = {
5
5
  initContext: jest.fn(() => Promise.resolve(1)),
6
- transcribe: jest.fn(() => Promise.resolve('TEST')),
6
+ transcribe: jest.fn(() => Promise.resolve({
7
+ result: ' Test',
8
+ segments: [{ text: ' Test', t0: 0, t1: 33 }],
9
+ })),
7
10
  releaseContext: jest.fn(() => Promise.resolve()),
8
11
  releaseAllContexts: jest.fn(() => Promise.resolve()),
9
12
  }
@@ -21,9 +21,7 @@ class WhisperContext {
21
21
  }
22
22
  async transcribe(path) {
23
23
  let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
24
- return RNWhisper.transcribe(this.id, path, options).then(result => ({
25
- result
26
- }));
24
+ return RNWhisper.transcribe(this.id, path, options);
27
25
  }
28
26
  async release() {
29
27
  return RNWhisper.releaseContext(this.id);
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","then","result","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAoBH,MAAMU,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOX,SAAS,CAACQ,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC,CAACI,IAAI,CAAEC,MAAc,KAAM;MAC5EA;IACF,CAAC,CAAC,CAAC;EACL;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAOhB,SAAS,CAACiB,cAAc,CAAC,IAAI,CAACV,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeW,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAR,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMP,SAAS,CAACoB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAId,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAec,iBAAiBA,CAAA,EAAkB;EACvD,OAAOrB,SAAS,CAACsB,kBAAkB,EAAE;AACvC"}
1
+ {"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMU,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOX,SAAS,CAACQ,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOd,SAAS,CAACe,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMP,SAAS,CAACkB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOnB,SAAS,CAACoB,kBAAkB,EAAE;AACvC"}
@@ -14,9 +14,7 @@ class WhisperContext {
14
14
  }
15
15
  async transcribe(path) {
16
16
  let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
17
- return RNWhisper.transcribe(this.id, path, options).then(result => ({
18
- result
19
- }));
17
+ return RNWhisper.transcribe(this.id, path, options);
20
18
  }
21
19
  async release() {
22
20
  return RNWhisper.releaseContext(this.id);
@@ -1 +1 @@
1
- {"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","then","result","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SAASA,aAAa,EAAEC,QAAQ,QAAQ,cAAc;AAEtD,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AAoBH,MAAMQ,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOV,SAAS,CAACO,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC,CAACI,IAAI,CAAEC,MAAc,KAAM;MAC5EA;IACF,CAAC,CAAC,CAAC;EACL;EAEA,MAAMC,OAAOA,CAAA,EAAG;IACd,OAAOf,SAAS,CAACgB,cAAc,CAAC,IAAI,CAACV,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeW,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAR,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMN,SAAS,CAACmB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAId,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAec,iBAAiBA,CAAA,EAAkB;EACvD,OAAOpB,SAAS,CAACqB,kBAAkB,EAAE;AACvC"}
1
+ {"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SAASA,aAAa,EAAEC,QAAQ,QAAQ,cAAc;AAEtD,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMQ,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOV,SAAS,CAACO,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOb,SAAS,CAACc,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMN,SAAS,CAACiB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOlB,SAAS,CAACmB,kBAAkB,EAAE;AACvC"}
@@ -1,7 +1,10 @@
1
1
  export type TranscribeOptions = {
2
+ language?: string;
3
+ translate?: boolean;
2
4
  maxThreads?: number;
3
5
  maxContext?: number;
4
6
  maxLen?: number;
7
+ tokenTimestamps?: boolean;
5
8
  offset?: number;
6
9
  duration?: number;
7
10
  wordThold?: number;
@@ -10,9 +13,15 @@ export type TranscribeOptions = {
10
13
  beamSize?: number;
11
14
  bestOf?: number;
12
15
  speedUp?: boolean;
16
+ prompt?: string;
13
17
  };
14
18
  export type TranscribeResult = {
15
19
  result: string;
20
+ segments: Array<{
21
+ text: string;
22
+ t0: number;
23
+ t1: number;
24
+ }>;
16
25
  };
17
26
  declare class WhisperContext {
18
27
  id: number;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;CAChB,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAIhB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAMpF,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;CACJ,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAIhB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAIpF,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper.rn",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "React Native binding of whisper.cpp",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -98,13 +98,17 @@
98
98
  "@commitlint/config-conventional"
99
99
  ]
100
100
  },
101
+ "publishConfig": {
102
+ "registry": "https://registry.npmjs.org/"
103
+ },
101
104
  "release-it": {
102
105
  "git": {
103
106
  "commitMessage": "chore: release ${version}",
104
107
  "tagName": "v${version}"
105
108
  },
106
109
  "npm": {
107
- "publish": true
110
+ "publish": true,
111
+ "skipChecks": true
108
112
  },
109
113
  "github": {
110
114
  "release": true
package/src/index.tsx CHANGED
@@ -16,9 +16,12 @@ const RNWhisper = NativeModules.RNWhisper
16
16
  )
17
17
 
18
18
  export type TranscribeOptions = {
19
+ language?: string,
20
+ translate?: boolean,
19
21
  maxThreads?: number,
20
22
  maxContext?: number,
21
23
  maxLen?: number,
24
+ tokenTimestamps?: boolean,
22
25
  offset?: number,
23
26
  duration?: number,
24
27
  wordThold?: number,
@@ -27,10 +30,16 @@ export type TranscribeOptions = {
27
30
  beamSize?: number,
28
31
  bestOf?: number,
29
32
  speedUp?: boolean,
33
+ prompt?: string,
30
34
  }
31
35
 
32
36
  export type TranscribeResult = {
33
37
  result: string,
38
+ segments: Array<{
39
+ text: string,
40
+ t0: number,
41
+ t1: number,
42
+ }>,
34
43
  }
35
44
 
36
45
  class WhisperContext {
@@ -41,9 +50,7 @@ class WhisperContext {
41
50
  }
42
51
 
43
52
  async transcribe(path: string, options: TranscribeOptions = {}): Promise<TranscribeResult> {
44
- return RNWhisper.transcribe(this.id, path, options).then((result: string) => ({
45
- result
46
- }))
53
+ return RNWhisper.transcribe(this.id, path, options)
47
54
  }
48
55
 
49
56
  async release() {