whisper.rn 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +5 -4
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -8
- package/android/src/main/jni/whisper/Whisper.mk +2 -1
- package/android/src/main/jni/whisper/{jni.c → jni.cpp} +57 -33
- package/cpp/rn-whisper.cpp +31 -0
- package/cpp/rn-whisper.h +16 -0
- package/ios/RNWhisper.h +2 -0
- package/ios/RNWhisper.mm +32 -5
- package/jest/mock.js +4 -1
- package/lib/commonjs/index.js +1 -3
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +1 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +9 -0
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +6 -2
- package/src/index.tsx +10 -3
|
@@ -12,6 +12,7 @@ import com.facebook.react.bridge.ReactContextBaseJavaModule;
|
|
|
12
12
|
import com.facebook.react.bridge.ReactMethod;
|
|
13
13
|
import com.facebook.react.bridge.LifecycleEventListener;
|
|
14
14
|
import com.facebook.react.bridge.ReadableMap;
|
|
15
|
+
import com.facebook.react.bridge.WritableMap;
|
|
15
16
|
import com.facebook.react.module.annotations.ReactModule;
|
|
16
17
|
|
|
17
18
|
import java.util.HashMap;
|
|
@@ -72,11 +73,11 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
72
73
|
|
|
73
74
|
@ReactMethod
|
|
74
75
|
public void transcribe(int id, String filePath, ReadableMap options, Promise promise) {
|
|
75
|
-
new AsyncTask<Void, Void,
|
|
76
|
+
new AsyncTask<Void, Void, WritableMap>() {
|
|
76
77
|
private Exception exception;
|
|
77
78
|
|
|
78
79
|
@Override
|
|
79
|
-
protected
|
|
80
|
+
protected WritableMap doInBackground(Void... voids) {
|
|
80
81
|
try {
|
|
81
82
|
WhisperContext context = contexts.get(id);
|
|
82
83
|
if (context == null) {
|
|
@@ -90,12 +91,12 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
90
91
|
}
|
|
91
92
|
|
|
92
93
|
@Override
|
|
93
|
-
protected void onPostExecute(
|
|
94
|
+
protected void onPostExecute(WritableMap data) {
|
|
94
95
|
if (exception != null) {
|
|
95
96
|
promise.reject(exception);
|
|
96
97
|
return;
|
|
97
98
|
}
|
|
98
|
-
promise.resolve(
|
|
99
|
+
promise.resolve(data);
|
|
99
100
|
}
|
|
100
101
|
}.execute();
|
|
101
102
|
}
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
package com.rnwhisper;
|
|
2
2
|
|
|
3
|
+
import com.facebook.react.bridge.Arguments;
|
|
4
|
+
import com.facebook.react.bridge.WritableArray;
|
|
5
|
+
import com.facebook.react.bridge.WritableMap;
|
|
3
6
|
import com.facebook.react.bridge.ReadableMap;
|
|
4
7
|
|
|
5
8
|
import android.util.Log;
|
|
@@ -29,7 +32,7 @@ public class WhisperContext {
|
|
|
29
32
|
this.context = context;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
|
-
public
|
|
35
|
+
public WritableMap transcribe(final String filePath, final ReadableMap options) throws IOException, Exception {
|
|
33
36
|
int code = fullTranscribe(
|
|
34
37
|
context,
|
|
35
38
|
decodeWaveFile(new File(filePath)),
|
|
@@ -37,14 +40,18 @@ public class WhisperContext {
|
|
|
37
40
|
options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
|
|
38
41
|
// jint max_context,
|
|
39
42
|
options.hasKey("maxContext") ? options.getInt("maxContext") : -1,
|
|
43
|
+
|
|
44
|
+
// jint word_thold,
|
|
45
|
+
options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
|
|
40
46
|
// jint max_len,
|
|
41
47
|
options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
|
|
48
|
+
// jboolean token_timestamps,
|
|
49
|
+
options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
|
|
50
|
+
|
|
42
51
|
// jint offset,
|
|
43
52
|
options.hasKey("offset") ? options.getInt("offset") : -1,
|
|
44
53
|
// jint duration,
|
|
45
54
|
options.hasKey("duration") ? options.getInt("duration") : -1,
|
|
46
|
-
// jint word_thold,
|
|
47
|
-
options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
|
|
48
55
|
// jfloat temperature,
|
|
49
56
|
options.hasKey("temperature") ? (float) options.getDouble("temperature") : -1.0f,
|
|
50
57
|
// jfloat temperature_inc,
|
|
@@ -58,17 +65,31 @@ public class WhisperContext {
|
|
|
58
65
|
// jboolean translate,
|
|
59
66
|
options.hasKey("translate") ? options.getBoolean("translate") : false,
|
|
60
67
|
// jstring language,
|
|
61
|
-
options.hasKey("language") ? options.getString("language") : "auto"
|
|
68
|
+
options.hasKey("language") ? options.getString("language") : "auto",
|
|
69
|
+
// jstring prompt
|
|
70
|
+
options.hasKey("prompt") ? options.getString("prompt") : null
|
|
62
71
|
);
|
|
63
72
|
if (code != 0) {
|
|
64
73
|
throw new Exception("Transcription failed with code " + code);
|
|
65
74
|
}
|
|
66
75
|
Integer count = getTextSegmentCount(context);
|
|
67
76
|
StringBuilder builder = new StringBuilder();
|
|
77
|
+
|
|
78
|
+
WritableMap data = Arguments.createMap();
|
|
79
|
+
WritableArray segments = Arguments.createArray();
|
|
68
80
|
for (int i = 0; i < count; i++) {
|
|
69
|
-
|
|
81
|
+
String text = getTextSegment(context, i);
|
|
82
|
+
builder.append(text);
|
|
83
|
+
|
|
84
|
+
WritableMap segment = Arguments.createMap();
|
|
85
|
+
segment.putString("text", text);
|
|
86
|
+
segment.putInt("t0", getTextSegmentT0(context, i));
|
|
87
|
+
segment.putInt("t1", getTextSegmentT1(context, i));
|
|
88
|
+
segments.pushMap(segment);
|
|
70
89
|
}
|
|
71
|
-
|
|
90
|
+
data.putString("result", builder.toString());
|
|
91
|
+
data.putArray("segments", segments);
|
|
92
|
+
return data;
|
|
72
93
|
}
|
|
73
94
|
|
|
74
95
|
public void release() {
|
|
@@ -168,19 +189,23 @@ public class WhisperContext {
|
|
|
168
189
|
float[] audio_data,
|
|
169
190
|
int n_threads,
|
|
170
191
|
int max_context,
|
|
192
|
+
int word_thold,
|
|
171
193
|
int max_len,
|
|
194
|
+
boolean token_timestamps,
|
|
172
195
|
int offset,
|
|
173
196
|
int duration,
|
|
174
|
-
int word_thold,
|
|
175
197
|
float temperature,
|
|
176
198
|
float temperature_inc,
|
|
177
199
|
int beam_size,
|
|
178
200
|
int best_of,
|
|
179
201
|
boolean speed_up,
|
|
180
202
|
boolean translate,
|
|
181
|
-
String language
|
|
203
|
+
String language,
|
|
204
|
+
String prompt
|
|
182
205
|
);
|
|
183
206
|
protected static native int getTextSegmentCount(long context);
|
|
184
207
|
protected static native String getTextSegment(long context, int index);
|
|
208
|
+
protected static native int getTextSegmentT0(long context, int index);
|
|
209
|
+
protected static native int getTextSegmentT1(long context, int index);
|
|
185
210
|
protected static native void freeContext(long contextPtr);
|
|
186
211
|
}
|
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
#include <android/asset_manager.h>
|
|
3
3
|
#include <android/asset_manager_jni.h>
|
|
4
4
|
#include <android/log.h>
|
|
5
|
-
#include <
|
|
5
|
+
#include <cstdlib>
|
|
6
6
|
#include <sys/sysinfo.h>
|
|
7
|
-
#include <string
|
|
7
|
+
#include <string>
|
|
8
8
|
#include "whisper.h"
|
|
9
|
+
#include "rn-whisper.h"
|
|
9
10
|
#include "ggml.h"
|
|
10
11
|
|
|
11
12
|
#define UNUSED(x) (void)(x)
|
|
@@ -22,27 +23,17 @@ static inline int max(int a, int b) {
|
|
|
22
23
|
return (a > b) ? a : b;
|
|
23
24
|
}
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
return AAsset_read((AAsset *) ctx, output, read_size);
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
static bool asset_is_eof(void *ctx) {
|
|
30
|
-
return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
static void asset_close(void *ctx) {
|
|
34
|
-
AAsset_close((AAsset *) ctx);
|
|
35
|
-
}
|
|
26
|
+
extern "C" {
|
|
36
27
|
|
|
37
28
|
JNIEXPORT jlong JNICALL
|
|
38
29
|
Java_com_rnwhisper_WhisperContext_initContext(
|
|
39
30
|
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
|
40
31
|
UNUSED(thiz);
|
|
41
|
-
struct whisper_context *context =
|
|
42
|
-
const char *model_path_chars =
|
|
32
|
+
struct whisper_context *context = nullptr;
|
|
33
|
+
const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
|
|
43
34
|
context = whisper_init_from_file(model_path_chars);
|
|
44
|
-
|
|
45
|
-
return (
|
|
35
|
+
env->ReleaseStringUTFChars(model_path_str, model_path_chars);
|
|
36
|
+
return reinterpret_cast<jlong>(context);
|
|
46
37
|
}
|
|
47
38
|
|
|
48
39
|
JNIEXPORT jint JNICALL
|
|
@@ -53,29 +44,31 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
53
44
|
jfloatArray audio_data,
|
|
54
45
|
jint n_threads,
|
|
55
46
|
jint max_context,
|
|
56
|
-
|
|
47
|
+
int word_thold,
|
|
48
|
+
int max_len,
|
|
49
|
+
jboolean token_timestamps,
|
|
57
50
|
jint offset,
|
|
58
51
|
jint duration,
|
|
59
|
-
jint word_thold,
|
|
60
52
|
jfloat temperature,
|
|
61
53
|
jfloat temperature_inc,
|
|
62
54
|
jint beam_size,
|
|
63
55
|
jint best_of,
|
|
64
56
|
jboolean speed_up,
|
|
65
57
|
jboolean translate,
|
|
66
|
-
jstring language
|
|
58
|
+
jstring language,
|
|
59
|
+
jstring prompt
|
|
67
60
|
) {
|
|
68
61
|
UNUSED(thiz);
|
|
69
|
-
struct whisper_context *context =
|
|
70
|
-
jfloat *audio_data_arr =
|
|
71
|
-
const jsize audio_data_length =
|
|
62
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
63
|
+
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
64
|
+
const jsize audio_data_length = env->GetArrayLength(audio_data);
|
|
72
65
|
|
|
73
66
|
int max_threads = max(1, min(8, get_nprocs() - 2));
|
|
74
67
|
|
|
75
68
|
LOGI("About to create params");
|
|
76
69
|
|
|
77
70
|
struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
|
78
|
-
|
|
71
|
+
|
|
79
72
|
if (beam_size > -1) {
|
|
80
73
|
params.strategy = WHISPER_SAMPLING_BEAM_SEARCH;
|
|
81
74
|
params.beam_search.beam_size = beam_size;
|
|
@@ -86,22 +79,25 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
86
79
|
params.print_timestamps = false;
|
|
87
80
|
params.print_special = false;
|
|
88
81
|
params.translate = translate;
|
|
89
|
-
|
|
82
|
+
const char *language_chars = env->GetStringUTFChars(language, nullptr);
|
|
83
|
+
params.language = language_chars;
|
|
90
84
|
params.n_threads = n_threads > 0 ? n_threads : max_threads;
|
|
91
85
|
params.speed_up = speed_up;
|
|
92
86
|
params.offset_ms = 0;
|
|
93
87
|
params.no_context = true;
|
|
94
88
|
params.single_segment = false;
|
|
95
89
|
|
|
90
|
+
if (max_len > -1) {
|
|
91
|
+
params.max_len = max_len;
|
|
92
|
+
}
|
|
93
|
+
params.token_timestamps = token_timestamps;
|
|
94
|
+
|
|
96
95
|
if (best_of > -1) {
|
|
97
96
|
params.greedy.best_of = best_of;
|
|
98
97
|
}
|
|
99
98
|
if (max_context > -1) {
|
|
100
99
|
params.n_max_text_ctx = max_context;
|
|
101
100
|
}
|
|
102
|
-
if (max_len > -1) {
|
|
103
|
-
params.max_len = max_len;
|
|
104
|
-
}
|
|
105
101
|
if (offset > -1) {
|
|
106
102
|
params.offset_ms = offset;
|
|
107
103
|
}
|
|
@@ -117,6 +113,13 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
117
113
|
if (temperature_inc > -1) {
|
|
118
114
|
params.temperature_inc = temperature_inc;
|
|
119
115
|
}
|
|
116
|
+
if (prompt != nullptr) {
|
|
117
|
+
rn_whisper_convert_prompt(
|
|
118
|
+
context,
|
|
119
|
+
params,
|
|
120
|
+
new std::string(env->GetStringUTFChars(prompt, nullptr))
|
|
121
|
+
);
|
|
122
|
+
}
|
|
120
123
|
|
|
121
124
|
LOGI("About to reset timings");
|
|
122
125
|
whisper_reset_timings(context);
|
|
@@ -126,7 +129,8 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
126
129
|
if (code == 0) {
|
|
127
130
|
// whisper_print_timings(context);
|
|
128
131
|
}
|
|
129
|
-
|
|
132
|
+
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
133
|
+
env->ReleaseStringUTFChars(language, language_chars);
|
|
130
134
|
return code;
|
|
131
135
|
}
|
|
132
136
|
|
|
@@ -135,7 +139,7 @@ Java_com_rnwhisper_WhisperContext_getTextSegmentCount(
|
|
|
135
139
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
136
140
|
UNUSED(env);
|
|
137
141
|
UNUSED(thiz);
|
|
138
|
-
struct whisper_context *context =
|
|
142
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
139
143
|
return whisper_full_n_segments(context);
|
|
140
144
|
}
|
|
141
145
|
|
|
@@ -143,17 +147,37 @@ JNIEXPORT jstring JNICALL
|
|
|
143
147
|
Java_com_rnwhisper_WhisperContext_getTextSegment(
|
|
144
148
|
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
|
145
149
|
UNUSED(thiz);
|
|
146
|
-
struct whisper_context *context =
|
|
150
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
147
151
|
const char *text = whisper_full_get_segment_text(context, index);
|
|
148
|
-
jstring string =
|
|
152
|
+
jstring string = env->NewStringUTF(text);
|
|
149
153
|
return string;
|
|
150
154
|
}
|
|
151
155
|
|
|
156
|
+
JNIEXPORT jint JNICALL
|
|
157
|
+
Java_com_rnwhisper_WhisperContext_getTextSegmentT0(
|
|
158
|
+
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
|
159
|
+
UNUSED(env);
|
|
160
|
+
UNUSED(thiz);
|
|
161
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
162
|
+
return whisper_full_get_segment_t0(context, index);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
JNIEXPORT jint JNICALL
|
|
166
|
+
Java_com_rnwhisper_WhisperContext_getTextSegmentT1(
|
|
167
|
+
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
|
168
|
+
UNUSED(env);
|
|
169
|
+
UNUSED(thiz);
|
|
170
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
171
|
+
return whisper_full_get_segment_t1(context, index);
|
|
172
|
+
}
|
|
173
|
+
|
|
152
174
|
JNIEXPORT void JNICALL
|
|
153
175
|
Java_com_rnwhisper_WhisperContext_freeContext(
|
|
154
176
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
155
177
|
UNUSED(env);
|
|
156
178
|
UNUSED(thiz);
|
|
157
|
-
struct whisper_context *context =
|
|
179
|
+
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
158
180
|
whisper_free(context);
|
|
159
181
|
}
|
|
182
|
+
|
|
183
|
+
} // extern "C"
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#include <cstdio>
|
|
2
|
+
#include <string>
|
|
3
|
+
#include <vector>
|
|
4
|
+
#include "whisper.h"
|
|
5
|
+
|
|
6
|
+
extern "C" {
|
|
7
|
+
|
|
8
|
+
void rn_whisper_convert_prompt(
|
|
9
|
+
struct whisper_context * ctx,
|
|
10
|
+
struct whisper_full_params params,
|
|
11
|
+
std::string * prompt
|
|
12
|
+
) {
|
|
13
|
+
std::vector<whisper_token> prompt_tokens;
|
|
14
|
+
if (!prompt->empty()) {
|
|
15
|
+
prompt_tokens.resize(1024);
|
|
16
|
+
prompt_tokens.resize(whisper_tokenize(ctx, prompt->c_str(), prompt_tokens.data(), prompt_tokens.size()));
|
|
17
|
+
|
|
18
|
+
// fprintf(stderr, "\n");
|
|
19
|
+
// fprintf(stderr, "initial prompt: '%s'\n", prompt->c_str());
|
|
20
|
+
// fprintf(stderr, "initial tokens: [ ");
|
|
21
|
+
// for (int i = 0; i < (int) prompt_tokens.size(); ++i) {
|
|
22
|
+
// fprintf(stderr, "%d ", prompt_tokens[i]);
|
|
23
|
+
// }
|
|
24
|
+
// fprintf(stderr, "]\n");
|
|
25
|
+
|
|
26
|
+
params.prompt_tokens = prompt_tokens.data();
|
|
27
|
+
params.prompt_n_tokens = prompt_tokens.size();
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
}
|
package/cpp/rn-whisper.h
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
|
|
2
|
+
#ifdef __cplusplus
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <whisper.h>
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
void rn_whisper_convert_prompt(
|
|
9
|
+
struct whisper_context * ctx,
|
|
10
|
+
struct whisper_full_params params,
|
|
11
|
+
std::string * prompt
|
|
12
|
+
);
|
|
13
|
+
|
|
14
|
+
#ifdef __cplusplus
|
|
15
|
+
}
|
|
16
|
+
#endif
|
package/ios/RNWhisper.h
CHANGED
package/ios/RNWhisper.mm
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
#import "RNWhisper.h"
|
|
3
3
|
#include <stdlib.h>
|
|
4
|
+
#include <string>
|
|
4
5
|
|
|
5
6
|
@interface WhisperContext : NSObject {
|
|
6
7
|
}
|
|
@@ -93,15 +94,18 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
93
94
|
params.no_context = true;
|
|
94
95
|
params.single_segment = false;
|
|
95
96
|
|
|
97
|
+
if (options[@"maxLen"] != nil) {
|
|
98
|
+
params.max_len = [options[@"maxLen"] intValue];
|
|
99
|
+
}
|
|
100
|
+
params.token_timestamps = options[@"tokenTimestamps"] != nil ? [options[@"tokenTimestamps"] boolValue] : false;
|
|
101
|
+
|
|
96
102
|
if (options[@"bestOf"] != nil) {
|
|
97
103
|
params.greedy.best_of = [options[@"bestOf"] intValue];
|
|
98
104
|
}
|
|
99
105
|
if (options[@"maxContext"] != nil) {
|
|
100
106
|
params.n_max_text_ctx = [options[@"maxContext"] intValue];
|
|
101
107
|
}
|
|
102
|
-
|
|
103
|
-
params.max_len = [options[@"maxLen"] intValue];
|
|
104
|
-
}
|
|
108
|
+
|
|
105
109
|
if (options[@"offset"] != nil) {
|
|
106
110
|
params.offset_ms = [options[@"offset"] intValue];
|
|
107
111
|
}
|
|
@@ -117,6 +121,15 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
117
121
|
if (options[@"temperatureInc"] != nil) {
|
|
118
122
|
params.temperature_inc = [options[@"temperature_inc"] floatValue];
|
|
119
123
|
}
|
|
124
|
+
|
|
125
|
+
if (options[@"prompt"] != nil) {
|
|
126
|
+
std::string *prompt = new std::string([options[@"prompt"] UTF8String]);
|
|
127
|
+
rn_whisper_convert_prompt(
|
|
128
|
+
context.ctx,
|
|
129
|
+
params,
|
|
130
|
+
prompt
|
|
131
|
+
);
|
|
132
|
+
}
|
|
120
133
|
|
|
121
134
|
whisper_reset_timings(context.ctx);
|
|
122
135
|
int code = whisper_full(context.ctx, params, waveFile, count);
|
|
@@ -132,11 +145,25 @@ RCT_REMAP_METHOD(transcribe,
|
|
|
132
145
|
|
|
133
146
|
NSString *result = @"";
|
|
134
147
|
int n_segments = whisper_full_n_segments(context.ctx);
|
|
148
|
+
|
|
149
|
+
NSMutableArray *segments = [[NSMutableArray alloc] init];
|
|
135
150
|
for (int i = 0; i < n_segments; i++) {
|
|
136
151
|
const char * text_cur = whisper_full_get_segment_text(context.ctx, i);
|
|
137
152
|
result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
|
|
138
|
-
|
|
139
|
-
|
|
153
|
+
|
|
154
|
+
const int64_t t0 = whisper_full_get_segment_t0(context.ctx, i);
|
|
155
|
+
const int64_t t1 = whisper_full_get_segment_t1(context.ctx, i);
|
|
156
|
+
NSDictionary *segment = @{
|
|
157
|
+
@"text": [NSString stringWithUTF8String:text_cur],
|
|
158
|
+
@"t0": [NSNumber numberWithLongLong:t0],
|
|
159
|
+
@"t1": [NSNumber numberWithLongLong:t1]
|
|
160
|
+
};
|
|
161
|
+
[segments addObject:segment];
|
|
162
|
+
}
|
|
163
|
+
resolve(@{
|
|
164
|
+
@"result": result,
|
|
165
|
+
@"segments": segments
|
|
166
|
+
});
|
|
140
167
|
}
|
|
141
168
|
|
|
142
169
|
RCT_REMAP_METHOD(releaseContext,
|
package/jest/mock.js
CHANGED
|
@@ -3,7 +3,10 @@ const { NativeModules } = require('react-native')
|
|
|
3
3
|
if (!NativeModules.RNWhisper) {
|
|
4
4
|
NativeModules.RNWhisper = {
|
|
5
5
|
initContext: jest.fn(() => Promise.resolve(1)),
|
|
6
|
-
transcribe: jest.fn(() => Promise.resolve(
|
|
6
|
+
transcribe: jest.fn(() => Promise.resolve({
|
|
7
|
+
result: ' Test',
|
|
8
|
+
segments: [{ text: ' Test', t0: 0, t1: 33 }],
|
|
9
|
+
})),
|
|
7
10
|
releaseContext: jest.fn(() => Promise.resolve()),
|
|
8
11
|
releaseAllContexts: jest.fn(() => Promise.resolve()),
|
|
9
12
|
}
|
package/lib/commonjs/index.js
CHANGED
|
@@ -21,9 +21,7 @@ class WhisperContext {
|
|
|
21
21
|
}
|
|
22
22
|
async transcribe(path) {
|
|
23
23
|
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
24
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
25
|
-
result
|
|
26
|
-
}));
|
|
24
|
+
return RNWhisper.transcribe(this.id, path, options);
|
|
27
25
|
}
|
|
28
26
|
async release() {
|
|
29
27
|
return RNWhisper.releaseContext(this.id);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","
|
|
1
|
+
{"version":3,"names":["_reactNative","require","LINKING_ERROR","Platform","select","ios","default","RNWhisper","NativeModules","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":";;;;;;;AAAA,IAAAA,YAAA,GAAAC,OAAA;AAEA,MAAMC,aAAa,GAChB,sEAAqEC,qBAAQ,CAACC,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGC,0BAAa,CAACD,SAAS,GACrCC,0BAAa,CAACD,SAAS,GACvB,IAAIE,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACT,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMU,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOX,SAAS,CAACQ,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOd,SAAS,CAACe,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMP,SAAS,CAACkB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOnB,SAAS,CAACoB,kBAAkB,EAAE;AACvC"}
|
package/lib/module/index.js
CHANGED
|
@@ -14,9 +14,7 @@ class WhisperContext {
|
|
|
14
14
|
}
|
|
15
15
|
async transcribe(path) {
|
|
16
16
|
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
17
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
18
|
-
result
|
|
19
|
-
}));
|
|
17
|
+
return RNWhisper.transcribe(this.id, path, options);
|
|
20
18
|
}
|
|
21
19
|
async release() {
|
|
22
20
|
return RNWhisper.releaseContext(this.id);
|
package/lib/module/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","
|
|
1
|
+
{"version":3,"names":["NativeModules","Platform","LINKING_ERROR","select","ios","default","RNWhisper","Proxy","get","Error","WhisperContext","constructor","id","transcribe","path","options","arguments","length","undefined","release","releaseContext","initWhisper","filePath","initContext","releaseAllWhisper","releaseAllContexts"],"sourceRoot":"../../src","sources":["index.tsx"],"mappings":"AAAA,SAASA,aAAa,EAAEC,QAAQ,QAAQ,cAAc;AAEtD,MAAMC,aAAa,GAChB,sEAAqED,QAAQ,CAACE,MAAM,CAAC;EAAEC,GAAG,EAAE,gCAAgC;EAAEC,OAAO,EAAE;AAAG,CAAC,CAC3I,oDAAmD;AAEtD,MAAMC,SAAS,GAAGN,aAAa,CAACM,SAAS,GACrCN,aAAa,CAACM,SAAS,GACvB,IAAIC,KAAK,CACT,CAAC,CAAC,EACF;EACEC,GAAGA,CAAA,EAAG;IACJ,MAAM,IAAIC,KAAK,CAACP,aAAa,CAAC;EAChC;AACF,CAAC,CACF;AA6BH,MAAMQ,cAAc,CAAC;EAGnBC,WAAWA,CAACC,EAAU,EAAE;IACtB,IAAI,CAACA,EAAE,GAAGA,EAAE;EACd;EAEA,MAAMC,UAAUA,CAACC,IAAY,EAA8D;IAAA,IAA5DC,OAA0B,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;IAC5D,OAAOV,SAAS,CAACO,UAAU,CAAC,IAAI,CAACD,EAAE,EAAEE,IAAI,EAAEC,OAAO,CAAC;EACrD;EAEA,MAAMI,OAAOA,CAAA,EAAG;IACd,OAAOb,SAAS,CAACc,cAAc,CAAC,IAAI,CAACR,EAAE,CAAC;EAC1C;AACF;AAEA,OAAO,eAAeS,WAAWA,CAAA,EAEN;EAAA,IADzB;IAAEC;EAAgC,CAAC,GAAAN,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EAExC,MAAMJ,EAAE,GAAG,MAAMN,SAAS,CAACiB,WAAW,CAACD,QAAQ,CAAC;EAChD,OAAO,IAAIZ,cAAc,CAACE,EAAE,CAAC;AAC/B;AAEA,OAAO,eAAeY,iBAAiBA,CAAA,EAAkB;EACvD,OAAOlB,SAAS,CAACmB,kBAAkB,EAAE;AACvC"}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
export type TranscribeOptions = {
|
|
2
|
+
language?: string;
|
|
3
|
+
translate?: boolean;
|
|
2
4
|
maxThreads?: number;
|
|
3
5
|
maxContext?: number;
|
|
4
6
|
maxLen?: number;
|
|
7
|
+
tokenTimestamps?: boolean;
|
|
5
8
|
offset?: number;
|
|
6
9
|
duration?: number;
|
|
7
10
|
wordThold?: number;
|
|
@@ -10,9 +13,15 @@ export type TranscribeOptions = {
|
|
|
10
13
|
beamSize?: number;
|
|
11
14
|
bestOf?: number;
|
|
12
15
|
speedUp?: boolean;
|
|
16
|
+
prompt?: string;
|
|
13
17
|
};
|
|
14
18
|
export type TranscribeResult = {
|
|
15
19
|
result: string;
|
|
20
|
+
segments: Array<{
|
|
21
|
+
text: string;
|
|
22
|
+
t0: number;
|
|
23
|
+
t1: number;
|
|
24
|
+
}>;
|
|
16
25
|
};
|
|
17
26
|
declare class WhisperContext {
|
|
18
27
|
id: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.tsx"],"names":[],"mappings":"AAiBA,MAAM,MAAM,iBAAiB,GAAG;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,EAAE,EAAE,MAAM,CAAC;QACX,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC,CAAC;CACJ,CAAA;AAED,cAAM,cAAc;IAClB,EAAE,EAAE,MAAM,CAAA;gBAEE,EAAE,EAAE,MAAM;IAIhB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAIpF,OAAO;CAGd;AAED,wBAAsB,WAAW,CAC/B,EAAE,QAAQ,EAAE,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvC,OAAO,CAAC,cAAc,CAAC,CAGzB;AAED,wBAAsB,iBAAiB,IAAI,OAAO,CAAC,IAAI,CAAC,CAEvD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whisper.rn",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "React Native binding of whisper.cpp",
|
|
5
5
|
"main": "lib/commonjs/index",
|
|
6
6
|
"module": "lib/module/index",
|
|
@@ -98,13 +98,17 @@
|
|
|
98
98
|
"@commitlint/config-conventional"
|
|
99
99
|
]
|
|
100
100
|
},
|
|
101
|
+
"publishConfig": {
|
|
102
|
+
"registry": "https://registry.npmjs.org/"
|
|
103
|
+
},
|
|
101
104
|
"release-it": {
|
|
102
105
|
"git": {
|
|
103
106
|
"commitMessage": "chore: release ${version}",
|
|
104
107
|
"tagName": "v${version}"
|
|
105
108
|
},
|
|
106
109
|
"npm": {
|
|
107
|
-
"publish": true
|
|
110
|
+
"publish": true,
|
|
111
|
+
"skipChecks": true
|
|
108
112
|
},
|
|
109
113
|
"github": {
|
|
110
114
|
"release": true
|
package/src/index.tsx
CHANGED
|
@@ -16,9 +16,12 @@ const RNWhisper = NativeModules.RNWhisper
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
export type TranscribeOptions = {
|
|
19
|
+
language?: string,
|
|
20
|
+
translate?: boolean,
|
|
19
21
|
maxThreads?: number,
|
|
20
22
|
maxContext?: number,
|
|
21
23
|
maxLen?: number,
|
|
24
|
+
tokenTimestamps?: boolean,
|
|
22
25
|
offset?: number,
|
|
23
26
|
duration?: number,
|
|
24
27
|
wordThold?: number,
|
|
@@ -27,10 +30,16 @@ export type TranscribeOptions = {
|
|
|
27
30
|
beamSize?: number,
|
|
28
31
|
bestOf?: number,
|
|
29
32
|
speedUp?: boolean,
|
|
33
|
+
prompt?: string,
|
|
30
34
|
}
|
|
31
35
|
|
|
32
36
|
export type TranscribeResult = {
|
|
33
37
|
result: string,
|
|
38
|
+
segments: Array<{
|
|
39
|
+
text: string,
|
|
40
|
+
t0: number,
|
|
41
|
+
t1: number,
|
|
42
|
+
}>,
|
|
34
43
|
}
|
|
35
44
|
|
|
36
45
|
class WhisperContext {
|
|
@@ -41,9 +50,7 @@ class WhisperContext {
|
|
|
41
50
|
}
|
|
42
51
|
|
|
43
52
|
async transcribe(path: string, options: TranscribeOptions = {}): Promise<TranscribeResult> {
|
|
44
|
-
return RNWhisper.transcribe(this.id, path, options)
|
|
45
|
-
result
|
|
46
|
-
}))
|
|
53
|
+
return RNWhisper.transcribe(this.id, path, options)
|
|
47
54
|
}
|
|
48
55
|
|
|
49
56
|
async release() {
|