whisper.rn 0.4.0-rc.1 → 0.4.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -6
  2. package/android/build.gradle +4 -0
  3. package/android/src/main/CMakeLists.txt +14 -0
  4. package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
  5. package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
  6. package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
  7. package/android/src/main/jni-utils.h +76 -0
  8. package/android/src/main/jni.cpp +226 -109
  9. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  10. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  11. package/cpp/README.md +1 -1
  12. package/cpp/coreml/whisper-encoder-impl.h +1 -1
  13. package/cpp/coreml/whisper-encoder.h +4 -0
  14. package/cpp/coreml/whisper-encoder.mm +5 -3
  15. package/cpp/ggml-aarch64.c +129 -0
  16. package/cpp/ggml-aarch64.h +19 -0
  17. package/cpp/ggml-alloc.c +805 -400
  18. package/cpp/ggml-alloc.h +60 -10
  19. package/cpp/ggml-backend-impl.h +216 -0
  20. package/cpp/ggml-backend-reg.cpp +204 -0
  21. package/cpp/ggml-backend.cpp +1996 -0
  22. package/cpp/ggml-backend.cpp.rej +12 -0
  23. package/cpp/ggml-backend.h +336 -0
  24. package/cpp/ggml-common.h +1853 -0
  25. package/cpp/ggml-cpp.h +38 -0
  26. package/cpp/ggml-cpu-aarch64.c +3560 -0
  27. package/cpp/ggml-cpu-aarch64.h +30 -0
  28. package/cpp/ggml-cpu-impl.h +371 -0
  29. package/cpp/ggml-cpu-quants.c +10822 -0
  30. package/cpp/ggml-cpu-quants.h +63 -0
  31. package/cpp/ggml-cpu.c +13970 -0
  32. package/cpp/ggml-cpu.cpp +663 -0
  33. package/cpp/ggml-cpu.h +177 -0
  34. package/cpp/ggml-impl.h +551 -0
  35. package/cpp/ggml-metal-impl.h +249 -0
  36. package/cpp/ggml-metal.h +24 -43
  37. package/cpp/ggml-metal.m +4190 -1075
  38. package/cpp/ggml-quants.c +5247 -0
  39. package/cpp/ggml-quants.h +100 -0
  40. package/cpp/ggml-threading.cpp +12 -0
  41. package/cpp/ggml-threading.h +12 -0
  42. package/cpp/ggml-whisper.metallib +0 -0
  43. package/cpp/ggml.c +5474 -18763
  44. package/cpp/ggml.h +833 -628
  45. package/cpp/rn-audioutils.cpp +68 -0
  46. package/cpp/rn-audioutils.h +14 -0
  47. package/cpp/rn-whisper-log.h +11 -0
  48. package/cpp/rn-whisper.cpp +221 -52
  49. package/cpp/rn-whisper.h +50 -15
  50. package/cpp/whisper.cpp +2872 -1371
  51. package/cpp/whisper.h +170 -41
  52. package/ios/RNWhisper.mm +139 -46
  53. package/ios/RNWhisperAudioUtils.h +1 -2
  54. package/ios/RNWhisperAudioUtils.m +18 -67
  55. package/ios/RNWhisperContext.h +11 -8
  56. package/ios/RNWhisperContext.mm +195 -150
  57. package/jest/mock.js +15 -2
  58. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  59. package/lib/commonjs/index.js +76 -28
  60. package/lib/commonjs/index.js.map +1 -1
  61. package/lib/commonjs/version.json +1 -1
  62. package/lib/module/NativeRNWhisper.js.map +1 -1
  63. package/lib/module/index.js +76 -28
  64. package/lib/module/index.js.map +1 -1
  65. package/lib/module/version.json +1 -1
  66. package/lib/typescript/NativeRNWhisper.d.ts +13 -4
  67. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  68. package/lib/typescript/index.d.ts +37 -5
  69. package/lib/typescript/index.d.ts.map +1 -1
  70. package/package.json +9 -7
  71. package/src/NativeRNWhisper.ts +20 -4
  72. package/src/index.ts +98 -42
  73. package/src/version.json +1 -1
  74. package/whisper-rn.podspec +11 -18
  75. package/cpp/ggml-metal.metal +0 -2353
@@ -0,0 +1,68 @@
1
+ #include "rn-audioutils.h"
2
+ #include "rn-whisper-log.h"
3
+
4
+ namespace rnaudioutils {
5
+
6
+ std::vector<uint8_t> concat_short_buffers(const std::vector<short*>& buffers, const std::vector<int>& slice_n_samples) {
7
+ std::vector<uint8_t> output_data;
8
+
9
+ for (size_t i = 0; i < buffers.size(); i++) {
10
+ int size = slice_n_samples[i]; // Number of shorts
11
+ short* slice = buffers[i];
12
+
13
+ // Copy each short as two bytes
14
+ for (int j = 0; j < size; j++) {
15
+ output_data.push_back(static_cast<uint8_t>(slice[j] & 0xFF)); // Lower byte
16
+ output_data.push_back(static_cast<uint8_t>((slice[j] >> 8) & 0xFF)); // Higher byte
17
+ }
18
+ }
19
+
20
+ return output_data;
21
+ }
22
+
23
+ std::vector<uint8_t> remove_trailing_zeros(const std::vector<uint8_t>& audio_data) {
24
+ auto last = std::find_if(audio_data.rbegin(), audio_data.rend(), [](uint8_t byte) { return byte != 0; });
25
+ return std::vector<uint8_t>(audio_data.begin(), last.base());
26
+ }
27
+
28
+ void save_wav_file(const std::vector<uint8_t>& raw, const std::string& file) {
29
+ std::vector<uint8_t> data = remove_trailing_zeros(raw);
30
+
31
+ std::ofstream output(file, std::ios::binary);
32
+
33
+ if (!output.is_open()) {
34
+ RNWHISPER_LOG_ERROR("Failed to open file for writing: %s\n", file.c_str());
35
+ return;
36
+ }
37
+
38
+ // WAVE header
39
+ output.write("RIFF", 4);
40
+ int32_t chunk_size = 36 + static_cast<int32_t>(data.size());
41
+ output.write(reinterpret_cast<char*>(&chunk_size), sizeof(chunk_size));
42
+ output.write("WAVE", 4);
43
+ output.write("fmt ", 4);
44
+ int32_t sub_chunk_size = 16;
45
+ output.write(reinterpret_cast<char*>(&sub_chunk_size), sizeof(sub_chunk_size));
46
+ short audio_format = 1;
47
+ output.write(reinterpret_cast<char*>(&audio_format), sizeof(audio_format));
48
+ short num_channels = 1;
49
+ output.write(reinterpret_cast<char*>(&num_channels), sizeof(num_channels));
50
+ int32_t sample_rate = WHISPER_SAMPLE_RATE;
51
+ output.write(reinterpret_cast<char*>(&sample_rate), sizeof(sample_rate));
52
+ int32_t byte_rate = WHISPER_SAMPLE_RATE * 2;
53
+ output.write(reinterpret_cast<char*>(&byte_rate), sizeof(byte_rate));
54
+ short block_align = 2;
55
+ output.write(reinterpret_cast<char*>(&block_align), sizeof(block_align));
56
+ short bits_per_sample = 16;
57
+ output.write(reinterpret_cast<char*>(&bits_per_sample), sizeof(bits_per_sample));
58
+ output.write("data", 4);
59
+ int32_t sub_chunk2_size = static_cast<int32_t>(data.size());
60
+ output.write(reinterpret_cast<char*>(&sub_chunk2_size), sizeof(sub_chunk2_size));
61
+ output.write(reinterpret_cast<const char*>(data.data()), data.size());
62
+
63
+ output.close();
64
+
65
+ RNWHISPER_LOG_INFO("Saved audio file: %s\n", file.c_str());
66
+ }
67
+
68
+ } // namespace rnaudioutils
@@ -0,0 +1,14 @@
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <vector>
4
+ #include <cstdint>
5
+ #include <cstring>
6
+ #include <algorithm>
7
+ #include "whisper.h"
8
+
9
+ namespace rnaudioutils {
10
+
11
+ std::vector<uint8_t> concat_short_buffers(const std::vector<short*>& buffers, const std::vector<int>& slice_n_samples);
12
+ void save_wav_file(const std::vector<uint8_t>& raw, const std::string& file);
13
+
14
+ } // namespace rnaudioutils
@@ -0,0 +1,11 @@
1
+ #if defined(__ANDROID__) && defined(RNWHISPER_ANDROID_ENABLE_LOGGING)
2
+ #include <android/log.h>
3
+ #define RNWHISPER_ANDROID_TAG "RNWHISPER_LOG_ANDROID"
4
+ #define RNWHISPER_LOG_INFO(...) __android_log_print(ANDROID_LOG_INFO , RNWHISPER_ANDROID_TAG, __VA_ARGS__)
5
+ #define RNWHISPER_LOG_WARN(...) __android_log_print(ANDROID_LOG_WARN , RNWHISPER_ANDROID_TAG, __VA_ARGS__)
6
+ #define RNWHISPER_LOG_ERROR(...) __android_log_print(ANDROID_LOG_ERROR, RNWHISPER_ANDROID_TAG, __VA_ARGS__)
7
+ #else
8
+ #define RNWHISPER_LOG_INFO(...) fprintf(stderr, __VA_ARGS__)
9
+ #define RNWHISPER_LOG_WARN(...) fprintf(stderr, __VA_ARGS__)
10
+ #define RNWHISPER_LOG_ERROR(...) fprintf(stderr, __VA_ARGS__)
11
+ #endif // __ANDROID__
@@ -2,40 +2,94 @@
2
2
  #include <string>
3
3
  #include <vector>
4
4
  #include <unordered_map>
5
- #include "whisper.h"
5
+ #include "rn-whisper.h"
6
6
 
7
- extern "C" {
7
+ #define DEFAULT_MAX_AUDIO_SEC 30;
8
8
 
9
- std::unordered_map<int, bool> abort_map;
9
+ namespace rnwhisper {
10
10
 
11
- bool* rn_whisper_assign_abort_map(int job_id) {
12
- abort_map[job_id] = false;
13
- return &abort_map[job_id];
11
+ const char * system_info(void) {
12
+ static std::string s;
13
+ s = "";
14
+ if (wsp_ggml_cpu_has_avx() == 1) s += "AVX ";
15
+ if (wsp_ggml_cpu_has_avx2() == 1) s += "AVX2 ";
16
+ if (wsp_ggml_cpu_has_avx512() == 1) s += "AVX512 ";
17
+ if (wsp_ggml_cpu_has_fma() == 1) s += "FMA ";
18
+ if (wsp_ggml_cpu_has_neon() == 1) s += "NEON ";
19
+ if (wsp_ggml_cpu_has_arm_fma() == 1) s += "ARM_FMA ";
20
+ if (wsp_ggml_cpu_has_f16c() == 1) s += "F16C ";
21
+ if (wsp_ggml_cpu_has_fp16_va() == 1) s += "FP16_VA ";
22
+ if (wsp_ggml_cpu_has_sse3() == 1) s += "SSE3 ";
23
+ if (wsp_ggml_cpu_has_ssse3() == 1) s += "SSSE3 ";
24
+ if (wsp_ggml_cpu_has_vsx() == 1) s += "VSX ";
25
+ #ifdef WHISPER_USE_COREML
26
+ s += "COREML ";
27
+ #endif
28
+ s.erase(s.find_last_not_of(" ") + 1);
29
+ return s.c_str();
14
30
  }
15
31
 
16
- void rn_whisper_remove_abort_map(int job_id) {
17
- if (abort_map.find(job_id) != abort_map.end()) {
18
- abort_map.erase(job_id);
19
- }
20
- }
32
+ std::string bench(struct whisper_context * ctx, int n_threads) {
33
+ const int n_mels = whisper_model_n_mels(ctx);
21
34
 
22
- void rn_whisper_abort_transcribe(int job_id) {
23
- if (abort_map.find(job_id) != abort_map.end()) {
24
- abort_map[job_id] = true;
25
- }
26
- }
35
+ if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
36
+ return "error: failed to set mel: " + std::to_string(ret);
37
+ }
38
+ // heat encoder
39
+ if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
40
+ return "error: failed to encode: " + std::to_string(ret);
41
+ }
27
42
 
28
- bool rn_whisper_transcribe_is_aborted(int job_id) {
29
- if (abort_map.find(job_id) != abort_map.end()) {
30
- return abort_map[job_id];
31
- }
32
- return false;
33
- }
43
+ whisper_token tokens[512];
44
+ memset(tokens, 0, sizeof(tokens));
45
+
46
+ // prompt heat
47
+ if (int ret = whisper_decode(ctx, tokens, 256, 0, n_threads) != 0) {
48
+ return "error: failed to decode: " + std::to_string(ret);
49
+ }
50
+
51
+ // text-generation heat
52
+ if (int ret = whisper_decode(ctx, tokens, 1, 256, n_threads) != 0) {
53
+ return "error: failed to decode: " + std::to_string(ret);
54
+ }
55
+
56
+ whisper_reset_timings(ctx);
57
+
58
+ // actual run
59
+ if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
60
+ return "error: failed to encode: " + std::to_string(ret);
61
+ }
62
+
63
+ // text-generation
64
+ for (int i = 0; i < 256; i++) {
65
+ if (int ret = whisper_decode(ctx, tokens, 1, i, n_threads) != 0) {
66
+ return "error: failed to decode: " + std::to_string(ret);
67
+ }
68
+ }
69
+
70
+ // batched decoding
71
+ for (int i = 0; i < 64; i++) {
72
+ if (int ret = whisper_decode(ctx, tokens, 5, 0, n_threads) != 0) {
73
+ return "error: failed to decode: " + std::to_string(ret);
74
+ }
75
+ }
34
76
 
35
- void rn_whisper_abort_all_transcribe() {
36
- for (auto it = abort_map.begin(); it != abort_map.end(); ++it) {
37
- it->second = true;
38
- }
77
+ // prompt processing
78
+ for (int i = 0; i < 16; i++) {
79
+ if (int ret = whisper_decode(ctx, tokens, 256, 0, n_threads) != 0) {
80
+ return "error: failed to decode: " + std::to_string(ret);
81
+ }
82
+ }
83
+
84
+ const struct whisper_timings * timings = whisper_get_timings(ctx);
85
+
86
+ return std::string("[") +
87
+ "\"" + system_info() + "\"," +
88
+ std::to_string(n_threads) + "," +
89
+ std::to_string(timings->encode_ms) + "," +
90
+ std::to_string(timings->decode_ms) + "," +
91
+ std::to_string(timings->batchd_ms) + "," +
92
+ std::to_string(timings->prompt_ms) + "]";
39
93
  }
40
94
 
41
95
  void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
@@ -51,42 +105,157 @@ void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate
51
105
  }
52
106
  }
53
107
 
54
- bool rn_whisper_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
55
- const int n_samples = pcmf32.size();
56
- const int n_samples_last = (sample_rate * last_ms) / 1000;
108
+ bool vad_simple_impl(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
109
+ const int n_samples = pcmf32.size();
110
+ const int n_samples_last = (sample_rate * last_ms) / 1000;
57
111
 
58
- if (n_samples_last >= n_samples) {
59
- // not enough samples - assume no speech
60
- return false;
61
- }
112
+ if (n_samples_last >= n_samples) {
113
+ // not enough samples - assume no speech
114
+ return false;
115
+ }
62
116
 
63
- if (freq_thold > 0.0f) {
64
- high_pass_filter(pcmf32, freq_thold, sample_rate);
65
- }
117
+ if (freq_thold > 0.0f) {
118
+ high_pass_filter(pcmf32, freq_thold, sample_rate);
119
+ }
66
120
 
67
- float energy_all = 0.0f;
68
- float energy_last = 0.0f;
121
+ float energy_all = 0.0f;
122
+ float energy_last = 0.0f;
69
123
 
70
- for (int i = 0; i < n_samples; i++) {
71
- energy_all += fabsf(pcmf32[i]);
124
+ for (int i = 0; i < n_samples; i++) {
125
+ energy_all += fabsf(pcmf32[i]);
72
126
 
73
- if (i >= n_samples - n_samples_last) {
74
- energy_last += fabsf(pcmf32[i]);
127
+ if (i >= n_samples - n_samples_last) {
128
+ energy_last += fabsf(pcmf32[i]);
129
+ }
75
130
  }
76
- }
77
131
 
78
- energy_all /= n_samples;
79
- energy_last /= n_samples_last;
132
+ energy_all /= n_samples;
133
+ energy_last /= n_samples_last;
80
134
 
81
- if (verbose) {
82
- fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
83
- }
135
+ if (verbose) {
136
+ RNWHISPER_LOG_INFO("%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
137
+ }
84
138
 
85
- if (energy_last > vad_thold*energy_all) {
139
+ if (energy_last > vad_thold*energy_all) {
140
+ return false;
141
+ }
142
+
143
+ return true;
144
+ }
145
+
146
+ void job::set_realtime_params(
147
+ vad_params params,
148
+ int sec,
149
+ int slice_sec,
150
+ float min_sec,
151
+ const char* output_path
152
+ ) {
153
+ vad = params;
154
+ if (vad.vad_ms < 2000) vad.vad_ms = 2000;
155
+ audio_sec = sec > 0 ? sec : DEFAULT_MAX_AUDIO_SEC;
156
+ audio_slice_sec = slice_sec > 0 && slice_sec < audio_sec ? slice_sec : audio_sec;
157
+ audio_min_sec = min_sec >= 0.5 && min_sec <= audio_slice_sec ? min_sec : 1.0f;
158
+ audio_output_path = output_path;
159
+ }
160
+
161
+ bool job::vad_simple(int slice_index, int n_samples, int n) {
162
+ if (slice_index >= pcm_slices.size()) return !vad.use_vad;
163
+ if (!vad.use_vad) return true;
164
+
165
+ short* pcm = pcm_slices[slice_index];
166
+ int sample_size = (int) (WHISPER_SAMPLE_RATE * vad.vad_ms / 1000);
167
+ if (n_samples + n > sample_size) {
168
+ int start = n_samples + n - sample_size;
169
+ std::vector<float> pcmf32(sample_size);
170
+ for (int i = 0; i < sample_size; i++) {
171
+ pcmf32[i] = (float)pcm[i + start] / 32768.0f;
172
+ }
173
+ return vad_simple_impl(pcmf32, WHISPER_SAMPLE_RATE, vad.last_ms, vad.vad_thold, vad.freq_thold, vad.verbose);
174
+ }
86
175
  return false;
87
- }
176
+ }
177
+
178
+ void job::put_pcm_data(short* data, int slice_index, int n_samples, int n) {
179
+ if (pcm_slices.size() == slice_index) {
180
+ int n_slices = (int) (WHISPER_SAMPLE_RATE * audio_slice_sec);
181
+ pcm_slices.push_back(new short[n_slices]);
182
+ }
183
+ short* pcm = pcm_slices[slice_index];
184
+ for (int i = 0; i < n; i++) {
185
+ pcm[i + n_samples] = data[i];
186
+ }
187
+ }
188
+
189
+ float* job::pcm_slice_to_f32(int slice_index, int size) {
190
+ if (pcm_slices.size() > slice_index) {
191
+ float* pcmf32 = new float[size];
192
+ for (int i = 0; i < size; i++) {
193
+ pcmf32[i] = (float)pcm_slices[slice_index][i] / 32768.0f;
194
+ }
195
+ return pcmf32;
196
+ }
197
+ return nullptr;
198
+ }
199
+
200
+ bool job::is_aborted() {
201
+ return aborted;
202
+ }
203
+
204
+ void job::abort() {
205
+ aborted = true;
206
+ }
207
+
208
+ job::~job() {
209
+ RNWHISPER_LOG_INFO("rnwhisper::job::%s: job_id: %d\n", __func__, job_id);
210
+
211
+ for (size_t i = 0; i < pcm_slices.size(); i++) {
212
+ delete[] pcm_slices[i];
213
+ }
214
+ pcm_slices.clear();
215
+ }
216
+
217
+ std::unordered_map<int, job*> job_map;
218
+
219
+ void job_abort_all() {
220
+ for (auto it = job_map.begin(); it != job_map.end(); ++it) {
221
+ it->second->abort();
222
+ }
223
+ }
224
+
225
+ job* job_new(int job_id, struct whisper_full_params params) {
226
+ job* ctx = new job();
227
+ ctx->job_id = job_id;
228
+ ctx->params = params;
229
+
230
+ job_map[job_id] = ctx;
231
+
232
+ // Abort handler
233
+ params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
234
+ job *j = (job*)user_data;
235
+ return !j->is_aborted();
236
+ };
237
+ params.encoder_begin_callback_user_data = job_map[job_id];
238
+ params.abort_callback = [](void * user_data) {
239
+ job *j = (job*)user_data;
240
+ return j->is_aborted();
241
+ };
242
+ params.abort_callback_user_data = job_map[job_id];
88
243
 
89
- return true;
244
+ return job_map[job_id];
90
245
  }
91
246
 
92
- }
247
+ job* job_get(int job_id) {
248
+ if (job_map.find(job_id) != job_map.end()) {
249
+ return job_map[job_id];
250
+ }
251
+ return nullptr;
252
+ }
253
+
254
+ void job_remove(int job_id) {
255
+ if (job_map.find(job_id) != job_map.end()) {
256
+ delete job_map[job_id];
257
+ }
258
+ job_map.erase(job_id);
259
+ }
260
+
261
+ }
package/cpp/rn-whisper.h CHANGED
@@ -1,17 +1,52 @@
1
+ #ifndef RNWHISPER_H
2
+ #define RNWHISPER_H
1
3
 
2
- #ifdef __cplusplus
3
4
  #include <string>
4
- #include <whisper.h>
5
- extern "C" {
6
- #endif
7
-
8
- bool* rn_whisper_assign_abort_map(int job_id);
9
- void rn_whisper_remove_abort_map(int job_id);
10
- void rn_whisper_abort_transcribe(int job_id);
11
- bool rn_whisper_transcribe_is_aborted(int job_id);
12
- void rn_whisper_abort_all_transcribe();
13
- bool rn_whisper_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose);
14
-
15
- #ifdef __cplusplus
16
- }
17
- #endif
5
+ #include <vector>
6
+ #include "whisper.h"
7
+ #include "rn-whisper-log.h"
8
+ #include "rn-audioutils.h"
9
+
10
+ namespace rnwhisper {
11
+
12
+ std::string bench(whisper_context * ctx, int n_threads);
13
+
14
+ struct vad_params {
15
+ bool use_vad = false;
16
+ float vad_thold = 0.6f;
17
+ float freq_thold = 100.0f;
18
+ int vad_ms = 2000;
19
+ int last_ms = 1000;
20
+ bool verbose = false;
21
+ };
22
+
23
+ struct job {
24
+ int job_id;
25
+ bool aborted = false;
26
+ whisper_full_params params;
27
+
28
+ ~job();
29
+ bool is_aborted();
30
+ void abort();
31
+
32
+ // Realtime transcription only:
33
+ vad_params vad;
34
+ int audio_sec = 0;
35
+ int audio_slice_sec = 0;
36
+ float audio_min_sec = 0;
37
+ const char* audio_output_path = nullptr;
38
+ std::vector<short *> pcm_slices;
39
+ void set_realtime_params(vad_params vad, int sec, int slice_sec, float min_sec, const char* output_path);
40
+ bool vad_simple(int slice_index, int n_samples, int n);
41
+ void put_pcm_data(short* pcm, int slice_index, int n_samples, int n);
42
+ float* pcm_slice_to_f32(int slice_index, int size);
43
+ };
44
+
45
+ void job_abort_all();
46
+ job* job_new(int job_id, struct whisper_full_params params);
47
+ void job_remove(int job_id);
48
+ job* job_get(int job_id);
49
+
50
+ } // namespace rnwhisper
51
+
52
+ #endif // RNWHISPER_H