whispercpp 1.2.0.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -92
- data/ext/extconf.rb +9 -0
- data/ext/ggml.c +18380 -5241
- data/ext/ggml.h +2156 -502
- data/ext/ruby_whisper.cpp +13 -47
- data/ext/whisper.cpp +4182 -1787
- data/ext/whisper.h +334 -65
- metadata +3 -3
data/ext/ruby_whisper.cpp
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#include <ruby.h>
|
2
|
-
#include <ruby/thread.h>
|
3
2
|
#include "ruby_whisper.h"
|
4
3
|
#define DR_WAV_IMPLEMENTATION
|
5
4
|
#include "dr_wav.h"
|
@@ -88,39 +87,13 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
|
88
87
|
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
89
88
|
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
90
89
|
}
|
91
|
-
rw->context =
|
90
|
+
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
92
91
|
if (rw->context == nullptr) {
|
93
92
|
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
94
93
|
}
|
95
94
|
return self;
|
96
95
|
}
|
97
96
|
|
98
|
-
struct WhisperFullParallelParams {
|
99
|
-
ruby_whisper *rw;
|
100
|
-
ruby_whisper_params *rwp;
|
101
|
-
std::vector<float> pcmf32; // mono-channel F32 PCM
|
102
|
-
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
103
|
-
};
|
104
|
-
|
105
|
-
|
106
|
-
static void stop_whisper_unblock(void *args) {
|
107
|
-
struct WhisperFullParallelParams *object = (struct WhisperFullParallelParams *)args;
|
108
|
-
fprintf(stderr, "Set running to abort\n");
|
109
|
-
whisper_running_abort(object->rw->context);
|
110
|
-
}
|
111
|
-
|
112
|
-
static VALUE call_whisper_full_parallel(void *args) {
|
113
|
-
struct WhisperFullParallelParams *object = (struct WhisperFullParallelParams *)args;
|
114
|
-
|
115
|
-
whisper_running_restore(object->rw->context);
|
116
|
-
|
117
|
-
if (whisper_full_parallel(object->rw->context, object->rwp->params, object->pcmf32.data(), object->pcmf32.size(), 1) != 0) {
|
118
|
-
fprintf(stderr, "failed to process audio\n");
|
119
|
-
return INT2FIX(-1);
|
120
|
-
}
|
121
|
-
return INT2FIX(0);
|
122
|
-
}
|
123
|
-
|
124
97
|
/*
|
125
98
|
* transcribe a single file
|
126
99
|
* can emit to a block results
|
@@ -141,9 +114,8 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
141
114
|
|
142
115
|
std::string fname_inp = StringValueCStr(wave_file_path);
|
143
116
|
|
144
|
-
|
145
|
-
|
146
|
-
struct WhisperFullParallelParams object;
|
117
|
+
std::vector<float> pcmf32; // mono-channel F32 PCM
|
118
|
+
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
147
119
|
|
148
120
|
// WAV input - this is directly from main.cpp example
|
149
121
|
{
|
@@ -201,49 +173,43 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
201
173
|
drwav_uninit(&wav);
|
202
174
|
|
203
175
|
// convert to mono, float
|
204
|
-
|
176
|
+
pcmf32.resize(n);
|
205
177
|
if (wav.channels == 1) {
|
206
178
|
for (uint64_t i = 0; i < n; i++) {
|
207
|
-
|
179
|
+
pcmf32[i] = float(pcm16[i])/32768.0f;
|
208
180
|
}
|
209
181
|
} else {
|
210
182
|
for (uint64_t i = 0; i < n; i++) {
|
211
|
-
|
183
|
+
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
212
184
|
}
|
213
185
|
}
|
214
186
|
|
215
187
|
if (rwp->diarize) {
|
216
188
|
// convert to stereo, float
|
217
|
-
|
189
|
+
pcmf32s.resize(2);
|
218
190
|
|
219
|
-
|
220
|
-
|
191
|
+
pcmf32s[0].resize(n);
|
192
|
+
pcmf32s[1].resize(n);
|
221
193
|
for (uint64_t i = 0; i < n; i++) {
|
222
|
-
|
223
|
-
|
194
|
+
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
195
|
+
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
224
196
|
}
|
225
197
|
}
|
226
198
|
}
|
227
199
|
{
|
228
200
|
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
229
201
|
|
230
|
-
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
|
202
|
+
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
231
203
|
bool is_aborted = *(bool*)user_data;
|
232
204
|
return !is_aborted;
|
233
205
|
};
|
234
206
|
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
235
207
|
}
|
236
208
|
|
237
|
-
|
238
|
-
object.rwp = rwp;
|
239
|
-
|
240
|
-
int r = (int)(VALUE)rb_thread_call_without_gvl((void *(*)(void *))call_whisper_full_parallel, &object, stop_whisper_unblock, &object);
|
241
|
-
//if (whisper_full_parallel(rw->context, rwp->params, object.pcmf32.data(), pcmf32.size(), 1) != 0) {
|
242
|
-
if (r != 0) {
|
209
|
+
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
243
210
|
fprintf(stderr, "failed to process audio\n");
|
244
211
|
return self;
|
245
212
|
}
|
246
|
-
|
247
213
|
const int n_segments = whisper_full_n_segments(rw->context);
|
248
214
|
VALUE output = rb_str_new2("");
|
249
215
|
for (int i = 0; i < n_segments; ++i) {
|