RubyGems - whispercpp - Versions diffs - 1.2.0.2 → 1.3.0 - Mend

whispercpp 1.2.0.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/ext/ruby_whisper.cpp CHANGED Viewed

@@ -1,5 +1,4 @@
 #include <ruby.h>
-#include <ruby/thread.h>
 #include "ruby_whisper.h"
 #define DR_WAV_IMPLEMENTATION
 #include "dr_wav.h"
@@ -88,39 +87,13 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
   if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
     rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
   }
-  rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
+  rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
   if (rw->context == nullptr) {
     rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
   }
   return self;
 }
-struct WhisperFullParallelParams {
-  ruby_whisper *rw;
-  ruby_whisper_params *rwp;
-  std::vector<float> pcmf32; // mono-channel F32 PCM
-  std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
-};
-static void stop_whisper_unblock(void *args) {
-  struct WhisperFullParallelParams *object = (struct WhisperFullParallelParams *)args;
-  fprintf(stderr, "Set running to abort\n");
-  whisper_running_abort(object->rw->context);
-}
-static VALUE call_whisper_full_parallel(void *args) {
-  struct WhisperFullParallelParams *object = (struct WhisperFullParallelParams *)args;
-  whisper_running_restore(object->rw->context);
-  if (whisper_full_parallel(object->rw->context, object->rwp->params, object->pcmf32.data(), object->pcmf32.size(), 1) != 0) {
-    fprintf(stderr, "failed to process audio\n");
-    return INT2FIX(-1);
-  }
-  return INT2FIX(0);
-}
 /*
  * transcribe a single file
  * can emit to a block results
@@ -141,9 +114,8 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
   std::string fname_inp = StringValueCStr(wave_file_path);
-  //std::vector<float> pcmf32; // mono-channel F32 PCM
-  //std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
-  struct WhisperFullParallelParams object;
+  std::vector<float> pcmf32; // mono-channel F32 PCM
+  std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
   // WAV input - this is directly from main.cpp example
   {
@@ -201,49 +173,43 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
     drwav_uninit(&wav);
     // convert to mono, float
-    object.pcmf32.resize(n);
+    pcmf32.resize(n);
     if (wav.channels == 1) {
       for (uint64_t i = 0; i < n; i++) {
-        object.pcmf32[i] = float(pcm16[i])/32768.0f;
+        pcmf32[i] = float(pcm16[i])/32768.0f;
       }
     } else {
       for (uint64_t i = 0; i < n; i++) {
-        object.pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
+        pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
       }
     }
     if (rwp->diarize) {
       // convert to stereo, float
-      object.pcmf32s.resize(2);
+      pcmf32s.resize(2);
-      object.pcmf32s[0].resize(n);
-      object.pcmf32s[1].resize(n);
+      pcmf32s[0].resize(n);
+      pcmf32s[1].resize(n);
       for (uint64_t i = 0; i < n; i++) {
-        object.pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
-        object.pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
+        pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
+        pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
       }
     }
   }
   {
     static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
-    rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
+    rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
       bool is_aborted = *(bool*)user_data;
       return !is_aborted;
     };
     rwp->params.encoder_begin_callback_user_data = &is_aborted;
   }
-  object.rw = rw;
-  object.rwp = rwp;
-  int r = (int)(VALUE)rb_thread_call_without_gvl((void *(*)(void *))call_whisper_full_parallel, &object, stop_whisper_unblock, &object);
-  //if (whisper_full_parallel(rw->context, rwp->params, object.pcmf32.data(), pcmf32.size(), 1) != 0) {
-  if (r != 0) {
+  if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
     fprintf(stderr, "failed to process audio\n");
     return self;
   }
   const int n_segments = whisper_full_n_segments(rw->context);
   VALUE output = rb_str_new2("");
   for (int i = 0; i < n_segments; ++i) {