RubyGems - whisper.cpp - Versions diffs - 0.3.1 → 0.3.3 - Mend

whisper.cpp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 561b7b824c1f631537d681fb1e5a1eafe20546a8468705ecd2ba3989e0062c69
-  data.tar.gz: e047a8ab099358095100a55cd30aca398bbe8b8c4c55cfb0713cdf33e87d2e1a
+  metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
+  data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
 SHA512:
-  metadata.gz: 80c60958d31df322ac570c97ca6d758278f8711d9bfff371be34e8159ffcadc2facf149aa4c7b55de49a2d2d8812b570d0ddd68e543172301f930fe494a4243b
-  data.tar.gz: 02fd686ff1857adf234c53fa0719035e797a82bed4b165e54004c0b7c903728eb7d8c44273b3f46e6ff21a32df44e29986092b72ccfd853324749e639fa303ed
+  metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
+  data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39

data/lib/whisper/model.rb CHANGED Viewed

@@ -8,84 +8,80 @@ module Whisper
     def initialize(model_path)
       @model_path = model_path
       @ctx = nil
+      @state = nil
+      init_whisper_context
+      init_whisper_state
     end
     def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
       # Load audio file and convert to float array
-      audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
-      transcribe_from_audio_data(audio_data, format: format, **params)
+      audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
+      transcribe_from_audio_data audio_data, format: format, **params
     end
     def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
-      init_whisper_context(params)
       # Prepare full params
-      full_params = default_full_params(params)
+      full_params = default_full_params params
       # Prepare audio data pointer
       n_samples = audio_data.size
-      samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
-      samples_ptr.write_array_of_float(audio_data)
+      samples_ptr = FFI::MemoryPointer.new :float, n_samples
+      samples_ptr.write_array_of_float audio_data
-      # Call the whisper_full_parallel function
-      n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
-      result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
+      # Call the whisper_full_with_state function
+      result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
       raise 'Transcription failed' if result != 0
       # Retrieve detected language
-      lang_id = Whisper.whisper_full_lang_id(@ctx)
-      language = Whisper.whisper_lang_str(lang_id)
+      lang_id = Whisper.whisper_full_lang_id_from_state @state
+      language = Whisper.whisper_lang_str lang_id
       # Retrieve the transcription output
-      n_segments = Whisper.whisper_full_n_segments(@ctx)
-      output = format_transcription(format, n_segments: n_segments)
+      n_segments = Whisper.whisper_full_n_segments_from_state @state
+      output = format_transcription format, n_segments: n_segments
-      TranscriptionResult.new(language, output)
+      TranscriptionResult.new language, output
     end
     def close
-      Whisper.whisper_free(@ctx) unless @ctx.nil?
+      Whisper.whisper_free_state @state unless @state.nil?
+      Whisper.whisper_free @ctx unless @ctx.nil?
     end
     private
-    def init_whisper_context(params)
+    def init_whisper_context params = {}
       return unless @ctx.nil?
       ctx_params = Whisper.whisper_context_default_params
-      # Set user-provided context params
-      user_ctx_params = params.fetch(:context_params, {})
-      user_ctx_params.each do |key, value|
-        if ctx_params.members.include?(field)
-          ctx_params[key] = value
-        else
-          warn "Unknown context_param field: #{field}"
-        end
+      params.select{ |k, _| ctx_params.members.include? k }.each do |key, value|
+        ctx_params[key] = value
       end
       ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
       # Initialize context
-      @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
+      @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
       raise 'Failed to initialize Whisper model' if @ctx.null?
     end
-    def default_full_params(params)
+    def init_whisper_state
+      @state = Whisper.whisper_init_state @ctx
+      raise 'Failed to initialize Whisper state' if @state.null?
+    end
+    def default_full_params params = {}
       # Get default full params
-      strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
-      full_params = Whisper.whisper_full_default_params(strategy)
+      strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
+      full_params = Whisper.whisper_full_default_params strategy
-        # Set translate to false to prevent translation to English
+      # Set translate to false to prevent translation to English
       full_params[:translate] = false
+      full_params[:language] = FFI::MemoryPointer.from_string 'auto'
       # Set user-provided full params
-      user_full_params = params.fetch(:full_params, {})
-      user_full_params.each do |key, value|
-        if full_params.members.include?(field)
-          full_params[key] = value
-        else
-          warn "Unknown full_param field: #{field}"
-        end
+      params.select{ |k, _| full_params.members.include? k }.each do |key, value|
+        full_params[key] = value
       end
       full_params
@@ -96,17 +92,17 @@ module Whisper
       case format.downcase
       when 'plaintext'
         n_segments.times do |i|
-          segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
+          segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
           output += segment_text
         end
       when 'srt'
         n_segments.times do |i|
-          start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
-          end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
-          segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
+          start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
+          end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
+          segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
           output += "#{i + 1}\n"
-          output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
+          output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
           output += "#{segment_text.strip}\n\n"
         end
       else
@@ -120,7 +116,7 @@ module Whisper
       minutes = ((seconds % 3600) / 60).to_i
       secs = (seconds % 60).to_i
       millis = ((seconds - seconds.to_i) * 1000).to_i
-      format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
+      format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
     end
   end
 end

data/lib/whisper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Whisper
-  VERSION = '0.3.1'
+  VERSION = '0.3.3'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: whisper.cpp
 version: !ruby/object:Gem::Version
-  version: 0.3.1
+  version: 0.3.3
 platform: ruby
 authors:
 - Braulio Oliveira
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-12 00:00:00.000000000 Z
+date: 2024-10-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ffi