whisper.cpp 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d1b91cc9ac6e7682429f261d3e9b283c4c34ef246a2e88d07f1c294d145d1f4
4
- data.tar.gz: be2a0ff1c4eda542685c209e6c64b7f3f3f902c4353534747922ea89c42ef24d
3
+ metadata.gz: a9cd333686c64e91cb061ac8cf093e32bd76e977854ca84ee25c3929929c2f57
4
+ data.tar.gz: aff6dd2d092a9ad86f6f3c1453aaed13c42fc6f8d686886c86fcccc30667a15a
5
5
  SHA512:
6
- metadata.gz: c992abd9e3e9b48d6b31cfa6489b5b09489781d251c2d3ca24d4932fca579b7d19b7483a1d72aefe77ac0655d5d5af19708bf6342cf6e5d4922d3663b1244acc
7
- data.tar.gz: 8890fa1e7653f8690c3686a7751e597958c06adfaa3a0b3a3e74c947920f1da2014ada7eebe4fd78e85cc2a74fe235b244c43817937072ffe3b7c75792567ec1
6
+ metadata.gz: 01cf0c815ec1e7630dc42787f424b521e2b172d67aa78662791c99d126bd4eed941d550ffe0601f88fec5bb6046332bbb0fcdb63eeaea1b3e538f6253768366d
7
+ data.tar.gz: 5e894cad714070459949b15cc7c527124ada408639dd5220cd14ba6bc92eafd2bde138f27683addf5d07bc2d193bc21a6d3813971a70881113e5e4696cf95023
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.0
1
+ 3.3.6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- whisper.cpp (0.1.0)
4
+ whisper.cpp (0.3.4)
5
5
  ffi (~> 1.15)
6
6
 
7
7
  GEM
@@ -28,7 +28,7 @@ DEPENDENCIES
28
28
  whisper.cpp!
29
29
 
30
30
  RUBY VERSION
31
- ruby 3.3.0p0
31
+ ruby 3.3.6p108
32
32
 
33
33
  BUNDLED WITH
34
34
  2.5.11
data/ext/extconf.rb CHANGED
@@ -43,26 +43,45 @@ unless Dir.exist?(whisper_dir)
43
43
  abort "Failed to find or create the whisper.cpp directory at #{whisper_dir}"
44
44
  end
45
45
 
46
- # Now, proceed to build libwhispercpp.so using the whisper.cpp Makefile
46
+ # Now, proceed to modify the Makefile and build libwhispercpp.so
47
47
  Dir.chdir(whisper_dir) do
48
48
  # Set environment variables for build settings
49
- ENV['GGML_CUDA'] = '1' # Enable CUDA support
49
+ ENV['GGML_CUDA'] = '1' # Enable CUDA support if desired
50
50
 
51
- puts "Building libwhispercpp.so with GGML_CUDA=#{ENV['GGML_CUDA']}..."
51
+ # Modify the Makefile to add libwhispercpp.so target
52
+ makefile_path = File.join(Dir.pwd, 'Makefile')
53
+
54
+ makefile_content = File.read(makefile_path)
55
+
56
+ # Check if 'libwhispercpp.so' target is already defined
57
+ unless makefile_content.include?('libwhispercpp.so:')
58
+ # Append the new target at the end of the Makefile
59
+ new_content = <<~MAKEFILE
60
+
61
+ # Custom target for building libwhispercpp.so
62
+ libwhispercpp.so: $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL) $(OBJ_WHISPER_EXTRA)
63
+ \t$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
64
+
65
+ BUILD_TARGETS += libwhispercpp.so
66
+ MAKEFILE
67
+
68
+ makefile_content << new_content
69
+
70
+ # Write back the modified Makefile
71
+ File.open(makefile_path, 'w') do |f|
72
+ f.write(makefile_content)
73
+ end
52
74
 
53
- # Build libwhisper.a and libggml.a
54
- unless system 'make clean && make -j libwhisper.a libggml.a'
55
- abort "Failed to build libwhisper.a and libggml.a"
75
+ puts "Modified Makefile to add libwhispercpp.so target."
76
+ else
77
+ puts "Makefile already contains libwhispercpp.so target."
56
78
  end
57
79
 
58
- # Link the static libraries into a single shared library using g++
59
- gcc_command = 'g++ -shared -o libwhispercpp.so ' \
60
- '-Wl,--whole-archive libwhisper.a -Wl,--no-whole-archive libggml.a ' \
61
- '-L$( [ -d /opt/cuda ] && echo /opt/cuda/lib || echo /usr/local/cuda/lib ) ' \
62
- '-lcuda -lcudart -lcublas -lc -lm -lstdc++'
80
+ puts "Building libwhispercpp.so with GGML_CUDA=#{ENV['GGML_CUDA']}..."
63
81
 
64
- unless system gcc_command
65
- abort "Failed to link libwhispercpp.so"
82
+ # Build libwhispercpp.so
83
+ unless system 'make clean && make -j libwhispercpp.so'
84
+ abort "Failed to build libwhispercpp.so"
66
85
  end
67
86
 
68
87
  # Verify that libwhispercpp.so was created
@@ -71,10 +90,11 @@ Dir.chdir(whisper_dir) do
71
90
  abort "libwhispercpp.so not found after compilation"
72
91
  end
73
92
 
74
- # Copy the compiled library to the gem's lib directory
75
- FileUtils.cp(source_lib, root_dir)
93
+ # Copy the compiled library to the gem's root directory
94
+ destination_lib = File.join(root_dir, 'libwhispercpp.so')
95
+ FileUtils.cp(source_lib, destination_lib)
76
96
 
77
- puts "Copied libwhispercpp.so to #{root_dir}"
97
+ puts "Copied libwhispercpp.so to #{destination_lib}"
78
98
  end
79
99
 
80
100
  puts "Compilation completed."
@@ -82,9 +102,9 @@ puts "Compilation completed."
82
102
  # Create a no-op Makefile to prevent RubyGems from attempting further compilation
83
103
  makefile_content = <<~MAKEFILE
84
104
  all:
85
- @echo 'libwhispercpp.so already built.'
105
+ \t@echo 'libwhispercpp.so already built.'
86
106
  install:
87
- @echo 'libwhispercpp.so already installed.'
107
+ \t@echo 'libwhispercpp.so already installed.'
88
108
  MAKEFILE
89
109
 
90
110
  File.open('Makefile', 'w') do |f|
@@ -93,15 +113,10 @@ end
93
113
 
94
114
  puts "Created a no-op Makefile to prevent further compilation."
95
115
 
96
- # After copying libwhispercpp.so
97
-
98
- # Path to the cloned whisper.cpp directory
116
+ # Remove the cloned whisper.cpp directory
99
117
  cloned_dir = whisper_dir
100
118
 
101
- # Remove the cloned whisper.cpp directory
102
119
  puts "Removing cloned whisper.cpp directory at #{cloned_dir}..."
103
120
  FileUtils.rm_rf(cloned_dir)
104
-
105
121
  puts "Removed cloned whisper.cpp directory."
106
122
 
107
-
data/lib/whisper/model.rb CHANGED
@@ -8,42 +8,44 @@ module Whisper
8
8
  def initialize(model_path)
9
9
  @model_path = model_path
10
10
  @ctx = nil
11
+ @state = nil
11
12
  init_whisper_context
13
+ init_whisper_state
12
14
  end
13
15
 
14
16
  def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
15
17
  # Load audio file and convert to float array
16
- audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
17
- transcribe_from_audio_data(audio_data, format: format, **params)
18
+ audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
19
+ transcribe_from_audio_data audio_data, format: format, **params
18
20
  end
19
21
 
20
22
  def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
21
23
  # Prepare full params
22
- full_params = default_full_params(params)
24
+ full_params = default_full_params params
23
25
 
24
26
  # Prepare audio data pointer
25
27
  n_samples = audio_data.size
26
- samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
27
- samples_ptr.write_array_of_float(audio_data)
28
+ samples_ptr = FFI::MemoryPointer.new :float, n_samples
29
+ samples_ptr.write_array_of_float audio_data
28
30
 
29
- # Call the whisper_full_parallel function
30
- n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
31
- result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
31
+ # Call the whisper_full_with_state function
32
+ result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
32
33
  raise 'Transcription failed' if result != 0
33
34
 
34
35
  # Retrieve detected language
35
- lang_id = Whisper.whisper_full_lang_id(@ctx)
36
- language = Whisper.whisper_lang_str(lang_id)
36
+ lang_id = Whisper.whisper_full_lang_id_from_state @state
37
+ language = Whisper.whisper_lang_str lang_id
37
38
 
38
39
  # Retrieve the transcription output
39
- n_segments = Whisper.whisper_full_n_segments(@ctx)
40
- output = format_transcription(format, n_segments: n_segments)
40
+ n_segments = Whisper.whisper_full_n_segments_from_state @state
41
+ output = format_transcription format, n_segments: n_segments
41
42
 
42
- TranscriptionResult.new(language, output)
43
+ TranscriptionResult.new language, output
43
44
  end
44
45
 
45
46
  def close
46
- Whisper.whisper_free(@ctx) unless @ctx.nil?
47
+ Whisper.whisper_free_state @state unless @state.nil?
48
+ Whisper.whisper_free @ctx unless @ctx.nil?
47
49
  end
48
50
 
49
51
  private
@@ -59,18 +61,23 @@ module Whisper
59
61
  ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
60
62
 
61
63
  # Initialize context
62
- @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
64
+ @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
63
65
  raise 'Failed to initialize Whisper model' if @ctx.null?
64
66
  end
65
67
 
68
+ def init_whisper_state
69
+ @state = Whisper.whisper_init_state @ctx
70
+ raise 'Failed to initialize Whisper state' if @state.null?
71
+ end
72
+
66
73
  def default_full_params params = {}
67
74
  # Get default full params
68
- strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
69
- full_params = Whisper.whisper_full_default_params(strategy)
75
+ strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
76
+ full_params = Whisper.whisper_full_default_params strategy
70
77
 
71
78
  # Set translate to false to prevent translation to English
72
79
  full_params[:translate] = false
73
- full_params[:language] = FFI::MemoryPointer.from_string 'auto'
80
+ full_params[:language] = FFI::MemoryPointer.from_string 'auto'
74
81
 
75
82
  # Set user-provided full params
76
83
  params.select{ |k, _| full_params.members.include? k }.each do |key, value|
@@ -85,17 +92,17 @@ module Whisper
85
92
  case format.downcase
86
93
  when 'plaintext'
87
94
  n_segments.times do |i|
88
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
95
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
89
96
  output += segment_text
90
97
  end
91
98
  when 'srt'
92
99
  n_segments.times do |i|
93
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
94
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
95
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
100
+ start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
101
+ end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
102
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
96
103
 
97
104
  output += "#{i + 1}\n"
98
- output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
105
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
99
106
  output += "#{segment_text.strip}\n\n"
100
107
  end
101
108
  else
@@ -109,7 +116,7 @@ module Whisper
109
116
  minutes = ((seconds % 3600) / 60).to_i
110
117
  secs = (seconds % 60).to_i
111
118
  millis = ((seconds - seconds.to_i) * 1000).to_i
112
- format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
119
+ format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
113
120
  end
114
121
  end
115
122
  end
@@ -1,5 +1,5 @@
1
1
  module Whisper
2
2
 
3
- VERSION = '0.3.2'
3
+ VERSION = '0.3.4'
4
4
 
5
5
  end
data/lib/whisper.rb CHANGED
@@ -26,7 +26,19 @@ module Whisper
26
26
  :WHISPER_AHEADS_MEDIUM,
27
27
  :WHISPER_AHEADS_LARGE_V1,
28
28
  :WHISPER_AHEADS_LARGE_V2,
29
- :WHISPER_AHEADS_LARGE_V3
29
+ :WHISPER_AHEADS_LARGE_V3,
30
+ :WHISPER_AHEADS_LARGE_V3_TURBO # Added new enum value
31
+ ]
32
+
33
+ # Enums for grammar element type
34
+ enum :whisper_gretype, [
35
+ :WHISPER_GRETYPE_END, 0,
36
+ :WHISPER_GRETYPE_ALT,
37
+ :WHISPER_GRETYPE_RULE_REF,
38
+ :WHISPER_GRETYPE_CHAR,
39
+ :WHISPER_GRETYPE_CHAR_NOT,
40
+ :WHISPER_GRETYPE_CHAR_RNG_UPPER,
41
+ :WHISPER_GRETYPE_CHAR_ALT
30
42
  ]
31
43
 
32
44
  # Enums for sampling strategy
@@ -93,7 +105,7 @@ module Whisper
93
105
 
94
106
  # whisper_model_loader struct
95
107
  #class WhisperModelLoader < FFI::Struct
96
- # callback :read_callback, [:pointer, :pointer, :size_t], :size_t
108
+ # callback :read_callback, [:pointer, :pointer], :size_t
97
109
  # callback :eof_callback, [:pointer], :bool
98
110
  # callback :close_callback, [:pointer], :void
99
111
 
@@ -108,7 +120,7 @@ module Whisper
108
120
  # whisper_grammar_element struct
109
121
  class WhisperGrammarElement < FFI::Struct
110
122
  layout(
111
- :type, :int,
123
+ :type, :whisper_gretype,
112
124
  :value, :uint32
113
125
  )
114
126
  end
@@ -131,7 +143,7 @@ module Whisper
131
143
  # whisper_full_params struct
132
144
  class WhisperFullParams < FFI::Struct
133
145
  layout(
134
- :strategy, :int,
146
+ :strategy, :whisper_sampling_strategy,
135
147
  :n_threads, :int,
136
148
  :n_max_text_ctx, :int,
137
149
  :offset_ms, :int,
@@ -189,8 +201,10 @@ module Whisper
189
201
 
190
202
  # Get default context params
191
203
  attach_function :whisper_context_default_params, [], WhisperContextParams.by_value
204
+ attach_function :whisper_context_default_params_by_ref, [], :pointer
192
205
  # Get default full params
193
- attach_function :whisper_full_default_params, [:int], WhisperFullParams.by_value
206
+ attach_function :whisper_full_default_params, [:whisper_sampling_strategy], WhisperFullParams.by_value
207
+ attach_function :whisper_full_default_params_by_ref, [:whisper_sampling_strategy], :pointer
194
208
 
195
209
  # Function Bindings
196
210
 
@@ -208,8 +222,8 @@ module Whisper
208
222
  attach_function :whisper_init_state, [:pointer], :pointer
209
223
 
210
224
  # OpenVINO functions
211
- #attach_function :whisper_ctx_init_openvino_encoder_with_state, [:pointer, :pointer, :string, :string, :string], :int
212
- #attach_function :whisper_ctx_init_openvino_encoder, [:pointer, :string, :string, :string], :int
225
+ attach_function :whisper_ctx_init_openvino_encoder_with_state, [:pointer, :pointer, :string, :string, :string], :int
226
+ attach_function :whisper_ctx_init_openvino_encoder, [:pointer, :string, :string, :string], :int
213
227
 
214
228
  # Free functions
215
229
  attach_function :whisper_free, [:pointer], :void
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-12 00:00:00.000000000 Z
11
+ date: 2024-11-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -95,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  requirements: []
98
- rubygems_version: 3.5.3
98
+ rubygems_version: 3.5.22
99
99
  signing_key:
100
100
  specification_version: 4
101
101
  summary: Ruby bindings for whisper.cpp