whisper.cpp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 561b7b824c1f631537d681fb1e5a1eafe20546a8468705ecd2ba3989e0062c69
4
- data.tar.gz: e047a8ab099358095100a55cd30aca398bbe8b8c4c55cfb0713cdf33e87d2e1a
3
+ metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
4
+ data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
5
5
  SHA512:
6
- metadata.gz: 80c60958d31df322ac570c97ca6d758278f8711d9bfff371be34e8159ffcadc2facf149aa4c7b55de49a2d2d8812b570d0ddd68e543172301f930fe494a4243b
7
- data.tar.gz: 02fd686ff1857adf234c53fa0719035e797a82bed4b165e54004c0b7c903728eb7d8c44273b3f46e6ff21a32df44e29986092b72ccfd853324749e639fa303ed
6
+ metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
7
+ data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
data/lib/whisper/model.rb CHANGED
@@ -8,84 +8,80 @@ module Whisper
8
8
  def initialize(model_path)
9
9
  @model_path = model_path
10
10
  @ctx = nil
11
+ @state = nil
12
+ init_whisper_context
13
+ init_whisper_state
11
14
  end
12
15
 
13
16
  def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
14
17
  # Load audio file and convert to float array
15
- audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
16
- transcribe_from_audio_data(audio_data, format: format, **params)
18
+ audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
19
+ transcribe_from_audio_data audio_data, format: format, **params
17
20
  end
18
21
 
19
22
  def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
20
- init_whisper_context(params)
21
-
22
23
  # Prepare full params
23
- full_params = default_full_params(params)
24
+ full_params = default_full_params params
24
25
 
25
26
  # Prepare audio data pointer
26
27
  n_samples = audio_data.size
27
- samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
28
- samples_ptr.write_array_of_float(audio_data)
28
+ samples_ptr = FFI::MemoryPointer.new :float, n_samples
29
+ samples_ptr.write_array_of_float audio_data
29
30
 
30
- # Call the whisper_full_parallel function
31
- n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
32
- result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
31
+ # Call the whisper_full_with_state function
32
+ result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
33
33
  raise 'Transcription failed' if result != 0
34
34
 
35
35
  # Retrieve detected language
36
- lang_id = Whisper.whisper_full_lang_id(@ctx)
37
- language = Whisper.whisper_lang_str(lang_id)
36
+ lang_id = Whisper.whisper_full_lang_id_from_state @state
37
+ language = Whisper.whisper_lang_str lang_id
38
38
 
39
39
  # Retrieve the transcription output
40
- n_segments = Whisper.whisper_full_n_segments(@ctx)
41
- output = format_transcription(format, n_segments: n_segments)
40
+ n_segments = Whisper.whisper_full_n_segments_from_state @state
41
+ output = format_transcription format, n_segments: n_segments
42
42
 
43
- TranscriptionResult.new(language, output)
43
+ TranscriptionResult.new language, output
44
44
  end
45
45
 
46
46
  def close
47
- Whisper.whisper_free(@ctx) unless @ctx.nil?
47
+ Whisper.whisper_free_state @state unless @state.nil?
48
+ Whisper.whisper_free @ctx unless @ctx.nil?
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def init_whisper_context(params)
53
+ def init_whisper_context params = {}
53
54
  return unless @ctx.nil?
54
55
 
55
56
  ctx_params = Whisper.whisper_context_default_params
56
57
 
57
- # Set user-provided context params
58
- user_ctx_params = params.fetch(:context_params, {})
59
- user_ctx_params.each do |key, value|
60
- if ctx_params.members.include?(field)
61
- ctx_params[key] = value
62
- else
63
- warn "Unknown context_param field: #{field}"
64
- end
58
+ params.select{ |k, _| ctx_params.members.include? k }.each do |key, value|
59
+ ctx_params[key] = value
65
60
  end
66
61
  ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
67
62
 
68
63
  # Initialize context
69
- @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
64
+ @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
70
65
  raise 'Failed to initialize Whisper model' if @ctx.null?
71
66
  end
72
67
 
73
- def default_full_params(params)
68
+ def init_whisper_state
69
+ @state = Whisper.whisper_init_state @ctx
70
+ raise 'Failed to initialize Whisper state' if @state.null?
71
+ end
72
+
73
+ def default_full_params params = {}
74
74
  # Get default full params
75
- strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
76
- full_params = Whisper.whisper_full_default_params(strategy)
75
+ strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
76
+ full_params = Whisper.whisper_full_default_params strategy
77
77
 
78
- # Set translate to false to prevent translation to English
78
+ # Set translate to false to prevent translation to English
79
79
  full_params[:translate] = false
80
+ full_params[:language] = FFI::MemoryPointer.from_string 'auto'
80
81
 
81
82
  # Set user-provided full params
82
- user_full_params = params.fetch(:full_params, {})
83
- user_full_params.each do |key, value|
84
- if full_params.members.include?(field)
85
- full_params[key] = value
86
- else
87
- warn "Unknown full_param field: #{field}"
88
- end
83
+ params.select{ |k, _| full_params.members.include? k }.each do |key, value|
84
+ full_params[key] = value
89
85
  end
90
86
 
91
87
  full_params
@@ -96,17 +92,17 @@ module Whisper
96
92
  case format.downcase
97
93
  when 'plaintext'
98
94
  n_segments.times do |i|
99
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
95
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
100
96
  output += segment_text
101
97
  end
102
98
  when 'srt'
103
99
  n_segments.times do |i|
104
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
105
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
106
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
100
+ start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
101
+ end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
102
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
107
103
 
108
104
  output += "#{i + 1}\n"
109
- output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
105
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
110
106
  output += "#{segment_text.strip}\n\n"
111
107
  end
112
108
  else
@@ -120,7 +116,7 @@ module Whisper
120
116
  minutes = ((seconds % 3600) / 60).to_i
121
117
  secs = (seconds % 60).to_i
122
118
  millis = ((seconds - seconds.to_i) * 1000).to_i
123
- format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
119
+ format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
124
120
  end
125
121
  end
126
122
  end
@@ -1,5 +1,5 @@
1
1
  module Whisper
2
2
 
3
- VERSION = '0.3.1'
3
+ VERSION = '0.3.3'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-12 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi