whisper.cpp 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 561b7b824c1f631537d681fb1e5a1eafe20546a8468705ecd2ba3989e0062c69
4
- data.tar.gz: e047a8ab099358095100a55cd30aca398bbe8b8c4c55cfb0713cdf33e87d2e1a
3
+ metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
4
+ data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
5
5
  SHA512:
6
- metadata.gz: 80c60958d31df322ac570c97ca6d758278f8711d9bfff371be34e8159ffcadc2facf149aa4c7b55de49a2d2d8812b570d0ddd68e543172301f930fe494a4243b
7
- data.tar.gz: 02fd686ff1857adf234c53fa0719035e797a82bed4b165e54004c0b7c903728eb7d8c44273b3f46e6ff21a32df44e29986092b72ccfd853324749e639fa303ed
6
+ metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
7
+ data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
data/lib/whisper/model.rb CHANGED
@@ -8,84 +8,80 @@ module Whisper
8
8
  def initialize(model_path)
9
9
  @model_path = model_path
10
10
  @ctx = nil
11
+ @state = nil
12
+ init_whisper_context
13
+ init_whisper_state
11
14
  end
12
15
 
13
16
  def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
14
17
  # Load audio file and convert to float array
15
- audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
16
- transcribe_from_audio_data(audio_data, format: format, **params)
18
+ audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
19
+ transcribe_from_audio_data audio_data, format: format, **params
17
20
  end
18
21
 
19
22
  def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
20
- init_whisper_context(params)
21
-
22
23
  # Prepare full params
23
- full_params = default_full_params(params)
24
+ full_params = default_full_params params
24
25
 
25
26
  # Prepare audio data pointer
26
27
  n_samples = audio_data.size
27
- samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
28
- samples_ptr.write_array_of_float(audio_data)
28
+ samples_ptr = FFI::MemoryPointer.new :float, n_samples
29
+ samples_ptr.write_array_of_float audio_data
29
30
 
30
- # Call the whisper_full_parallel function
31
- n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
32
- result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
31
+ # Call the whisper_full_with_state function
32
+ result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
33
33
  raise 'Transcription failed' if result != 0
34
34
 
35
35
  # Retrieve detected language
36
- lang_id = Whisper.whisper_full_lang_id(@ctx)
37
- language = Whisper.whisper_lang_str(lang_id)
36
+ lang_id = Whisper.whisper_full_lang_id_from_state @state
37
+ language = Whisper.whisper_lang_str lang_id
38
38
 
39
39
  # Retrieve the transcription output
40
- n_segments = Whisper.whisper_full_n_segments(@ctx)
41
- output = format_transcription(format, n_segments: n_segments)
40
+ n_segments = Whisper.whisper_full_n_segments_from_state @state
41
+ output = format_transcription format, n_segments: n_segments
42
42
 
43
- TranscriptionResult.new(language, output)
43
+ TranscriptionResult.new language, output
44
44
  end
45
45
 
46
46
  def close
47
- Whisper.whisper_free(@ctx) unless @ctx.nil?
47
+ Whisper.whisper_free_state @state unless @state.nil?
48
+ Whisper.whisper_free @ctx unless @ctx.nil?
48
49
  end
49
50
 
50
51
  private
51
52
 
52
- def init_whisper_context(params)
53
+ def init_whisper_context params = {}
53
54
  return unless @ctx.nil?
54
55
 
55
56
  ctx_params = Whisper.whisper_context_default_params
56
57
 
57
- # Set user-provided context params
58
- user_ctx_params = params.fetch(:context_params, {})
59
- user_ctx_params.each do |key, value|
60
- if ctx_params.members.include?(field)
61
- ctx_params[key] = value
62
- else
63
- warn "Unknown context_param field: #{field}"
64
- end
58
+ params.select{ |k, _| ctx_params.members.include? k }.each do |key, value|
59
+ ctx_params[key] = value
65
60
  end
66
61
  ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
67
62
 
68
63
  # Initialize context
69
- @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
64
+ @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
70
65
  raise 'Failed to initialize Whisper model' if @ctx.null?
71
66
  end
72
67
 
73
- def default_full_params(params)
68
+ def init_whisper_state
69
+ @state = Whisper.whisper_init_state @ctx
70
+ raise 'Failed to initialize Whisper state' if @state.null?
71
+ end
72
+
73
+ def default_full_params params = {}
74
74
  # Get default full params
75
- strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
76
- full_params = Whisper.whisper_full_default_params(strategy)
75
+ strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
76
+ full_params = Whisper.whisper_full_default_params strategy
77
77
 
78
- # Set translate to false to prevent translation to English
78
+ # Set translate to false to prevent translation to English
79
79
  full_params[:translate] = false
80
+ full_params[:language] = FFI::MemoryPointer.from_string 'auto'
80
81
 
81
82
  # Set user-provided full params
82
- user_full_params = params.fetch(:full_params, {})
83
- user_full_params.each do |key, value|
84
- if full_params.members.include?(field)
85
- full_params[key] = value
86
- else
87
- warn "Unknown full_param field: #{field}"
88
- end
83
+ params.select{ |k, _| full_params.members.include? k }.each do |key, value|
84
+ full_params[key] = value
89
85
  end
90
86
 
91
87
  full_params
@@ -96,17 +92,17 @@ module Whisper
96
92
  case format.downcase
97
93
  when 'plaintext'
98
94
  n_segments.times do |i|
99
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
95
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
100
96
  output += segment_text
101
97
  end
102
98
  when 'srt'
103
99
  n_segments.times do |i|
104
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
105
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
106
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
100
+ start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
101
+ end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
102
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
107
103
 
108
104
  output += "#{i + 1}\n"
109
- output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
105
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
110
106
  output += "#{segment_text.strip}\n\n"
111
107
  end
112
108
  else
@@ -120,7 +116,7 @@ module Whisper
120
116
  minutes = ((seconds % 3600) / 60).to_i
121
117
  secs = (seconds % 60).to_i
122
118
  millis = ((seconds - seconds.to_i) * 1000).to_i
123
- format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
119
+ format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
124
120
  end
125
121
  end
126
122
  end
@@ -1,5 +1,5 @@
1
1
  module Whisper
2
2
 
3
- VERSION = '0.3.1'
3
+ VERSION = '0.3.3'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-12 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi