whisper.cpp 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d1b91cc9ac6e7682429f261d3e9b283c4c34ef246a2e88d07f1c294d145d1f4
4
- data.tar.gz: be2a0ff1c4eda542685c209e6c64b7f3f3f902c4353534747922ea89c42ef24d
3
+ metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
4
+ data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
5
5
  SHA512:
6
- metadata.gz: c992abd9e3e9b48d6b31cfa6489b5b09489781d251c2d3ca24d4932fca579b7d19b7483a1d72aefe77ac0655d5d5af19708bf6342cf6e5d4922d3663b1244acc
7
- data.tar.gz: 8890fa1e7653f8690c3686a7751e597958c06adfaa3a0b3a3e74c947920f1da2014ada7eebe4fd78e85cc2a74fe235b244c43817937072ffe3b7c75792567ec1
6
+ metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
7
+ data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
data/lib/whisper/model.rb CHANGED
@@ -8,42 +8,44 @@ module Whisper
8
8
  def initialize(model_path)
9
9
  @model_path = model_path
10
10
  @ctx = nil
11
+ @state = nil
11
12
  init_whisper_context
13
+ init_whisper_state
12
14
  end
13
15
 
14
16
  def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
15
17
  # Load audio file and convert to float array
16
- audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
17
- transcribe_from_audio_data(audio_data, format: format, **params)
18
+ audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
19
+ transcribe_from_audio_data audio_data, format: format, **params
18
20
  end
19
21
 
20
22
  def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
21
23
  # Prepare full params
22
- full_params = default_full_params(params)
24
+ full_params = default_full_params params
23
25
 
24
26
  # Prepare audio data pointer
25
27
  n_samples = audio_data.size
26
- samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
27
- samples_ptr.write_array_of_float(audio_data)
28
+ samples_ptr = FFI::MemoryPointer.new :float, n_samples
29
+ samples_ptr.write_array_of_float audio_data
28
30
 
29
- # Call the whisper_full_parallel function
30
- n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
31
- result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
31
+ # Call the whisper_full_with_state function
32
+ result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
32
33
  raise 'Transcription failed' if result != 0
33
34
 
34
35
  # Retrieve detected language
35
- lang_id = Whisper.whisper_full_lang_id(@ctx)
36
- language = Whisper.whisper_lang_str(lang_id)
36
+ lang_id = Whisper.whisper_full_lang_id_from_state @state
37
+ language = Whisper.whisper_lang_str lang_id
37
38
 
38
39
  # Retrieve the transcription output
39
- n_segments = Whisper.whisper_full_n_segments(@ctx)
40
- output = format_transcription(format, n_segments: n_segments)
40
+ n_segments = Whisper.whisper_full_n_segments_from_state @state
41
+ output = format_transcription format, n_segments: n_segments
41
42
 
42
- TranscriptionResult.new(language, output)
43
+ TranscriptionResult.new language, output
43
44
  end
44
45
 
45
46
  def close
46
- Whisper.whisper_free(@ctx) unless @ctx.nil?
47
+ Whisper.whisper_free_state @state unless @state.nil?
48
+ Whisper.whisper_free @ctx unless @ctx.nil?
47
49
  end
48
50
 
49
51
  private
@@ -59,18 +61,23 @@ module Whisper
59
61
  ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
60
62
 
61
63
  # Initialize context
62
- @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
64
+ @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
63
65
  raise 'Failed to initialize Whisper model' if @ctx.null?
64
66
  end
65
67
 
68
+ def init_whisper_state
69
+ @state = Whisper.whisper_init_state @ctx
70
+ raise 'Failed to initialize Whisper state' if @state.null?
71
+ end
72
+
66
73
  def default_full_params params = {}
67
74
  # Get default full params
68
- strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
69
- full_params = Whisper.whisper_full_default_params(strategy)
75
+ strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
76
+ full_params = Whisper.whisper_full_default_params strategy
70
77
 
71
78
  # Set translate to false to prevent translation to English
72
79
  full_params[:translate] = false
73
- full_params[:language] = FFI::MemoryPointer.from_string 'auto'
80
+ full_params[:language] = FFI::MemoryPointer.from_string 'auto'
74
81
 
75
82
  # Set user-provided full params
76
83
  params.select{ |k, _| full_params.members.include? k }.each do |key, value|
@@ -85,17 +92,17 @@ module Whisper
85
92
  case format.downcase
86
93
  when 'plaintext'
87
94
  n_segments.times do |i|
88
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
95
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
89
96
  output += segment_text
90
97
  end
91
98
  when 'srt'
92
99
  n_segments.times do |i|
93
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
94
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
95
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
100
+ start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
101
+ end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
102
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
96
103
 
97
104
  output += "#{i + 1}\n"
98
- output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
105
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
99
106
  output += "#{segment_text.strip}\n\n"
100
107
  end
101
108
  else
@@ -109,7 +116,7 @@ module Whisper
109
116
  minutes = ((seconds % 3600) / 60).to_i
110
117
  secs = (seconds % 60).to_i
111
118
  millis = ((seconds - seconds.to_i) * 1000).to_i
112
- format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
119
+ format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
113
120
  end
114
121
  end
115
122
  end
@@ -1,5 +1,5 @@
1
1
  module Whisper
2
2
 
3
- VERSION = '0.3.2'
3
+ VERSION = '0.3.3'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-12 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi