whisper.cpp 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d1b91cc9ac6e7682429f261d3e9b283c4c34ef246a2e88d07f1c294d145d1f4
4
- data.tar.gz: be2a0ff1c4eda542685c209e6c64b7f3f3f902c4353534747922ea89c42ef24d
3
+ metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
4
+ data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
5
5
  SHA512:
6
- metadata.gz: c992abd9e3e9b48d6b31cfa6489b5b09489781d251c2d3ca24d4932fca579b7d19b7483a1d72aefe77ac0655d5d5af19708bf6342cf6e5d4922d3663b1244acc
7
- data.tar.gz: 8890fa1e7653f8690c3686a7751e597958c06adfaa3a0b3a3e74c947920f1da2014ada7eebe4fd78e85cc2a74fe235b244c43817937072ffe3b7c75792567ec1
6
+ metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
7
+ data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
data/lib/whisper/model.rb CHANGED
@@ -8,42 +8,44 @@ module Whisper
8
8
  def initialize(model_path)
9
9
  @model_path = model_path
10
10
  @ctx = nil
11
+ @state = nil
11
12
  init_whisper_context
13
+ init_whisper_state
12
14
  end
13
15
 
14
16
  def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
15
17
  # Load audio file and convert to float array
16
- audio_data = Whisper::AudioProcessor.convert_to_float_array(audio_file_path)
17
- transcribe_from_audio_data(audio_data, format: format, **params)
18
+ audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
19
+ transcribe_from_audio_data audio_data, format: format, **params
18
20
  end
19
21
 
20
22
  def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
21
23
  # Prepare full params
22
- full_params = default_full_params(params)
24
+ full_params = default_full_params params
23
25
 
24
26
  # Prepare audio data pointer
25
27
  n_samples = audio_data.size
26
- samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
27
- samples_ptr.write_array_of_float(audio_data)
28
+ samples_ptr = FFI::MemoryPointer.new :float, n_samples
29
+ samples_ptr.write_array_of_float audio_data
28
30
 
29
- # Call the whisper_full_parallel function
30
- n_processors = params.fetch :n_processors, ENV['WHISPER_N_PROCS']&.to_i || 1
31
- result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
31
+ # Call the whisper_full_with_state function
32
+ result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
32
33
  raise 'Transcription failed' if result != 0
33
34
 
34
35
  # Retrieve detected language
35
- lang_id = Whisper.whisper_full_lang_id(@ctx)
36
- language = Whisper.whisper_lang_str(lang_id)
36
+ lang_id = Whisper.whisper_full_lang_id_from_state @state
37
+ language = Whisper.whisper_lang_str lang_id
37
38
 
38
39
  # Retrieve the transcription output
39
- n_segments = Whisper.whisper_full_n_segments(@ctx)
40
- output = format_transcription(format, n_segments: n_segments)
40
+ n_segments = Whisper.whisper_full_n_segments_from_state @state
41
+ output = format_transcription format, n_segments: n_segments
41
42
 
42
- TranscriptionResult.new(language, output)
43
+ TranscriptionResult.new language, output
43
44
  end
44
45
 
45
46
  def close
46
- Whisper.whisper_free(@ctx) unless @ctx.nil?
47
+ Whisper.whisper_free_state @state unless @state.nil?
48
+ Whisper.whisper_free @ctx unless @ctx.nil?
47
49
  end
48
50
 
49
51
  private
@@ -59,18 +61,23 @@ module Whisper
59
61
  ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
60
62
 
61
63
  # Initialize context
62
- @ctx = Whisper.whisper_init_from_file_with_params(@model_path, ctx_params)
64
+ @ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
63
65
  raise 'Failed to initialize Whisper model' if @ctx.null?
64
66
  end
65
67
 
68
+ def init_whisper_state
69
+ @state = Whisper.whisper_init_state @ctx
70
+ raise 'Failed to initialize Whisper state' if @state.null?
71
+ end
72
+
66
73
  def default_full_params params = {}
67
74
  # Get default full params
68
- strategy = params.fetch(:sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY)
69
- full_params = Whisper.whisper_full_default_params(strategy)
75
+ strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
76
+ full_params = Whisper.whisper_full_default_params strategy
70
77
 
71
78
  # Set translate to false to prevent translation to English
72
79
  full_params[:translate] = false
73
- full_params[:language] = FFI::MemoryPointer.from_string 'auto'
80
+ full_params[:language] = FFI::MemoryPointer.from_string 'auto'
74
81
 
75
82
  # Set user-provided full params
76
83
  params.select{ |k, _| full_params.members.include? k }.each do |key, value|
@@ -85,17 +92,17 @@ module Whisper
85
92
  case format.downcase
86
93
  when 'plaintext'
87
94
  n_segments.times do |i|
88
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
95
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
89
96
  output += segment_text
90
97
  end
91
98
  when 'srt'
92
99
  n_segments.times do |i|
93
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
94
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
95
- segment_text = Whisper.whisper_full_get_segment_text(@ctx, i)
100
+ start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
101
+ end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
102
+ segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
96
103
 
97
104
  output += "#{i + 1}\n"
98
- output += "#{format_time_srt(start_time)} --> #{format_time_srt(end_time)}\n"
105
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
99
106
  output += "#{segment_text.strip}\n\n"
100
107
  end
101
108
  else
@@ -109,7 +116,7 @@ module Whisper
109
116
  minutes = ((seconds % 3600) / 60).to_i
110
117
  secs = (seconds % 60).to_i
111
118
  millis = ((seconds - seconds.to_i) * 1000).to_i
112
- format('%02d:%02d:%02d,%03d', hours, minutes, secs, millis)
119
+ format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
113
120
  end
114
121
  end
115
122
  end
@@ -1,5 +1,5 @@
1
1
  module Whisper
2
2
 
3
- VERSION = '0.3.2'
3
+ VERSION = '0.3.3'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-12 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi