whisper.cpp 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/whisper/model.rb +31 -24
- data/lib/whisper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
|
4
|
+
data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
|
7
|
+
data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
|
data/lib/whisper/model.rb
CHANGED
@@ -8,42 +8,44 @@ module Whisper
|
|
8
8
|
def initialize(model_path)
|
9
9
|
@model_path = model_path
|
10
10
|
@ctx = nil
|
11
|
+
@state = nil
|
11
12
|
init_whisper_context
|
13
|
+
init_whisper_state
|
12
14
|
end
|
13
15
|
|
14
16
|
def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
|
15
17
|
# Load audio file and convert to float array
|
16
|
-
audio_data = Whisper::AudioProcessor.convert_to_float_array
|
17
|
-
transcribe_from_audio_data
|
18
|
+
audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
|
19
|
+
transcribe_from_audio_data audio_data, format: format, **params
|
18
20
|
end
|
19
21
|
|
20
22
|
def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
|
21
23
|
# Prepare full params
|
22
|
-
full_params = default_full_params
|
24
|
+
full_params = default_full_params params
|
23
25
|
|
24
26
|
# Prepare audio data pointer
|
25
27
|
n_samples = audio_data.size
|
26
|
-
samples_ptr = FFI::MemoryPointer.new
|
27
|
-
samples_ptr.write_array_of_float
|
28
|
+
samples_ptr = FFI::MemoryPointer.new :float, n_samples
|
29
|
+
samples_ptr.write_array_of_float audio_data
|
28
30
|
|
29
|
-
# Call the
|
30
|
-
|
31
|
-
result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
|
31
|
+
# Call the whisper_full_with_state function
|
32
|
+
result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
|
32
33
|
raise 'Transcription failed' if result != 0
|
33
34
|
|
34
35
|
# Retrieve detected language
|
35
|
-
lang_id = Whisper.
|
36
|
-
language = Whisper.whisper_lang_str
|
36
|
+
lang_id = Whisper.whisper_full_lang_id_from_state @state
|
37
|
+
language = Whisper.whisper_lang_str lang_id
|
37
38
|
|
38
39
|
# Retrieve the transcription output
|
39
|
-
n_segments = Whisper.
|
40
|
-
output = format_transcription
|
40
|
+
n_segments = Whisper.whisper_full_n_segments_from_state @state
|
41
|
+
output = format_transcription format, n_segments: n_segments
|
41
42
|
|
42
|
-
TranscriptionResult.new
|
43
|
+
TranscriptionResult.new language, output
|
43
44
|
end
|
44
45
|
|
45
46
|
def close
|
46
|
-
Whisper.
|
47
|
+
Whisper.whisper_free_state @state unless @state.nil?
|
48
|
+
Whisper.whisper_free @ctx unless @ctx.nil?
|
47
49
|
end
|
48
50
|
|
49
51
|
private
|
@@ -59,18 +61,23 @@ module Whisper
|
|
59
61
|
ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
|
60
62
|
|
61
63
|
# Initialize context
|
62
|
-
@ctx = Whisper.whisper_init_from_file_with_params
|
64
|
+
@ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
|
63
65
|
raise 'Failed to initialize Whisper model' if @ctx.null?
|
64
66
|
end
|
65
67
|
|
68
|
+
def init_whisper_state
|
69
|
+
@state = Whisper.whisper_init_state @ctx
|
70
|
+
raise 'Failed to initialize Whisper state' if @state.null?
|
71
|
+
end
|
72
|
+
|
66
73
|
def default_full_params params = {}
|
67
74
|
# Get default full params
|
68
|
-
strategy = params.fetch
|
69
|
-
full_params = Whisper.whisper_full_default_params
|
75
|
+
strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
|
76
|
+
full_params = Whisper.whisper_full_default_params strategy
|
70
77
|
|
71
78
|
# Set translate to false to prevent translation to English
|
72
79
|
full_params[:translate] = false
|
73
|
-
full_params[:language]
|
80
|
+
full_params[:language] = FFI::MemoryPointer.from_string 'auto'
|
74
81
|
|
75
82
|
# Set user-provided full params
|
76
83
|
params.select{ |k, _| full_params.members.include? k }.each do |key, value|
|
@@ -85,17 +92,17 @@ module Whisper
|
|
85
92
|
case format.downcase
|
86
93
|
when 'plaintext'
|
87
94
|
n_segments.times do |i|
|
88
|
-
segment_text = Whisper.
|
95
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
89
96
|
output += segment_text
|
90
97
|
end
|
91
98
|
when 'srt'
|
92
99
|
n_segments.times do |i|
|
93
|
-
start_time = Whisper.
|
94
|
-
end_time = Whisper.
|
95
|
-
segment_text = Whisper.
|
100
|
+
start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
|
101
|
+
end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
|
102
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
96
103
|
|
97
104
|
output += "#{i + 1}\n"
|
98
|
-
output += "#{format_time_srt
|
105
|
+
output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
|
99
106
|
output += "#{segment_text.strip}\n\n"
|
100
107
|
end
|
101
108
|
else
|
@@ -109,7 +116,7 @@ module Whisper
|
|
109
116
|
minutes = ((seconds % 3600) / 60).to_i
|
110
117
|
secs = (seconds % 60).to_i
|
111
118
|
millis = ((seconds - seconds.to_i) * 1000).to_i
|
112
|
-
format
|
119
|
+
format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
|
113
120
|
end
|
114
121
|
end
|
115
122
|
end
|
data/lib/whisper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whisper.cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Braulio Oliveira
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|