whisper.cpp 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/whisper/model.rb +39 -43
- data/lib/whisper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
|
4
|
+
data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
|
7
|
+
data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
|
data/lib/whisper/model.rb
CHANGED
@@ -8,84 +8,80 @@ module Whisper
|
|
8
8
|
def initialize(model_path)
|
9
9
|
@model_path = model_path
|
10
10
|
@ctx = nil
|
11
|
+
@state = nil
|
12
|
+
init_whisper_context
|
13
|
+
init_whisper_state
|
11
14
|
end
|
12
15
|
|
13
16
|
def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
|
14
17
|
# Load audio file and convert to float array
|
15
|
-
audio_data = Whisper::AudioProcessor.convert_to_float_array
|
16
|
-
transcribe_from_audio_data
|
18
|
+
audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
|
19
|
+
transcribe_from_audio_data audio_data, format: format, **params
|
17
20
|
end
|
18
21
|
|
19
22
|
def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
|
20
|
-
init_whisper_context(params)
|
21
|
-
|
22
23
|
# Prepare full params
|
23
|
-
full_params = default_full_params
|
24
|
+
full_params = default_full_params params
|
24
25
|
|
25
26
|
# Prepare audio data pointer
|
26
27
|
n_samples = audio_data.size
|
27
|
-
samples_ptr = FFI::MemoryPointer.new
|
28
|
-
samples_ptr.write_array_of_float
|
28
|
+
samples_ptr = FFI::MemoryPointer.new :float, n_samples
|
29
|
+
samples_ptr.write_array_of_float audio_data
|
29
30
|
|
30
|
-
# Call the
|
31
|
-
|
32
|
-
result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
|
31
|
+
# Call the whisper_full_with_state function
|
32
|
+
result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
|
33
33
|
raise 'Transcription failed' if result != 0
|
34
34
|
|
35
35
|
# Retrieve detected language
|
36
|
-
lang_id = Whisper.
|
37
|
-
language = Whisper.whisper_lang_str
|
36
|
+
lang_id = Whisper.whisper_full_lang_id_from_state @state
|
37
|
+
language = Whisper.whisper_lang_str lang_id
|
38
38
|
|
39
39
|
# Retrieve the transcription output
|
40
|
-
n_segments = Whisper.
|
41
|
-
output = format_transcription
|
40
|
+
n_segments = Whisper.whisper_full_n_segments_from_state @state
|
41
|
+
output = format_transcription format, n_segments: n_segments
|
42
42
|
|
43
|
-
TranscriptionResult.new
|
43
|
+
TranscriptionResult.new language, output
|
44
44
|
end
|
45
45
|
|
46
46
|
def close
|
47
|
-
Whisper.
|
47
|
+
Whisper.whisper_free_state @state unless @state.nil?
|
48
|
+
Whisper.whisper_free @ctx unless @ctx.nil?
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
51
52
|
|
52
|
-
def init_whisper_context
|
53
|
+
def init_whisper_context params = {}
|
53
54
|
return unless @ctx.nil?
|
54
55
|
|
55
56
|
ctx_params = Whisper.whisper_context_default_params
|
56
57
|
|
57
|
-
|
58
|
-
|
59
|
-
user_ctx_params.each do |key, value|
|
60
|
-
if ctx_params.members.include?(field)
|
61
|
-
ctx_params[key] = value
|
62
|
-
else
|
63
|
-
warn "Unknown context_param field: #{field}"
|
64
|
-
end
|
58
|
+
params.select{ |k, _| ctx_params.members.include? k }.each do |key, value|
|
59
|
+
ctx_params[key] = value
|
65
60
|
end
|
66
61
|
ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
|
67
62
|
|
68
63
|
# Initialize context
|
69
|
-
@ctx = Whisper.whisper_init_from_file_with_params
|
64
|
+
@ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
|
70
65
|
raise 'Failed to initialize Whisper model' if @ctx.null?
|
71
66
|
end
|
72
67
|
|
73
|
-
def
|
68
|
+
def init_whisper_state
|
69
|
+
@state = Whisper.whisper_init_state @ctx
|
70
|
+
raise 'Failed to initialize Whisper state' if @state.null?
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_full_params params = {}
|
74
74
|
# Get default full params
|
75
|
-
strategy = params.fetch
|
76
|
-
full_params = Whisper.whisper_full_default_params
|
75
|
+
strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
|
76
|
+
full_params = Whisper.whisper_full_default_params strategy
|
77
77
|
|
78
|
-
|
78
|
+
# Set translate to false to prevent translation to English
|
79
79
|
full_params[:translate] = false
|
80
|
+
full_params[:language] = FFI::MemoryPointer.from_string 'auto'
|
80
81
|
|
81
82
|
# Set user-provided full params
|
82
|
-
|
83
|
-
|
84
|
-
if full_params.members.include?(field)
|
85
|
-
full_params[key] = value
|
86
|
-
else
|
87
|
-
warn "Unknown full_param field: #{field}"
|
88
|
-
end
|
83
|
+
params.select{ |k, _| full_params.members.include? k }.each do |key, value|
|
84
|
+
full_params[key] = value
|
89
85
|
end
|
90
86
|
|
91
87
|
full_params
|
@@ -96,17 +92,17 @@ module Whisper
|
|
96
92
|
case format.downcase
|
97
93
|
when 'plaintext'
|
98
94
|
n_segments.times do |i|
|
99
|
-
segment_text = Whisper.
|
95
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
100
96
|
output += segment_text
|
101
97
|
end
|
102
98
|
when 'srt'
|
103
99
|
n_segments.times do |i|
|
104
|
-
start_time = Whisper.
|
105
|
-
end_time = Whisper.
|
106
|
-
segment_text = Whisper.
|
100
|
+
start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
|
101
|
+
end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
|
102
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
107
103
|
|
108
104
|
output += "#{i + 1}\n"
|
109
|
-
output += "#{format_time_srt
|
105
|
+
output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
|
110
106
|
output += "#{segment_text.strip}\n\n"
|
111
107
|
end
|
112
108
|
else
|
@@ -120,7 +116,7 @@ module Whisper
|
|
120
116
|
minutes = ((seconds % 3600) / 60).to_i
|
121
117
|
secs = (seconds % 60).to_i
|
122
118
|
millis = ((seconds - seconds.to_i) * 1000).to_i
|
123
|
-
format
|
119
|
+
format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
|
124
120
|
end
|
125
121
|
end
|
126
122
|
end
|
data/lib/whisper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whisper.cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Braulio Oliveira
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|