whisper.cpp 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/whisper/model.rb +39 -43
- data/lib/whisper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb6cb421d7d00b2f9ff5f93f543bf468d5d8c0754befe434b25e1b0e2b3c80a4
|
4
|
+
data.tar.gz: cbd9ea39b8f408692b857cf0302d945392c7aec620834aca340e9c6cb6c900c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a2671addade694795b2a76b1db257db8ffab3ebd9b3266df237a45a73d85b8f7c5f3ebae78bc33c4c38fb869397ca7f8082b909a5d8aebe0818f0174ac01fdd
|
7
|
+
data.tar.gz: d150ab03e621e4b2128ed34792d6968ad5e8fccf599150be240344f97b10c58ccbcac9a8d2022356d97c31c856004c18d0f8fc4edf32ceacfbe2dac14805de39
|
data/lib/whisper/model.rb
CHANGED
@@ -8,84 +8,80 @@ module Whisper
|
|
8
8
|
def initialize(model_path)
|
9
9
|
@model_path = model_path
|
10
10
|
@ctx = nil
|
11
|
+
@state = nil
|
12
|
+
init_whisper_context
|
13
|
+
init_whisper_state
|
11
14
|
end
|
12
15
|
|
13
16
|
def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
|
14
17
|
# Load audio file and convert to float array
|
15
|
-
audio_data = Whisper::AudioProcessor.convert_to_float_array
|
16
|
-
transcribe_from_audio_data
|
18
|
+
audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
|
19
|
+
transcribe_from_audio_data audio_data, format: format, **params
|
17
20
|
end
|
18
21
|
|
19
22
|
def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
|
20
|
-
init_whisper_context(params)
|
21
|
-
|
22
23
|
# Prepare full params
|
23
|
-
full_params = default_full_params
|
24
|
+
full_params = default_full_params params
|
24
25
|
|
25
26
|
# Prepare audio data pointer
|
26
27
|
n_samples = audio_data.size
|
27
|
-
samples_ptr = FFI::MemoryPointer.new
|
28
|
-
samples_ptr.write_array_of_float
|
28
|
+
samples_ptr = FFI::MemoryPointer.new :float, n_samples
|
29
|
+
samples_ptr.write_array_of_float audio_data
|
29
30
|
|
30
|
-
# Call the
|
31
|
-
|
32
|
-
result = Whisper.whisper_full_parallel(@ctx, full_params, samples_ptr, n_samples, n_processors)
|
31
|
+
# Call the whisper_full_with_state function
|
32
|
+
result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
|
33
33
|
raise 'Transcription failed' if result != 0
|
34
34
|
|
35
35
|
# Retrieve detected language
|
36
|
-
lang_id = Whisper.
|
37
|
-
language = Whisper.whisper_lang_str
|
36
|
+
lang_id = Whisper.whisper_full_lang_id_from_state @state
|
37
|
+
language = Whisper.whisper_lang_str lang_id
|
38
38
|
|
39
39
|
# Retrieve the transcription output
|
40
|
-
n_segments = Whisper.
|
41
|
-
output = format_transcription
|
40
|
+
n_segments = Whisper.whisper_full_n_segments_from_state @state
|
41
|
+
output = format_transcription format, n_segments: n_segments
|
42
42
|
|
43
|
-
TranscriptionResult.new
|
43
|
+
TranscriptionResult.new language, output
|
44
44
|
end
|
45
45
|
|
46
46
|
def close
|
47
|
-
Whisper.
|
47
|
+
Whisper.whisper_free_state @state unless @state.nil?
|
48
|
+
Whisper.whisper_free @ctx unless @ctx.nil?
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
51
52
|
|
52
|
-
def init_whisper_context
|
53
|
+
def init_whisper_context params = {}
|
53
54
|
return unless @ctx.nil?
|
54
55
|
|
55
56
|
ctx_params = Whisper.whisper_context_default_params
|
56
57
|
|
57
|
-
|
58
|
-
|
59
|
-
user_ctx_params.each do |key, value|
|
60
|
-
if ctx_params.members.include?(field)
|
61
|
-
ctx_params[key] = value
|
62
|
-
else
|
63
|
-
warn "Unknown context_param field: #{field}"
|
64
|
-
end
|
58
|
+
params.select{ |k, _| ctx_params.members.include? k }.each do |key, value|
|
59
|
+
ctx_params[key] = value
|
65
60
|
end
|
66
61
|
ctx_params[:gpu_device] = ENV['WHISPER_GPU']&.to_i || 0
|
67
62
|
|
68
63
|
# Initialize context
|
69
|
-
@ctx = Whisper.whisper_init_from_file_with_params
|
64
|
+
@ctx = Whisper.whisper_init_from_file_with_params @model_path, ctx_params
|
70
65
|
raise 'Failed to initialize Whisper model' if @ctx.null?
|
71
66
|
end
|
72
67
|
|
73
|
-
def
|
68
|
+
def init_whisper_state
|
69
|
+
@state = Whisper.whisper_init_state @ctx
|
70
|
+
raise 'Failed to initialize Whisper state' if @state.null?
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_full_params params = {}
|
74
74
|
# Get default full params
|
75
|
-
strategy = params.fetch
|
76
|
-
full_params = Whisper.whisper_full_default_params
|
75
|
+
strategy = params.fetch :sampling_strategy, Whisper::WHISPER_SAMPLING_GREEDY
|
76
|
+
full_params = Whisper.whisper_full_default_params strategy
|
77
77
|
|
78
|
-
|
78
|
+
# Set translate to false to prevent translation to English
|
79
79
|
full_params[:translate] = false
|
80
|
+
full_params[:language] = FFI::MemoryPointer.from_string 'auto'
|
80
81
|
|
81
82
|
# Set user-provided full params
|
82
|
-
|
83
|
-
|
84
|
-
if full_params.members.include?(field)
|
85
|
-
full_params[key] = value
|
86
|
-
else
|
87
|
-
warn "Unknown full_param field: #{field}"
|
88
|
-
end
|
83
|
+
params.select{ |k, _| full_params.members.include? k }.each do |key, value|
|
84
|
+
full_params[key] = value
|
89
85
|
end
|
90
86
|
|
91
87
|
full_params
|
@@ -96,17 +92,17 @@ module Whisper
|
|
96
92
|
case format.downcase
|
97
93
|
when 'plaintext'
|
98
94
|
n_segments.times do |i|
|
99
|
-
segment_text = Whisper.
|
95
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
100
96
|
output += segment_text
|
101
97
|
end
|
102
98
|
when 'srt'
|
103
99
|
n_segments.times do |i|
|
104
|
-
start_time = Whisper.
|
105
|
-
end_time = Whisper.
|
106
|
-
segment_text = Whisper.
|
100
|
+
start_time = Whisper.whisper_full_get_segment_t0_from_state(@state, i) / 100.0
|
101
|
+
end_time = Whisper.whisper_full_get_segment_t1_from_state(@state, i) / 100.0
|
102
|
+
segment_text = Whisper.whisper_full_get_segment_text_from_state @state, i
|
107
103
|
|
108
104
|
output += "#{i + 1}\n"
|
109
|
-
output += "#{format_time_srt
|
105
|
+
output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
|
110
106
|
output += "#{segment_text.strip}\n\n"
|
111
107
|
end
|
112
108
|
else
|
@@ -120,7 +116,7 @@ module Whisper
|
|
120
116
|
minutes = ((seconds % 3600) / 60).to_i
|
121
117
|
secs = (seconds % 60).to_i
|
122
118
|
millis = ((seconds - seconds.to_i) * 1000).to_i
|
123
|
-
format
|
119
|
+
format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
|
124
120
|
end
|
125
121
|
end
|
126
122
|
end
|
data/lib/whisper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whisper.cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Braulio Oliveira
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|