whisper.cpp 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e8d71af28ba0c9af9fce2d70c86c36b6ebfb2a14d0b93720c87df0102358cc1
4
- data.tar.gz: af1711e65ce109501e96aff8ce986764997033e15bc9eb97f9252d7cee9c074f
3
+ metadata.gz: 9233f65df607fba80d30ff4ae4a86dcbfaa1a750c88de487a2f98ace0a15de75
4
+ data.tar.gz: 157dcee2a04c9b514c3b36a92500532521aa964eed35ef9ffefd6f2d6b5fd40b
5
5
  SHA512:
6
- metadata.gz: 98d417a79f60594c4a903aaaf2661e2b358113013dec48f8afd3d0bc2bf753a0091021cf912f59a87291fcf2fb65c87225807f7315366c89d169ee1b9f711efe
7
- data.tar.gz: 54584e200cd1132ed924e9172a558bd5c3831ef4e7a63bf27a7b97e6726e9d74191a7db261dbd9c21ca2debb85e5fca9178727bb2285b4abe04d0d83b58ede2e
6
+ metadata.gz: 5a1b01fb0ae991764d318f054369f1740d9f7d5324aa8a0deedf7e5bad9f1c8ed84573061c3a68dfda1b73c60bc5761dd2f4f21931c22414098ebe3b1cc9b17d
7
+ data.tar.gz: bae0d8a234b23f88fca2a363438a43989ad01d6e732b95987f740b999b6fa9c74f300de1033e338a141d85e29d5dff2827763b408f599c34bbed50fe58f5d441
data/.gitignore CHANGED
@@ -1,35 +1,7 @@
1
- # Ignore bundler config
2
- /.bundle
3
-
1
+ /ext/Makefile
4
2
  /ext/whisper.cpp
5
- /lib/libwhispercpp.so
6
- Makefile
3
+ libwhispercpp.so
7
4
 
8
- # Ignore Gem artifacts
9
5
  *.gem
10
- Gemfile.lock
11
-
12
- # Ignore log files
13
- /log/*.log
14
-
15
- # Ignore temp files
16
- /tmp
17
- /coverage/
18
- /doc/
19
- /.yardoc
20
-
21
- # Ignore OS-specific files
22
- .DS_Store
23
- Thumbs.db
24
-
25
- # Ignore editor-specific files
26
- .idea/
27
- /.vscode/
28
- /*.swp
29
-
30
- # Ignore pry history
31
- .pry_history
32
6
 
33
- # Ignore test coverage reports
34
- /coverage
35
7
 
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.3.0
data/Gemfile.lock ADDED
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ whisper.cpp (0.1.0)
5
+ ffi (~> 1.15)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ coderay (1.1.3)
11
+ ffi (1.17.0)
12
+ ffi (1.17.0-x86_64-linux-gnu)
13
+ method_source (1.1.0)
14
+ pry (0.14.2)
15
+ coderay (~> 1.1)
16
+ method_source (~> 1.0)
17
+ rake (13.2.1)
18
+ rake-compiler (1.2.7)
19
+ rake
20
+
21
+ PLATFORMS
22
+ ruby
23
+ x86_64-linux
24
+
25
+ DEPENDENCIES
26
+ pry
27
+ rake-compiler
28
+ whisper.cpp!
29
+
30
+ RUBY VERSION
31
+ ruby 3.3.0p0
32
+
33
+ BUNDLED WITH
34
+ 2.5.11
data/lib/whisper/model.rb CHANGED
@@ -3,7 +3,10 @@ require_relative 'audio_processor'
3
3
 
4
4
  module Whisper
5
5
  class Model
6
- def initialize(model_path)
6
+
7
+ TranscriptionResult = Struct.new :language, :output
8
+
9
+ def initialize model_path
7
10
  params = Whisper.whisper_context_default_params
8
11
  # Modify params as needed
9
12
  params[:use_gpu] = true
@@ -32,30 +35,33 @@ module Whisper
32
35
  result = Whisper.whisper_full @ctx, params, samples_ptr, n_samples
33
36
  raise 'Transcription failed' if result != 0
34
37
 
38
+ # Retrieve detected language
39
+ lang_id = Whisper.whisper_full_lang_id(@ctx)
40
+ language = Whisper.whisper_lang_str(lang_id)
41
+
35
42
  n_segments = Whisper.whisper_full_n_segments @ctx
43
+ output = ''
36
44
  case format.downcase
37
45
  when 'plaintext'
38
- transcript = ''
39
46
  n_segments.times do |i|
40
47
  segment_text = Whisper.whisper_full_get_segment_text @ctx, i
41
- transcript += segment_text
48
+ output += segment_text
42
49
  end
43
- transcript
44
50
  when 'srt'
45
- srt_content = ''
46
51
  n_segments.times do |i|
47
- start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
48
- end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
52
+ start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
53
+ end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
49
54
  segment_text = Whisper.whisper_full_get_segment_text @ctx, i
50
55
 
51
- srt_content += "#{i + 1}\n"
52
- srt_content += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
53
- srt_content += "#{segment_text.strip}\n\n"
56
+ output += "#{i + 1}\n"
57
+ output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
58
+ output += "#{segment_text.strip}\n\n"
54
59
  end
55
- srt_content
56
60
  else
57
61
  raise "Unsupported format: #{format}"
58
62
  end
63
+
64
+ TranscriptionResult.new language, output
59
65
  end
60
66
 
61
67
  def close
@@ -64,13 +70,14 @@ module Whisper
64
70
 
65
71
  private
66
72
 
67
- def format_time_srt(seconds)
68
- hours = (seconds / 3600).to_i
73
+ def format_time_srt seconds
74
+ hours = (seconds / 3600).to_i
69
75
  minutes = ((seconds % 3600) / 60).to_i
70
- secs = (seconds % 60).to_i
71
- millis = ((seconds - seconds.to_i) * 1000).to_i
76
+ secs = (seconds % 60).to_i
77
+ millis = ((seconds - seconds.to_i) * 1000).to_i
72
78
  format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
73
79
  end
80
+
74
81
  end
75
82
  end
76
83
 
data/lib/whisper.cpp.rb CHANGED
@@ -3,7 +3,6 @@ require_relative 'whisper/audio_processor'
3
3
  require_relative 'whisper/model'
4
4
 
5
5
  module Whisper
6
- # The module is intentionally left empty here.
7
- # All classes and modules are defined in their respective files.
6
+
8
7
  end
9
8
 
data/lib/whisper.rb CHANGED
@@ -177,5 +177,25 @@ module Whisper
177
177
  # Get segment start and end times
178
178
  attach_function :whisper_full_get_segment_t0, [:pointer, :int], :int64
179
179
  attach_function :whisper_full_get_segment_t1, [:pointer, :int], :int64
180
+
181
+ # Get detected language ID
182
+ attach_function :whisper_full_lang_id, [:pointer], :int
183
+
184
+ # Convert language ID to string
185
+ attach_function :whisper_lang_str, [:int], :string
186
+
187
+ # void log_callback(int level, const char *msg, void *user_data);
188
+ callback :ggml_log_callback, [:int, :string, :pointer], :void
189
+
190
+ # Set the log callback
191
+ attach_function :whisper_log_set, [:ggml_log_callback, :pointer], :void
192
+
193
+ # Define a no-op log callback to suppress debug messages
194
+ NOOP_LOG_CALLBACK = FFI::Function.new(:void, [:int, :string, :pointer]) do |level, msg, user_data|
195
+ # Intentionally do nothing to suppress logs
196
+ end
197
+ # Set the no-op log callback to suppress logging
198
+ Whisper.whisper_log_set NOOP_LOG_CALLBACK, FFI::Pointer::NULL unless ENV['WHISPER_DEBUG']
199
+
180
200
  end
181
201
 
data/whisper.cpp.gemspec CHANGED
@@ -1,6 +1,8 @@
1
+ require_relative 'lib/whisper/version'
2
+
1
3
  Gem::Specification.new do |spec|
2
4
  spec.name = 'whisper.cpp'
3
- spec.version = '0.1.0'
5
+ spec.version = Whisper::VERSION
4
6
  spec.authors = ['Braulio Oliveira']
5
7
  spec.email = ['brauliobo@gmail.com']
6
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whisper.cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braulio Oliveira
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-27 00:00:00.000000000 Z
11
+ date: 2024-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -62,7 +62,9 @@ extensions:
62
62
  extra_rdoc_files: []
63
63
  files:
64
64
  - ".gitignore"
65
+ - ".ruby-version"
65
66
  - Gemfile
67
+ - Gemfile.lock
66
68
  - LICENSE.md
67
69
  - README.md
68
70
  - Rakefile