whisper.cpp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/Gemfile +6 -0
- data/LICENSE.md +42 -0
- data/README.md +11 -0
- data/Rakefile +6 -0
- data/ext/extconf.rb +107 -0
- data/lib/whisper/audio_processor.rb +28 -0
- data/lib/whisper/model.rb +76 -0
- data/lib/whisper.cpp.rb +9 -0
- data/lib/whisper.rb +181 -0
- data/script/console +8 -0
- data/whisper.cpp.gemspec +24 -0
- metadata +99 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 0e8d71af28ba0c9af9fce2d70c86c36b6ebfb2a14d0b93720c87df0102358cc1
|
|
4
|
+
data.tar.gz: af1711e65ce109501e96aff8ce986764997033e15bc9eb97f9252d7cee9c074f
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 98d417a79f60594c4a903aaaf2661e2b358113013dec48f8afd3d0bc2bf753a0091021cf912f59a87291fcf2fb65c87225807f7315366c89d169ee1b9f711efe
|
|
7
|
+
data.tar.gz: 54584e200cd1132ed924e9172a558bd5c3831ef4e7a63bf27a7b97e6726e9d74191a7db261dbd9c21ca2debb85e5fca9178727bb2285b4abe04d0d83b58ede2e
|
data/.gitignore
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Ignore bundler config
|
|
2
|
+
/.bundle
|
|
3
|
+
|
|
4
|
+
/ext/whisper.cpp
|
|
5
|
+
/lib/libwhispercpp.so
|
|
6
|
+
Makefile
|
|
7
|
+
|
|
8
|
+
# Ignore Gem artifacts
|
|
9
|
+
*.gem
|
|
10
|
+
Gemfile.lock
|
|
11
|
+
|
|
12
|
+
# Ignore log files
|
|
13
|
+
/log/*.log
|
|
14
|
+
|
|
15
|
+
# Ignore temp files
|
|
16
|
+
/tmp
|
|
17
|
+
/coverage/
|
|
18
|
+
/doc/
|
|
19
|
+
/.yardoc
|
|
20
|
+
|
|
21
|
+
# Ignore OS-specific files
|
|
22
|
+
.DS_Store
|
|
23
|
+
Thumbs.db
|
|
24
|
+
|
|
25
|
+
# Ignore editor-specific files
|
|
26
|
+
.idea/
|
|
27
|
+
/.vscode/
|
|
28
|
+
/*.swp
|
|
29
|
+
|
|
30
|
+
# Ignore pry history
|
|
31
|
+
.pry_history
|
|
32
|
+
|
|
33
|
+
# Ignore test coverage reports
|
|
34
|
+
/coverage
|
|
35
|
+
|
data/Gemfile
ADDED
data/LICENSE.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
|
|
2
|
+
### **8. Additional Notes**
|
|
3
|
+
|
|
4
|
+
- **Cross-Platform Considerations:** The build process may vary depending on the operating system. The provided `Rakefile` assumes a Unix-like environment.
|
|
5
|
+
- **Error Handling:** Ensure you handle potential errors during the build process, such as network issues when cloning the repository.
|
|
6
|
+
- **Permissions:** You may need appropriate permissions to write to certain directories or to install the CUDA Toolkit.
|
|
7
|
+
|
|
8
|
+
### **9. Potential Issues and Solutions**
|
|
9
|
+
|
|
10
|
+
- **Problem:** The `whisper.cpp` repository changes, breaking the build process.
|
|
11
|
+
- **Solution:** Pin the repository to a specific commit or tag by checking out a specific commit after cloning.
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
# After cloning
|
|
15
|
+
sh 'git checkout <commit-hash>'
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
- **Problem:** CUDA is not available on the system.
|
|
19
|
+
- **Solution:** Modify the build process to skip CUDA support if not available.
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
# In the Rakefile
|
|
23
|
+
ENV['GGML_CUDA'] = '1' if cuda_available?
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Implement a `cuda_available?` method to check for CUDA availability.
|
|
27
|
+
|
|
28
|
+
### **10. Optional Enhancements**
|
|
29
|
+
|
|
30
|
+
- **Automate Dependency Installation:** You could enhance the `Rakefile` to check for and install missing dependencies.
|
|
31
|
+
- **Add Tests:** Implement unit tests to verify the gem's functionality.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
By updating the `Rakefile` to include steps for cloning and building `whisper.cpp` from the GitHub repository, and adjusting the rest of the gem accordingly, you should be able to build and use the gem as intended.
|
|
36
|
+
|
|
37
|
+
Please replace `'Your Name'` and `'your.email@example.com'` with your actual name and email in the `whisper.cpp.gemspec` file.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
**Let me know if you need further assistance or if you encounter any issues during the build process.**
|
|
42
|
+
|
data/README.md
ADDED
data/Rakefile
ADDED
data/ext/extconf.rb
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# ext/extconf.rb
|
|
2
|
+
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require 'rbconfig'
|
|
5
|
+
|
|
6
|
+
# Set root_dir to the project root directory
|
|
7
|
+
root_dir = File.expand_path('..', __dir__)
|
|
8
|
+
|
|
9
|
+
# Define the whisper.cpp directory path
|
|
10
|
+
whisper_dir = File.join(__dir__, 'whisper.cpp')
|
|
11
|
+
|
|
12
|
+
puts "Root Directory: #{root_dir}"
|
|
13
|
+
puts "Whisper.cpp Directory: #{whisper_dir}"
|
|
14
|
+
|
|
15
|
+
# Clone or update the whisper.cpp repository
|
|
16
|
+
if Dir.exist?(whisper_dir)
|
|
17
|
+
# If the directory exists, check if it's a git repository
|
|
18
|
+
Dir.chdir(whisper_dir) do
|
|
19
|
+
if system('git rev-parse --is-inside-work-tree > /dev/null 2>&1')
|
|
20
|
+
puts "Updating existing whisper.cpp repository..."
|
|
21
|
+
# Pull the latest changes
|
|
22
|
+
system 'git pull' or abort "Failed to update whisper.cpp repository"
|
|
23
|
+
else
|
|
24
|
+
# If it's not a git repository, remove it and clone again
|
|
25
|
+
puts "Removing non-git directory #{whisper_dir}"
|
|
26
|
+
FileUtils.rm_rf(whisper_dir)
|
|
27
|
+
puts "Cloning whisper.cpp repository..."
|
|
28
|
+
Dir.chdir(__dir__) do
|
|
29
|
+
system 'git clone https://github.com/ggerganov/whisper.cpp.git' or abort "Failed to clone whisper.cpp repository"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
else
|
|
34
|
+
# Clone the repository
|
|
35
|
+
puts "Cloning whisper.cpp repository..."
|
|
36
|
+
Dir.chdir(__dir__) do
|
|
37
|
+
system 'git clone https://github.com/ggerganov/whisper.cpp.git' or abort "Failed to clone whisper.cpp repository"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Verify that the whisper.cpp directory now exists
|
|
42
|
+
unless Dir.exist?(whisper_dir)
|
|
43
|
+
abort "Failed to find or create the whisper.cpp directory at #{whisper_dir}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Now, proceed to build libwhispercpp.so using the whisper.cpp Makefile
|
|
47
|
+
Dir.chdir(whisper_dir) do
|
|
48
|
+
# Set environment variables for build settings
|
|
49
|
+
ENV['GGML_CUDA'] = '1' # Enable CUDA support
|
|
50
|
+
|
|
51
|
+
puts "Building libwhispercpp.so with GGML_CUDA=#{ENV['GGML_CUDA']}..."
|
|
52
|
+
|
|
53
|
+
# Build libwhisper.a and libggml.a
|
|
54
|
+
unless system 'make clean && make -j libwhisper.a libggml.a'
|
|
55
|
+
abort "Failed to build libwhisper.a and libggml.a"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Link the static libraries into a single shared library using g++
|
|
59
|
+
gcc_command = 'g++ -shared -o libwhispercpp.so ' \
|
|
60
|
+
'-Wl,--whole-archive libwhisper.a -Wl,--no-whole-archive libggml.a ' \
|
|
61
|
+
'-L$( [ -d /opt/cuda ] && echo /opt/cuda/lib || echo /usr/local/cuda/lib ) ' \
|
|
62
|
+
'-lcuda -lcudart -lcublas -lc -lm -lstdc++'
|
|
63
|
+
|
|
64
|
+
unless system gcc_command
|
|
65
|
+
abort "Failed to link libwhispercpp.so"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Verify that libwhispercpp.so was created
|
|
69
|
+
source_lib = File.join(Dir.pwd, 'libwhispercpp.so')
|
|
70
|
+
unless File.exist?(source_lib)
|
|
71
|
+
abort "libwhispercpp.so not found after compilation"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Copy the compiled library to the gem's lib directory
|
|
75
|
+
FileUtils.cp(source_lib, root_dir)
|
|
76
|
+
|
|
77
|
+
puts "Copied libwhispercpp.so to #{root_dir}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
puts "Compilation completed."
|
|
81
|
+
|
|
82
|
+
# Create a no-op Makefile to prevent RubyGems from attempting further compilation
|
|
83
|
+
makefile_content = <<~MAKEFILE
|
|
84
|
+
all:
|
|
85
|
+
@echo 'libwhispercpp.so already built.'
|
|
86
|
+
install:
|
|
87
|
+
@echo 'libwhispercpp.so already installed.'
|
|
88
|
+
MAKEFILE
|
|
89
|
+
|
|
90
|
+
File.open('Makefile', 'w') do |f|
|
|
91
|
+
f.write(makefile_content)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
puts "Created a no-op Makefile to prevent further compilation."
|
|
95
|
+
|
|
96
|
+
# After copying libwhispercpp.so
|
|
97
|
+
|
|
98
|
+
# Path to the cloned whisper.cpp directory
|
|
99
|
+
cloned_dir = whisper_dir
|
|
100
|
+
|
|
101
|
+
# Remove the cloned whisper.cpp directory
|
|
102
|
+
puts "Removing cloned whisper.cpp directory at #{cloned_dir}..."
|
|
103
|
+
FileUtils.rm_rf(cloned_dir)
|
|
104
|
+
|
|
105
|
+
puts "Removed cloned whisper.cpp directory."
|
|
106
|
+
|
|
107
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
require 'tmpdir'
|
|
3
|
+
|
|
4
|
+
module Whisper
|
|
5
|
+
class AudioProcessor
|
|
6
|
+
def self.convert_to_float_array(file_path)
|
|
7
|
+
# Use ffmpeg to convert audio to 16-bit PCM WAV at 16kHz mono
|
|
8
|
+
wav_file = "#{Dir.tmpdir}/temp_#{Time.now.to_i}_#{rand(1000)}.wav"
|
|
9
|
+
cmd = [
|
|
10
|
+
'ffmpeg', '-y', '-i', file_path,
|
|
11
|
+
'-ar', '16000', '-ac', '1', '-f', 'wav', wav_file
|
|
12
|
+
]
|
|
13
|
+
stdout_str, stderr_str, status = Open3.capture3(*cmd)
|
|
14
|
+
raise "ffmpeg error: #{stderr_str}" unless status.success?
|
|
15
|
+
|
|
16
|
+
# Read the WAV file and extract the PCM data
|
|
17
|
+
data = File.binread wav_file
|
|
18
|
+
# Skip the WAV header (44 bytes) and unpack the PCM data
|
|
19
|
+
pcm_data = data[44..-1].unpack 's<*' # Little-endian 16-bit signed integers
|
|
20
|
+
|
|
21
|
+
# Normalize and convert to float32
|
|
22
|
+
pcm_data.map { |sample| sample / 32768.0 }
|
|
23
|
+
ensure
|
|
24
|
+
File.delete wav_file if File.exist? wav_file
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require_relative '../whisper'
|
|
2
|
+
require_relative 'audio_processor'
|
|
3
|
+
|
|
4
|
+
module Whisper
|
|
5
|
+
class Model
|
|
6
|
+
def initialize(model_path)
|
|
7
|
+
params = Whisper.whisper_context_default_params
|
|
8
|
+
# Modify params as needed
|
|
9
|
+
params[:use_gpu] = true
|
|
10
|
+
params[:gpu_device] = 0
|
|
11
|
+
|
|
12
|
+
@ctx = Whisper.whisper_init_from_file_with_params model_path, params
|
|
13
|
+
raise 'Failed to initialize Whisper model' if @ctx.null?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def transcribe_from_file(audio_file_path, format: 'plaintext')
|
|
17
|
+
# Load audio file and convert to float array
|
|
18
|
+
audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
|
|
19
|
+
|
|
20
|
+
# Prepare full params
|
|
21
|
+
params = Whisper.whisper_full_default_params Whisper::WHISPER_SAMPLING_GREEDY
|
|
22
|
+
params[:n_threads] = 4
|
|
23
|
+
params[:translate] = false
|
|
24
|
+
params[:language] = FFI::Pointer::NULL # Auto-detect language
|
|
25
|
+
|
|
26
|
+
# Prepare audio data pointer
|
|
27
|
+
n_samples = audio_data.size
|
|
28
|
+
samples_ptr = FFI::MemoryPointer.new(:float, n_samples)
|
|
29
|
+
samples_ptr.write_array_of_float audio_data
|
|
30
|
+
|
|
31
|
+
# Call the whisper_full function
|
|
32
|
+
result = Whisper.whisper_full @ctx, params, samples_ptr, n_samples
|
|
33
|
+
raise 'Transcription failed' if result != 0
|
|
34
|
+
|
|
35
|
+
n_segments = Whisper.whisper_full_n_segments @ctx
|
|
36
|
+
case format.downcase
|
|
37
|
+
when 'plaintext'
|
|
38
|
+
transcript = ''
|
|
39
|
+
n_segments.times do |i|
|
|
40
|
+
segment_text = Whisper.whisper_full_get_segment_text @ctx, i
|
|
41
|
+
transcript += segment_text
|
|
42
|
+
end
|
|
43
|
+
transcript
|
|
44
|
+
when 'srt'
|
|
45
|
+
srt_content = ''
|
|
46
|
+
n_segments.times do |i|
|
|
47
|
+
start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0
|
|
48
|
+
end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0
|
|
49
|
+
segment_text = Whisper.whisper_full_get_segment_text @ctx, i
|
|
50
|
+
|
|
51
|
+
srt_content += "#{i + 1}\n"
|
|
52
|
+
srt_content += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n"
|
|
53
|
+
srt_content += "#{segment_text.strip}\n\n"
|
|
54
|
+
end
|
|
55
|
+
srt_content
|
|
56
|
+
else
|
|
57
|
+
raise "Unsupported format: #{format}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def close
|
|
62
|
+
Whisper.whisper_free @ctx
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def format_time_srt(seconds)
|
|
68
|
+
hours = (seconds / 3600).to_i
|
|
69
|
+
minutes = ((seconds % 3600) / 60).to_i
|
|
70
|
+
secs = (seconds % 60).to_i
|
|
71
|
+
millis = ((seconds - seconds.to_i) * 1000).to_i
|
|
72
|
+
format '%02d:%02d:%02d,%03d', hours, minutes, secs, millis
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
data/lib/whisper.cpp.rb
ADDED
data/lib/whisper.rb
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
require 'ffi'
|
|
2
|
+
|
|
3
|
+
module Whisper
|
|
4
|
+
extend FFI::Library
|
|
5
|
+
|
|
6
|
+
# Load the shared library
|
|
7
|
+
lib_path = File.expand_path '../../libwhispercpp.so', __FILE__
|
|
8
|
+
ffi_lib lib_path
|
|
9
|
+
|
|
10
|
+
# Enums and Constants
|
|
11
|
+
WHISPER_SAMPLING_GREEDY = 0
|
|
12
|
+
WHISPER_SAMPLING_BEAM_SEARCH = 1
|
|
13
|
+
|
|
14
|
+
# Enums for alignment heads preset
|
|
15
|
+
enum :whisper_alignment_heads_preset, [
|
|
16
|
+
:WHISPER_AHEADS_NONE,
|
|
17
|
+
:WHISPER_AHEADS_N_TOP_MOST,
|
|
18
|
+
:WHISPER_AHEADS_CUSTOM,
|
|
19
|
+
:WHISPER_AHEADS_TINY_EN,
|
|
20
|
+
:WHISPER_AHEADS_TINY,
|
|
21
|
+
:WHISPER_AHEADS_BASE_EN,
|
|
22
|
+
:WHISPER_AHEADS_BASE,
|
|
23
|
+
:WHISPER_AHEADS_SMALL_EN,
|
|
24
|
+
:WHISPER_AHEADS_SMALL,
|
|
25
|
+
:WHISPER_AHEADS_MEDIUM_EN,
|
|
26
|
+
:WHISPER_AHEADS_MEDIUM,
|
|
27
|
+
:WHISPER_AHEADS_LARGE_V1,
|
|
28
|
+
:WHISPER_AHEADS_LARGE_V2,
|
|
29
|
+
:WHISPER_AHEADS_LARGE_V3
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# Structs Definitions
|
|
33
|
+
|
|
34
|
+
# whisper_ahead struct
|
|
35
|
+
class WhisperAhead < FFI::Struct
|
|
36
|
+
layout(
|
|
37
|
+
:n_text_layer, :int,
|
|
38
|
+
:n_head, :int
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# whisper_aheads struct
|
|
43
|
+
class WhisperAheads < FFI::Struct
|
|
44
|
+
layout(
|
|
45
|
+
:n_heads, :size_t,
|
|
46
|
+
:heads, :pointer # Pointer to array of WhisperAhead
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# whisper_context_params struct
|
|
51
|
+
class WhisperContextParams < FFI::Struct
|
|
52
|
+
layout(
|
|
53
|
+
:use_gpu, :bool,
|
|
54
|
+
:flash_attn, :bool,
|
|
55
|
+
:gpu_device, :int,
|
|
56
|
+
:dtw_token_timestamps, :bool,
|
|
57
|
+
:dtw_aheads_preset, :whisper_alignment_heads_preset,
|
|
58
|
+
:dtw_n_top, :int,
|
|
59
|
+
:dtw_aheads, WhisperAheads,
|
|
60
|
+
:dtw_mem_size, :size_t
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# whisper_token_data struct
|
|
65
|
+
class WhisperTokenData < FFI::Struct
|
|
66
|
+
layout(
|
|
67
|
+
:id, :int32,
|
|
68
|
+
:tid, :int32,
|
|
69
|
+
:p, :float,
|
|
70
|
+
:plog, :float,
|
|
71
|
+
:pt, :float,
|
|
72
|
+
:ptsum, :float,
|
|
73
|
+
:t0, :int64,
|
|
74
|
+
:t1, :int64,
|
|
75
|
+
:t_dtw, :int64,
|
|
76
|
+
:vlen, :float
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# greedy sampling parameters
|
|
81
|
+
class WhisperGreedyParams < FFI::Struct
|
|
82
|
+
layout(
|
|
83
|
+
:best_of, :int
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# beam search sampling parameters
|
|
88
|
+
class WhisperBeamSearchParams < FFI::Struct
|
|
89
|
+
layout(
|
|
90
|
+
:beam_size, :int,
|
|
91
|
+
:patience, :float
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# whisper_full_params struct
|
|
96
|
+
class WhisperFullParams < FFI::Struct
|
|
97
|
+
layout(
|
|
98
|
+
:strategy, :int,
|
|
99
|
+
:n_threads, :int,
|
|
100
|
+
:n_max_text_ctx, :int,
|
|
101
|
+
:offset_ms, :int,
|
|
102
|
+
:duration_ms, :int,
|
|
103
|
+
:translate, :bool,
|
|
104
|
+
:no_context, :bool,
|
|
105
|
+
:no_timestamps, :bool,
|
|
106
|
+
:single_segment, :bool,
|
|
107
|
+
:print_special, :bool,
|
|
108
|
+
:print_progress, :bool,
|
|
109
|
+
:print_realtime, :bool,
|
|
110
|
+
:print_timestamps, :bool,
|
|
111
|
+
:token_timestamps, :bool,
|
|
112
|
+
:thold_pt, :float,
|
|
113
|
+
:thold_ptsum, :float,
|
|
114
|
+
:max_len, :int,
|
|
115
|
+
:split_on_word, :bool,
|
|
116
|
+
:max_tokens, :int,
|
|
117
|
+
:debug_mode, :bool,
|
|
118
|
+
:audio_ctx, :int,
|
|
119
|
+
:tdrz_enable, :bool,
|
|
120
|
+
:suppress_regex, :pointer,
|
|
121
|
+
:initial_prompt, :pointer,
|
|
122
|
+
:prompt_tokens, :pointer,
|
|
123
|
+
:prompt_n_tokens, :int,
|
|
124
|
+
:language, :pointer,
|
|
125
|
+
:detect_language, :bool,
|
|
126
|
+
:suppress_blank, :bool,
|
|
127
|
+
:suppress_non_speech_tokens, :bool,
|
|
128
|
+
:temperature, :float,
|
|
129
|
+
:max_initial_ts, :float,
|
|
130
|
+
:length_penalty, :float,
|
|
131
|
+
:temperature_inc, :float,
|
|
132
|
+
:entropy_thold, :float,
|
|
133
|
+
:logprob_thold, :float,
|
|
134
|
+
:no_speech_thold, :float,
|
|
135
|
+
:greedy, WhisperGreedyParams,
|
|
136
|
+
:beam_search, WhisperBeamSearchParams,
|
|
137
|
+
:new_segment_callback, :pointer,
|
|
138
|
+
:new_segment_callback_user_data, :pointer,
|
|
139
|
+
:progress_callback, :pointer,
|
|
140
|
+
:progress_callback_user_data, :pointer,
|
|
141
|
+
:encoder_begin_callback, :pointer,
|
|
142
|
+
:encoder_begin_callback_user_data, :pointer,
|
|
143
|
+
:abort_callback, :pointer,
|
|
144
|
+
:abort_callback_user_data, :pointer,
|
|
145
|
+
:logits_filter_callback, :pointer,
|
|
146
|
+
:logits_filter_callback_user_data, :pointer,
|
|
147
|
+
:grammar_rules, :pointer,
|
|
148
|
+
:n_grammar_rules, :size_t,
|
|
149
|
+
:i_start_rule, :size_t,
|
|
150
|
+
:grammar_penalty, :float
|
|
151
|
+
)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Function Bindings
|
|
155
|
+
|
|
156
|
+
# Initialize context with params
|
|
157
|
+
attach_function :whisper_init_from_file_with_params, [:string, WhisperContextParams.by_value], :pointer
|
|
158
|
+
|
|
159
|
+
# Get default context params
|
|
160
|
+
attach_function :whisper_context_default_params, [], WhisperContextParams.by_value
|
|
161
|
+
|
|
162
|
+
# Get default full params
|
|
163
|
+
attach_function :whisper_full_default_params, [:int], WhisperFullParams.by_value
|
|
164
|
+
|
|
165
|
+
# Free functions
|
|
166
|
+
attach_function :whisper_free, [:pointer], :void
|
|
167
|
+
|
|
168
|
+
# Full transcription function
|
|
169
|
+
attach_function :whisper_full, [:pointer, WhisperFullParams.by_value, :pointer, :int], :int
|
|
170
|
+
|
|
171
|
+
# Number of segments
|
|
172
|
+
attach_function :whisper_full_n_segments, [:pointer], :int
|
|
173
|
+
|
|
174
|
+
# Get segment text
|
|
175
|
+
attach_function :whisper_full_get_segment_text, [:pointer, :int], :string
|
|
176
|
+
|
|
177
|
+
# Get segment start and end times
|
|
178
|
+
attach_function :whisper_full_get_segment_t0, [:pointer, :int], :int64
|
|
179
|
+
attach_function :whisper_full_get_segment_t1, [:pointer, :int], :int64
|
|
180
|
+
end
|
|
181
|
+
|
data/script/console
ADDED
data/whisper.cpp.gemspec
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Gem::Specification.new do |spec|
|
|
2
|
+
spec.name = 'whisper.cpp'
|
|
3
|
+
spec.version = '0.1.0'
|
|
4
|
+
spec.authors = ['Braulio Oliveira']
|
|
5
|
+
spec.email = ['brauliobo@gmail.com']
|
|
6
|
+
|
|
7
|
+
spec.summary = 'Ruby bindings for whisper.cpp'
|
|
8
|
+
spec.description = 'A Ruby gem that provides bindings to the whisper.cpp library for speech transcription.'
|
|
9
|
+
# spec.homepage = 'http://example.com/whisper.cpp'
|
|
10
|
+
spec.license = 'MIT'
|
|
11
|
+
|
|
12
|
+
# Use git ls-files to specify files to include in the gem
|
|
13
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
14
|
+
|
|
15
|
+
# Specify the extension to build
|
|
16
|
+
spec.extensions = ['ext/extconf.rb']
|
|
17
|
+
|
|
18
|
+
# Dependencies
|
|
19
|
+
spec.add_dependency 'ffi', '~> 1.15'
|
|
20
|
+
|
|
21
|
+
spec.add_development_dependency 'rake-compiler'
|
|
22
|
+
spec.add_development_dependency 'pry'
|
|
23
|
+
end
|
|
24
|
+
|
metadata
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: whisper.cpp
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Braulio Oliveira
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2024-09-27 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: ffi
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.15'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.15'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake-compiler
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: pry
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
description: A Ruby gem that provides bindings to the whisper.cpp library for speech
|
|
56
|
+
transcription.
|
|
57
|
+
email:
|
|
58
|
+
- brauliobo@gmail.com
|
|
59
|
+
executables: []
|
|
60
|
+
extensions:
|
|
61
|
+
- ext/extconf.rb
|
|
62
|
+
extra_rdoc_files: []
|
|
63
|
+
files:
|
|
64
|
+
- ".gitignore"
|
|
65
|
+
- Gemfile
|
|
66
|
+
- LICENSE.md
|
|
67
|
+
- README.md
|
|
68
|
+
- Rakefile
|
|
69
|
+
- ext/extconf.rb
|
|
70
|
+
- lib/whisper.cpp.rb
|
|
71
|
+
- lib/whisper.rb
|
|
72
|
+
- lib/whisper/audio_processor.rb
|
|
73
|
+
- lib/whisper/model.rb
|
|
74
|
+
- script/console
|
|
75
|
+
- whisper.cpp.gemspec
|
|
76
|
+
homepage:
|
|
77
|
+
licenses:
|
|
78
|
+
- MIT
|
|
79
|
+
metadata: {}
|
|
80
|
+
post_install_message:
|
|
81
|
+
rdoc_options: []
|
|
82
|
+
require_paths:
|
|
83
|
+
- lib
|
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '0'
|
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
|
+
requirements:
|
|
91
|
+
- - ">="
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0'
|
|
94
|
+
requirements: []
|
|
95
|
+
rubygems_version: 3.5.3
|
|
96
|
+
signing_key:
|
|
97
|
+
specification_version: 4
|
|
98
|
+
summary: Ruby bindings for whisper.cpp
|
|
99
|
+
test_files: []
|