gtcrn 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 684bab48a155c6ad43d42479fd8c361632a4c51d1ecf19f388e7487e575c3ce4
4
+ data.tar.gz: 78655c56cf93b6839ba193112534c02a4e8d73084c89ec6536dc9db6cfb12032
5
+ SHA512:
6
+ metadata.gz: e71548aba12e685361ab24a6f2a1574a151cce46c622bf281f54254273b9f8e83ad7ced377ff0ad0b9096e530fef2c1dab289f6a3f1bd3457d511599960ecc64
7
+ data.tar.gz: a8ad7d5e076c0257a79a7952fc96ab15f001280113cbc05f7e0d49d6b4ddb95ae562a02ffc15e25d94b4b8b84a6390eaaae434cd5899b91fd0622af1a2e1f43f
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ Gemfile.lock
2
+ .bundle/
3
+ vendor/
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "gtcrn"]
2
+ path = gtcrn
3
+ url = https://github.com/Xiaobin-Rong/gtcrn.git
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kitaiti Makoto
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ GTCRN
2
+ =====
3
+
4
+ Speech enhancement (denoising) using [GTCRN model](https://github.com/Xiaobin-Rong/gtcrn).
5
+
6
+ SYNOPSIS
7
+ --------
8
+
9
+ ```ruby
10
+ require "gtcrn"
11
+
12
+ output = GTCRN.new.enhance_speech("path/to/audio.wav", "path/to/output.wav")
13
+
14
+ # You may omit output path
15
+ output = GTCRN.new.enhance_speech("path/to/audio.wav")
16
+ # => <Pathname:path/to/audio.enhanced.wav>
17
+ ````
18
+
19
+ Audio file must be monoral WAV with 16kHz sampling rate and 16-bit per sample..
20
+
21
+ CLI
22
+ ---
23
+
24
+ This gem ships with `gtcrn` command.
25
+
26
+ % gtcrn path/to/audio.wav path/to/output.wav
27
+ Enhanced file written to
28
+ path/to/output.wav
29
+
30
+ You may omit output path
31
+
32
+ % gtcrn path/to/audio.wav
33
+ Enhanced file written to
34
+ path/to/audio.enhanced.wav
35
+
36
+ ENHANCE AUDIO DATA
37
+ ------------------
38
+
39
+ You can also enhance audio data in memory:
40
+
41
+ ```ruby
42
+ waveform, sample_rate = TorchAudio.load("path/to/audio.wav")
43
+ enhanced = GTCRN.new.enhance_speech_waveform(waveform)
44
+ TorchAudio.save("path/to/output.wav", enhanced.squeeze, sample_rate)
45
+ ```
46
+
47
+ LICENSE
48
+ -------
49
+
50
+ MIT license. See LICENSE file.
51
+
52
+ GTCRN ONNX model under vendor/gtcrn directory is distributed under MIT license by Rong Xiaobin. See vendor/gtcrn/LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ require "rake/testtask"
2
+ require "rubygems/tasks"
3
+
4
+ task default: :test
5
+
6
+ gem_tasks = Gem::Tasks.new
7
+ gemspec = gem_tasks.build.gem.project.gemspecs["gtcrn"]
8
+
9
+ vendor_files = FileList[gemspec.files.select {|path| path.start_with?("vendor/gtcrn/")}]
10
+ vendor_files.each do |f|
11
+ src = f.pathmap("%{vendor/,./}p")
12
+ dir = f.pathmap("%d")
13
+
14
+ directory dir
15
+ file f => [src, dir] do |t|
16
+ copy t.source, t.name
17
+ end
18
+ end
19
+
20
+ Rake::TestTask.new test: vendor_files
21
+ task build: vendor_files
data/bin/gtcrn ADDED
@@ -0,0 +1,11 @@
1
+ require "gtcrn"
2
+
3
+ def main(argv)
4
+ input = argv.shift
5
+ abort "Specify input file" unless input
6
+ output = GTCRN.new.enhance_speech(input, argv.shift)
7
+ $stderr.puts "Enhanced file written to"
8
+ $stderr.puts output
9
+ end
10
+
11
+ main ARGV
data/gtcrn.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "gtcrn"
3
+ s.version = "0.0.1"
4
+ s.authors = ["Kitaiti Makoto"]
5
+ s.email = ["KitaitiMakoto@gmail.com"]
6
+ s.summary = "Denoises audio"
7
+ s.description = "Denoises audio (Speech Enhancement) using GTCRN model"
8
+ s.license = "MIT"
9
+ s.homepage = "https://gitlab.com/KitaitiMakoto/gtcrn"
10
+
11
+ s.files = `git ls-files -z`.split("\x0") + %w[
12
+ vendor/gtcrn/LICENSE
13
+ vendor/gtcrn/stream/onnx_models/gtcrn_simple.onnx
14
+ ]
15
+ s.executables = s.files.filter_map {|f| File.basename(f) if f.start_with?("bin/")}
16
+
17
+ s.add_runtime_dependency "onnxruntime"
18
+ s.add_runtime_dependency "torch-rb"
19
+ s.add_runtime_dependency "torchaudio"
20
+ s.add_runtime_dependency "numo-narray-alt"
21
+
22
+ s.add_development_dependency "rake"
23
+ s.add_development_dependency "rubygems-tasks"
24
+ s.add_development_dependency "test-unit"
25
+ s.add_development_dependency "test-unit-notify"
26
+ s.add_development_dependency "terminal-notifier" if RUBY_PLATFORM.match?(/darwin/)
27
+ s.add_development_dependency "wavefile"
28
+ end
data/lib/gtcrn.rb ADDED
@@ -0,0 +1,71 @@
1
+ require "pathname"
2
+ require "torchaudio"
3
+ require "torch"
4
+ require "onnxruntime"
5
+ require "numo/narray/alt"
6
+
7
+ class GTCRN
8
+ MODEL_PATH = File.join(__dir__, "../vendor/gtcrn/stream/onnx_models/gtcrn_simple.onnx").freeze
9
+ ISTFT_OPTS = {
10
+ n_fft: 512,
11
+ hop_length: 256,
12
+ win_length: 512,
13
+ window: Torch.hann_window(512).pow(0.5)
14
+ }
15
+ STFT_OPTS = ISTFT_OPTS.merge(
16
+ pad_mode: "reflect",
17
+ return_complex: true
18
+ )
19
+
20
+ def initialize
21
+ @session = OnnxRuntime::InferenceSession.new(MODEL_PATH)
22
+ end
23
+
24
+ def enhance_speech(path, dest=nil)
25
+ path = Pathname(path)
26
+ if dest
27
+ dest = Pathname(dest)
28
+ if dest.directory?
29
+ dest = dest/path.basename.sub_ext(".enhanced" + path.extname)
30
+ end
31
+ else
32
+ dest = path.sub_ext(".enhanced" + path.extname)
33
+ end
34
+
35
+ waveform, sample_rate = TorchAudio.load(path.to_path)
36
+ raise "Sampling rate must be 16000 Hz, but given: #{sample_rate} Hz" unless sample_rate == 16000
37
+ enhanced = enhance_speech_waveform(waveform)
38
+ TorchAudio.save(dest.to_path, enhanced.squeeze, sample_rate)
39
+
40
+ dest
41
+ end
42
+
43
+ def enhance_speech_waveform(waveform)
44
+ conv_cache, tra_cache, inter_cache = 1.upto(3).collect {|i| Numo::SFloat.zeros(*@session.inputs[i][:shape]) }
45
+ inputs = Torch.view_as_real(
46
+ Torch.stft(waveform[0], **STFT_OPTS)[nil]
47
+ ).numo
48
+ outputs = []
49
+ inputs.shape[-2].times do |i|
50
+ enh, conv_cache, tra_cache, inter_cache = @session.run(
51
+ @session.outputs.collect {|output| output[:name]},
52
+ {
53
+ mix: OnnxRuntime::OrtValue.from_numo(inputs[0.., 0.., i..i, 0..]),
54
+ conv_cache: OnnxRuntime::OrtValue.from_numo(conv_cache),
55
+ tra_cache: OnnxRuntime::OrtValue.from_numo(tra_cache),
56
+ inter_cache: OnnxRuntime::OrtValue.from_numo(inter_cache)
57
+ },
58
+ output_type: :numo
59
+ )
60
+ outputs << enh
61
+ end
62
+ concated = Numo::NArray.concatenate(outputs, axis: 2)
63
+ real = concated[0.., 0.., 0.., 0]
64
+ imag = concated[0.., 0.., 0.., 1]
65
+ enhanced = Torch.istft(
66
+ Torch.from_numo(real) + 1i * Torch.from_numo(imag),
67
+ **ISTFT_OPTS
68
+ )
69
+ enhanced
70
+ end
71
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "test/unit"
2
+ require "test/unit/notify"
3
+ require "terminal-notifier" if RUBY_PLATFORM.match?(/darwin/)
4
+
5
+ require "gtcrn"
@@ -0,0 +1,22 @@
1
+ require_relative "helper"
2
+ require "tmpdir"
3
+ require "wavefile"
4
+
5
+ class TestGTCRN < Test::Unit::TestCase
6
+ def test_enhance_speech
7
+ Dir.mktmpdir do |dir|
8
+ audio = "gtcrn/test_wavs/mix.wav"
9
+ dest = GTCRN.new.enhance_speech(audio, dir)
10
+ assert_path_exist dest
11
+
12
+ source = WaveFile::Reader.new(audio)
13
+ source.class
14
+ enhanced = WaveFile::Reader.new(dest.to_path)
15
+ enhanced.close
16
+ assert enhanced.format.mono?
17
+ assert_equal 16, enhanced.format.bits_per_sample
18
+ assert_equal 16000, enhanced.format.sample_rate
19
+ assert source.total_sample_frames - enhanced.total_sample_frames < 512
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Rong Xiaobin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gtcrn
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kitaiti Makoto
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: onnxruntime
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: torch-rb
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: torchaudio
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: numo-narray-alt
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: rake
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rubygems-tasks
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ - !ruby/object:Gem::Dependency
97
+ name: test-unit
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: test-unit-notify
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ - !ruby/object:Gem::Dependency
125
+ name: terminal-notifier
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ - !ruby/object:Gem::Dependency
139
+ name: wavefile
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ description: Denoises audio (Speech Enhancement) using GTCRN model
153
+ email:
154
+ - KitaitiMakoto@gmail.com
155
+ executables:
156
+ - gtcrn
157
+ extensions: []
158
+ extra_rdoc_files: []
159
+ files:
160
+ - ".gitignore"
161
+ - ".gitmodules"
162
+ - Gemfile
163
+ - LICENSE
164
+ - README.md
165
+ - Rakefile
166
+ - bin/gtcrn
167
+ - gtcrn.gemspec
168
+ - lib/gtcrn.rb
169
+ - test/helper.rb
170
+ - test/test_gtcrn.rb
171
+ - vendor/gtcrn/LICENSE
172
+ - vendor/gtcrn/stream/onnx_models/gtcrn_simple.onnx
173
+ homepage: https://gitlab.com/KitaitiMakoto/gtcrn
174
+ licenses:
175
+ - MIT
176
+ metadata: {}
177
+ rdoc_options: []
178
+ require_paths:
179
+ - lib
180
+ required_ruby_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
185
+ required_rubygems_version: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - ">="
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ requirements: []
191
+ rubygems_version: 4.0.3
192
+ specification_version: 4
193
+ summary: Denoises audio
194
+ test_files: []