itak 1 → 2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/bin/itak +4 -1
- data/itak.gemspec +11 -4
- data/lib/itak/denoiser.rb +4 -7
- data/lib/itak/vad.rb +14 -20
- data/lib/itak.rb +14 -6
- metadata +102 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 05d3ad0b0b7a48d1053aa421e23a4d720f8069e3b767d52ffe305eb52266ea4c
|
|
4
|
+
data.tar.gz: 1ca65afc391b6d3b5b80c79478e8317ec87198785a3376e34feafe9b20aa86b7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 682271f0f3c32b88b489f3465f40df61bfc20588f611fedd41cdea75b2935ed14259166993f8039d7833ffc8abd7e22af3cf0c41d91bad33f15068e22c9143a3
|
|
7
|
+
data.tar.gz: 07e5d6f02e6127ebc74835561ee44c9c0aaa57094ef6fd110f980b08838b698f244cabe64d798b8acd9abf698db2296a6e6d54fded5a19eeb887759a30e10d6e
|
data/README.md
CHANGED
data/bin/itak
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "optparse"
|
|
2
|
+
require "optparse/pathname"
|
|
2
3
|
require "itak"
|
|
3
4
|
|
|
4
5
|
def main(argv)
|
|
@@ -8,6 +9,8 @@ def main(argv)
|
|
|
8
9
|
output = Itak.new.run(src, options[:output])
|
|
9
10
|
$stderr.puts "Output written to"
|
|
10
11
|
$stderr.puts output
|
|
12
|
+
rescue => err
|
|
13
|
+
abort err.message
|
|
11
14
|
end
|
|
12
15
|
|
|
13
16
|
def parse_options(argv)
|
|
@@ -18,7 +21,7 @@ def parse_options(argv)
|
|
|
18
21
|
|
|
19
22
|
Usage: itak [options] INPUT
|
|
20
23
|
EOB
|
|
21
|
-
opt.on "-o", "--output=PATH", "Specify output file or directory" do |path|
|
|
24
|
+
opt.on "-o", "--output=PATH", "Specify output file or directory", Pathname do |path|
|
|
22
25
|
options[:output] = path
|
|
23
26
|
end
|
|
24
27
|
}.parse!(argv)
|
data/itak.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "itak"
|
|
3
|
-
s.version = "
|
|
3
|
+
s.version = "2"
|
|
4
4
|
s.authors = ["Kitaiti Makoto"]
|
|
5
5
|
s.email = ["KitaitiMaokto@gmail.com"]
|
|
6
6
|
s.summary = "Audio editing tool for podcasters"
|
|
@@ -11,9 +11,16 @@ Gem::Specification.new do |s|
|
|
|
11
11
|
s.files = `git ls-files -z`.split("\x0")
|
|
12
12
|
s.executables = s.files.filter_map {|f| File.basename(f) if f.start_with?("bin/")}
|
|
13
13
|
|
|
14
|
-
s.add_runtime_dependency "gtcrn"
|
|
15
|
-
s.add_runtime_dependency "whispercpp"
|
|
16
|
-
s.add_runtime_dependency "torchaudio"
|
|
14
|
+
s.add_runtime_dependency "gtcrn", ">= 0.0.2"
|
|
15
|
+
s.add_runtime_dependency "whispercpp", ">= 1.3.6"
|
|
16
|
+
s.add_runtime_dependency "torchaudio", ">= 0.5.0"
|
|
17
|
+
s.add_runtime_dependency "torchcodec"
|
|
18
|
+
s.add_runtime_dependency "red-arrow"
|
|
19
|
+
s.add_runtime_dependency "numo-narray-alt"
|
|
20
|
+
s.add_runtime_dependency "red-arrow-numo-narray"
|
|
21
|
+
s.add_runtime_dependency "torch-rb"
|
|
22
|
+
s.add_runtime_dependency "optparse-pathname"
|
|
23
|
+
s.add_runtime_dependency "ndav-torch-tensor"
|
|
17
24
|
|
|
18
25
|
s.add_development_dependency "rake"
|
|
19
26
|
s.add_development_dependency "rubygems-tasks"
|
data/lib/itak/denoiser.rb
CHANGED
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
require "fileutils"
|
|
2
1
|
require "gtcrn"
|
|
3
2
|
|
|
4
3
|
class Itak
|
|
5
4
|
class Denoiser
|
|
6
|
-
def initialize
|
|
7
|
-
@
|
|
5
|
+
def initialize
|
|
6
|
+
@gtcrn = GTCRN.new
|
|
8
7
|
end
|
|
9
8
|
|
|
10
|
-
def denoise(src
|
|
11
|
-
|
|
12
|
-
dest ||= dir/src.sub_ext(".denoised" + src.extname).basename
|
|
13
|
-
GTCRN.new.enhance_speech(src, dest)
|
|
9
|
+
def denoise(src)
|
|
10
|
+
@gtcrn.enhance_speech_waveform(src)
|
|
14
11
|
end
|
|
15
12
|
end
|
|
16
13
|
end
|
data/lib/itak/vad.rb
CHANGED
|
@@ -1,33 +1,27 @@
|
|
|
1
|
-
require "pathname"
|
|
2
|
-
require "fileutils"
|
|
3
1
|
require "whisper"
|
|
4
|
-
require "
|
|
2
|
+
require "ndav/torch/tensor"
|
|
5
3
|
|
|
6
4
|
class Itak
|
|
7
5
|
class VAD
|
|
6
|
+
PARAMS = Whisper::VAD::Params.new(
|
|
7
|
+
threshold: 0.7,
|
|
8
|
+
min_silence_duration_ms: 1500
|
|
9
|
+
)
|
|
10
|
+
|
|
8
11
|
def initialize(model: "silero-v6.2.0")
|
|
9
|
-
@
|
|
12
|
+
@vad = Whisper::VAD::Context.new(model)
|
|
10
13
|
end
|
|
11
14
|
|
|
12
|
-
def run(src,
|
|
13
|
-
src =
|
|
14
|
-
dest = Pathname(dest)
|
|
15
|
-
context = Whisper::VAD::Context.new(@model)
|
|
16
|
-
params ||= Whisper::VAD::Params.new(
|
|
17
|
-
threshold: 0.7,
|
|
18
|
-
min_silence_duration_ms: 1000
|
|
19
|
-
)
|
|
20
|
-
waveform, sample_rate = TorchAudio.load(src)
|
|
15
|
+
def run(src, params: PARAMS)
|
|
16
|
+
src = src[0] if src.ndim == 2 && src.shape[0] == 1
|
|
21
17
|
chunks = []
|
|
22
|
-
|
|
18
|
+
@vad.segments_from_samples(params, src.to_ndav).each do |segment|
|
|
23
19
|
segment => {start_time:, end_time:}
|
|
24
|
-
st = start_time *
|
|
25
|
-
en = end_time *
|
|
26
|
-
chunks <<
|
|
20
|
+
st = start_time * SAMPLE_RATE / 1000
|
|
21
|
+
en = end_time * SAMPLE_RATE / 1000
|
|
22
|
+
chunks << src[st..en]
|
|
27
23
|
end
|
|
28
|
-
|
|
29
|
-
TorchAudio.save(dest.to_path, output.unsqueeze(0), sample_rate)
|
|
30
|
-
dest
|
|
24
|
+
Torch.cat(chunks)
|
|
31
25
|
end
|
|
32
26
|
end
|
|
33
27
|
end
|
data/lib/itak.rb
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
require "pathname"
|
|
2
|
-
require "
|
|
2
|
+
require "torchaudio"
|
|
3
3
|
|
|
4
4
|
require "itak/denoiser"
|
|
5
5
|
require "itak/vad"
|
|
6
6
|
|
|
7
7
|
class Itak
|
|
8
|
+
SAMPLE_RATE = 16_000
|
|
9
|
+
|
|
8
10
|
def run(src, dest=nil)
|
|
9
11
|
src = Pathname(src)
|
|
10
12
|
if dest
|
|
@@ -15,13 +17,19 @@ class Itak
|
|
|
15
17
|
else
|
|
16
18
|
dest = src.sub_ext(".denoised-vad" + src.extname)
|
|
17
19
|
end
|
|
20
|
+
dest.dirname.mkpath
|
|
18
21
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
waveform, sample_rate = TorchAudio.load(src.to_path)
|
|
23
|
+
raise "Currently only #{SAMPLE_RATE}Hz audio is supported" unless sample_rate == SAMPLE_RATE
|
|
24
|
+
|
|
25
|
+
$stderr.puts "Denoising..."
|
|
26
|
+
denoised = Denoiser.new.denoise(waveform)
|
|
27
|
+
|
|
28
|
+
$stderr.puts "VAD..."
|
|
29
|
+
vadded = VAD.new.run(denoised)
|
|
24
30
|
|
|
31
|
+
TorchAudio.save(dest.to_path, vadded.unsqueeze(0), SAMPLE_RATE)
|
|
32
|
+
$stderr.puts "Saved to #{dest}"
|
|
25
33
|
dest
|
|
26
34
|
end
|
|
27
35
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: itak
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '
|
|
4
|
+
version: '2'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kitaiti Makoto
|
|
@@ -11,6 +11,48 @@ date: 1980-01-02 00:00:00.000000000 Z
|
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: gtcrn
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 0.0.2
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 0.0.2
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: whispercpp
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 1.3.6
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 1.3.6
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: torchaudio
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 0.5.0
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 0.5.0
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: torchcodec
|
|
14
56
|
requirement: !ruby/object:Gem::Requirement
|
|
15
57
|
requirements:
|
|
16
58
|
- - ">="
|
|
@@ -24,7 +66,7 @@ dependencies:
|
|
|
24
66
|
- !ruby/object:Gem::Version
|
|
25
67
|
version: '0'
|
|
26
68
|
- !ruby/object:Gem::Dependency
|
|
27
|
-
name:
|
|
69
|
+
name: red-arrow
|
|
28
70
|
requirement: !ruby/object:Gem::Requirement
|
|
29
71
|
requirements:
|
|
30
72
|
- - ">="
|
|
@@ -38,7 +80,63 @@ dependencies:
|
|
|
38
80
|
- !ruby/object:Gem::Version
|
|
39
81
|
version: '0'
|
|
40
82
|
- !ruby/object:Gem::Dependency
|
|
41
|
-
name:
|
|
83
|
+
name: numo-narray-alt
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '0'
|
|
89
|
+
type: :runtime
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '0'
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
97
|
+
name: red-arrow-numo-narray
|
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - ">="
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: '0'
|
|
103
|
+
type: :runtime
|
|
104
|
+
prerelease: false
|
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
+
requirements:
|
|
107
|
+
- - ">="
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: '0'
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
111
|
+
name: torch-rb
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - ">="
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: '0'
|
|
117
|
+
type: :runtime
|
|
118
|
+
prerelease: false
|
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - ">="
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '0'
|
|
124
|
+
- !ruby/object:Gem::Dependency
|
|
125
|
+
name: optparse-pathname
|
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - ">="
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '0'
|
|
131
|
+
type: :runtime
|
|
132
|
+
prerelease: false
|
|
133
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
134
|
+
requirements:
|
|
135
|
+
- - ">="
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
version: '0'
|
|
138
|
+
- !ruby/object:Gem::Dependency
|
|
139
|
+
name: ndav-torch-tensor
|
|
42
140
|
requirement: !ruby/object:Gem::Requirement
|
|
43
141
|
requirements:
|
|
44
142
|
- - ">="
|
|
@@ -189,7 +287,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
189
287
|
- !ruby/object:Gem::Version
|
|
190
288
|
version: '0'
|
|
191
289
|
requirements: []
|
|
192
|
-
rubygems_version: 4.0.
|
|
290
|
+
rubygems_version: 4.0.6
|
|
193
291
|
specification_version: 4
|
|
194
292
|
summary: Audio editing tool for podcasters
|
|
195
293
|
test_files: []
|