gtcrn 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/gtcrn +18 -1
- data/gtcrn.gemspec +1 -1
- data/lib/gtcrn.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e62dd7f2ba7da50ab841769e0ccc9d0aeef8f0a31499a2a59b80ffc749b1ca9b
|
|
4
|
+
data.tar.gz: 555a5c91d412822e1e066bb1ea4bccdfd1cdf9e334adad025b886aea3a2b62f3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c82eb21211da591054d65de5aa131219432f20ce161b53b750d961baed6bd5e3f840076ff17dcdf9568a059257544de648a33c1f7d51911e0513c2e855bb3e2e
|
|
7
|
+
data.tar.gz: 50814826a17fb42e874f7ea7bd8fae2366f4b08d2961c2e354f4b8b9e4346199d37e61495c4a236cafc46f9886c2a1a9ba6f4b4e9b1b5f5e6f837a0a16265f03
|
data/README.md
CHANGED
|
@@ -16,14 +16,14 @@ output = GTCRN.new.enhance_speech("path/to/audio.wav")
|
|
|
16
16
|
# => <Pathname:path/to/audio.enhanced.wav>
|
|
17
17
|
````
|
|
18
18
|
|
|
19
|
-
Audio file must be
|
|
19
|
+
Audio file must be mono WAV with 16kHz sampling rate and 16-bit per sample.
|
|
20
20
|
|
|
21
21
|
CLI
|
|
22
22
|
---
|
|
23
23
|
|
|
24
24
|
This gem ships with `gtcrn` command.
|
|
25
25
|
|
|
26
|
-
% gtcrn path/to/audio.wav path/to/output.wav
|
|
26
|
+
% gtcrn path/to/audio.wav --output=path/to/output.wav
|
|
27
27
|
Enhanced file written to
|
|
28
28
|
path/to/output.wav
|
|
29
29
|
|
data/bin/gtcrn
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
|
+
require "optparse"
|
|
1
2
|
require "gtcrn"
|
|
2
3
|
|
|
3
4
|
def main(argv)
|
|
5
|
+
options = parse_options(argv)
|
|
4
6
|
input = argv.shift
|
|
5
7
|
abort "Specify input file" unless input
|
|
6
|
-
output = GTCRN.new.enhance_speech(input,
|
|
8
|
+
output = GTCRN.new.enhance_speech(input, options[:output])
|
|
7
9
|
$stderr.puts "Enhanced file written to"
|
|
8
10
|
$stderr.puts output
|
|
9
11
|
end
|
|
10
12
|
|
|
13
|
+
def parse_options(argv)
|
|
14
|
+
options = {}
|
|
15
|
+
OptionParser.new {|opt|
|
|
16
|
+
opt.banner = <<~EOB
|
|
17
|
+
Enhance speech properties of GTCRN files
|
|
18
|
+
|
|
19
|
+
Usage: gtcrn [options] INPUT
|
|
20
|
+
EOB
|
|
21
|
+
opt.on "-o", "--output=PATH", "Specify output file or directory" do |path|
|
|
22
|
+
options[:output] = path
|
|
23
|
+
end
|
|
24
|
+
}.parse!(argv)
|
|
25
|
+
options
|
|
26
|
+
end
|
|
27
|
+
|
|
11
28
|
main ARGV
|
data/gtcrn.gemspec
CHANGED
data/lib/gtcrn.rb
CHANGED
|
@@ -35,7 +35,7 @@ class GTCRN
|
|
|
35
35
|
waveform, sample_rate = TorchAudio.load(path.to_path)
|
|
36
36
|
raise "Sampling rate must be 16000 Hz, but given: #{sample_rate} Hz" unless sample_rate == 16000
|
|
37
37
|
enhanced = enhance_speech_waveform(waveform)
|
|
38
|
-
TorchAudio.save(dest.to_path, enhanced
|
|
38
|
+
TorchAudio.save(dest.to_path, enhanced, sample_rate)
|
|
39
39
|
|
|
40
40
|
dest
|
|
41
41
|
end
|
|
@@ -66,6 +66,6 @@ class GTCRN
|
|
|
66
66
|
Torch.from_numo(real) + 1i * Torch.from_numo(imag),
|
|
67
67
|
**ISTFT_OPTS
|
|
68
68
|
)
|
|
69
|
-
enhanced
|
|
69
|
+
enhanced.squeeze
|
|
70
70
|
end
|
|
71
71
|
end
|