itak 2 → 3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/itak.gemspec +1 -4
- data/lib/itak/transcriber.rb +39 -0
- data/lib/itak/vad.rb +1 -0
- data/lib/itak.rb +14 -2
- data/test/test_itak.rb +1 -0
- metadata +2 -43
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a7ab66466c640ba4b91cfdf41de938434bdf3decd225c606213ff0aa91a9cff2
|
|
4
|
+
data.tar.gz: 8eabbd49439e13cd6d1831a7fc5fd83a391538f89a06e9c69640ba3ec5006ace
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0f7278553178a3d2fa2de089f14a71f5079814528772adccd5fd1897056b31fb5a624efe5b73c2a8779f6d3bb7e8ee213cea8aea8fc26a1f9984967b9b6a75c7
|
|
7
|
+
data.tar.gz: ed4f9e3765197050aa8061e52cad8cfcbed747f00c2433e59f80fa7ce1a715255be4ec0e36ecc7d78cae85dc7fc8463d4aa2fc7154eee3e12eda0b882b7d069c
|
data/itak.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "itak"
|
|
3
|
-
s.version = "
|
|
3
|
+
s.version = "3"
|
|
4
4
|
s.authors = ["Kitaiti Makoto"]
|
|
5
5
|
s.email = ["KitaitiMaokto@gmail.com"]
|
|
6
6
|
s.summary = "Audio editing tool for podcasters"
|
|
@@ -15,9 +15,6 @@ Gem::Specification.new do |s|
|
|
|
15
15
|
s.add_runtime_dependency "whispercpp", ">= 1.3.6"
|
|
16
16
|
s.add_runtime_dependency "torchaudio", ">= 0.5.0"
|
|
17
17
|
s.add_runtime_dependency "torchcodec"
|
|
18
|
-
s.add_runtime_dependency "red-arrow"
|
|
19
|
-
s.add_runtime_dependency "numo-narray-alt"
|
|
20
|
-
s.add_runtime_dependency "red-arrow-numo-narray"
|
|
21
18
|
s.add_runtime_dependency "torch-rb"
|
|
22
19
|
s.add_runtime_dependency "optparse-pathname"
|
|
23
20
|
s.add_runtime_dependency "ndav-torch-tensor"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
require "whisper"
|
|
2
|
+
|
|
3
|
+
class Itak
|
|
4
|
+
class Transcriber
|
|
5
|
+
PARAMS = {
|
|
6
|
+
language: "ja",
|
|
7
|
+
temperature: 1.0
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
def initialize(model: "large-v3-turbo-q8_0")
|
|
11
|
+
Whisper.log_set proc {}, nil
|
|
12
|
+
@whisper = Whisper::Context.new(model)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def run(src, params: {})
|
|
16
|
+
params = Whisper::Params.new(**PARAMS.merge(params))
|
|
17
|
+
src = src[0] if src.ndim == 2 && src.shape[0] == 1
|
|
18
|
+
@whisper
|
|
19
|
+
.full(params, src.to_ndav)
|
|
20
|
+
.each_segment
|
|
21
|
+
.collect {|segment|
|
|
22
|
+
"[%<start_time>s --> %<end_time>s]%<text>s" % {
|
|
23
|
+
start_time: format_time(segment.start_time),
|
|
24
|
+
end_time: format_time(segment.end_time),
|
|
25
|
+
text: segment.text
|
|
26
|
+
}
|
|
27
|
+
}.join("\n\n")
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def format_time(time_ms)
|
|
33
|
+
sec, decimal_part = time_ms.divmod(1000)
|
|
34
|
+
min, sec = sec.divmod(60)
|
|
35
|
+
hour, min = min.divmod(60)
|
|
36
|
+
"%02d:%02d:%02d" % [hour, min, sec]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
data/lib/itak/vad.rb
CHANGED
data/lib/itak.rb
CHANGED
|
@@ -3,6 +3,7 @@ require "torchaudio"
|
|
|
3
3
|
|
|
4
4
|
require "itak/denoiser"
|
|
5
5
|
require "itak/vad"
|
|
6
|
+
require "itak/transcriber"
|
|
6
7
|
|
|
7
8
|
class Itak
|
|
8
9
|
SAMPLE_RATE = 16_000
|
|
@@ -28,8 +29,19 @@ class Itak
|
|
|
28
29
|
$stderr.puts "VAD..."
|
|
29
30
|
vadded = VAD.new.run(denoised)
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
saving = Thread.new {
|
|
33
|
+
TorchAudio.save(dest.to_path, vadded.unsqueeze(0), SAMPLE_RATE)
|
|
34
|
+
$stderr.puts "Saved to #{dest}"
|
|
35
|
+
}
|
|
36
|
+
transcribing = Thread.new {
|
|
37
|
+
$stderr.puts "Transcribing..."
|
|
38
|
+
transcription = Transcriber.new.run(vadded)
|
|
39
|
+
transcription_path = dest.sub_ext(".txt")
|
|
40
|
+
transcription_path.write transcription
|
|
41
|
+
$stderr.puts "Transcription saved to #{transcription_path}"
|
|
42
|
+
}
|
|
43
|
+
saving.join
|
|
44
|
+
transcribing.join
|
|
33
45
|
dest
|
|
34
46
|
end
|
|
35
47
|
end
|
data/test/test_itak.rb
CHANGED
|
@@ -9,6 +9,7 @@ class TestItak < TestBase
|
|
|
9
9
|
output = Itak.new.run(@src)
|
|
10
10
|
assert_equal Pathname("test/fixtures/mix.denoised-vad.wav").expand_path, output
|
|
11
11
|
assert_path_exist output
|
|
12
|
+
assert_path_exist output.sub_ext(".txt")
|
|
12
13
|
|
|
13
14
|
src_reader = WaveFile::Reader.new(@src)
|
|
14
15
|
src_reader.close
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: itak
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '
|
|
4
|
+
version: '3'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kitaiti Makoto
|
|
@@ -65,48 +65,6 @@ dependencies:
|
|
|
65
65
|
- - ">="
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
67
|
version: '0'
|
|
68
|
-
- !ruby/object:Gem::Dependency
|
|
69
|
-
name: red-arrow
|
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
|
71
|
-
requirements:
|
|
72
|
-
- - ">="
|
|
73
|
-
- !ruby/object:Gem::Version
|
|
74
|
-
version: '0'
|
|
75
|
-
type: :runtime
|
|
76
|
-
prerelease: false
|
|
77
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
-
requirements:
|
|
79
|
-
- - ">="
|
|
80
|
-
- !ruby/object:Gem::Version
|
|
81
|
-
version: '0'
|
|
82
|
-
- !ruby/object:Gem::Dependency
|
|
83
|
-
name: numo-narray-alt
|
|
84
|
-
requirement: !ruby/object:Gem::Requirement
|
|
85
|
-
requirements:
|
|
86
|
-
- - ">="
|
|
87
|
-
- !ruby/object:Gem::Version
|
|
88
|
-
version: '0'
|
|
89
|
-
type: :runtime
|
|
90
|
-
prerelease: false
|
|
91
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
-
requirements:
|
|
93
|
-
- - ">="
|
|
94
|
-
- !ruby/object:Gem::Version
|
|
95
|
-
version: '0'
|
|
96
|
-
- !ruby/object:Gem::Dependency
|
|
97
|
-
name: red-arrow-numo-narray
|
|
98
|
-
requirement: !ruby/object:Gem::Requirement
|
|
99
|
-
requirements:
|
|
100
|
-
- - ">="
|
|
101
|
-
- !ruby/object:Gem::Version
|
|
102
|
-
version: '0'
|
|
103
|
-
type: :runtime
|
|
104
|
-
prerelease: false
|
|
105
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
-
requirements:
|
|
107
|
-
- - ">="
|
|
108
|
-
- !ruby/object:Gem::Version
|
|
109
|
-
version: '0'
|
|
110
68
|
- !ruby/object:Gem::Dependency
|
|
111
69
|
name: torch-rb
|
|
112
70
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -265,6 +223,7 @@ files:
|
|
|
265
223
|
- itak.gemspec
|
|
266
224
|
- lib/itak.rb
|
|
267
225
|
- lib/itak/denoiser.rb
|
|
226
|
+
- lib/itak/transcriber.rb
|
|
268
227
|
- lib/itak/vad.rb
|
|
269
228
|
- test/fixtures/.gitkeep
|
|
270
229
|
- test/helper.rb
|