itak 2 → 3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 05d3ad0b0b7a48d1053aa421e23a4d720f8069e3b767d52ffe305eb52266ea4c
4
- data.tar.gz: 1ca65afc391b6d3b5b80c79478e8317ec87198785a3376e34feafe9b20aa86b7
3
+ metadata.gz: a7ab66466c640ba4b91cfdf41de938434bdf3decd225c606213ff0aa91a9cff2
4
+ data.tar.gz: 8eabbd49439e13cd6d1831a7fc5fd83a391538f89a06e9c69640ba3ec5006ace
5
5
  SHA512:
6
- metadata.gz: 682271f0f3c32b88b489f3465f40df61bfc20588f611fedd41cdea75b2935ed14259166993f8039d7833ffc8abd7e22af3cf0c41d91bad33f15068e22c9143a3
7
- data.tar.gz: 07e5d6f02e6127ebc74835561ee44c9c0aaa57094ef6fd110f980b08838b698f244cabe64d798b8acd9abf698db2296a6e6d54fded5a19eeb887759a30e10d6e
6
+ metadata.gz: 0f7278553178a3d2fa2de089f14a71f5079814528772adccd5fd1897056b31fb5a624efe5b73c2a8779f6d3bb7e8ee213cea8aea8fc26a1f9984967b9b6a75c7
7
+ data.tar.gz: ed4f9e3765197050aa8061e52cad8cfcbed747f00c2433e59f80fa7ce1a715255be4ec0e36ecc7d78cae85dc7fc8463d4aa2fc7154eee3e12eda0b882b7d069c
data/itak.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "itak"
3
- s.version = "2"
3
+ s.version = "3"
4
4
  s.authors = ["Kitaiti Makoto"]
5
5
  s.email = ["KitaitiMaokto@gmail.com"]
6
6
  s.summary = "Audio editing tool for podcasters"
@@ -15,9 +15,6 @@ Gem::Specification.new do |s|
15
15
  s.add_runtime_dependency "whispercpp", ">= 1.3.6"
16
16
  s.add_runtime_dependency "torchaudio", ">= 0.5.0"
17
17
  s.add_runtime_dependency "torchcodec"
18
- s.add_runtime_dependency "red-arrow"
19
- s.add_runtime_dependency "numo-narray-alt"
20
- s.add_runtime_dependency "red-arrow-numo-narray"
21
18
  s.add_runtime_dependency "torch-rb"
22
19
  s.add_runtime_dependency "optparse-pathname"
23
20
  s.add_runtime_dependency "ndav-torch-tensor"
@@ -0,0 +1,39 @@
1
+ require "whisper"
2
+
3
+ class Itak
4
+ class Transcriber
5
+ PARAMS = {
6
+ language: "ja",
7
+ temperature: 1.0
8
+ }
9
+
10
+ def initialize(model: "large-v3-turbo-q8_0")
11
+ Whisper.log_set proc {}, nil
12
+ @whisper = Whisper::Context.new(model)
13
+ end
14
+
15
+ def run(src, params: {})
16
+ params = Whisper::Params.new(**PARAMS.merge(params))
17
+ src = src[0] if src.ndim == 2 && src.shape[0] == 1
18
+ @whisper
19
+ .full(params, src.to_ndav)
20
+ .each_segment
21
+ .collect {|segment|
22
+ "[%<start_time>s --> %<end_time>s]%<text>s" % {
23
+ start_time: format_time(segment.start_time),
24
+ end_time: format_time(segment.end_time),
25
+ text: segment.text
26
+ }
27
+ }.join("\n\n")
28
+ end
29
+
30
+ private
31
+
32
+ def format_time(time_ms)
33
+ sec, decimal_part = time_ms.divmod(1000)
34
+ min, sec = sec.divmod(60)
35
+ hour, min = min.divmod(60)
36
+ "%02d:%02d:%02d" % [hour, min, sec]
37
+ end
38
+ end
39
+ end
data/lib/itak/vad.rb CHANGED
@@ -9,6 +9,7 @@ class Itak
9
9
  )
10
10
 
11
11
  def initialize(model: "silero-v6.2.0")
12
+ Whisper.log_set proc {}, nil
12
13
  @vad = Whisper::VAD::Context.new(model)
13
14
  end
14
15
 
data/lib/itak.rb CHANGED
@@ -3,6 +3,7 @@ require "torchaudio"
3
3
 
4
4
  require "itak/denoiser"
5
5
  require "itak/vad"
6
+ require "itak/transcriber"
6
7
 
7
8
  class Itak
8
9
  SAMPLE_RATE = 16_000
@@ -28,8 +29,19 @@ class Itak
28
29
  $stderr.puts "VAD..."
29
30
  vadded = VAD.new.run(denoised)
30
31
 
31
- TorchAudio.save(dest.to_path, vadded.unsqueeze(0), SAMPLE_RATE)
32
- $stderr.puts "Saved to #{dest}"
32
+ saving = Thread.new {
33
+ TorchAudio.save(dest.to_path, vadded.unsqueeze(0), SAMPLE_RATE)
34
+ $stderr.puts "Saved to #{dest}"
35
+ }
36
+ transcribing = Thread.new {
37
+ $stderr.puts "Transcribing..."
38
+ transcription = Transcriber.new.run(vadded)
39
+ transcription_path = dest.sub_ext(".txt")
40
+ transcription_path.write transcription
41
+ $stderr.puts "Transcription saved to #{transcription_path}"
42
+ }
43
+ saving.join
44
+ transcribing.join
33
45
  dest
34
46
  end
35
47
  end
data/test/test_itak.rb CHANGED
@@ -9,6 +9,7 @@ class TestItak < TestBase
9
9
  output = Itak.new.run(@src)
10
10
  assert_equal Pathname("test/fixtures/mix.denoised-vad.wav").expand_path, output
11
11
  assert_path_exist output
12
+ assert_path_exist output.sub_ext(".txt")
12
13
 
13
14
  src_reader = WaveFile::Reader.new(@src)
14
15
  src_reader.close
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: itak
3
3
  version: !ruby/object:Gem::Version
4
- version: '2'
4
+ version: '3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kitaiti Makoto
@@ -65,48 +65,6 @@ dependencies:
65
65
  - - ">="
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
- - !ruby/object:Gem::Dependency
69
- name: red-arrow
70
- requirement: !ruby/object:Gem::Requirement
71
- requirements:
72
- - - ">="
73
- - !ruby/object:Gem::Version
74
- version: '0'
75
- type: :runtime
76
- prerelease: false
77
- version_requirements: !ruby/object:Gem::Requirement
78
- requirements:
79
- - - ">="
80
- - !ruby/object:Gem::Version
81
- version: '0'
82
- - !ruby/object:Gem::Dependency
83
- name: numo-narray-alt
84
- requirement: !ruby/object:Gem::Requirement
85
- requirements:
86
- - - ">="
87
- - !ruby/object:Gem::Version
88
- version: '0'
89
- type: :runtime
90
- prerelease: false
91
- version_requirements: !ruby/object:Gem::Requirement
92
- requirements:
93
- - - ">="
94
- - !ruby/object:Gem::Version
95
- version: '0'
96
- - !ruby/object:Gem::Dependency
97
- name: red-arrow-numo-narray
98
- requirement: !ruby/object:Gem::Requirement
99
- requirements:
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- version: '0'
103
- type: :runtime
104
- prerelease: false
105
- version_requirements: !ruby/object:Gem::Requirement
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- version: '0'
110
68
  - !ruby/object:Gem::Dependency
111
69
  name: torch-rb
112
70
  requirement: !ruby/object:Gem::Requirement
@@ -265,6 +223,7 @@ files:
265
223
  - itak.gemspec
266
224
  - lib/itak.rb
267
225
  - lib/itak/denoiser.rb
226
+ - lib/itak/transcriber.rb
268
227
  - lib/itak/vad.rb
269
228
  - test/fixtures/.gitkeep
270
229
  - test/helper.rb