diarize-ruby 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +26 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/AUTHORS +12 -0
- data/Gemfile +4 -0
- data/LICENSE +678 -0
- data/README.md +109 -0
- data/Rakefile +11 -0
- data/diarize-ruby.gemspec +31 -0
- data/lib/diarize.rb +117 -0
- data/lib/diarize/LIUM_SpkDiarization-4.2.jar +0 -0
- data/lib/diarize/audio.rb +196 -0
- data/lib/diarize/audio_player.rb +24 -0
- data/lib/diarize/lium.rb +5 -0
- data/lib/diarize/segment.rb +58 -0
- data/lib/diarize/segmentation.rb +37 -0
- data/lib/diarize/speaker.rb +174 -0
- data/lib/diarize/super_vector.rb +77 -0
- data/lib/diarize/ubm.gmm +0 -0
- data/lib/diarize/version.rb +3 -0
- data/test/audio_test.rb +107 -0
- data/test/data/foo.wav +0 -0
- data/test/data/speaker1.gmm +0 -0
- data/test/data/will-and-juergen.wav +0 -0
- data/test/segment_test.rb +29 -0
- data/test/segmentation_test.rb +39 -0
- data/test/speaker_test.rb +101 -0
- data/test/super_vector_test.rb +24 -0
- data/test/test_helper.rb +23 -0
- metadata +168 -0
data/test/data/foo.wav
ADDED
File without changes
|
Binary file
|
Binary file
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class SegmentTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_initialize
|
8
|
+
segment = Diarize::Segment.new('audio', 'start', 'duration', 'gender', 'bandwidth', 'speaker_id')
|
9
|
+
assert_equal segment.instance_variable_get('@audio'), 'audio'
|
10
|
+
assert_equal segment.instance_variable_get('@start'), 'start'
|
11
|
+
assert_equal segment.instance_variable_get('@duration'), 'duration'
|
12
|
+
assert_equal segment.instance_variable_get('@speaker_gender'), 'gender'
|
13
|
+
assert_equal segment.instance_variable_get('@bandwidth'), 'bandwidth'
|
14
|
+
assert_equal segment.instance_variable_get('@speaker_id'), 'speaker_id'
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_speaker
|
18
|
+
segment = Diarize::Segment.new(OpenStruct.new({:base_uri => 'http://example.com'}), nil, nil, 'm', nil, 's1')
|
19
|
+
assert_equal segment.speaker.object_id, segment.speaker.object_id # same one should be generated twice
|
20
|
+
assert_equal segment.speaker.uri, URI('http://example.com#s1')
|
21
|
+
assert_equal segment.speaker.gender, 'm'
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_uri
|
25
|
+
segment = Diarize::Segment.new(OpenStruct.new({:base_uri => 'http://example.com'}), 2, 5, 'm', nil, 's1')
|
26
|
+
assert_equal segment.uri, URI('http://example.com#t=2,7')
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
class SegmentationTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_segmentation_from_empty_seg_file
|
7
|
+
audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
|
8
|
+
audio = Diarize::Audio.new audio_uri
|
9
|
+
seg_file = Tempfile.new(['diarize-jruby', '.seg'])
|
10
|
+
seg_file.write ''
|
11
|
+
seg_file.close
|
12
|
+
segmentation = Diarize::Segmentation.from_seg_file(audio, seg_file.path)
|
13
|
+
assert_equal segmentation.size, 0
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_segmentation_from_sef_file
|
17
|
+
audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
|
18
|
+
audio = Diarize::Audio.new audio_uri
|
19
|
+
seg_file = Tempfile.new(['diarize-jruby', '.seg'])
|
20
|
+
seg_file.write <<EOF
|
21
|
+
foo 1 0 1000 F S U S0
|
22
|
+
foo 1 1000 10000 M S U S1
|
23
|
+
EOF
|
24
|
+
seg_file.close
|
25
|
+
segmentation = Diarize::Segmentation.from_seg_file(audio, seg_file.path)
|
26
|
+
assert_equal segmentation.size, 2
|
27
|
+
assert_equal segmentation.first.class, Diarize::Segment
|
28
|
+
assert_equal segmentation.first.start, 0
|
29
|
+
assert_equal segmentation.first.duration, 10
|
30
|
+
assert_equal segmentation.first.speaker.uri, URI(audio_uri.to_s + '#S0')
|
31
|
+
assert_equal segmentation.first.speaker.gender, 'F'
|
32
|
+
assert_equal segmentation.last.class, Diarize::Segment
|
33
|
+
assert_equal segmentation.last.start, 10
|
34
|
+
assert_equal segmentation.last.duration, 100
|
35
|
+
assert_equal segmentation.last.speaker.uri, URI(audio_uri.to_s + '#S1')
|
36
|
+
assert_equal segmentation.last.speaker.gender, 'M'
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
class SpeakerTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_detection_threshold
|
7
|
+
Diarize::Speaker.detection_threshold = 0.1
|
8
|
+
assert_equal 0.1, Diarize::Speaker.detection_threshold
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_find_or_create_gives_same_object_if_called_with_same_id
|
12
|
+
speaker1 = Diarize::Speaker.find_or_create('S0', 'M')
|
13
|
+
speaker2 = Diarize::Speaker.find_or_create('S0', 'M')
|
14
|
+
assert_equal speaker2.object_id, speaker1.object_id
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_initialize
|
18
|
+
speaker = Diarize::Speaker.new('uri', 'm')
|
19
|
+
assert_equal speaker.uri, 'uri'
|
20
|
+
assert_equal speaker.gender, 'm'
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_initialize_ubm
|
24
|
+
speaker = Diarize::Speaker.ubm
|
25
|
+
assert_equal speaker.gender, nil
|
26
|
+
assert_equal speaker.uri, nil
|
27
|
+
assert_equal speaker.model.name, 'MSMTFSFT' # UBM GMM
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_initialize_with_model
|
31
|
+
model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
|
32
|
+
speaker = Diarize::Speaker.new(nil, nil, model_file)
|
33
|
+
assert_equal speaker.model.name, 'S0'
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_mean_log_likelihood
|
37
|
+
speaker = Diarize::Speaker.ubm
|
38
|
+
assert speaker.mean_log_likelihood.nan?
|
39
|
+
speaker.mean_log_likelihood = 1
|
40
|
+
assert_equal speaker.mean_log_likelihood, 1
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_supervector
|
44
|
+
speaker = Diarize::Speaker.new(nil, nil, File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
|
45
|
+
assert_equal 512 * 24, speaker.supervector.dim
|
46
|
+
# Testing the first and the last elements are OK
|
47
|
+
assert_equal speaker.model.components.get(0).mean(0), speaker.supervector.vector[0]
|
48
|
+
assert_equal speaker.model.components.get(511).mean(23), speaker.supervector.vector[512 * 24 - 1]
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_save_and_load_model
|
52
|
+
speaker = Diarize::Speaker.ubm
|
53
|
+
tmp = Tempfile.new(['diarize-test', '.gmm'])
|
54
|
+
speaker.save_model(tmp.path)
|
55
|
+
model = Diarize::Speaker.load_model(tmp.path)
|
56
|
+
assert_equal speaker.model.components.get(0).mean(0), model.components.get(0).mean(0)
|
57
|
+
File.delete(tmp.path)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_divergence_returns_nil_if_one_model_is_empty
|
61
|
+
speaker1 = Diarize::Speaker.ubm
|
62
|
+
speaker2 = Diarize::Speaker.ubm
|
63
|
+
speaker2.model = nil
|
64
|
+
assert_equal Diarize::Speaker.divergence(speaker1, speaker2), nil
|
65
|
+
assert_equal Diarize::Speaker.divergence(speaker2, speaker1), nil
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_divergence_is_symmetric
|
69
|
+
model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
|
70
|
+
speaker1 = Diarize::Speaker.new(nil, nil, model_file)
|
71
|
+
speaker2 = Diarize::Speaker.ubm
|
72
|
+
assert Diarize::Speaker.divergence(speaker1, speaker2) > 0
|
73
|
+
assert_equal Diarize::Speaker.divergence(speaker1, speaker2), Diarize::Speaker.divergence(speaker2, speaker1)
|
74
|
+
assert_equal Diarize::Speaker.divergence(speaker1, speaker1), 0.0
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_divergence_ruby_is_same_as_divergence_lium
|
78
|
+
model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
|
79
|
+
speaker1 = Diarize::Speaker.new(nil, nil, model_file)
|
80
|
+
speaker2 = Diarize::Speaker.ubm
|
81
|
+
assert_equal Diarize::Speaker.divergence_lium(speaker1, speaker2).round(12), Diarize::Speaker.divergence_ruby(speaker1, speaker2).round(12)
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_normalise
|
85
|
+
# Testing M-Norm
|
86
|
+
model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
|
87
|
+
speaker1 = Diarize::Speaker.new(nil, nil, model_file)
|
88
|
+
speaker2 = Diarize::Speaker.ubm
|
89
|
+
assert Diarize::Speaker.divergence(speaker1, speaker2) != 1.0
|
90
|
+
speaker1.normalize! # Putting speaker1.gmm at distance 1 from UBM
|
91
|
+
assert Diarize::Speaker.divergence(speaker1, speaker2) - 1.0 < 1e-12 # rounding error
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_do_not_normalize_ubm
|
95
|
+
speaker = Diarize::Speaker.ubm
|
96
|
+
old_supervector = speaker.supervector
|
97
|
+
speaker.normalize!
|
98
|
+
assert_equal old_supervector, speaker.supervector
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class SuperVectorTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_generate_from_model
|
6
|
+
model = Diarize::Speaker.load_model(File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
|
7
|
+
sv = Diarize::SuperVector.generate_from_model(model)
|
8
|
+
assert_equal 512 * 24, sv.dim
|
9
|
+
# Checking all elements are OK
|
10
|
+
model.nb_of_components.times do |i|
|
11
|
+
gaussian = model.components.get(i)
|
12
|
+
gaussian.dim.times do |j|
|
13
|
+
assert_equal gaussian.mean(j), sv.vector[i * gaussian.dim + j]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_hash
|
19
|
+
model = Diarize::Speaker.load_model(File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
|
20
|
+
sv = Diarize::SuperVector.generate_from_model(model)
|
21
|
+
assert_equal sv.vector.hash, sv.hash
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'test/unit/assertions'
|
4
|
+
require 'diarize'
|
5
|
+
require 'mocha/test_unit'
|
6
|
+
require 'webmock/test_unit'
|
7
|
+
|
8
|
+
WebMock.disable_net_connect!(:net_http_connect_on_start => true)
|
9
|
+
|
10
|
+
class Test::Unit::TestCase
|
11
|
+
|
12
|
+
# Add global extensions to the test case class here
|
13
|
+
|
14
|
+
# E.g. "/Users/foo/work/test"
|
15
|
+
def test_root
|
16
|
+
File.dirname(__FILE__)
|
17
|
+
end
|
18
|
+
|
19
|
+
def fixtures_root
|
20
|
+
"#{test_root}/data"
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: diarize-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yves Raimond
|
8
|
+
- Juergen Fesslmeier
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-07-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: test-unit
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '3.0'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '3.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: mocha
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.1'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.1'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: webmock
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '2.1'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '2.1'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rjb
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '1.5'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.5'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: to-rdf
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: jblas-ruby
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '1.1'
|
91
|
+
type: :runtime
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '1.1'
|
98
|
+
description: A library for Ruby wrapping the LIUM Speaker Diarization and including
|
99
|
+
a few extra tools
|
100
|
+
email:
|
101
|
+
- jfesslmeier@gmail.com
|
102
|
+
executables: []
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- ".gitignore"
|
107
|
+
- ".ruby-gemset"
|
108
|
+
- ".ruby-version"
|
109
|
+
- AUTHORS
|
110
|
+
- Gemfile
|
111
|
+
- LICENSE
|
112
|
+
- README.md
|
113
|
+
- Rakefile
|
114
|
+
- diarize-ruby.gemspec
|
115
|
+
- lib/diarize.rb
|
116
|
+
- lib/diarize/LIUM_SpkDiarization-4.2.jar
|
117
|
+
- lib/diarize/audio.rb
|
118
|
+
- lib/diarize/audio_player.rb
|
119
|
+
- lib/diarize/lium.rb
|
120
|
+
- lib/diarize/segment.rb
|
121
|
+
- lib/diarize/segmentation.rb
|
122
|
+
- lib/diarize/speaker.rb
|
123
|
+
- lib/diarize/super_vector.rb
|
124
|
+
- lib/diarize/ubm.gmm
|
125
|
+
- lib/diarize/version.rb
|
126
|
+
- test/audio_test.rb
|
127
|
+
- test/data/foo.wav
|
128
|
+
- test/data/speaker1.gmm
|
129
|
+
- test/data/will-and-juergen.wav
|
130
|
+
- test/segment_test.rb
|
131
|
+
- test/segmentation_test.rb
|
132
|
+
- test/speaker_test.rb
|
133
|
+
- test/super_vector_test.rb
|
134
|
+
- test/test_helper.rb
|
135
|
+
homepage: https://github.com/chinshr/diarize-ruby
|
136
|
+
licenses:
|
137
|
+
- GNU Affero General Public License version 3
|
138
|
+
metadata: {}
|
139
|
+
post_install_message:
|
140
|
+
rdoc_options: []
|
141
|
+
require_paths:
|
142
|
+
- lib
|
143
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
144
|
+
requirements:
|
145
|
+
- - ">="
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0'
|
148
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
requirements: []
|
154
|
+
rubyforge_project:
|
155
|
+
rubygems_version: 2.4.8
|
156
|
+
signing_key:
|
157
|
+
specification_version: 4
|
158
|
+
summary: Speaker Diarization for Ruby
|
159
|
+
test_files:
|
160
|
+
- test/audio_test.rb
|
161
|
+
- test/data/foo.wav
|
162
|
+
- test/data/speaker1.gmm
|
163
|
+
- test/data/will-and-juergen.wav
|
164
|
+
- test/segment_test.rb
|
165
|
+
- test/segmentation_test.rb
|
166
|
+
- test/speaker_test.rb
|
167
|
+
- test/super_vector_test.rb
|
168
|
+
- test/test_helper.rb
|