diarize-ruby 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/data/foo.wav ADDED
File without changes
Binary file
Binary file
@@ -0,0 +1,29 @@
1
+ require 'test_helper'
2
+ require 'ostruct'
3
+ require 'uri'
4
+
5
+ class SegmentTest < Test::Unit::TestCase
6
+
7
+ def test_initialize
8
+ segment = Diarize::Segment.new('audio', 'start', 'duration', 'gender', 'bandwidth', 'speaker_id')
9
+ assert_equal segment.instance_variable_get('@audio'), 'audio'
10
+ assert_equal segment.instance_variable_get('@start'), 'start'
11
+ assert_equal segment.instance_variable_get('@duration'), 'duration'
12
+ assert_equal segment.instance_variable_get('@speaker_gender'), 'gender'
13
+ assert_equal segment.instance_variable_get('@bandwidth'), 'bandwidth'
14
+ assert_equal segment.instance_variable_get('@speaker_id'), 'speaker_id'
15
+ end
16
+
17
+ def test_speaker
18
+ segment = Diarize::Segment.new(OpenStruct.new({:base_uri => 'http://example.com'}), nil, nil, 'm', nil, 's1')
19
+ assert_equal segment.speaker.object_id, segment.speaker.object_id # same one should be generated twice
20
+ assert_equal segment.speaker.uri, URI('http://example.com#s1')
21
+ assert_equal segment.speaker.gender, 'm'
22
+ end
23
+
24
+ def test_uri
25
+ segment = Diarize::Segment.new(OpenStruct.new({:base_uri => 'http://example.com'}), 2, 5, 'm', nil, 's1')
26
+ assert_equal segment.uri, URI('http://example.com#t=2,7')
27
+ end
28
+
29
+ end
@@ -0,0 +1,39 @@
1
+ require 'test_helper'
2
+ require 'tempfile'
3
+
4
+ class SegmentationTest < Test::Unit::TestCase
5
+
6
+ def test_segmentation_from_empty_seg_file
7
+ audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
8
+ audio = Diarize::Audio.new audio_uri
9
+ seg_file = Tempfile.new(['diarize-jruby', '.seg'])
10
+ seg_file.write ''
11
+ seg_file.close
12
+ segmentation = Diarize::Segmentation.from_seg_file(audio, seg_file.path)
13
+ assert_equal segmentation.size, 0
14
+ end
15
+
16
+ def test_segmentation_from_sef_file
17
+ audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
18
+ audio = Diarize::Audio.new audio_uri
19
+ seg_file = Tempfile.new(['diarize-jruby', '.seg'])
20
+ seg_file.write <<EOF
21
+ foo 1 0 1000 F S U S0
22
+ foo 1 1000 10000 M S U S1
23
+ EOF
24
+ seg_file.close
25
+ segmentation = Diarize::Segmentation.from_seg_file(audio, seg_file.path)
26
+ assert_equal segmentation.size, 2
27
+ assert_equal segmentation.first.class, Diarize::Segment
28
+ assert_equal segmentation.first.start, 0
29
+ assert_equal segmentation.first.duration, 10
30
+ assert_equal segmentation.first.speaker.uri, URI(audio_uri.to_s + '#S0')
31
+ assert_equal segmentation.first.speaker.gender, 'F'
32
+ assert_equal segmentation.last.class, Diarize::Segment
33
+ assert_equal segmentation.last.start, 10
34
+ assert_equal segmentation.last.duration, 100
35
+ assert_equal segmentation.last.speaker.uri, URI(audio_uri.to_s + '#S1')
36
+ assert_equal segmentation.last.speaker.gender, 'M'
37
+ end
38
+
39
+ end
@@ -0,0 +1,101 @@
1
+ require 'test_helper'
2
+ require 'tempfile'
3
+
4
+ class SpeakerTest < Test::Unit::TestCase
5
+
6
+ def test_detection_threshold
7
+ Diarize::Speaker.detection_threshold = 0.1
8
+ assert_equal 0.1, Diarize::Speaker.detection_threshold
9
+ end
10
+
11
+ def test_find_or_create_gives_same_object_if_called_with_same_id
12
+ speaker1 = Diarize::Speaker.find_or_create('S0', 'M')
13
+ speaker2 = Diarize::Speaker.find_or_create('S0', 'M')
14
+ assert_equal speaker2.object_id, speaker1.object_id
15
+ end
16
+
17
+ def test_initialize
18
+ speaker = Diarize::Speaker.new('uri', 'm')
19
+ assert_equal speaker.uri, 'uri'
20
+ assert_equal speaker.gender, 'm'
21
+ end
22
+
23
+ def test_initialize_ubm
24
+ speaker = Diarize::Speaker.ubm
25
+ assert_equal speaker.gender, nil
26
+ assert_equal speaker.uri, nil
27
+ assert_equal speaker.model.name, 'MSMTFSFT' # UBM GMM
28
+ end
29
+
30
+ def test_initialize_with_model
31
+ model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
32
+ speaker = Diarize::Speaker.new(nil, nil, model_file)
33
+ assert_equal speaker.model.name, 'S0'
34
+ end
35
+
36
+ def test_mean_log_likelihood
37
+ speaker = Diarize::Speaker.ubm
38
+ assert speaker.mean_log_likelihood.nan?
39
+ speaker.mean_log_likelihood = 1
40
+ assert_equal speaker.mean_log_likelihood, 1
41
+ end
42
+
43
+ def test_supervector
44
+ speaker = Diarize::Speaker.new(nil, nil, File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
45
+ assert_equal 512 * 24, speaker.supervector.dim
46
+ # Testing the first and the last elements are OK
47
+ assert_equal speaker.model.components.get(0).mean(0), speaker.supervector.vector[0]
48
+ assert_equal speaker.model.components.get(511).mean(23), speaker.supervector.vector[512 * 24 - 1]
49
+ end
50
+
51
+ def test_save_and_load_model
52
+ speaker = Diarize::Speaker.ubm
53
+ tmp = Tempfile.new(['diarize-test', '.gmm'])
54
+ speaker.save_model(tmp.path)
55
+ model = Diarize::Speaker.load_model(tmp.path)
56
+ assert_equal speaker.model.components.get(0).mean(0), model.components.get(0).mean(0)
57
+ File.delete(tmp.path)
58
+ end
59
+
60
+ def test_divergence_returns_nil_if_one_model_is_empty
61
+ speaker1 = Diarize::Speaker.ubm
62
+ speaker2 = Diarize::Speaker.ubm
63
+ speaker2.model = nil
64
+ assert_equal Diarize::Speaker.divergence(speaker1, speaker2), nil
65
+ assert_equal Diarize::Speaker.divergence(speaker2, speaker1), nil
66
+ end
67
+
68
+ def test_divergence_is_symmetric
69
+ model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
70
+ speaker1 = Diarize::Speaker.new(nil, nil, model_file)
71
+ speaker2 = Diarize::Speaker.ubm
72
+ assert Diarize::Speaker.divergence(speaker1, speaker2) > 0
73
+ assert_equal Diarize::Speaker.divergence(speaker1, speaker2), Diarize::Speaker.divergence(speaker2, speaker1)
74
+ assert_equal Diarize::Speaker.divergence(speaker1, speaker1), 0.0
75
+ end
76
+
77
+ def test_divergence_ruby_is_same_as_divergence_lium
78
+ model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
79
+ speaker1 = Diarize::Speaker.new(nil, nil, model_file)
80
+ speaker2 = Diarize::Speaker.ubm
81
+ assert_equal Diarize::Speaker.divergence_lium(speaker1, speaker2).round(12), Diarize::Speaker.divergence_ruby(speaker1, speaker2).round(12)
82
+ end
83
+
84
+ def test_normalise
85
+ # Testing M-Norm
86
+ model_file = File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm')
87
+ speaker1 = Diarize::Speaker.new(nil, nil, model_file)
88
+ speaker2 = Diarize::Speaker.ubm
89
+ assert Diarize::Speaker.divergence(speaker1, speaker2) != 1.0
90
+ speaker1.normalize! # Putting speaker1.gmm at distance 1 from UBM
91
+ assert Diarize::Speaker.divergence(speaker1, speaker2) - 1.0 < 1e-12 # rounding error
92
+ end
93
+
94
+ def test_do_not_normalize_ubm
95
+ speaker = Diarize::Speaker.ubm
96
+ old_supervector = speaker.supervector
97
+ speaker.normalize!
98
+ assert_equal old_supervector, speaker.supervector
99
+ end
100
+
101
+ end
@@ -0,0 +1,24 @@
1
+ require 'test_helper'
2
+
3
+ class SuperVectorTest < Test::Unit::TestCase
4
+
5
+ def test_generate_from_model
6
+ model = Diarize::Speaker.load_model(File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
7
+ sv = Diarize::SuperVector.generate_from_model(model)
8
+ assert_equal 512 * 24, sv.dim
9
+ # Checking all elements are OK
10
+ model.nb_of_components.times do |i|
11
+ gaussian = model.components.get(i)
12
+ gaussian.dim.times do |j|
13
+ assert_equal gaussian.mean(j), sv.vector[i * gaussian.dim + j]
14
+ end
15
+ end
16
+ end
17
+
18
+ def test_hash
19
+ model = Diarize::Speaker.load_model(File.join(File.dirname(__FILE__), 'data', 'speaker1.gmm'))
20
+ sv = Diarize::SuperVector.generate_from_model(model)
21
+ assert_equal sv.vector.hash, sv.hash
22
+ end
23
+
24
+ end
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'test/unit/assertions'
4
+ require 'diarize'
5
+ require 'mocha/test_unit'
6
+ require 'webmock/test_unit'
7
+
8
+ WebMock.disable_net_connect!(:net_http_connect_on_start => true)
9
+
10
+ class Test::Unit::TestCase
11
+
12
+ # Add global extensions to the test case class here
13
+
14
+ # E.g. "/Users/foo/work/test"
15
+ def test_root
16
+ File.dirname(__FILE__)
17
+ end
18
+
19
+ def fixtures_root
20
+ "#{test_root}/data"
21
+ end
22
+
23
+ end
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diarize-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Yves Raimond
8
+ - Juergen Fesslmeier
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2016-07-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: test-unit
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '3.0'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '3.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: mocha
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.1'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.1'
42
+ - !ruby/object:Gem::Dependency
43
+ name: webmock
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '2.1'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '2.1'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rjb
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '1.5'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.5'
70
+ - !ruby/object:Gem::Dependency
71
+ name: to-rdf
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: jblas-ruby
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '1.1'
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '1.1'
98
+ description: A library for Ruby wrapping the LIUM Speaker Diarization and including
99
+ a few extra tools
100
+ email:
101
+ - jfesslmeier@gmail.com
102
+ executables: []
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - ".gitignore"
107
+ - ".ruby-gemset"
108
+ - ".ruby-version"
109
+ - AUTHORS
110
+ - Gemfile
111
+ - LICENSE
112
+ - README.md
113
+ - Rakefile
114
+ - diarize-ruby.gemspec
115
+ - lib/diarize.rb
116
+ - lib/diarize/LIUM_SpkDiarization-4.2.jar
117
+ - lib/diarize/audio.rb
118
+ - lib/diarize/audio_player.rb
119
+ - lib/diarize/lium.rb
120
+ - lib/diarize/segment.rb
121
+ - lib/diarize/segmentation.rb
122
+ - lib/diarize/speaker.rb
123
+ - lib/diarize/super_vector.rb
124
+ - lib/diarize/ubm.gmm
125
+ - lib/diarize/version.rb
126
+ - test/audio_test.rb
127
+ - test/data/foo.wav
128
+ - test/data/speaker1.gmm
129
+ - test/data/will-and-juergen.wav
130
+ - test/segment_test.rb
131
+ - test/segmentation_test.rb
132
+ - test/speaker_test.rb
133
+ - test/super_vector_test.rb
134
+ - test/test_helper.rb
135
+ homepage: https://github.com/chinshr/diarize-ruby
136
+ licenses:
137
+ - GNU Affero General Public License version 3
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.4.8
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: Speaker Diarization for Ruby
159
+ test_files:
160
+ - test/audio_test.rb
161
+ - test/data/foo.wav
162
+ - test/data/speaker1.gmm
163
+ - test/data/will-and-juergen.wav
164
+ - test/segment_test.rb
165
+ - test/segmentation_test.rb
166
+ - test/speaker_test.rb
167
+ - test/super_vector_test.rb
168
+ - test/test_helper.rb