diarize-ruby 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ module Diarize
2
+
3
+ class AudioPlayer
4
+
5
+ def play(file, start=0.0, duration=10.0)
6
+ java_file = java.io.File.new(file.path)
7
+ stream = javax.sound.sampled.AudioSystem.getAudioInputStream(java_file)
8
+ clip = javax.sound.sampled.AudioSystem.clip
9
+ clip.open(stream)
10
+ clip.setMicrosecondPosition(start * 1000000)
11
+ clip.start
12
+ begin
13
+ sleep(duration)
14
+ rescue Exception
15
+ $stderr.puts 'Stopping playback'
16
+ end
17
+ clip.stop
18
+ clip.close
19
+ stream.close
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,5 @@
1
+ # require File.join(File.expand_path(File.dirname(__FILE__)), 'LIUM_SpkDiarization-4.2.jar')
2
+
3
+ def fr
4
+ Java::Fr
5
+ end
@@ -0,0 +1,58 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), 'audio_player')
2
+
3
+ require 'rubygems'
4
+ require 'to_rdf'
5
+ require 'uri'
6
+
7
+ module Diarize
8
+
9
+ class Segment
10
+
11
+ attr_reader :start, :duration, :gender, :bandwidth
12
+
13
+ def initialize(audio, start, duration, gender, bandwidth, speaker_id)
14
+ @audio = audio
15
+ @start = start
16
+ @duration = duration
17
+ @bandwidth = bandwidth
18
+ @speaker_id = speaker_id
19
+ @speaker_gender = gender
20
+ end
21
+
22
+ def speaker
23
+ Speaker.find_or_create(URI("#{@audio.base_uri}##{@speaker_id}"), @speaker_gender)
24
+ end
25
+
26
+ def play
27
+ player = AudioPlayer.new
28
+ player.play(@audio.file, start, duration)
29
+ end
30
+
31
+ include ToRdf
32
+
33
+ def namespaces
34
+ super.merge 'ws' => 'http://wsarchive.prototype0.net/ontology/'
35
+ end
36
+
37
+ def uri
38
+ # http://www.w3.org/TR/media-frags/
39
+ URI("#{@audio.base_uri}#t=#{start},#{start+duration}")
40
+ end
41
+
42
+ def type_uri
43
+ 'ws:Segment'
44
+ end
45
+
46
+ def rdf_mapping
47
+ {
48
+ 'ws:start' => start,
49
+ 'ws:duration' => duration,
50
+ 'ws:gender' => gender,
51
+ 'ws:bandwidth' => bandwidth,
52
+ 'ws:speaker' => speaker,
53
+ }
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,37 @@
1
+ module Diarize
2
+
3
+ class Segmentation
4
+
5
+ def self.from_seg_file(audio, seg_file)
6
+ segmentation = []
7
+ File.open(seg_file).each_line do |line|
8
+ next if line.start_with? ';;'
9
+ parts = line.split(' ')
10
+ start = parts[2].to_i / 100.0
11
+ duration = parts[3].to_i / 100.0
12
+ gender = parts[4]
13
+ bandwidth = parts[6]
14
+ speaker_id = parts[7]
15
+ segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
16
+ end
17
+ segmentation
18
+ end
19
+
20
+ def self.from_clusters(audio, clusters)
21
+ segmentation = []
22
+ clusters.map(&:to_s).each do |speaker_id|
23
+ cluster = clusters.getCluster(speaker_id)
24
+ gender = cluster.gender
25
+ bandwidth = cluster.bandwidth
26
+ cluster.each do |segment|
27
+ start = segment.start_in_second
28
+ duration = segment.length_in_second
29
+ segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
30
+ end
31
+ end
32
+ segmentation
33
+ end
34
+
35
+ end
36
+
37
+ end
@@ -0,0 +1,174 @@
1
+ require 'rubygems'
2
+ require 'to_rdf'
3
+ # require 'jblas'
4
+
5
+ module Diarize
6
+
7
+ class Speaker
8
+
9
+ # include JBLAS
10
+
11
+ @@log_likelihood_threshold = -33
12
+ @@detection_threshold = 0.2
13
+
14
+ @@speakers = {}
15
+
16
+ attr_accessor :model_uri, :model, :normalized
17
+ attr_reader :gender
18
+
19
+ def initialize(uri = nil, gender = nil, model_file = nil)
20
+ @model = Speaker.load_model(model_file) if model_file
21
+ @uri = uri
22
+ @gender = gender
23
+ @normalized = false
24
+ end
25
+
26
+ def self.ubm
27
+ speaker = Speaker.new
28
+ speaker.normalized = true
29
+ speaker.model = Speaker.load_model(File.join(File.expand_path(File.dirname(__FILE__)), 'ubm.gmm'))
30
+ speaker
31
+ end
32
+
33
+ def mean_log_likelihood
34
+ @mean_log_likelihood ? @mean_log_likelihood : model.mean_log_likelihood # Will be NaN if model was loaded from somewhere
35
+ end
36
+
37
+ def mean_log_likelihood=(mll)
38
+ @mean_log_likelihood = mll
39
+ end
40
+
41
+ def save_model(filename)
42
+ # TODO perhaps a warning if a normalised model is being saved?
43
+ write_gmm(filename, @model)
44
+ end
45
+
46
+ def self.detection_threshold=(threshold)
47
+ @@detection_threshold = threshold
48
+ end
49
+
50
+ def self.detection_threshold
51
+ @@detection_threshold
52
+ end
53
+
54
+ def self.load_model(filename)
55
+ read_gmm(filename)
56
+ end
57
+
58
+ def self.find_or_create(uri, gender)
59
+ return @@speakers[uri] if @@speakers[uri]
60
+ @@speakers[uri] = Speaker.new(uri, gender)
61
+ end
62
+
63
+ def self.divergence(speaker1, speaker2)
64
+ # TODO bundle in mean_log_likelihood to weight down unlikely models?
65
+ return unless speaker1.model and speaker2.model
66
+ # MAP Gaussian divergence
67
+ # See "A model space framework for efficient speaker detection", Interspeech'05
68
+ divergence_lium(speaker1, speaker2)
69
+ end
70
+
71
+ def self.divergence_lium(speaker1, speaker2)
72
+ # fr.lium.spkDiarization.libModel.Distance.GDMAP(speaker1.model, speaker2.model)
73
+ Rjb::import('fr.lium.spkDiarization.libModel.Distance').GDMAP(speaker1.model, speaker2.model)
74
+ end
75
+
76
+ def self.divergence_ruby(speaker1, speaker2)
77
+ SuperVector.divergence(speaker1.supervector, speaker2.supervector)
78
+ end
79
+
80
+ def self.match_sets(speakers1, speakers2)
81
+ matches = []
82
+ speakers1.each do |s1|
83
+ speakers2.each do |s2|
84
+ matches << [ s1, s2 ] if s1.same_speaker_as(s2)
85
+ end
86
+ end
87
+ matches
88
+ end
89
+
90
+ def self.match(speakers)
91
+ speakers.combination(2).select { |s1, s2| s1.same_speaker_as(s2) }
92
+ end
93
+
94
+ def normalize!
95
+ unless @normalized
96
+ # Applies M-Norm from "D-MAP: a Distance-Normalized MAP Estimation of Speaker Models for Automatic Speaker Verification"
97
+ # to the associated GMM, placing it on a unit hyper-sphere with a UBM centre (model will be at distance one from the UBM
98
+ # according to GDMAP)
99
+ # Using supervectors: vector = (1.0 / distance_to_ubm) * vector + (1.0 - 1.0 / distance_to_ubm) * ubm_vector
100
+ speaker_ubm = Speaker.ubm
101
+ distance_to_ubm = Math.sqrt(Speaker.divergence(self, speaker_ubm))
102
+ model.nb_of_components.times do |k|
103
+ gaussian = model.components.get(k)
104
+ gaussian.dim.times do |i|
105
+ normalized_mean = (1.0 / distance_to_ubm) * gaussian.mean(i) + (1.0 - 1.0 / distance_to_ubm) * speaker_ubm.model.components.get(k).mean(i)
106
+ gaussian.set_mean(i, normalized_mean)
107
+ end
108
+ end
109
+ @normalized = true
110
+ end
111
+ @normalized
112
+ end
113
+
114
+ def same_speaker_as(other)
115
+ # Detection score defined in Ben2005
116
+ return unless [ self.mean_log_likelihood, other.mean_log_likelihood ].min > @@log_likelihood_threshold
117
+ self.normalize!
118
+ other.normalize!
119
+ detection_score = 1.0 - Speaker.divergence(other, self)
120
+ detection_score > @@detection_threshold
121
+ end
122
+
123
+ def supervector
124
+ # TODO: cache only when normalized
125
+ @supervector ||= SuperVector.generate_from_model(model)
126
+ end
127
+
128
+ include ToRdf
129
+
130
+ def namespaces
131
+ super.merge 'ws' => 'http://wsarchive.prototype0.net/ontology/'
132
+ end
133
+
134
+ def uri
135
+ @uri
136
+ end
137
+
138
+ def type_uri
139
+ 'ws:Speaker'
140
+ end
141
+
142
+ def rdf_mapping
143
+ { 'ws:gender' => gender, 'ws:model' => model_uri, 'ws:mean_log_likelihood' => model.mean_log_likelihood, 'ws:supervector_hash' => supervector.hash.to_s }
144
+ end
145
+
146
+ protected
147
+
148
+ def self.read_gmm(filename)
149
+ # gmmlist = java.util.ArrayList.new
150
+ gmmlist = Rjb::JavaObjectWrapper.new("java.util.ArrayList")
151
+ # input = fr.lium.spkDiarization.lib.IOFile.new(filename, 'rb')
152
+ input = Rjb::import('fr.lium.spkDiarization.lib.IOFile').new(filename, 'rb')
153
+ input.open
154
+ # fr.lium.spkDiarization.libModel.ModelIO.readerGMMContainer(input, gmmlist)
155
+ Rjb::import('fr.lium.spkDiarization.libModel.ModelIO').readerGMMContainer(input, gmmlist.java_object)
156
+ input.close
157
+ gmmlist.to_a.first.java_object
158
+ end
159
+
160
+ def write_gmm(filename, model)
161
+ # gmmlist = java.util.ArrayList.new
162
+ gmmlist = Rjb::JavaObjectWrapper.new("java.util.ArrayList")
163
+ # gmmlist << model.components
164
+ gmmlist.java_object.add(model)
165
+ # output = fr.lium.spkDiarization.lib.IOFile.new(filename, 'wb')
166
+ output = Rjb::import('fr.lium.spkDiarization.lib.IOFile').new(filename, 'wb')
167
+ output.open
168
+ Rjb::import('fr.lium.spkDiarization.libModel.ModelIO').writerGMMContainer(output, gmmlist.java_object)
169
+ output.close
170
+ end
171
+
172
+ end
173
+
174
+ end
@@ -0,0 +1,77 @@
1
+ module Diarize
2
+
3
+ class SuperVector
4
+ attr_reader :vector
5
+
6
+ def initialize(vector)
7
+ @vector = vector
8
+ end
9
+
10
+ def self.generate_from_model(model)
11
+ # Generates a supervector from a LIUM GMM
12
+ dim = model.nb_of_components * model.components.get(0).dim
13
+ # vector = DoubleMatrix.new(1, dim)
14
+ # vector = Vector.elements(Array.new(dim, 0))
15
+ vector = Array.new(dim, 0)
16
+ model.nb_of_components.times do |k|
17
+ gaussian = model.components.get(k)
18
+ gaussian.dim.times do |i|
19
+ vector[k * gaussian.dim + i] = gaussian.mean(i)
20
+ end
21
+ end
22
+ # SuperVector.new(vector)
23
+ SuperVector.new(Vector.elements(vector))
24
+ end
25
+
26
+ def self.ubm_gaussian_weights
27
+ # Returns a vector of gaussian weights, same dimension as speaker's super vectors
28
+ @@ubm_gaussian_weights ||= begin
29
+ ubm = Speaker.ubm
30
+ # weights = DoubleMatrix.new(1, ubm.supervector.dim)
31
+ weights = Array.new(ubm.supervector.dim, 0)
32
+ ubm.model.nb_of_components.times do |k|
33
+ gaussian = ubm.model.components.get(k)
34
+ gaussian.dim.times do |i|
35
+ weights[k * gaussian.dim + i] = gaussian.weight
36
+ end
37
+ end
38
+ Vector.elements(weights)
39
+ end
40
+ end
41
+
42
+ def self.ubm_covariance
43
+ # Returns a vector of diagonal covariances, same dimension as speaker's super vectors
44
+ @@ubm_covariance ||= begin
45
+ ubm = Speaker.ubm
46
+ # cov = DoubleMatrix.new(1, ubm.supervector.dim)
47
+ cov = Array.new(ubm.supervector.dim)
48
+ ubm.model.nb_of_components.times do |k|
49
+ gaussian = ubm.model.components.get(k)
50
+ gaussian.dim.times do |i|
51
+ cov[k * gaussian.dim + i] = gaussian.getCovariance(i, i)
52
+ end
53
+ end
54
+ Vector.elements(cov)
55
+ end
56
+ end
57
+
58
+ def self.divergence(sv1, sv2)
59
+ # ubm_gaussian_weights.mul(((sv1.vector - sv2.vector) ** 2) / ubm_covariance).sum
60
+ diff = sv1.vector - sv2.vector
61
+ square = diff.map {|el| el ** 2}
62
+ codiv = Vector.elements(square.each.with_index.inject([]) {|a,(el,ix)| a << el / ubm_covariance[ix]})
63
+ mult = ubm_gaussian_weights.each.with_index.inject([]) {|a,(el,ix)| a << el * codiv[ix]}
64
+ mult.inject(0, :+)
65
+ end
66
+
67
+ def dim
68
+ @vector.size
69
+ end
70
+
71
+ def hash
72
+ @vector.hash
73
+ end
74
+
75
+ end
76
+
77
+ end
Binary file
@@ -0,0 +1,3 @@
1
+ module Diarize
2
+ VERSION = "0.3.0"
3
+ end
@@ -0,0 +1,107 @@
1
+ require 'test_helper'
2
+ require 'ostruct'
3
+
4
+ class AudioTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
8
+ @audio = Diarize::Audio.new audio_uri
9
+ end
10
+
11
+ def test_initialize_file_uri
12
+ audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
13
+ audio = Diarize::Audio.new audio_uri
14
+ assert_equal audio.uri, audio_uri
15
+ assert_equal audio.path, File.join(File.dirname(__FILE__), 'data', 'foo.wav')
16
+ end
17
+
18
+ def test_initialize_http_uri
19
+ audio_url = 'http://example.com/test.wav'
20
+ hash = Digest::MD5.hexdigest(audio_url)
21
+ File.expects(:new).with('/tmp/' + hash).returns(true)
22
+ stub_request(:get, audio_url).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
23
+ to_return(:status => 200, :body => "", :headers => {})
24
+ audio = Diarize::Audio.new URI(audio_url)
25
+ assert_equal audio.path, '/tmp/' + hash
26
+ end
27
+
28
+ def test_clean_local_file
29
+ audio_uri = URI('file:' + File.join(File.dirname(__FILE__), 'data', 'foo.wav'))
30
+ audio = Diarize::Audio.new audio_uri
31
+ File.expects(:delete).never
32
+ audio.clean!
33
+ end
34
+
35
+ def test_clean_http_file
36
+ audio_url = 'http://example.com/test.wav'
37
+ hash = Digest::MD5.hexdigest(audio_url)
38
+ File.expects(:new).with('/tmp/' + hash).returns(true)
39
+ stub_request(:get, audio_url).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'User-Agent'=>'Ruby'}).
40
+ to_return(:status => 200, :body => "", :headers => {})
41
+ audio = Diarize::Audio.new URI(audio_url)
42
+ File.expects(:delete).with('/tmp/' + hash).returns(true)
43
+ audio.clean!
44
+ end
45
+
46
+ def test_segments_raises_exception_when_audio_is_not_analysed
47
+ assert_raise Exception do
48
+ @audio.segments
49
+ end
50
+ end
51
+
52
+ def test_analyze
53
+ # TODO - We don't test the full ESTER2 algorithm for now
54
+ end
55
+
56
+ def test_segments
57
+ @audio.instance_variable_set('@segments', [1, 2, 3])
58
+ assert_equal @audio.segments, [1, 2, 3]
59
+ end
60
+
61
+ def test_speakers_is_cached
62
+ @audio.instance_variable_set('@speakers', [1, 2, 3])
63
+ assert_equal @audio.speakers, [1, 2, 3]
64
+ end
65
+
66
+ def test_speakers
67
+ segment1 = OpenStruct.new({ :speaker => 's1' })
68
+ segment2 = OpenStruct.new({ :speaker => 's2' })
69
+ @audio.instance_variable_set('@segments', [ segment1, segment2, segment1 ])
70
+ assert_equal @audio.speakers, ['s1', 's2']
71
+ end
72
+
73
+ def test_segments_by_speaker
74
+ segment1 = OpenStruct.new({ :speaker => 's1' })
75
+ segment2 = OpenStruct.new({ :speaker => 's2' })
76
+ @audio.instance_variable_set('@segments', [ segment1, segment2, segment1 ])
77
+ assert_equal @audio.segments_by_speaker('s1'), [ segment1, segment1 ]
78
+ assert_equal @audio.segments_by_speaker('s2'), [ segment2 ]
79
+ end
80
+
81
+ def test_duration_by_speaker
82
+ segment1 = OpenStruct.new({ :speaker => 's1', :duration => 2})
83
+ segment2 = OpenStruct.new({ :speaker => 's2', :duration => 3})
84
+ @audio.instance_variable_set('@segments', [ segment1, segment2, segment1 ])
85
+ assert_equal @audio.duration_by_speaker('s1'), 4
86
+ assert_equal @audio.duration_by_speaker('s2'), 3
87
+ end
88
+
89
+ def test_top_speakers
90
+ segment1 = OpenStruct.new({ :speaker => 's1', :duration => 2})
91
+ segment2 = OpenStruct.new({ :speaker => 's2', :duration => 3})
92
+ @audio.instance_variable_set('@segments', [ segment1, segment2, segment1 ])
93
+ assert_equal @audio.top_speakers, ['s1', 's2']
94
+ end
95
+
96
+ def test_set_uri_and_type_uri
97
+ @audio.uri = 'foo'
98
+ @audio.type_uri = 'bar'
99
+ assert_equal @audio.uri, 'foo'
100
+ assert_equal @audio.type_uri, 'bar'
101
+ end
102
+
103
+ def test_show
104
+ assert_equal @audio.show, 'foo'
105
+ end
106
+
107
+ end