diarize-ruby 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +3 -4
- data/bin/diarize +18 -24
- data/lib/diarize/audio.rb +21 -13
- data/lib/diarize/segment.rb +5 -5
- data/lib/diarize/segmentation.rb +9 -9
- data/lib/diarize/version.rb +1 -1
- data/lib/diarize.rb +1 -1
- data/test/audio_test.rb +2 -2
- data/test/version_test.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: acabfb7160ffeb0ee59815441c7108aebd3dbc57
|
4
|
+
data.tar.gz: 1e1ac96bdeb06c3546ef652f1b8324f38c42316f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f453a319e2d7e4084fa95fa33240a78773bcf6d7dddf8551b02337a5a9ed294ebc48317c52ba2d845fb2f095f86397038e0759124aa4409721a1f3d5cf2f7786
|
7
|
+
data.tar.gz: 6be77004bd7ba178fba11f5b3e8510357f7c2708b5fc49bfc302a570af8b6e0727603cd776373e157c1227e7e354b1abf3bec63810d87f5df7b72a7148e99dbc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [v0.3.7] - 2016-11-18
|
2
|
+
|
3
|
+
- Sort segments by start time by default
|
4
|
+
- Allow to pass audio file names to `Diarize::Audio.new("~/foo.wav")`
|
5
|
+
- Download https URLs without certs checking
|
6
|
+
- Fix `diarize` druby uri for remote audio
|
7
|
+
|
1
8
|
## [v0.3.6] - 2016-11-16
|
2
9
|
|
3
10
|
- Add pidfile option to diarize server command (--pidfile, -P)
|
data/README.md
CHANGED
@@ -71,8 +71,8 @@ Some Java implementations (i.e. OpenJDK on Linux) are causing trouble running [R
|
|
71
71
|
Start the diarizer in a separate process as a server:
|
72
72
|
|
73
73
|
$ diarize server -P 9999 -H localhost
|
74
|
-
|
75
|
-
diarize-ruby
|
74
|
+
DRb server
|
75
|
+
diarize-ruby x.y.z
|
76
76
|
Listening on druby://localhost:9999, CTRL+C to stop
|
77
77
|
|
78
78
|
### Client
|
@@ -91,8 +91,7 @@ server_uri = "druby://localhost:9999"
|
|
91
91
|
DRb.start_service
|
92
92
|
client = DRbObject.new_with_uri(server_uri)
|
93
93
|
|
94
|
-
|
95
|
-
audio = client.build_audio(audio_uri)
|
94
|
+
audio = client.build_audio(File.join("test", "data", "will-and-juergen.wav"))
|
96
95
|
audio.analyze!
|
97
96
|
audio.segments
|
98
97
|
...
|
data/bin/diarize
CHANGED
@@ -3,39 +3,21 @@ require "diarize"
|
|
3
3
|
require "gli"
|
4
4
|
require "uri"
|
5
5
|
require "drb/drb"
|
6
|
-
require "byebug"
|
7
6
|
|
8
7
|
include GLI::App
|
9
8
|
|
10
|
-
def uri_from_args(args)
|
11
|
-
url_or_file_name = args.first
|
12
|
-
uri = URI.parse(url_or_file_name)
|
13
|
-
if uri.scheme && uri.scheme.match(/^(http|https|file)$/)
|
14
|
-
uri = url_or_file_name
|
15
|
-
else
|
16
|
-
uri = if url_or_file_name[0] == "/"
|
17
|
-
URI.join('file:///', url_or_file_name)
|
18
|
-
else
|
19
|
-
URI.join('file:///', File.join(File.expand_path(Dir.pwd), url_or_file_name))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
uri
|
23
|
-
end
|
24
|
-
|
25
9
|
def build_audio_from_args(args, options = {})
|
26
|
-
|
27
|
-
|
28
|
-
audio = Diarize::Audio.new(uri)
|
10
|
+
$stdout.puts args.first.to_s if options[:verbose]
|
11
|
+
audio = Diarize::Audio.new(args.first)
|
29
12
|
audio.analyze!
|
30
13
|
audio
|
31
14
|
end
|
32
15
|
|
33
16
|
def build_remote_audio_from_args(args, options = {})
|
34
17
|
remote = client(options)
|
35
|
-
audio_uri = uri_from_args(args)
|
36
18
|
|
37
|
-
$stdout.puts
|
38
|
-
audio = remote.build_audio(
|
19
|
+
$stdout.puts args.first.to_s if options[:verbose]
|
20
|
+
audio = remote.build_audio(args.first)
|
39
21
|
audio.analyze!
|
40
22
|
audio
|
41
23
|
rescue DRb::DRbConnError => ex
|
@@ -76,7 +58,7 @@ command [:remote, :r] do |remote|
|
|
76
58
|
audio.arg_name 'FILENAME', :multiple
|
77
59
|
audio.command [:se, :segment, :segments] do |segments|
|
78
60
|
segments.action do |global_options, options, args|
|
79
|
-
audio = build_remote_audio_from_args(args, options.
|
61
|
+
audio = build_remote_audio_from_args(args, options.first.last.first.last)
|
80
62
|
audio.segments.each do |segment|
|
81
63
|
$stdout.puts segment.uri
|
82
64
|
end if audio
|
@@ -87,13 +69,25 @@ command [:remote, :r] do |remote|
|
|
87
69
|
audio.arg_name 'FILENAME', :multiple
|
88
70
|
audio.command [:sp, :speaker, :speakers] do |speakers|
|
89
71
|
speakers.action do |global_options, options, args|
|
90
|
-
audio = build_remote_audio_from_args(args, options.
|
72
|
+
audio = build_remote_audio_from_args(args, options.first.last.first.last)
|
91
73
|
audio.speakers.each do |speaker|
|
92
74
|
$stdout.puts speaker.uri
|
93
75
|
end if audio
|
94
76
|
end
|
95
77
|
end
|
96
78
|
end
|
79
|
+
|
80
|
+
#-h, --host HOSTNAME
|
81
|
+
remote.desc 'Host, e.g. "localhost"'
|
82
|
+
remote.default_value 'localhost'
|
83
|
+
remote.arg_name 'HOSTNAME'
|
84
|
+
remote.flag [:h, :host]
|
85
|
+
|
86
|
+
#-p, --port PORT
|
87
|
+
remote.desc 'Port number'
|
88
|
+
remote.default_value 9999
|
89
|
+
remote.arg_name 'PORT'
|
90
|
+
remote.flag [:p, :port]
|
97
91
|
end
|
98
92
|
|
99
93
|
desc 'audio file'
|
data/lib/diarize/audio.rb
CHANGED
@@ -2,22 +2,30 @@ module Diarize
|
|
2
2
|
class Audio
|
3
3
|
attr_reader :path, :file, :uri
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(uri_url_or_file_name)
|
6
|
+
if uri_url_or_file_name.is_a?(URI)
|
7
|
+
@uri = uri_url_or_file_name
|
8
|
+
elsif uri_url_or_file_name.is_a?(String)
|
9
|
+
# url or file name
|
10
|
+
@uri = URI.parse(uri_url_or_file_name)
|
11
|
+
if @uri.scheme && @uri.scheme.match(/^(http|https|file)$/)
|
12
|
+
# url or file:/// uri, do nothing
|
13
|
+
else
|
14
|
+
@uri = URI.join('file:///', File.join(File.expand_path(uri_url_or_file_name)))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
if @uri.scheme == 'file'
|
9
19
|
@path = uri.path
|
10
20
|
else
|
11
|
-
#
|
12
|
-
@path = '/tmp/' + Digest::MD5.hexdigest(uri.to_s)
|
13
|
-
File.open(@path, "wb") {|f| f << open(uri).read
|
21
|
+
# remote file, we download it locally
|
22
|
+
@path = '/tmp/' + Digest::MD5.hexdigest(@uri.to_s)
|
23
|
+
File.open(@path, "wb") {|f| f << open(@uri, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read}
|
14
24
|
end
|
15
25
|
|
16
|
-
|
17
|
-
raise "Unable to locate: #{@path}. Check that the file is available at #{uri.inspect}."
|
18
|
-
end
|
26
|
+
raise "Unable to locate '#{@path}' from '#{@uri.inspect}'." unless File.exist?(@path)
|
19
27
|
|
20
|
-
@file = File.new
|
28
|
+
@file = File.new(@path)
|
21
29
|
end
|
22
30
|
|
23
31
|
def analyze!(train_speaker_models = true)
|
@@ -37,7 +45,7 @@ module Diarize
|
|
37
45
|
#parameter.parameterDiarization.cEClustering = true # We use CE clustering by default
|
38
46
|
parameter.parameterInputFeature.setFeatureMask(@path)
|
39
47
|
@clusters = ester2(parameter)
|
40
|
-
@segments = Segmentation.from_clusters(self, @clusters)
|
48
|
+
@segments = Segmentation.from_clusters(self, @clusters).sort_by(&:start)
|
41
49
|
train_speaker_gmms if train_speaker_models
|
42
50
|
end
|
43
51
|
|
@@ -47,7 +55,7 @@ module Diarize
|
|
47
55
|
end
|
48
56
|
|
49
57
|
def segments
|
50
|
-
raise
|
58
|
+
raise RuntimeError, "You need to run analyze! before being able to access the analysis results" unless @segments
|
51
59
|
@segments
|
52
60
|
end
|
53
61
|
|
data/lib/diarize/segment.rb
CHANGED
@@ -5,11 +5,11 @@ module Diarize
|
|
5
5
|
attr_reader :start, :duration, :gender, :bandwidth
|
6
6
|
|
7
7
|
def initialize(audio, start, duration, gender, bandwidth, speaker_id)
|
8
|
-
@audio
|
9
|
-
@start
|
10
|
-
@duration
|
11
|
-
@bandwidth
|
12
|
-
@speaker_id
|
8
|
+
@audio = audio
|
9
|
+
@start = start
|
10
|
+
@duration = duration
|
11
|
+
@bandwidth = bandwidth
|
12
|
+
@speaker_id = speaker_id
|
13
13
|
@speaker_gender = gender
|
14
14
|
end
|
15
15
|
|
data/lib/diarize/segmentation.rb
CHANGED
@@ -5,12 +5,12 @@ module Diarize
|
|
5
5
|
segmentation = []
|
6
6
|
File.open(seg_file).each_line do |line|
|
7
7
|
next if line.start_with? ';;'
|
8
|
-
parts
|
9
|
-
start
|
10
|
-
duration
|
11
|
-
gender
|
12
|
-
bandwidth
|
13
|
-
speaker_id
|
8
|
+
parts = line.split(' ')
|
9
|
+
start = parts[2].to_i / 100.0
|
10
|
+
duration = parts[3].to_i / 100.0
|
11
|
+
gender = parts[4]
|
12
|
+
bandwidth = parts[6]
|
13
|
+
speaker_id = parts[7]
|
14
14
|
segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
|
15
15
|
end
|
16
16
|
segmentation
|
@@ -19,9 +19,9 @@ module Diarize
|
|
19
19
|
def self.from_clusters(audio, clusters)
|
20
20
|
segmentation = []
|
21
21
|
clusters.map(&:to_s).each do |speaker_id|
|
22
|
-
cluster
|
23
|
-
gender
|
24
|
-
bandwidth
|
22
|
+
cluster = clusters.getCluster(speaker_id)
|
23
|
+
gender = cluster.gender
|
24
|
+
bandwidth = cluster.bandwidth
|
25
25
|
cluster.each do |segment|
|
26
26
|
start = segment.start_in_second
|
27
27
|
duration = segment.length_in_second
|
data/lib/diarize/version.rb
CHANGED
data/lib/diarize.rb
CHANGED
data/test/audio_test.rb
CHANGED
@@ -43,8 +43,8 @@ class AudioTest < Test::Unit::TestCase
|
|
43
43
|
audio.clean!
|
44
44
|
end
|
45
45
|
|
46
|
-
def
|
47
|
-
assert_raise
|
46
|
+
def test_segments_raises_exception_when_audio_is_not_analyzed
|
47
|
+
assert_raise RuntimeError do
|
48
48
|
@audio.segments
|
49
49
|
end
|
50
50
|
end
|
data/test/version_test.rb
CHANGED