diarize-ruby 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +3 -4
- data/bin/diarize +18 -24
- data/lib/diarize/audio.rb +21 -13
- data/lib/diarize/segment.rb +5 -5
- data/lib/diarize/segmentation.rb +9 -9
- data/lib/diarize/version.rb +1 -1
- data/lib/diarize.rb +1 -1
- data/test/audio_test.rb +2 -2
- data/test/version_test.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: acabfb7160ffeb0ee59815441c7108aebd3dbc57
|
4
|
+
data.tar.gz: 1e1ac96bdeb06c3546ef652f1b8324f38c42316f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f453a319e2d7e4084fa95fa33240a78773bcf6d7dddf8551b02337a5a9ed294ebc48317c52ba2d845fb2f095f86397038e0759124aa4409721a1f3d5cf2f7786
|
7
|
+
data.tar.gz: 6be77004bd7ba178fba11f5b3e8510357f7c2708b5fc49bfc302a570af8b6e0727603cd776373e157c1227e7e354b1abf3bec63810d87f5df7b72a7148e99dbc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [v0.3.7] - 2016-11-18
|
2
|
+
|
3
|
+
- Sort segments by start time by default
|
4
|
+
- Allow to pass audio file names to `Diarize::Audio.new("~/foo.wav")`
|
5
|
+
- Download https URLs without certs checking
|
6
|
+
- Fix `diarize` druby uri for remote audio
|
7
|
+
|
1
8
|
## [v0.3.6] - 2016-11-16
|
2
9
|
|
3
10
|
- Add pidfile option to diarize server command (--pidfile, -P)
|
data/README.md
CHANGED
@@ -71,8 +71,8 @@ Some Java implementations (i.e. OpenJDK on Linux) are causing trouble running [R
|
|
71
71
|
Start the diarizer in a separate process as a server:
|
72
72
|
|
73
73
|
$ diarize server -P 9999 -H localhost
|
74
|
-
|
75
|
-
diarize-ruby
|
74
|
+
DRb server
|
75
|
+
diarize-ruby x.y.z
|
76
76
|
Listening on druby://localhost:9999, CTRL+C to stop
|
77
77
|
|
78
78
|
### Client
|
@@ -91,8 +91,7 @@ server_uri = "druby://localhost:9999"
|
|
91
91
|
DRb.start_service
|
92
92
|
client = DRbObject.new_with_uri(server_uri)
|
93
93
|
|
94
|
-
|
95
|
-
audio = client.build_audio(audio_uri)
|
94
|
+
audio = client.build_audio(File.join("test", "data", "will-and-juergen.wav"))
|
96
95
|
audio.analyze!
|
97
96
|
audio.segments
|
98
97
|
...
|
data/bin/diarize
CHANGED
@@ -3,39 +3,21 @@ require "diarize"
|
|
3
3
|
require "gli"
|
4
4
|
require "uri"
|
5
5
|
require "drb/drb"
|
6
|
-
require "byebug"
|
7
6
|
|
8
7
|
include GLI::App
|
9
8
|
|
10
|
-
def uri_from_args(args)
|
11
|
-
url_or_file_name = args.first
|
12
|
-
uri = URI.parse(url_or_file_name)
|
13
|
-
if uri.scheme && uri.scheme.match(/^(http|https|file)$/)
|
14
|
-
uri = url_or_file_name
|
15
|
-
else
|
16
|
-
uri = if url_or_file_name[0] == "/"
|
17
|
-
URI.join('file:///', url_or_file_name)
|
18
|
-
else
|
19
|
-
URI.join('file:///', File.join(File.expand_path(Dir.pwd), url_or_file_name))
|
20
|
-
end
|
21
|
-
end
|
22
|
-
uri
|
23
|
-
end
|
24
|
-
|
25
9
|
def build_audio_from_args(args, options = {})
|
26
|
-
|
27
|
-
|
28
|
-
audio = Diarize::Audio.new(uri)
|
10
|
+
$stdout.puts args.first.to_s if options[:verbose]
|
11
|
+
audio = Diarize::Audio.new(args.first)
|
29
12
|
audio.analyze!
|
30
13
|
audio
|
31
14
|
end
|
32
15
|
|
33
16
|
def build_remote_audio_from_args(args, options = {})
|
34
17
|
remote = client(options)
|
35
|
-
audio_uri = uri_from_args(args)
|
36
18
|
|
37
|
-
$stdout.puts
|
38
|
-
audio = remote.build_audio(
|
19
|
+
$stdout.puts args.first.to_s if options[:verbose]
|
20
|
+
audio = remote.build_audio(args.first)
|
39
21
|
audio.analyze!
|
40
22
|
audio
|
41
23
|
rescue DRb::DRbConnError => ex
|
@@ -76,7 +58,7 @@ command [:remote, :r] do |remote|
|
|
76
58
|
audio.arg_name 'FILENAME', :multiple
|
77
59
|
audio.command [:se, :segment, :segments] do |segments|
|
78
60
|
segments.action do |global_options, options, args|
|
79
|
-
audio = build_remote_audio_from_args(args, options.
|
61
|
+
audio = build_remote_audio_from_args(args, options.first.last.first.last)
|
80
62
|
audio.segments.each do |segment|
|
81
63
|
$stdout.puts segment.uri
|
82
64
|
end if audio
|
@@ -87,13 +69,25 @@ command [:remote, :r] do |remote|
|
|
87
69
|
audio.arg_name 'FILENAME', :multiple
|
88
70
|
audio.command [:sp, :speaker, :speakers] do |speakers|
|
89
71
|
speakers.action do |global_options, options, args|
|
90
|
-
audio = build_remote_audio_from_args(args, options.
|
72
|
+
audio = build_remote_audio_from_args(args, options.first.last.first.last)
|
91
73
|
audio.speakers.each do |speaker|
|
92
74
|
$stdout.puts speaker.uri
|
93
75
|
end if audio
|
94
76
|
end
|
95
77
|
end
|
96
78
|
end
|
79
|
+
|
80
|
+
#-h, --host HOSTNAME
|
81
|
+
remote.desc 'Host, e.g. "localhost"'
|
82
|
+
remote.default_value 'localhost'
|
83
|
+
remote.arg_name 'HOSTNAME'
|
84
|
+
remote.flag [:h, :host]
|
85
|
+
|
86
|
+
#-p, --port PORT
|
87
|
+
remote.desc 'Port number'
|
88
|
+
remote.default_value 9999
|
89
|
+
remote.arg_name 'PORT'
|
90
|
+
remote.flag [:p, :port]
|
97
91
|
end
|
98
92
|
|
99
93
|
desc 'audio file'
|
data/lib/diarize/audio.rb
CHANGED
@@ -2,22 +2,30 @@ module Diarize
|
|
2
2
|
class Audio
|
3
3
|
attr_reader :path, :file, :uri
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(uri_url_or_file_name)
|
6
|
+
if uri_url_or_file_name.is_a?(URI)
|
7
|
+
@uri = uri_url_or_file_name
|
8
|
+
elsif uri_url_or_file_name.is_a?(String)
|
9
|
+
# url or file name
|
10
|
+
@uri = URI.parse(uri_url_or_file_name)
|
11
|
+
if @uri.scheme && @uri.scheme.match(/^(http|https|file)$/)
|
12
|
+
# url or file:/// uri, do nothing
|
13
|
+
else
|
14
|
+
@uri = URI.join('file:///', File.join(File.expand_path(uri_url_or_file_name)))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
if @uri.scheme == 'file'
|
9
19
|
@path = uri.path
|
10
20
|
else
|
11
|
-
#
|
12
|
-
@path = '/tmp/' + Digest::MD5.hexdigest(uri.to_s)
|
13
|
-
File.open(@path, "wb") {|f| f << open(uri).read
|
21
|
+
# remote file, we download it locally
|
22
|
+
@path = '/tmp/' + Digest::MD5.hexdigest(@uri.to_s)
|
23
|
+
File.open(@path, "wb") {|f| f << open(@uri, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read}
|
14
24
|
end
|
15
25
|
|
16
|
-
|
17
|
-
raise "Unable to locate: #{@path}. Check that the file is available at #{uri.inspect}."
|
18
|
-
end
|
26
|
+
raise "Unable to locate '#{@path}' from '#{@uri.inspect}'." unless File.exist?(@path)
|
19
27
|
|
20
|
-
@file = File.new
|
28
|
+
@file = File.new(@path)
|
21
29
|
end
|
22
30
|
|
23
31
|
def analyze!(train_speaker_models = true)
|
@@ -37,7 +45,7 @@ module Diarize
|
|
37
45
|
#parameter.parameterDiarization.cEClustering = true # We use CE clustering by default
|
38
46
|
parameter.parameterInputFeature.setFeatureMask(@path)
|
39
47
|
@clusters = ester2(parameter)
|
40
|
-
@segments = Segmentation.from_clusters(self, @clusters)
|
48
|
+
@segments = Segmentation.from_clusters(self, @clusters).sort_by(&:start)
|
41
49
|
train_speaker_gmms if train_speaker_models
|
42
50
|
end
|
43
51
|
|
@@ -47,7 +55,7 @@ module Diarize
|
|
47
55
|
end
|
48
56
|
|
49
57
|
def segments
|
50
|
-
raise
|
58
|
+
raise RuntimeError, "You need to run analyze! before being able to access the analysis results" unless @segments
|
51
59
|
@segments
|
52
60
|
end
|
53
61
|
|
data/lib/diarize/segment.rb
CHANGED
@@ -5,11 +5,11 @@ module Diarize
|
|
5
5
|
attr_reader :start, :duration, :gender, :bandwidth
|
6
6
|
|
7
7
|
def initialize(audio, start, duration, gender, bandwidth, speaker_id)
|
8
|
-
@audio
|
9
|
-
@start
|
10
|
-
@duration
|
11
|
-
@bandwidth
|
12
|
-
@speaker_id
|
8
|
+
@audio = audio
|
9
|
+
@start = start
|
10
|
+
@duration = duration
|
11
|
+
@bandwidth = bandwidth
|
12
|
+
@speaker_id = speaker_id
|
13
13
|
@speaker_gender = gender
|
14
14
|
end
|
15
15
|
|
data/lib/diarize/segmentation.rb
CHANGED
@@ -5,12 +5,12 @@ module Diarize
|
|
5
5
|
segmentation = []
|
6
6
|
File.open(seg_file).each_line do |line|
|
7
7
|
next if line.start_with? ';;'
|
8
|
-
parts
|
9
|
-
start
|
10
|
-
duration
|
11
|
-
gender
|
12
|
-
bandwidth
|
13
|
-
speaker_id
|
8
|
+
parts = line.split(' ')
|
9
|
+
start = parts[2].to_i / 100.0
|
10
|
+
duration = parts[3].to_i / 100.0
|
11
|
+
gender = parts[4]
|
12
|
+
bandwidth = parts[6]
|
13
|
+
speaker_id = parts[7]
|
14
14
|
segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
|
15
15
|
end
|
16
16
|
segmentation
|
@@ -19,9 +19,9 @@ module Diarize
|
|
19
19
|
def self.from_clusters(audio, clusters)
|
20
20
|
segmentation = []
|
21
21
|
clusters.map(&:to_s).each do |speaker_id|
|
22
|
-
cluster
|
23
|
-
gender
|
24
|
-
bandwidth
|
22
|
+
cluster = clusters.getCluster(speaker_id)
|
23
|
+
gender = cluster.gender
|
24
|
+
bandwidth = cluster.bandwidth
|
25
25
|
cluster.each do |segment|
|
26
26
|
start = segment.start_in_second
|
27
27
|
duration = segment.length_in_second
|
data/lib/diarize/version.rb
CHANGED
data/lib/diarize.rb
CHANGED
data/test/audio_test.rb
CHANGED
@@ -43,8 +43,8 @@ class AudioTest < Test::Unit::TestCase
|
|
43
43
|
audio.clean!
|
44
44
|
end
|
45
45
|
|
46
|
-
def
|
47
|
-
assert_raise
|
46
|
+
def test_segments_raises_exception_when_audio_is_not_analyzed
|
47
|
+
assert_raise RuntimeError do
|
48
48
|
@audio.segments
|
49
49
|
end
|
50
50
|
end
|
data/test/version_test.rb
CHANGED