diarize-ruby 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 36e00b3c5b5a169cc746f0cf140e3d1fbb5e2055
4
- data.tar.gz: d082d84e13a307504f5fac7916549d3d201a5415
3
+ metadata.gz: acabfb7160ffeb0ee59815441c7108aebd3dbc57
4
+ data.tar.gz: 1e1ac96bdeb06c3546ef652f1b8324f38c42316f
5
5
  SHA512:
6
- metadata.gz: 0d44decde02e7d5f74e4dac4247c9e2ff1f5d95f169112f1b34108c3e510be74f68d393705135e76d93719da6ea09539ff9ee46aed235d82c5fe6e3e2909acaf
7
- data.tar.gz: f15c9b16585f52566e532d8417b23cb946629fda36db1d0b6de189cb6be0004a5893c0816a34b1acd636c63bde375e237e9871697001116cae3694e08ef207e0
6
+ metadata.gz: f453a319e2d7e4084fa95fa33240a78773bcf6d7dddf8551b02337a5a9ed294ebc48317c52ba2d845fb2f095f86397038e0759124aa4409721a1f3d5cf2f7786
7
+ data.tar.gz: 6be77004bd7ba178fba11f5b3e8510357f7c2708b5fc49bfc302a570af8b6e0727603cd776373e157c1227e7e354b1abf3bec63810d87f5df7b72a7148e99dbc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [v0.3.7] - 2016-11-18
2
+
3
+ - Sort segments by start time by default
4
+ - Allow to pass audio file names to `Diarize::Audio.new("~/foo.wav")`
5
+ - Download https URLs without certs checking
6
+ - Fix `diarize` druby uri for remote audio
7
+
1
8
  ## [v0.3.6] - 2016-11-16
2
9
 
3
10
  - Add pidfile option to diarize server command (--pidfile, -P)
data/README.md CHANGED
@@ -71,8 +71,8 @@ Some Java implementations (i.e. OpenJDK on Linux) are causing trouble running [R
71
71
  Start the diarizer in a separate process as a server:
72
72
 
73
73
  $ diarize server -P 9999 -H localhost
74
- Drb server
75
- diarize-ruby 0.3.4
74
+ DRb server
75
+ diarize-ruby x.y.z
76
76
  Listening on druby://localhost:9999, CTRL+C to stop
77
77
 
78
78
  ### Client
@@ -91,8 +91,7 @@ server_uri = "druby://localhost:9999"
91
91
  DRb.start_service
92
92
  client = DRbObject.new_with_uri(server_uri)
93
93
 
94
- audio_uri = URI.join('file:///', File.join(File.expand_path(File.dirname(__FILE__)), "test", "data", "will-and-juergen.wav"))
95
- audio = client.build_audio(audio_uri)
94
+ audio = client.build_audio(File.join("test", "data", "will-and-juergen.wav"))
96
95
  audio.analyze!
97
96
  audio.segments
98
97
  ...
data/bin/diarize CHANGED
@@ -3,39 +3,21 @@ require "diarize"
3
3
  require "gli"
4
4
  require "uri"
5
5
  require "drb/drb"
6
- require "byebug"
7
6
 
8
7
  include GLI::App
9
8
 
10
- def uri_from_args(args)
11
- url_or_file_name = args.first
12
- uri = URI.parse(url_or_file_name)
13
- if uri.scheme && uri.scheme.match(/^(http|https|file)$/)
14
- uri = url_or_file_name
15
- else
16
- uri = if url_or_file_name[0] == "/"
17
- URI.join('file:///', url_or_file_name)
18
- else
19
- URI.join('file:///', File.join(File.expand_path(Dir.pwd), url_or_file_name))
20
- end
21
- end
22
- uri
23
- end
24
-
25
9
  def build_audio_from_args(args, options = {})
26
- uri = uri_from_args(args)
27
- $stdout.puts uri.to_s if options[:verbose]
28
- audio = Diarize::Audio.new(uri)
10
+ $stdout.puts args.first.to_s if options[:verbose]
11
+ audio = Diarize::Audio.new(args.first)
29
12
  audio.analyze!
30
13
  audio
31
14
  end
32
15
 
33
16
  def build_remote_audio_from_args(args, options = {})
34
17
  remote = client(options)
35
- audio_uri = uri_from_args(args)
36
18
 
37
- $stdout.puts uri.to_s if options[:verbose]
38
- audio = remote.build_audio(audio_uri)
19
+ $stdout.puts args.first.to_s if options[:verbose]
20
+ audio = remote.build_audio(args.first)
39
21
  audio.analyze!
40
22
  audio
41
23
  rescue DRb::DRbConnError => ex
@@ -76,7 +58,7 @@ command [:remote, :r] do |remote|
76
58
  audio.arg_name 'FILENAME', :multiple
77
59
  audio.command [:se, :segment, :segments] do |segments|
78
60
  segments.action do |global_options, options, args|
79
- audio = build_remote_audio_from_args(args, options.merge(global_options))
61
+ audio = build_remote_audio_from_args(args, options.first.last.first.last)
80
62
  audio.segments.each do |segment|
81
63
  $stdout.puts segment.uri
82
64
  end if audio
@@ -87,13 +69,25 @@ command [:remote, :r] do |remote|
87
69
  audio.arg_name 'FILENAME', :multiple
88
70
  audio.command [:sp, :speaker, :speakers] do |speakers|
89
71
  speakers.action do |global_options, options, args|
90
- audio = build_remote_audio_from_args(args, options.merge(global_options))
72
+ audio = build_remote_audio_from_args(args, options.first.last.first.last)
91
73
  audio.speakers.each do |speaker|
92
74
  $stdout.puts speaker.uri
93
75
  end if audio
94
76
  end
95
77
  end
96
78
  end
79
+
80
+ #-h, --host HOSTNAME
81
+ remote.desc 'Host, e.g. "localhost"'
82
+ remote.default_value 'localhost'
83
+ remote.arg_name 'HOSTNAME'
84
+ remote.flag [:h, :host]
85
+
86
+ #-p, --port PORT
87
+ remote.desc 'Port number'
88
+ remote.default_value 9999
89
+ remote.arg_name 'PORT'
90
+ remote.flag [:p, :port]
97
91
  end
98
92
 
99
93
  desc 'audio file'
data/lib/diarize/audio.rb CHANGED
@@ -2,22 +2,30 @@ module Diarize
2
2
  class Audio
3
3
  attr_reader :path, :file, :uri
4
4
 
5
- def initialize(url_or_uri)
6
- @uri = url_or_uri.is_a?(String) ? URI(url_or_uri) : url_or_uri
7
- if uri.scheme == 'file'
8
- # Local file
5
+ def initialize(uri_url_or_file_name)
6
+ if uri_url_or_file_name.is_a?(URI)
7
+ @uri = uri_url_or_file_name
8
+ elsif uri_url_or_file_name.is_a?(String)
9
+ # url or file name
10
+ @uri = URI.parse(uri_url_or_file_name)
11
+ if @uri.scheme && @uri.scheme.match(/^(http|https|file)$/)
12
+ # url or file:/// uri, do nothing
13
+ else
14
+ @uri = URI.join('file:///', File.join(File.expand_path(uri_url_or_file_name)))
15
+ end
16
+ end
17
+
18
+ if @uri.scheme == 'file'
9
19
  @path = uri.path
10
20
  else
11
- # Remote file, we get it locally
12
- @path = '/tmp/' + Digest::MD5.hexdigest(uri.to_s)
13
- File.open(@path, "wb") {|f| f << open(uri).read }
21
+ # remote file, we download it locally
22
+ @path = '/tmp/' + Digest::MD5.hexdigest(@uri.to_s)
23
+ File.open(@path, "wb") {|f| f << open(@uri, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read}
14
24
  end
15
25
 
16
- if !File.exist?(@path)
17
- raise "Unable to locate: #{@path}. Check that the file is available at #{uri.inspect}."
18
- end
26
+ raise "Unable to locate '#{@path}' from '#{@uri.inspect}'." unless File.exist?(@path)
19
27
 
20
- @file = File.new @path
28
+ @file = File.new(@path)
21
29
  end
22
30
 
23
31
  def analyze!(train_speaker_models = true)
@@ -37,7 +45,7 @@ module Diarize
37
45
  #parameter.parameterDiarization.cEClustering = true # We use CE clustering by default
38
46
  parameter.parameterInputFeature.setFeatureMask(@path)
39
47
  @clusters = ester2(parameter)
40
- @segments = Segmentation.from_clusters(self, @clusters)
48
+ @segments = Segmentation.from_clusters(self, @clusters).sort_by(&:start)
41
49
  train_speaker_gmms if train_speaker_models
42
50
  end
43
51
 
@@ -47,7 +55,7 @@ module Diarize
47
55
  end
48
56
 
49
57
  def segments
50
- raise Exception.new('You need to run analyze! before being able to access the analysis results') unless @segments
58
+ raise RuntimeError, "You need to run analyze! before being able to access the analysis results" unless @segments
51
59
  @segments
52
60
  end
53
61
 
@@ -5,11 +5,11 @@ module Diarize
5
5
  attr_reader :start, :duration, :gender, :bandwidth
6
6
 
7
7
  def initialize(audio, start, duration, gender, bandwidth, speaker_id)
8
- @audio = audio
9
- @start = start
10
- @duration = duration
11
- @bandwidth = bandwidth
12
- @speaker_id = speaker_id
8
+ @audio = audio
9
+ @start = start
10
+ @duration = duration
11
+ @bandwidth = bandwidth
12
+ @speaker_id = speaker_id
13
13
  @speaker_gender = gender
14
14
  end
15
15
 
@@ -5,12 +5,12 @@ module Diarize
5
5
  segmentation = []
6
6
  File.open(seg_file).each_line do |line|
7
7
  next if line.start_with? ';;'
8
- parts = line.split(' ')
9
- start = parts[2].to_i / 100.0
10
- duration = parts[3].to_i / 100.0
11
- gender = parts[4]
12
- bandwidth = parts[6]
13
- speaker_id = parts[7]
8
+ parts = line.split(' ')
9
+ start = parts[2].to_i / 100.0
10
+ duration = parts[3].to_i / 100.0
11
+ gender = parts[4]
12
+ bandwidth = parts[6]
13
+ speaker_id = parts[7]
14
14
  segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
15
15
  end
16
16
  segmentation
@@ -19,9 +19,9 @@ module Diarize
19
19
  def self.from_clusters(audio, clusters)
20
20
  segmentation = []
21
21
  clusters.map(&:to_s).each do |speaker_id|
22
- cluster = clusters.getCluster(speaker_id)
23
- gender = cluster.gender
24
- bandwidth = cluster.bandwidth
22
+ cluster = clusters.getCluster(speaker_id)
23
+ gender = cluster.gender
24
+ bandwidth = cluster.bandwidth
25
25
  cluster.each do |segment|
26
26
  start = segment.start_in_second
27
27
  duration = segment.length_in_second
@@ -1,3 +1,3 @@
1
1
  module Diarize
2
- VERSION = "0.3.6"
2
+ VERSION = "0.3.7"
3
3
  end
data/lib/diarize.rb CHANGED
@@ -7,10 +7,10 @@ Rjb::load(RJB_LOAD_PATH, RJB_OPTIONS)
7
7
 
8
8
  require "matrix"
9
9
 
10
- require "to_rdf"
11
10
  require "uri"
12
11
  require "open-uri"
13
12
  require "digest/md5"
13
+ require "to_rdf"
14
14
 
15
15
  require "diarize/version"
16
16
  require "diarize/audio"
data/test/audio_test.rb CHANGED
@@ -43,8 +43,8 @@ class AudioTest < Test::Unit::TestCase
43
43
  audio.clean!
44
44
  end
45
45
 
46
- def test_segments_raises_exception_when_audio_is_not_analysed
47
- assert_raise Exception do
46
+ def test_segments_raises_exception_when_audio_is_not_analyzed
47
+ assert_raise RuntimeError do
48
48
  @audio.segments
49
49
  end
50
50
  end
data/test/version_test.rb CHANGED
@@ -3,7 +3,7 @@ require 'test_helper'
3
3
  class VersionTest < Test::Unit::TestCase
4
4
 
5
5
  def test_current_version
6
- assert_equal "0.3.6", Diarize::VERSION
6
+ assert_equal "0.3.7", Diarize::VERSION
7
7
  end
8
8
 
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: diarize-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yves Raimond