diarize-ruby 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 36e00b3c5b5a169cc746f0cf140e3d1fbb5e2055
4
- data.tar.gz: d082d84e13a307504f5fac7916549d3d201a5415
3
+ metadata.gz: acabfb7160ffeb0ee59815441c7108aebd3dbc57
4
+ data.tar.gz: 1e1ac96bdeb06c3546ef652f1b8324f38c42316f
5
5
  SHA512:
6
- metadata.gz: 0d44decde02e7d5f74e4dac4247c9e2ff1f5d95f169112f1b34108c3e510be74f68d393705135e76d93719da6ea09539ff9ee46aed235d82c5fe6e3e2909acaf
7
- data.tar.gz: f15c9b16585f52566e532d8417b23cb946629fda36db1d0b6de189cb6be0004a5893c0816a34b1acd636c63bde375e237e9871697001116cae3694e08ef207e0
6
+ metadata.gz: f453a319e2d7e4084fa95fa33240a78773bcf6d7dddf8551b02337a5a9ed294ebc48317c52ba2d845fb2f095f86397038e0759124aa4409721a1f3d5cf2f7786
7
+ data.tar.gz: 6be77004bd7ba178fba11f5b3e8510357f7c2708b5fc49bfc302a570af8b6e0727603cd776373e157c1227e7e354b1abf3bec63810d87f5df7b72a7148e99dbc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [v0.3.7] - 2016-11-18
2
+
3
+ - Sort segments by start time by default
4
+ - Allow to pass audio file names to `Diarize::Audio.new("~/foo.wav")`
5
+ - Download https URLs without certs checking
6
+ - Fix `diarize` druby uri for remote audio
7
+
1
8
  ## [v0.3.6] - 2016-11-16
2
9
 
3
10
  - Add pidfile option to diarize server command (--pidfile, -P)
data/README.md CHANGED
@@ -71,8 +71,8 @@ Some Java implementations (i.e. OpenJDK on Linux) are causing trouble running [R
71
71
  Start the diarizer in a separate process as a server:
72
72
 
73
73
  $ diarize server -P 9999 -H localhost
74
- Drb server
75
- diarize-ruby 0.3.4
74
+ DRb server
75
+ diarize-ruby x.y.z
76
76
  Listening on druby://localhost:9999, CTRL+C to stop
77
77
 
78
78
  ### Client
@@ -91,8 +91,7 @@ server_uri = "druby://localhost:9999"
91
91
  DRb.start_service
92
92
  client = DRbObject.new_with_uri(server_uri)
93
93
 
94
- audio_uri = URI.join('file:///', File.join(File.expand_path(File.dirname(__FILE__)), "test", "data", "will-and-juergen.wav"))
95
- audio = client.build_audio(audio_uri)
94
+ audio = client.build_audio(File.join("test", "data", "will-and-juergen.wav"))
96
95
  audio.analyze!
97
96
  audio.segments
98
97
  ...
data/bin/diarize CHANGED
@@ -3,39 +3,21 @@ require "diarize"
3
3
  require "gli"
4
4
  require "uri"
5
5
  require "drb/drb"
6
- require "byebug"
7
6
 
8
7
  include GLI::App
9
8
 
10
- def uri_from_args(args)
11
- url_or_file_name = args.first
12
- uri = URI.parse(url_or_file_name)
13
- if uri.scheme && uri.scheme.match(/^(http|https|file)$/)
14
- uri = url_or_file_name
15
- else
16
- uri = if url_or_file_name[0] == "/"
17
- URI.join('file:///', url_or_file_name)
18
- else
19
- URI.join('file:///', File.join(File.expand_path(Dir.pwd), url_or_file_name))
20
- end
21
- end
22
- uri
23
- end
24
-
25
9
  def build_audio_from_args(args, options = {})
26
- uri = uri_from_args(args)
27
- $stdout.puts uri.to_s if options[:verbose]
28
- audio = Diarize::Audio.new(uri)
10
+ $stdout.puts args.first.to_s if options[:verbose]
11
+ audio = Diarize::Audio.new(args.first)
29
12
  audio.analyze!
30
13
  audio
31
14
  end
32
15
 
33
16
  def build_remote_audio_from_args(args, options = {})
34
17
  remote = client(options)
35
- audio_uri = uri_from_args(args)
36
18
 
37
- $stdout.puts uri.to_s if options[:verbose]
38
- audio = remote.build_audio(audio_uri)
19
+ $stdout.puts args.first.to_s if options[:verbose]
20
+ audio = remote.build_audio(args.first)
39
21
  audio.analyze!
40
22
  audio
41
23
  rescue DRb::DRbConnError => ex
@@ -76,7 +58,7 @@ command [:remote, :r] do |remote|
76
58
  audio.arg_name 'FILENAME', :multiple
77
59
  audio.command [:se, :segment, :segments] do |segments|
78
60
  segments.action do |global_options, options, args|
79
- audio = build_remote_audio_from_args(args, options.merge(global_options))
61
+ audio = build_remote_audio_from_args(args, options.first.last.first.last)
80
62
  audio.segments.each do |segment|
81
63
  $stdout.puts segment.uri
82
64
  end if audio
@@ -87,13 +69,25 @@ command [:remote, :r] do |remote|
87
69
  audio.arg_name 'FILENAME', :multiple
88
70
  audio.command [:sp, :speaker, :speakers] do |speakers|
89
71
  speakers.action do |global_options, options, args|
90
- audio = build_remote_audio_from_args(args, options.merge(global_options))
72
+ audio = build_remote_audio_from_args(args, options.first.last.first.last)
91
73
  audio.speakers.each do |speaker|
92
74
  $stdout.puts speaker.uri
93
75
  end if audio
94
76
  end
95
77
  end
96
78
  end
79
+
80
+ #-h, --host HOSTNAME
81
+ remote.desc 'Host, e.g. "localhost"'
82
+ remote.default_value 'localhost'
83
+ remote.arg_name 'HOSTNAME'
84
+ remote.flag [:h, :host]
85
+
86
+ #-p, --port PORT
87
+ remote.desc 'Port number'
88
+ remote.default_value 9999
89
+ remote.arg_name 'PORT'
90
+ remote.flag [:p, :port]
97
91
  end
98
92
 
99
93
  desc 'audio file'
data/lib/diarize/audio.rb CHANGED
@@ -2,22 +2,30 @@ module Diarize
2
2
  class Audio
3
3
  attr_reader :path, :file, :uri
4
4
 
5
- def initialize(url_or_uri)
6
- @uri = url_or_uri.is_a?(String) ? URI(url_or_uri) : url_or_uri
7
- if uri.scheme == 'file'
8
- # Local file
5
+ def initialize(uri_url_or_file_name)
6
+ if uri_url_or_file_name.is_a?(URI)
7
+ @uri = uri_url_or_file_name
8
+ elsif uri_url_or_file_name.is_a?(String)
9
+ # url or file name
10
+ @uri = URI.parse(uri_url_or_file_name)
11
+ if @uri.scheme && @uri.scheme.match(/^(http|https|file)$/)
12
+ # url or file:/// uri, do nothing
13
+ else
14
+ @uri = URI.join('file:///', File.join(File.expand_path(uri_url_or_file_name)))
15
+ end
16
+ end
17
+
18
+ if @uri.scheme == 'file'
9
19
  @path = uri.path
10
20
  else
11
- # Remote file, we get it locally
12
- @path = '/tmp/' + Digest::MD5.hexdigest(uri.to_s)
13
- File.open(@path, "wb") {|f| f << open(uri).read }
21
+ # remote file, we download it locally
22
+ @path = '/tmp/' + Digest::MD5.hexdigest(@uri.to_s)
23
+ File.open(@path, "wb") {|f| f << open(@uri, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read}
14
24
  end
15
25
 
16
- if !File.exist?(@path)
17
- raise "Unable to locate: #{@path}. Check that the file is available at #{uri.inspect}."
18
- end
26
+ raise "Unable to locate '#{@path}' from '#{@uri.inspect}'." unless File.exist?(@path)
19
27
 
20
- @file = File.new @path
28
+ @file = File.new(@path)
21
29
  end
22
30
 
23
31
  def analyze!(train_speaker_models = true)
@@ -37,7 +45,7 @@ module Diarize
37
45
  #parameter.parameterDiarization.cEClustering = true # We use CE clustering by default
38
46
  parameter.parameterInputFeature.setFeatureMask(@path)
39
47
  @clusters = ester2(parameter)
40
- @segments = Segmentation.from_clusters(self, @clusters)
48
+ @segments = Segmentation.from_clusters(self, @clusters).sort_by(&:start)
41
49
  train_speaker_gmms if train_speaker_models
42
50
  end
43
51
 
@@ -47,7 +55,7 @@ module Diarize
47
55
  end
48
56
 
49
57
  def segments
50
- raise Exception.new('You need to run analyze! before being able to access the analysis results') unless @segments
58
+ raise RuntimeError, "You need to run analyze! before being able to access the analysis results" unless @segments
51
59
  @segments
52
60
  end
53
61
 
@@ -5,11 +5,11 @@ module Diarize
5
5
  attr_reader :start, :duration, :gender, :bandwidth
6
6
 
7
7
  def initialize(audio, start, duration, gender, bandwidth, speaker_id)
8
- @audio = audio
9
- @start = start
10
- @duration = duration
11
- @bandwidth = bandwidth
12
- @speaker_id = speaker_id
8
+ @audio = audio
9
+ @start = start
10
+ @duration = duration
11
+ @bandwidth = bandwidth
12
+ @speaker_id = speaker_id
13
13
  @speaker_gender = gender
14
14
  end
15
15
 
@@ -5,12 +5,12 @@ module Diarize
5
5
  segmentation = []
6
6
  File.open(seg_file).each_line do |line|
7
7
  next if line.start_with? ';;'
8
- parts = line.split(' ')
9
- start = parts[2].to_i / 100.0
10
- duration = parts[3].to_i / 100.0
11
- gender = parts[4]
12
- bandwidth = parts[6]
13
- speaker_id = parts[7]
8
+ parts = line.split(' ')
9
+ start = parts[2].to_i / 100.0
10
+ duration = parts[3].to_i / 100.0
11
+ gender = parts[4]
12
+ bandwidth = parts[6]
13
+ speaker_id = parts[7]
14
14
  segmentation << Segment.new(audio, start, duration, gender, bandwidth, speaker_id)
15
15
  end
16
16
  segmentation
@@ -19,9 +19,9 @@ module Diarize
19
19
  def self.from_clusters(audio, clusters)
20
20
  segmentation = []
21
21
  clusters.map(&:to_s).each do |speaker_id|
22
- cluster = clusters.getCluster(speaker_id)
23
- gender = cluster.gender
24
- bandwidth = cluster.bandwidth
22
+ cluster = clusters.getCluster(speaker_id)
23
+ gender = cluster.gender
24
+ bandwidth = cluster.bandwidth
25
25
  cluster.each do |segment|
26
26
  start = segment.start_in_second
27
27
  duration = segment.length_in_second
@@ -1,3 +1,3 @@
1
1
  module Diarize
2
- VERSION = "0.3.6"
2
+ VERSION = "0.3.7"
3
3
  end
data/lib/diarize.rb CHANGED
@@ -7,10 +7,10 @@ Rjb::load(RJB_LOAD_PATH, RJB_OPTIONS)
7
7
 
8
8
  require "matrix"
9
9
 
10
- require "to_rdf"
11
10
  require "uri"
12
11
  require "open-uri"
13
12
  require "digest/md5"
13
+ require "to_rdf"
14
14
 
15
15
  require "diarize/version"
16
16
  require "diarize/audio"
data/test/audio_test.rb CHANGED
@@ -43,8 +43,8 @@ class AudioTest < Test::Unit::TestCase
43
43
  audio.clean!
44
44
  end
45
45
 
46
- def test_segments_raises_exception_when_audio_is_not_analysed
47
- assert_raise Exception do
46
+ def test_segments_raises_exception_when_audio_is_not_analyzed
47
+ assert_raise RuntimeError do
48
48
  @audio.segments
49
49
  end
50
50
  end
data/test/version_test.rb CHANGED
@@ -3,7 +3,7 @@ require 'test_helper'
3
3
  class VersionTest < Test::Unit::TestCase
4
4
 
5
5
  def test_current_version
6
- assert_equal "0.3.6", Diarize::VERSION
6
+ assert_equal "0.3.7", Diarize::VERSION
7
7
  end
8
8
 
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: diarize-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yves Raimond