speech2text 0.3.4 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,7 @@ module Speech
14
14
 
15
15
  def to_s
16
16
  s,f = seconds.split('.')
17
- sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
17
+ sprintf "%.2d:%.2d:%.2d.%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
18
18
  #"#{hours}:#{minutes}:#{seconds}:#{f}"
19
19
  end
20
20
 
@@ -47,7 +47,13 @@ module Speech
47
47
  end
48
48
 
49
49
  def initialize(file)
50
- self.duration = Duration.new(`ffmpeg -i #{file} 2>&1`.strip.scan(/Duration: (.*),/).first.first)
50
+ out = `ffmpeg -i #{file} 2>&1`.strip
51
+ if out.match(/No such file or directory/)
52
+ raise "No such file or directory: #{file}"
53
+ else
54
+ out = out.scan(/Duration: (.*),/)
55
+ self.duration = Duration.new(out.first.first)
56
+ end
51
57
  end
52
58
 
53
59
  end
@@ -25,13 +25,13 @@ module Speech
25
25
  # given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
26
26
  def build
27
27
  return self if self.copied
28
- # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
29
- offset_ts = AudioInspector::Duration.from_seconds(self.offset)
30
- duration_ts = AudioInspector::Duration.from_seconds(self.duration)
28
+ # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00.00 -t 00:00:30.00 sample.audio.out.wav
29
+ offset_ts = AudioInspector::Duration.from_seconds(self.offset).to_s
30
+ duration_ts = AudioInspector::Duration.from_seconds(self.duration).to_s
31
31
  # NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
32
- puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
32
+ #puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
33
33
  #puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
34
- cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
34
+ cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
35
35
  if system(cmd)
36
36
  self
37
37
  else
@@ -41,10 +41,9 @@ module Speech
41
41
 
42
42
  # convert the audio file to flac format
43
43
  def to_flac
44
- puts "convert: #{chunk} to flac"
45
- if system("flac #{chunk}")
46
- puts "success?"
47
- self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
44
+ chunk_outputfile = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
45
+ if system("ffmpeg -i #{chunk} -acodec flac #{chunk_outputfile} >/dev/null 2>&1")
46
+ self.flac_chunk = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
48
47
  # convert the audio file to 16K
49
48
  self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
50
49
  down_sampled = self.flac_chunk.gsub(/\.flac$/, '-sampled.flac')
@@ -72,7 +71,7 @@ module Speech
72
71
 
73
72
  end
74
73
 
75
- def initialize(file, chunk_size=30)
74
+ def initialize(file, chunk_size=5)
76
75
  self.original_file = file
77
76
  self.duration = AudioInspector.new(file).duration
78
77
  self.size = chunk_size
@@ -86,7 +85,13 @@ module Speech
86
85
  #puts "generate: #{full_chunks} chunks of #{size} seconds, last: #{last_chunk} seconds"
87
86
 
88
87
  (full_chunks-1).times do|chunkid|
89
- chunks << AudioChunk.new(self, chunkid * self.size, self.size)
88
+ if chunkid > 0
89
+ chunks << AudioChunk.new(self, chunkid * self.size, self.size)
90
+ else
91
+ off = (chunkid * self.size)-(self.size/2)
92
+ off = 0 if off < 0
93
+ chunks << AudioChunk.new(self, off, self.size)
94
+ end
90
95
  end
91
96
 
92
97
  if chunks.empty?
@@ -94,7 +99,7 @@ module Speech
94
99
  else
95
100
  chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
96
101
  end
97
- puts "Chunk count: #{chunks.size}"
102
+ #puts "Chunk count: #{chunks.size}"
98
103
 
99
104
  chunks
100
105
  end
@@ -2,63 +2,80 @@
2
2
  module Speech
3
3
 
4
4
  class AudioToText
5
- attr_accessor :file, :rate, :captured_json, :captured_file
5
+ attr_accessor :file, :rate, :captured_json
6
+ attr_accessor :best_match_text, :score, :verbose, :segments
6
7
 
7
- def initialize(file)
8
+ def initialize(file, options={})
9
+ self.verbose = false
8
10
  self.file = file
9
- self.captured_file = self.file.gsub(/\.wav$/,'.json')
10
11
  self.captured_json = {}
12
+ self.best_match_text = ""
13
+ self.score = 0.0
14
+ self.segments = 0
15
+
16
+ self.verbose = !!options[:verbose] if options.key?(:verbose)
17
+ end
18
+
19
+ def to_text(max=2,lang="en-US")
20
+ to_json(max,lang)
21
+ self.best_match_text
11
22
  end
12
23
 
13
- def to_text
14
- url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
24
+ def to_json(max=2,lang="en-US")
25
+ self.best_match_text = ""
26
+ self.score = 0.0
27
+ self.segments = 0
28
+
29
+ url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=#{lang}&maxresults=#{max}"
15
30
  splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
16
31
  easy = Curl::Easy.new(url)
17
32
  splitter.split.each do|chunk|
18
33
  chunk.build.to_flac
19
34
  convert_chunk(easy, chunk)
20
35
  end
21
- JSON.parse(File.read(self.captured_file))
22
- end
23
-
24
- def clean
25
- File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
36
+ self.best_match_text = self.best_match_text.strip
37
+ self.score /= self.segments
38
+ self.captured_json
26
39
  end
27
40
 
28
41
  protected
29
42
 
30
43
  def convert_chunk(easy, chunk, options={})
31
- puts "sending chunk of size #{chunk.duration}..."
44
+ puts "sending chunk of size #{chunk.duration}..." if self.verbose
32
45
  retrying = true
33
46
  retry_count = 0
34
- while retrying && retry_count < 5
35
- #easy.verbose = true
47
+ while retrying && retry_count < 3 # 3 retries
48
+ easy.verbose = self.verbose
36
49
  easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
37
50
  easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
38
- #puts chunk.inspect
39
51
  easy.post_body = "Content=#{chunk.to_flac_bytes}"
40
- easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
52
+ if self.verbose
53
+ easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
54
+ end
41
55
  easy.on_complete {|easy| puts }
42
56
  easy.http_post
43
- #puts easy.header_str
44
- #puts easy.body_str
45
57
  if easy.response_code == 500
46
- puts "500 from google retry after 0.5 seconds"
58
+ puts "500 from google retry after 0.5 seconds" if self.verbose
47
59
  retrying = true
48
60
  retry_count += 1
49
61
  sleep 0.5 # wait longer on error?, google??
50
62
  else
51
- # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
63
+ # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance"=>"I like pickles", "confidence"=>0.59408695}, {"utterance"=>"I like turtles"}, {"utterance"=>"I like tickles"}, {"utterance"=>"I like to Kohl's"}, {"utterance"=>"I Like tickles"}, {"utterance"=>"I lyk tickles"}, {"utterance"=>"I liked to Kohl's"}]}
52
64
  data = JSON.parse(easy.body_str)
53
65
  self.captured_json['status'] = data['status']
54
66
  self.captured_json['id'] = data['id']
55
67
  self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
56
- puts self.captured_json.inspect
57
- File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
68
+ if data.key?('hypotheses') && ['hypotheses'].first
69
+ self.best_match_text += " " + data['hypotheses'].first['utterance']
70
+ self.score += data['hypotheses'].first['confidence']
71
+ self.segments += 1
72
+ end
58
73
  retrying = false
59
74
  end
60
75
  sleep 0.1 # not too fast there tiger
61
76
  end
77
+ puts "#{segments} processed: #{self.captured_json.inspect}" if self.verbose
78
+ self.captured_json
62
79
  ensure
63
80
  chunk.clean
64
81
  end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: binary -*-
2
2
  module Speech
3
3
  class Info
4
- VERSION='0.3.4'
4
+ VERSION='0.3.6'
5
5
  end
6
6
  end
@@ -6,10 +6,10 @@ require 'speech'
6
6
  class SpeechAudioSplitterTest < Test::Unit::TestCase
7
7
 
8
8
  def test_audio_splitter
9
- splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
9
+ splitter = Speech::AudioSplitter.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")), 1)
10
10
 
11
- assert_equal '00:00:03:52', splitter.duration.to_s
12
- assert_equal 3.52, splitter.duration.to_f
11
+ assert_equal '00:00:03.51', splitter.duration.to_s
12
+ assert_equal 3.51, splitter.duration.to_f
13
13
 
14
14
  chunks = splitter.split
15
15
  assert_equal 3, chunks.size
@@ -4,39 +4,19 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
4
  require 'speech'
5
5
 
6
6
  class SpeechAudioToTextTest < Test::Unit::TestCase
7
- def test_audio_to_text
8
- audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
9
- captured_json = audio.to_text
10
- assert captured_json
11
- assert captured_json.key?("hypotheses")
12
- assert !captured_json['hypotheses'].empty?
13
- assert captured_json.keys.include?('status')
14
- assert captured_json.keys.include?('id')
15
- assert captured_json.keys.include?('hypotheses')
7
+ def setup
8
+ super
9
+ end
16
10
 
17
- assert_equal "I like pickles", captured_json['hypotheses'].first.first
18
- assert captured_json['hypotheses'].first.last > 0.9
19
- # {"hypotheses"=>[["I like pickles", 0.92731786]]}
20
- # puts captured_json.inspect
21
- ensure
22
- audio.clean
11
+ def test_audio_to_text
12
+ audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")))
13
+ assert_equal "I like pickles", audio.to_text
23
14
  end
24
15
 
25
- def test_short_audio_clip
26
- audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
27
- captured_json = audio.to_text
28
- assert captured_json
29
- assert captured_json.key?("hypotheses")
30
- assert !captured_json['hypotheses'].empty?
31
- #{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
32
- assert captured_json.keys.include?('status')
33
- assert captured_json.keys.include?('id')
34
- assert captured_json.keys.include?('hypotheses')
35
- puts captured_json.inspect
36
- assert_equal "eagles", captured_json['hypotheses'][0].first
37
- assert_equal "pickles", captured_json['hypotheses'][1].first
38
- #assert captured_json['confidence'] > 0.9
39
- ensure
40
- audio.clean
16
+ def test_longer_audio
17
+ audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"/SampleAudio.wav")), :verbose => true)
18
+ puts audio.to_text
19
+ puts audio.score
20
+ puts audio.segments
41
21
  end
42
22
  end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: !binary |-
5
+ MC4zLjY=
5
6
  prerelease:
6
7
  platform: ruby
7
8
  authors:
@@ -9,12 +10,11 @@ authors:
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2011-04-05 00:00:00.000000000 -04:00
13
- default_executable:
13
+ date: 2012-10-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
17
- requirement: &2163558380 !ruby/object:Gem::Requirement
17
+ requirement: !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,15 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2163558380
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ! '>='
29
+ - !ruby/object:Gem::Version
30
+ version: '0'
26
31
  - !ruby/object:Gem::Dependency
27
32
  name: json
28
- requirement: &2163557840 !ruby/object:Gem::Requirement
33
+ requirement: !ruby/object:Gem::Requirement
29
34
  none: false
30
35
  requirements:
31
36
  - - ! '>='
@@ -33,7 +38,12 @@ dependencies:
33
38
  version: '0'
34
39
  type: :runtime
35
40
  prerelease: false
36
- version_requirements: *2163557840
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
37
47
  description: Super powers of Google wrapped in a nice Ruby interface
38
48
  email: todd.fisher@gmail.com
39
49
  executables:
@@ -51,12 +61,10 @@ files:
51
61
  - test/audio_splitter_test.rb
52
62
  - test/audio_to_text_test.rb
53
63
  - test/SampleAudio.wav
54
- - test/samples/i-like-pickles.json
55
64
  - test/samples/i-like-pickles.wav
56
65
  - Rakefile
57
66
  - README.rdoc
58
67
  - speech2text.gemspec
59
- has_rdoc: true
60
68
  homepage: https://github.com/taf2/speech2text
61
69
  licenses: []
62
70
  post_install_message:
@@ -77,8 +85,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
85
  version: '0'
78
86
  requirements: []
79
87
  rubyforge_project:
80
- rubygems_version: 1.6.2
88
+ rubygems_version: 1.8.24
81
89
  signing_key:
82
90
  specification_version: 3
83
91
  summary: Speech to Text Library
84
92
  test_files: []
93
+ has_rdoc:
@@ -1 +0,0 @@
1
- {"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}