speech2text 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,5 +3,10 @@
3
3
 
4
4
  require 'speech'
5
5
 
6
+ if ARGV[0].nil? || !File.exist?(ARGV[0])
7
+ STDERR.puts "usage: #{$0} input.wav"
8
+ exit(1)
9
+ end
10
+
6
11
  captured_json = Speech::AudioToText.new(ARGV[0]).to_text
7
12
  puts captured_json.inspect
@@ -14,7 +14,7 @@ module Speech
14
14
 
15
15
  def to_s
16
16
  s,f = seconds.split('.')
17
- sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_s.gsub(/^0/,'').to_i, self.minutes.to_s.gsub(/^0/,'').to_i, s.to_s.gsub(/^0/,'').to_i, (f||0)
17
+ sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
18
18
  #"#{hours}:#{minutes}:#{seconds}:#{f}"
19
19
  end
20
20
 
@@ -5,22 +5,33 @@ module Speech
5
5
  attr_accessor :original_file, :size, :duration, :chunks
6
6
 
7
7
  class AudioChunk
8
- attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate
8
+ attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate, :copied
9
9
 
10
10
  def initialize(splitter, offset, duration)
11
11
  self.offset = offset
12
12
  self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
13
13
  self.duration = duration
14
14
  self.splitter = splitter
15
+ self.copied = false
16
+ end
17
+
18
+ def self.copy(splitter)
19
+ chunk = AudioChunk.new(splitter, 0, splitter.duration.to_f)
20
+ chunk.copied = true
21
+ system("cp #{splitter.original_file} #{chunk.chunk}")
22
+ chunk
15
23
  end
16
24
 
17
25
  # given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
18
26
  def build
27
+ return self if self.copied
19
28
  # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
20
29
  offset_ts = AudioInspector::Duration.from_seconds(self.offset)
21
30
  duration_ts = AudioInspector::Duration.from_seconds(self.duration)
31
+ # NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
32
+ puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
22
33
  #puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
23
- cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
34
+ cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
24
35
  if system(cmd)
25
36
  self
26
37
  else
@@ -30,7 +41,9 @@ module Speech
30
41
 
31
42
  # convert the audio file to flac format
32
43
  def to_flac
33
- if system("flac #{chunk} >/dev/null 2>&1")
44
+ puts "convert: #{chunk} to flac"
45
+ if system("flac #{chunk}")
46
+ puts "success?"
34
47
  self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
35
48
  # convert the audio file to 16K
36
49
  self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
@@ -42,6 +55,8 @@ module Speech
42
55
  raise "failed to convert to lower audio rate"
43
56
  end
44
57
 
58
+ else
59
+ raise "failed to convert chunk: #{chunk} with flac #{chunk}"
45
60
  end
46
61
  end
47
62
 
@@ -75,10 +90,11 @@ module Speech
75
90
  end
76
91
 
77
92
  if chunks.empty?
78
- chunks << AudioChunk.new(self, 0, self.duration.to_f)
93
+ chunks << AudioChunk.copy(self)#, 0, self.duration.to_f)
79
94
  else
80
95
  chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
81
96
  end
97
+ puts "Chunk count: #{chunks.size}"
82
98
 
83
99
  chunks
84
100
  end
@@ -2,17 +2,16 @@
2
2
  module Speech
3
3
 
4
4
  class AudioToText
5
- attr_accessor :file, :rate, :captured_json, :confidence, :captured_file
5
+ attr_accessor :file, :rate, :captured_json, :captured_file
6
6
 
7
7
  def initialize(file)
8
8
  self.file = file
9
9
  self.captured_file = self.file.gsub(/\.wav$/,'.json')
10
- self.captured_json = []
11
- self.confidence = 0.0
10
+ self.captured_json = {}
12
11
  end
13
12
 
14
13
  def to_text
15
- url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=1"
14
+ url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
16
15
  splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
17
16
  easy = Curl::Easy.new(url)
18
17
  splitter.split.each do|chunk|
@@ -31,10 +30,12 @@ module Speech
31
30
  def convert_chunk(easy, chunk, options={})
32
31
  puts "sending chunk of size #{chunk.duration}..."
33
32
  retrying = true
34
- while retrying
33
+ retry_count = 0
34
+ while retrying && retry_count < 5
35
35
  #easy.verbose = true
36
36
  easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
37
37
  easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
38
+ #puts chunk.inspect
38
39
  easy.post_body = "Content=#{chunk.to_flac_bytes}"
39
40
  easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
40
41
  easy.on_complete {|easy| puts }
@@ -44,25 +45,16 @@ module Speech
44
45
  if easy.response_code == 500
45
46
  puts "500 from google retry after 0.5 seconds"
46
47
  retrying = true
47
- sleep 0.5 # wait longer on error?
48
+ retry_count += 1
49
+ sleep 0.5 # wait longer on error?, google??
48
50
  else
49
51
  # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
50
52
  data = JSON.parse(easy.body_str)
51
- puts data.inspect
52
- data['hypotheses'].each {|utterance|
53
- puts utterance.inspect
54
- self.captured_json << [utterance['utterance'], utterance['confidence']]
55
- self.confidence += utterance['confidence']
56
- }
57
- File.open("#{self.captured_file}", "wb") {|f|
58
- size = self.captured_json.size
59
- if size > 0
60
- confidence_calc = self.confidence / size
61
- else
62
- confidence_calc = 0
63
- end
64
- f << {:captured_json => captured_json, :confidence => confidence_calc}.to_json
65
- }
53
+ self.captured_json['status'] = data['status']
54
+ self.captured_json['id'] = data['id']
55
+ self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
56
+ puts self.captured_json.inspect
57
+ File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
66
58
  retrying = false
67
59
  end
68
60
  sleep 0.1 # not too fast there tiger
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: binary -*-
2
2
  module Speech
3
3
  class Info
4
- VERSION='0.3.1'
4
+ VERSION='0.3.2'
5
5
  end
6
6
  end
@@ -18,4 +18,22 @@ class SpeechAudioToTextTest < Test::Unit::TestCase
18
18
  ensure
19
19
  audio.clean
20
20
  end
21
+
22
+ def test_short_audio_clip
23
+ audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
24
+ captured_json = audio.to_text
25
+ assert captured_json
26
+ assert captured_json.key?("hypotheses")
27
+ assert !captured_json['hypotheses'].empty?
28
+ #{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
29
+ assert captured_json.keys.include?('status')
30
+ assert captured_json.keys.include?('id')
31
+ assert captured_json.keys.include?('hypotheses')
32
+ puts captured_json.inspect
33
+ assert_equal "eagles", captured_json['hypotheses'][0].first
34
+ assert_equal "pickles", captured_json['hypotheses'][1].first
35
+ #assert captured_json['confidence'] > 0.9
36
+ ensure
37
+ audio.clean
38
+ end
21
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-04-03 00:00:00.000000000 -04:00
12
+ date: 2011-04-04 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
17
- requirement: &2163110280 !ruby/object:Gem::Requirement
17
+ requirement: &2163572140 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2163110280
25
+ version_requirements: *2163572140
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: json
28
- requirement: &2163109720 !ruby/object:Gem::Requirement
28
+ requirement: &2163571700 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2163109720
36
+ version_requirements: *2163571700
37
37
  description: Super powers of Google wrapped in a nice Ruby interface
38
38
  email: todd.fisher@gmail.com
39
39
  executables: