speech2text 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,5 +3,10 @@
3
3
 
4
4
  require 'speech'
5
5
 
6
+ if ARGV[0].nil? || !File.exist?(ARGV[0])
7
+ STDERR.puts "usage: #{$0} input.wav"
8
+ exit(1)
9
+ end
10
+
6
11
  captured_json = Speech::AudioToText.new(ARGV[0]).to_text
7
12
  puts captured_json.inspect
@@ -14,7 +14,7 @@ module Speech
14
14
 
15
15
  def to_s
16
16
  s,f = seconds.split('.')
17
- sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_s.gsub(/^0/,'').to_i, self.minutes.to_s.gsub(/^0/,'').to_i, s.to_s.gsub(/^0/,'').to_i, (f||0)
17
+ sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
18
18
  #"#{hours}:#{minutes}:#{seconds}:#{f}"
19
19
  end
20
20
 
@@ -5,22 +5,33 @@ module Speech
5
5
  attr_accessor :original_file, :size, :duration, :chunks
6
6
 
7
7
  class AudioChunk
8
- attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate
8
+ attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate, :copied
9
9
 
10
10
  def initialize(splitter, offset, duration)
11
11
  self.offset = offset
12
12
  self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
13
13
  self.duration = duration
14
14
  self.splitter = splitter
15
+ self.copied = false
16
+ end
17
+
18
+ def self.copy(splitter)
19
+ chunk = AudioChunk.new(splitter, 0, splitter.duration.to_f)
20
+ chunk.copied = true
21
+ system("cp #{splitter.original_file} #{chunk.chunk}")
22
+ chunk
15
23
  end
16
24
 
17
25
  # given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
18
26
  def build
27
+ return self if self.copied
19
28
  # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
20
29
  offset_ts = AudioInspector::Duration.from_seconds(self.offset)
21
30
  duration_ts = AudioInspector::Duration.from_seconds(self.duration)
31
+ # NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
32
+ puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
22
33
  #puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
23
- cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
34
+ cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
24
35
  if system(cmd)
25
36
  self
26
37
  else
@@ -30,7 +41,9 @@ module Speech
30
41
 
31
42
  # convert the audio file to flac format
32
43
  def to_flac
33
- if system("flac #{chunk} >/dev/null 2>&1")
44
+ puts "convert: #{chunk} to flac"
45
+ if system("flac #{chunk}")
46
+ puts "success?"
34
47
  self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
35
48
  # convert the audio file to 16K
36
49
  self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
@@ -42,6 +55,8 @@ module Speech
42
55
  raise "failed to convert to lower audio rate"
43
56
  end
44
57
 
58
+ else
59
+ raise "failed to convert chunk: #{chunk} with flac #{chunk}"
45
60
  end
46
61
  end
47
62
 
@@ -75,10 +90,11 @@ module Speech
75
90
  end
76
91
 
77
92
  if chunks.empty?
78
- chunks << AudioChunk.new(self, 0, self.duration.to_f)
93
+ chunks << AudioChunk.copy(self)#, 0, self.duration.to_f)
79
94
  else
80
95
  chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
81
96
  end
97
+ puts "Chunk count: #{chunks.size}"
82
98
 
83
99
  chunks
84
100
  end
@@ -2,17 +2,16 @@
2
2
  module Speech
3
3
 
4
4
  class AudioToText
5
- attr_accessor :file, :rate, :captured_json, :confidence, :captured_file
5
+ attr_accessor :file, :rate, :captured_json, :captured_file
6
6
 
7
7
  def initialize(file)
8
8
  self.file = file
9
9
  self.captured_file = self.file.gsub(/\.wav$/,'.json')
10
- self.captured_json = []
11
- self.confidence = 0.0
10
+ self.captured_json = {}
12
11
  end
13
12
 
14
13
  def to_text
15
- url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=1"
14
+ url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
16
15
  splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
17
16
  easy = Curl::Easy.new(url)
18
17
  splitter.split.each do|chunk|
@@ -31,10 +30,12 @@ module Speech
31
30
  def convert_chunk(easy, chunk, options={})
32
31
  puts "sending chunk of size #{chunk.duration}..."
33
32
  retrying = true
34
- while retrying
33
+ retry_count = 0
34
+ while retrying && retry_count < 5
35
35
  #easy.verbose = true
36
36
  easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
37
37
  easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
38
+ #puts chunk.inspect
38
39
  easy.post_body = "Content=#{chunk.to_flac_bytes}"
39
40
  easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
40
41
  easy.on_complete {|easy| puts }
@@ -44,25 +45,16 @@ module Speech
44
45
  if easy.response_code == 500
45
46
  puts "500 from google retry after 0.5 seconds"
46
47
  retrying = true
47
- sleep 0.5 # wait longer on error?
48
+ retry_count += 1
49
+ sleep 0.5 # wait longer on error?, google??
48
50
  else
49
51
  # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
50
52
  data = JSON.parse(easy.body_str)
51
- puts data.inspect
52
- data['hypotheses'].each {|utterance|
53
- puts utterance.inspect
54
- self.captured_json << [utterance['utterance'], utterance['confidence']]
55
- self.confidence += utterance['confidence']
56
- }
57
- File.open("#{self.captured_file}", "wb") {|f|
58
- size = self.captured_json.size
59
- if size > 0
60
- confidence_calc = self.confidence / size
61
- else
62
- confidence_calc = 0
63
- end
64
- f << {:captured_json => captured_json, :confidence => confidence_calc}.to_json
65
- }
53
+ self.captured_json['status'] = data['status']
54
+ self.captured_json['id'] = data['id']
55
+ self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
56
+ puts self.captured_json.inspect
57
+ File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
66
58
  retrying = false
67
59
  end
68
60
  sleep 0.1 # not too fast there tiger
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: binary -*-
2
2
  module Speech
3
3
  class Info
4
- VERSION='0.3.1'
4
+ VERSION='0.3.2'
5
5
  end
6
6
  end
@@ -18,4 +18,22 @@ class SpeechAudioToTextTest < Test::Unit::TestCase
18
18
  ensure
19
19
  audio.clean
20
20
  end
21
+
22
+ def test_short_audio_clip
23
+ audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
24
+ captured_json = audio.to_text
25
+ assert captured_json
26
+ assert captured_json.key?("hypotheses")
27
+ assert !captured_json['hypotheses'].empty?
28
+ #{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
29
+ assert captured_json.keys.include?('status')
30
+ assert captured_json.keys.include?('id')
31
+ assert captured_json.keys.include?('hypotheses')
32
+ puts captured_json.inspect
33
+ assert_equal "eagles", captured_json['hypotheses'][0].first
34
+ assert_equal "pickles", captured_json['hypotheses'][1].first
35
+ #assert captured_json['confidence'] > 0.9
36
+ ensure
37
+ audio.clean
38
+ end
21
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-04-03 00:00:00.000000000 -04:00
12
+ date: 2011-04-04 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
17
- requirement: &2163110280 !ruby/object:Gem::Requirement
17
+ requirement: &2163572140 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2163110280
25
+ version_requirements: *2163572140
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: json
28
- requirement: &2163109720 !ruby/object:Gem::Requirement
28
+ requirement: &2163571700 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2163109720
36
+ version_requirements: *2163571700
37
37
  description: Super powers of Google wrapped in a nice Ruby interface
38
38
  email: todd.fisher@gmail.com
39
39
  executables: