RubyGems - speech2text - Versions diffs - 0.3.4 → 0.3.6 - Mend

speech2text 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/lib/speech/audio_inspector.rb +8 -2
data/lib/speech/audio_splitter.rb +17 -12
data/lib/speech/audio_to_text.rb +38 -21
data/lib/speech/version.rb +1 -1
data/test/audio_splitter_test.rb +3 -3
data/test/audio_to_text_test.rb +11 -31
metadata +19 -10
data/test/samples/i-like-pickles.json +0 -1

data/lib/speech/audio_inspector.rb CHANGED

@@ -14,7 +14,7 @@ module Speech
       def to_s
         s,f = seconds.split('.')
-        sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
+        sprintf "%.2d:%.2d:%.2d.%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
         #"#{hours}:#{minutes}:#{seconds}:#{f}"
       end
@@ -47,7 +47,13 @@ module Speech
     end
     def initialize(file)
-      self.duration = Duration.new(`ffmpeg -i #{file} 2>&1`.strip.scan(/Duration: (.*),/).first.first)
+      out = `ffmpeg -i #{file} 2>&1`.strip
+      if out.match(/No such file or directory/)
+        raise "No such file or directory: #{file}"
+      else
+        out = out.scan(/Duration: (.*),/)
+        self.duration = Duration.new(out.first.first)
+      end
     end
   end

data/lib/speech/audio_splitter.rb CHANGED

@@ -25,13 +25,13 @@ module Speech
       # given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
       def build
         return self if self.copied
-        # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
-        offset_ts = AudioInspector::Duration.from_seconds(self.offset)
-        duration_ts = AudioInspector::Duration.from_seconds(self.duration)
+        # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00.00 -t 00:00:30.00 sample.audio.out.wav
+        offset_ts = AudioInspector::Duration.from_seconds(self.offset).to_s
+        duration_ts = AudioInspector::Duration.from_seconds(self.duration).to_s
         # NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
-        puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
+        #puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
         #puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
-        cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
+        cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}   >/dev/null 2>&1"
         if system(cmd)
           self
         else
@@ -41,10 +41,9 @@ module Speech
       # convert the audio file to flac format
       def to_flac
-        puts "convert: #{chunk} to flac"
-        if system("flac #{chunk}")
-          puts "success?"
-          self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
+        chunk_outputfile = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
+        if system("ffmpeg -i #{chunk} -acodec flac #{chunk_outputfile} >/dev/null 2>&1")
+          self.flac_chunk = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
           # convert the audio file to 16K
           self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
           down_sampled = self.flac_chunk.gsub(/\.flac$/, '-sampled.flac')
@@ -72,7 +71,7 @@ module Speech
     end
-    def initialize(file, chunk_size=30)
+    def initialize(file, chunk_size=5)
       self.original_file = file
       self.duration = AudioInspector.new(file).duration
       self.size = chunk_size
@@ -86,7 +85,13 @@ module Speech
       #puts "generate: #{full_chunks} chunks of #{size} seconds, last: #{last_chunk} seconds"
       (full_chunks-1).times do|chunkid|
-        chunks << AudioChunk.new(self, chunkid * self.size, self.size)
+        if chunkid > 0
+          chunks << AudioChunk.new(self, chunkid * self.size, self.size)
+        else
+          off = (chunkid * self.size)-(self.size/2)
+          off = 0 if off < 0
+          chunks << AudioChunk.new(self, off, self.size)
+        end
       end
       if chunks.empty?
@@ -94,7 +99,7 @@ module Speech
       else
         chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
       end
-      puts "Chunk count: #{chunks.size}"
+      #puts "Chunk count: #{chunks.size}"
       chunks
     end

data/lib/speech/audio_to_text.rb CHANGED

@@ -2,63 +2,80 @@
 module Speech
   class AudioToText
-    attr_accessor :file, :rate, :captured_json, :captured_file
+    attr_accessor :file, :rate, :captured_json
+    attr_accessor :best_match_text, :score, :verbose, :segments
-    def initialize(file)
+    def initialize(file, options={})
+      self.verbose = false
       self.file = file
-      self.captured_file = self.file.gsub(/\.wav$/,'.json')
       self.captured_json = {}
+      self.best_match_text = ""
+      self.score = 0.0
+      self.segments = 0
+      self.verbose = !!options[:verbose] if options.key?(:verbose)
+    end
+    def to_text(max=2,lang="en-US")
+      to_json(max,lang)
+      self.best_match_text
     end
-    def to_text
-      url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
+    def to_json(max=2,lang="en-US")
+      self.best_match_text = ""
+      self.score = 0.0
+      self.segments = 0
+      url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=#{lang}&maxresults=#{max}"
       splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
       easy = Curl::Easy.new(url)
       splitter.split.each do|chunk|
         chunk.build.to_flac
         convert_chunk(easy, chunk)
       end
-      JSON.parse(File.read(self.captured_file))
-    end
-    def clean
-      File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
+      self.best_match_text = self.best_match_text.strip
+      self.score /= self.segments
+      self.captured_json
     end
   protected
     def convert_chunk(easy, chunk, options={})
-      puts "sending chunk of size #{chunk.duration}..."
+      puts "sending chunk of size #{chunk.duration}..." if self.verbose
       retrying = true
       retry_count = 0
-      while retrying && retry_count < 5
-        #easy.verbose = true
+      while retrying && retry_count < 3 # 3 retries
+        easy.verbose = self.verbose
         easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
         easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
-        #puts chunk.inspect
         easy.post_body = "Content=#{chunk.to_flac_bytes}"
-        easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
+        if self.verbose
+          easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
+        end
         easy.on_complete {|easy| puts }
         easy.http_post
-        #puts easy.header_str
-        #puts easy.body_str
         if easy.response_code == 500
-          puts "500 from google retry after 0.5 seconds"
+          puts "500 from google retry after 0.5 seconds" if self.verbose
           retrying = true
           retry_count += 1
           sleep 0.5 # wait longer on error?, google??
         else
-          # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
+          # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance"=>"I like pickles", "confidence"=>0.59408695}, {"utterance"=>"I like turtles"}, {"utterance"=>"I like tickles"}, {"utterance"=>"I like to Kohl's"}, {"utterance"=>"I Like tickles"}, {"utterance"=>"I lyk tickles"}, {"utterance"=>"I liked to Kohl's"}]}
           data = JSON.parse(easy.body_str)
           self.captured_json['status'] = data['status']
           self.captured_json['id'] = data['id']
           self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
-          puts self.captured_json.inspect
-          File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
+          if data.key?('hypotheses') && ['hypotheses'].first
+            self.best_match_text += " " + data['hypotheses'].first['utterance']
+            self.score += data['hypotheses'].first['confidence']
+            self.segments += 1
+          end
           retrying = false
         end
         sleep 0.1 # not too fast there tiger
       end
+      puts "#{segments} processed: #{self.captured_json.inspect}" if self.verbose
+      self.captured_json
     ensure
       chunk.clean
     end

data/lib/speech/version.rb CHANGED

@@ -1,6 +1,6 @@
 # -*- encoding: binary -*-
 module Speech
   class Info
-    VERSION='0.3.4'
+    VERSION='0.3.6'
   end
 end

data/test/audio_splitter_test.rb CHANGED

@@ -6,10 +6,10 @@ require 'speech'
 class SpeechAudioSplitterTest < Test::Unit::TestCase
   def test_audio_splitter
-    splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
+    splitter = Speech::AudioSplitter.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")), 1)
-    assert_equal '00:00:03:52', splitter.duration.to_s
-    assert_equal 3.52, splitter.duration.to_f
+    assert_equal '00:00:03.51', splitter.duration.to_s
+    assert_equal 3.51, splitter.duration.to_f
     chunks = splitter.split
     assert_equal 3, chunks.size

data/test/audio_to_text_test.rb CHANGED

@@ -4,39 +4,19 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
 require 'speech'
 class SpeechAudioToTextTest < Test::Unit::TestCase
-  def test_audio_to_text
-    audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
-    captured_json = audio.to_text
-    assert captured_json
-    assert captured_json.key?("hypotheses")
-    assert !captured_json['hypotheses'].empty?
-    assert captured_json.keys.include?('status')
-    assert captured_json.keys.include?('id')
-    assert captured_json.keys.include?('hypotheses')
+  def setup
+    super
+  end
-    assert_equal "I like pickles", captured_json['hypotheses'].first.first
-    assert captured_json['hypotheses'].first.last > 0.9
-#    {"hypotheses"=>[["I like pickles", 0.92731786]]}
-#    puts captured_json.inspect
-  ensure
-    audio.clean
+  def test_audio_to_text
+    audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")))
+    assert_equal "I like pickles", audio.to_text
   end
-  def test_short_audio_clip
-    audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
-    captured_json = audio.to_text
-    assert captured_json
-    assert captured_json.key?("hypotheses")
-    assert !captured_json['hypotheses'].empty?
-    #{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
-    assert captured_json.keys.include?('status')
-    assert captured_json.keys.include?('id')
-    assert captured_json.keys.include?('hypotheses')
-    puts captured_json.inspect
-    assert_equal "eagles", captured_json['hypotheses'][0].first
-    assert_equal "pickles", captured_json['hypotheses'][1].first
-    #assert captured_json['confidence'] > 0.9
-  ensure
-    audio.clean
+  def test_longer_audio
+    audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"/SampleAudio.wav")), :verbose => true)
+    puts audio.to_text
+    puts audio.score
+    puts audio.segments
   end
 end

metadata CHANGED

@@ -1,7 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: speech2text
 version: !ruby/object:Gem::Version
-  version: 0.3.4
+  version: !binary |-
+    MC4zLjY=
   prerelease:
 platform: ruby
 authors:
@@ -9,12 +10,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-04-05 00:00:00.000000000 -04:00
-default_executable:
+date: 2012-10-07 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: curb
-  requirement: &2163558380 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -22,10 +22,15 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2163558380
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: json
-  requirement: &2163557840 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -33,7 +38,12 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *2163557840
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Super powers of Google wrapped in a nice Ruby interface
 email: todd.fisher@gmail.com
 executables:
@@ -51,12 +61,10 @@ files:
 - test/audio_splitter_test.rb
 - test/audio_to_text_test.rb
 - test/SampleAudio.wav
-- test/samples/i-like-pickles.json
 - test/samples/i-like-pickles.wav
 - Rakefile
 - README.rdoc
 - speech2text.gemspec
-has_rdoc: true
 homepage: https://github.com/taf2/speech2text
 licenses: []
 post_install_message:
@@ -77,8 +85,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.6.2
+rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
 summary: Speech to Text Library
 test_files: []
+has_rdoc:

data/test/samples/i-like-pickles.json DELETED

	@@ -1 +0,0 @@
1	- {"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}