RubyGems - google_speech - Versions diffs - 0.3.0 → 0.3.1 - Mend

google_speech 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/lib/google_speech/chunk.rb +8 -7
data/lib/google_speech/chunk_factory.rb +4 -3
data/lib/google_speech/transcriber.rb +51 -17
data/lib/google_speech/utility.rb +4 -7
data/lib/google_speech/version.rb +1 -1
data/spec/transcriber_spec.rb +1 -2
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1e8c0b050e64d5ccc7521777e0b320f5544eff3b
-  data.tar.gz: 7389a2c17862a3a3e31359b721fa9d2eeb8d3c3b
+  metadata.gz: 308bea0bbf8297bbe42f197906b271e789453629
+  data.tar.gz: 4829db0fc7ceecf1c91de0b044f108bf3016eea3
 SHA512:
-  metadata.gz: 5d33ab4b317a1ef3acaa48e9d05c9e47248a8e5b0fc19de80a32729f0b7a37de0fe0d595ea56d63fefe14a78a41ac7e0fe261e53e54f34b2bc6ccf374e7c869c
-  data.tar.gz: d2c37ebff2fb005d3fa8a69c860104977481f17b545be69ced5fb3c576d4a2940acf8b0ef074de8f715f2a8477dfd0a2c025e48ddf7be15dd286dade706b19ae
+  metadata.gz: 7b536794d1d6ee96227f2a21a3928a11d83583ef982597b872520422510a873bc1cbd431cffe8f4f52a6428a74d54900aa39a2ffffc0d8b054659c5a15704792
+  data.tar.gz: 28d88d63f1a08eccd9c36010a7cc0bfe89292487029c96528648529428b02210c9ebe09d26700e436b6157a67f733c35555d1fe10c1776336b928c3b093597c2

data/lib/google_speech/chunk.rb CHANGED

@@ -5,16 +5,17 @@ require 'tempfile'
 module GoogleSpeech
   class Chunk
-    attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
+    attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file, :rate
-    def initialize(original_file, original_duration, start_time, duration)
-      @original_file = original_file
+    def initialize(original_file, original_duration, start_time, duration, rate)
+      @original_file     = original_file
       @original_duration = original_duration
-      @start_time = start_time
-      @duration = [duration, (@original_duration - @start_time)].min
-      @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
+      @start_time        = start_time
+      @duration          = [duration, (@original_duration - @start_time)].min
+      @rate              = rate
+      @chunk_file        = Tempfile.new([File.basename(@original_file), '.wav'])
       # puts "@chunk_file: #{@chunk_file.path}"
-      Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
+      Utility.trim_and_encode(@original_file.path, @chunk_file.path, @start_time, @duration, @rate)
     end
     def to_hash

data/lib/google_speech/chunk_factory.rb CHANGED

@@ -4,12 +4,13 @@ module GoogleSpeech
   # break wav audio into short files
   class ChunkFactory
-    attr_accessor :original_file, :chunk_duration, :overlap
+    attr_accessor :original_file, :chunk_duration, :overlap, :rate
-    def initialize(original_file, chunk_duration=8, overlap=1)
+    def initialize(original_file, chunk_duration, overlap, rate)
       @chunk_duration    = chunk_duration
       @original_file     = original_file
       @overlap           = overlap
+      @rate              = rate
       @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path)
     end
@@ -17,7 +18,7 @@ module GoogleSpeech
     def each
       pos = 0
       while(pos < @original_duration) do
-        chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
+        chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap), @rate)
         yield chunk
         pos = pos + [chunk.duration, @chunk_duration].min
       end

data/lib/google_speech/transcriber.rb CHANGED

@@ -10,22 +10,27 @@ module GoogleSpeech
     DEFAULT_OPTIONS =   {
       :key              => 'AIzaSyCnl6MRydhw_5fLXIdASxkLJzcJh5iX0M4',
-      :language         => 'en-US',
-      :chunk_duration   => 5,
-      :overlap          => 0.5,
+      :client           => SecureRandom.hex,
+      :audio_type       => 'audio/l16',
+      :rate             => 8000,
+      :language         => 'en-us',
+      :chunk_duration   => 4.0,
+      :overlap          => 0.25,
       :max_results      => 1,
-      :request_pause    => 1,
-      :profanity_filter => true
+      :request_pause    => 0.1,
+      :profanity_filter => true,
+      :retry_max        => 3
     }
     def initialize(original_file, options=nil)
       @original_file = original_file
       @options = DEFAULT_OPTIONS.merge(options || {})
       @results = []
+      @last_ua = 0
     end
     def transcribe
-      chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
+      chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap], options[:rate])
       chunk_factory.each{ |chunk|
         result = chunk.to_hash
         transcript = transcribe_data(chunk.data)
@@ -63,43 +68,72 @@ module GoogleSpeech
       options[:profanity_filter] ? '1' : '0'
     end
+    def user_agent
+      ua_strings = [
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
+        'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36',
+        'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'
+      ]
+      ua = ua_strings[(@last_ua % ua_strings.length)]
+      @last_ua += 1
+      ua
+    end
     def transcribe_data(data)
       params = {
         :path     => "/speech-api/v2/recognize",
-        :query    => "output=json&client=chromium&lang=#{options[:language]}&key=#{options[:key]}",
+        :query    => "output=json&key=#{options[:key]}&client=#{options[:client]}&lang=#{options[:language]}",
         :body     => data,
         :method   => 'POST',
         :headers  => {
-          'Content-Type'   => 'audio/x-flac; rate=8000',
+          'Content-Type'   => "#{options[:audio_type]}; rate=#{options[:rate]}",
           'Content-Length' => data.bytesize,
-          'User-Agent'     => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36"
+          'User-Agent'     => user_agent
         }
       }
+      # puts "data size: #{data.bytesize}"
       retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
       retry_count = 0
       result = nil
-      url = "https://www.google.com:443#{params[:path]}"
+      url = "https://www.google.com#{params[:path]}"
       while(!result && retry_count < retry_max)
         retry_count += 1
         begin
           connection = Excon.new(url)
           response = connection.request(params)
           # puts "response: #{response.inspect}\n\n"
-          # puts "response.body:\nSTART\n#{response.body}\nEND\n#{response.body.class.name}"
-          if response.status.to_s.start_with?('2')
+          # puts "response.headers:\n#{response.headers}\n"
+          # puts "response.body:'#{response.body}'\n"
+          if response.status.to_s.start_with?('2') && response.body != "{\"result\":[]}\n"
             result = []
             if (response.body && response.body.size > 0)
               result = response.body.split("\n").collect{|b| JSON.parse(b)} rescue []
             end
           else
-            logger.error "transcribe_data response unsuccessful, status: #{response.status}, response: #{response.inspect}"
-            sleep(1)
+            logger.error "        transcribe_data retrycount(#{retry_count}): status: #{response.status}, response: #{response.body.chomp}"
+            sleep(options[:request_pause].to_i)
           end
         rescue StandardError => err
           #need to do something to retry this - use new a13g func for this.
-          logger.error "transcribe_data retrycount(#{retry_count}): error: #{err.message}"
-          sleep(1)
+          logger.error "        transcribe_data retrycount(#{retry_count}): error: #{err.message}"
+          sleep(options[:request_pause].to_i)
         end
       end

data/lib/google_speech/utility.rb CHANGED

@@ -19,16 +19,13 @@ module GoogleSpeech
         duration
       end
-      def trim_to_flac(wav_path, duration, flac_path, start, length)
+      def trim_and_encode(wav_path, flac_path, start, length, rate)
         check_local_file(wav_path)
-        command = "sox -t wav '#{wav_path}' -r 8000 -c 1 -t flac '#{flac_path}' trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0 norm -0.1"
-        # command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start} #{length} rate 8000"
+        command = "sox -t wav '#{wav_path}' -t wav '#{flac_path}' norm channels 1 rate #{rate} trim #{start} #{length} compand .5,2 -80,-80,-75,-50,-30,-15,0,0"
         out, err = run_command(command)
         response = out + err
-        response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
+        response.split("\n").each{ |l| raise("trim_and_encode: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
       end
       # Pass the command to run, and various options
@@ -49,7 +46,7 @@ module GoogleSpeech
         cmd = "#{nice}#{command}#{echo_return}"
-        logger.info "google_speech - run_command: #{cmd}"
+        # logger.info "google_speech - run_command: #{cmd}"
         begin
           result = Timeout::timeout(timeout) {
             Open3::popen3(cmd) do |i,o,e|

data/lib/google_speech/version.rb CHANGED

@@ -1,5 +1,5 @@
 # -*- encoding: utf-8 -*-
 module GoogleSpeech
-  VERSION = "0.3.0"
+  VERSION = "0.3.1"
 end

data/spec/transcriber_spec.rb CHANGED

@@ -9,11 +9,10 @@ describe GoogleSpeech::Transcriber do
     transcriber = GoogleSpeech::Transcriber.new(f)
     t = transcriber.transcribe
-    # puts "\n\nt: #{t.inspect}\n\n"
     t.size.must_equal 2
     t.first[:start_time].must_equal 0
-    t.first[:end_time].must_equal 5.5
+    t.first[:end_time].must_equal 4.25
     t.first[:text].wont_be_nil
     t.first[:confidence].wont_be_nil
   end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: google_speech
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Andrew Kuklewicz
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-05-07 00:00:00.000000000 Z
+date: 2014-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: excon