RubyGems - google_speech - Versions diffs - 0.0.1 → 0.0.2 - Mend

google_speech 0.0.1 → 0.0.2

Files changed (13) hide show

data/README.md +23 -1
data/Rakefile +7 -1
data/bin/google_speech +13 -0
data/google_speech.gemspec +2 -0
data/lib/google_speech.rb +8 -199
data/lib/google_speech/chunk.rb +32 -0
data/lib/google_speech/chunk_factory.rb +27 -0
data/lib/google_speech/transcriber.rb +81 -0
data/lib/google_speech/utility.rb +93 -0
data/lib/google_speech/version.rb +3 -1
data/test/test_helper.rb +5 -0
data/test/transcriber_test.rb +29 -0
metadata +19 -4

data/README.md CHANGED Viewed

@@ -26,7 +26,29 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+	require 'google_speech'
+	f = File.open '/Users/you/Downloads/audio.wav'
+  	transcriber = GoogleSpeech::Transcriber.new(f)
+  	t = transcriber.transcribe
+Options:
+	* language - what language is the speech in
+	* chunk_duration - length in seconds for each audio chunk of the wav to send
+	* overlap - chunking does not respect word boundaries; overlap can compensate
+	* max_results - # of results to request of speech api
+	* request_pause - sleep seconds between chunk transcription requests
+	* profanity_filter - google by default filters profanity, this gem does not
+Default option values:
+	{
+		:language         => 'en-US',
+		:chunk_duration   => 8,
+		:overlap          => 1,
+		:max_results      => 2,
+		:request_pause    => 1,
+		:profanity_filter => false
+	}
 ## Contributing

data/Rakefile CHANGED Viewed

@@ -1 +1,7 @@
-require "bundler/gem_tasks"
+require 'bundler/gem_tasks'
+require 'rake/testtask'
+desc "Default Task (test gem)"
+task :default => :test
+Rake::TestTask.new(:test) { |t| t.test_files = FileList['test/*_test.rb'] }

data/bin/google_speech ADDED Viewed

@@ -0,0 +1,13 @@
+#!/usr/bin/env ruby
+require 'google_speech'
+if ARGV[0].nil? || !File.exist?(ARGV[0])
+  STDERR.puts "usage: #{$0} input.wav"
+  exit(1)
+end
+f = File.open ARGV[0]
+transcriber = GoogleSpeech::Transcriber.new(f)
+t = transcriber.transcribe
+puts t.inspect

data/google_speech.gemspec CHANGED Viewed

@@ -1,6 +1,8 @@
 # -*- encoding: utf-8 -*-
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'google_speech/version'
 Gem::Specification.new do |gem|

data/lib/google_speech.rb CHANGED Viewed

@@ -1,203 +1,12 @@
-require 'rubygems'
-require 'google_speech/version'
+# -*- encoding: utf-8 -*-
-require 'excon'
-require 'tempfile'
-require 'open3'
-require 'logger'
-require 'cgi'
-require 'json'
+require 'rubygems'
 module GoogleSpeech
-  SOX_ERROR_RE       = /error:/
-  class Transcriber
-    attr_accessor :original_file, :options, :results
-    def initialize(original_file, options=nil)
-      @original_file = original_file
-      @options = {:language=>'en-US', :chunk_duration=>8, :overlap=>1, :max_results=>2}.merge(options || {})
-      @results = []
-    end
-    def transcribe
-      ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap]).each{|chunk|
-        result = chunk.to_hash
-        transcript = transcribe_data(chunk.data)
-        result[:text] = transcript['hypotheses'].first['utterance']
-        result[:confidence] = transcript['hypotheses'].first['confidence']
-        @results << result
-        puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
-        sleep(1)
-      }
-      @results
-    end
-    def transcribe_data(data)
-      params = {
-        :scheme   => 'https',
-        :host     => 'www.google.com',
-        :port     => 443,
-        :path     => "/speech-api/v1/recognize",
-        :query    => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}",
-        :body     => data,
-        :method   => 'POST',
-        :headers  => {
-          'Content-Type'   => 'audio/x-flac; rate=16000',
-          'Content-Length' => data.bytesize,
-          'User-Agent'     => "google_speech"
-        }
-      }
-      retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
-      retry_count = 0
-      result = nil
-      url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
-      while(!result && retry_count < retry_max)
-        connection = Excon.new(url)
-        response = connection.request(params)
-        if response.status.to_s.start_with?('2')
-          result = JSON.parse(response.body)
-        else
-          sleep(1)
-          retry_count += 1
-        end
-      end
-      result
-    end
-  end
-  # break wav audio into short files
-  class ChunkFactory
-    attr_accessor :original_file, :chunk_duration, :overlap
-    def initialize(original_file, chunk_duration=8, overlap=1)
-      @chunk_duration    = chunk_duration.to_i
-      @original_file     = original_file
-      @overlap           = overlap
-      @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
-    end
-    # return temp file for each chunk
-    def each
-      pos = 0
-      while(pos < @original_duration) do
-        chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
-        yield chunk
-        pos = pos + [chunk.duration, @chunk_duration].min
-      end
-    end
-  end
-  class Chunk
-    attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
-    def initialize(original_file, original_duration, start_time, duration)
-      @original_file = original_file
-      @original_duration = original_duration.to_i
-      @start_time = start_time.to_i
-      @duration = [duration.to_i, (@original_duration - @start_time)].min
-      @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
-      # puts "@chunk_file: #{@chunk_file.path}"
-      Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
-    end
-    def to_hash
-      {
-        :start_time => @start_time,
-        :duration => @duration
-      }
-    end
-    def data
-      @data ||= @chunk_file.read
-    end
-  end
-  # send each to google api
-  class Utility
-    class <<self
-      def audio_file_duration(path)
-        check_local_file(path)
-        soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
-        duration = soxi_duration.chomp.to_f
-        duration
-      end
-      def trim_to_flac(wav_path, duration, flac_path, start, length)
-        check_local_file(wav_path)
-        command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
-        out, err = run_command(command)
-        response = out + err
-        response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
-      end
-      # Pass the command to run, and various options
-      # :timeout - seconds to wait for command to complete, defaults to 2 hours
-      # :echo_return - gets the return value via appended '; echo $?', true by default
-      # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
-      def run_command(command, options={})
-        timeout = options[:timeout] || 7200
-        # default to adding a nice 19 if nothing specified
-        nice = if options.key?(:nice)
-          !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
-        else
-          'nice -n 19 '
-        end
-        echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
-        cmd = "#{nice}#{command}#{echo_return}"
-        # logger.debug "run_command:  #{cmd}"
-        begin
-          result = Timeout::timeout(timeout) {
-            Open3::popen3(cmd) do |i,o,e|
-              out_str = ""
-              err_str = ""
-              i.close # important!
-              o.sync = true
-              e.sync = true
-              o.each{|line|
-                out_str << line
-                line.chomp!
-                # logger.debug "stdout:    #{line}"
-              }
-              e.each { |line|
-                err_str << line
-                line.chomp!
-                # logger.debug "stderr:    #{line}"
-              }
-              return out_str, err_str
-            end
-          }
-        rescue Timeout::Error => toe
-          # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
-          raise toe
-        end
-      end
-      def check_local_file(file_path)
-        raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
-      end
-      def logger
-        @logger ||= Logger.new(STDOUT)
-      end
-      def logger=(l)
-        @logger = l
-      end
-    end
-  end
 end
+require 'google_speech/version'
+require 'google_speech/utility'
+require 'google_speech/chunk'
+require 'google_speech/chunk_factory'
+require 'google_speech/transcriber'

data/lib/google_speech/chunk.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# -*- encoding: utf-8 -*-
+require 'tempfile'
+module GoogleSpeech
+  class Chunk
+    attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
+    def initialize(original_file, original_duration, start_time, duration)
+      @original_file = original_file
+      @original_duration = original_duration.to_i
+      @start_time = start_time.to_i
+      @duration = [duration.to_i, (@original_duration - @start_time)].min
+      @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
+      # puts "@chunk_file: #{@chunk_file.path}"
+      Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
+    end
+    def to_hash
+      {
+        :start_time => @start_time,
+        :duration => @duration
+      }
+    end
+    def data
+      @data ||= @chunk_file.read
+    end
+  end
+end

data/lib/google_speech/chunk_factory.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# -*- encoding: utf-8 -*-
+module GoogleSpeech
+  # break wav audio into short files
+  class ChunkFactory
+    attr_accessor :original_file, :chunk_duration, :overlap
+    def initialize(original_file, chunk_duration=8, overlap=1)
+      @chunk_duration    = chunk_duration.to_i
+      @original_file     = original_file
+      @overlap           = overlap
+      @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
+    end
+    # return temp file for each chunk
+    def each
+      pos = 0
+      while(pos < @original_duration) do
+        chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
+        yield chunk
+        pos = pos + [chunk.duration, @chunk_duration].min
+      end
+    end
+  end
+end

data/lib/google_speech/transcriber.rb ADDED Viewed

@@ -0,0 +1,81 @@
+# -*- encoding: utf-8 -*-
+require 'excon'
+require 'json'
+module GoogleSpeech
+  class Transcriber
+    attr_accessor :original_file, :options, :results
+    DEFAULT_OPTIONS =   {
+      :language         => 'en-US',
+      :chunk_duration   => 8,
+      :overlap          => 1,
+      :max_results      => 2,
+      :request_pause    => 1,
+      :profanity_filter => false
+    }
+    def initialize(original_file, options=nil)
+      @original_file = original_file
+      @options = DEFAULT_OPTIONS.merge(options || {})
+      @results = []
+    end
+    def transcribe
+      chunk_factory = ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap])
+      chunk_factory.each{ |chunk|
+        result = chunk.to_hash
+        transcript = transcribe_data(chunk.data)
+        result[:text] = transcript['hypotheses'].first['utterance']
+        result[:confidence] = transcript['hypotheses'].first['confidence']
+        @results << result
+        # puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
+        sleep(options[:request_pause].to_i)
+      }
+      @results
+    end
+    def pfilter
+      options[:profanity_filter] ? '1' : '0'
+    end
+    def transcribe_data(data)
+      params = {
+        :scheme   => 'https',
+        :host     => 'www.google.com',
+        :port     => 443,
+        :path     => "/speech-api/v1/recognize",
+        :query    => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}&pfilter=#{pfilter}",
+        :body     => data,
+        :method   => 'POST',
+        :headers  => {
+          'Content-Type'   => 'audio/x-flac; rate=16000',
+          'Content-Length' => data.bytesize,
+          'User-Agent'     => "google_speech"
+        }
+      }
+      retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
+      retry_count = 0
+      result = nil
+      url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
+      while(!result && retry_count < retry_max)
+        connection = Excon.new(url)
+        response = connection.request(params)
+        if response.status.to_s.start_with?('2')
+          result = JSON.parse(response.body)
+        else
+          sleep(1)
+          retry_count += 1
+        end
+      end
+      result
+    end
+  end
+end

data/lib/google_speech/utility.rb ADDED Viewed

@@ -0,0 +1,93 @@
+# -*- encoding: utf-8 -*-
+require 'tempfile'
+require 'open3'
+require 'logger'
+module GoogleSpeech
+  class Utility
+    SOX_ERROR_RE       = /error:/
+    class <<self
+      def audio_file_duration(path)
+        check_local_file(path)
+        soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
+        duration = soxi_duration.chomp.to_f
+        duration
+      end
+      def trim_to_flac(wav_path, duration, flac_path, start, length)
+        check_local_file(wav_path)
+        command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
+        out, err = run_command(command)
+        response = out + err
+        response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
+      end
+      # Pass the command to run, and various options
+      # :timeout - seconds to wait for command to complete, defaults to 2 hours
+      # :echo_return - gets the return value via appended '; echo $?', true by default
+      # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
+      def run_command(command, options={})
+        timeout = options[:timeout] || 7200
+        # default to adding a nice 19 if nothing specified
+        nice = if options.key?(:nice)
+          !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
+        else
+          'nice -n 19 '
+        end
+        echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
+        cmd = "#{nice}#{command}#{echo_return}"
+        # logger.debug "run_command:  #{cmd}"
+        begin
+          result = Timeout::timeout(timeout) {
+            Open3::popen3(cmd) do |i,o,e|
+              out_str = ""
+              err_str = ""
+              i.close # important!
+              o.sync = true
+              e.sync = true
+              o.each{|line|
+                out_str << line
+                line.chomp!
+                # logger.debug "stdout:    #{line}"
+              }
+              e.each { |line|
+                err_str << line
+                line.chomp!
+                # logger.debug "stderr:    #{line}"
+              }
+              return out_str, err_str
+            end
+          }
+        rescue Timeout::Error => toe
+          # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
+          raise toe
+        end
+      end
+      def check_local_file(file_path)
+        raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
+      end
+      def logger
+        @logger ||= Logger.new(STDOUT)
+      end
+      def logger=(l)
+        @logger = l
+      end
+    end
+  end
+end

data/lib/google_speech/version.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# -*- encoding: utf-8 -*-
 module GoogleSpeech
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# -*- encoding: utf-8 -*-
+require 'minitest/autorun'
+$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'google_speech'

data/test/transcriber_test.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# -*- encoding: utf-8 -*-
+require 'test_helper.rb'
+describe Transcriber do
+  before do
+  end
+  describe 'load file' do
+	  f = File.open '/Users/andrew/Downloads/hive.wav'
+	  # f = File.open('/Users/andrew/dev/projects/nu_wav/test/files/test_basic.wav')
+	  transcriber = GoogleSpeech::Transcriber.new(f)
+	  t = transcriber.transcribe
+	  puts t.inspect
+  end
+  # describe "when asked about cheeseburgers" do
+  #   it "must respond positively" do
+  #     @meme.i_can_has_cheezburger?.must_equal "OHAI!"
+  #   end
+  # end
+  # describe "when asked about blending possibilities" do
+  #   it "won't say no" do
+  #     @meme.will_it_blend?.wont_match /^no/i
+  #   end
+  # end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: google_speech
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -46,7 +46,8 @@ dependencies:
 description: This is a gem to call the google speech api.
 email:
 - andrew@prx.org
-executables: []
+executables:
+- google_speech
 extensions: []
 extra_rdoc_files: []
 files:
@@ -55,9 +56,16 @@ files:
 - LICENSE.txt
 - README.md
 - Rakefile
+- bin/google_speech
 - google_speech.gemspec
 - lib/google_speech.rb
+- lib/google_speech/chunk.rb
+- lib/google_speech/chunk_factory.rb
+- lib/google_speech/transcriber.rb
+- lib/google_speech/utility.rb
 - lib/google_speech/version.rb
+- test/test_helper.rb
+- test/transcriber_test.rb
 homepage: ''
 licenses: []
 post_install_message:
@@ -70,17 +78,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
   - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
+      segments:
+      - 0
+      hash: -2361961218730724122
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
   - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
+      segments:
+      - 0
+      hash: -2361961218730724122
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.23
 signing_key:
 specification_version: 3
 summary: This is a gem to call the google speech api.
-test_files: []
-has_rdoc:
+test_files:
+- test/test_helper.rb
+- test/transcriber_test.rb