google_speech 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in google_speech.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 The Public Radio Exchange, www.prx.org
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # GoogleSpeech
2
+
3
+ This is a gem to call the google speech api.
4
+
5
+ The gem expects pcm wav audio.
6
+
7
+ It returns JSON including confidence values, and timing (acts as amkind of transcription alignment).
8
+
9
+ It uses excon for the http communication, sox (http://sox.sourceforge.net/) for audio conversion and splitting, and the related soxi executable to get audio file info/length.
10
+
11
+ Inspired by https://github.com/taf2/speech2text
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'google_speech'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install google_speech
26
+
27
+ ## Usage
28
+
29
+ TODO: Write usage instructions here
30
+
31
+ ## Contributing
32
+
33
+ 1. Fork it
34
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
35
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
36
+ 4. Push to the branch (`git push origin my-new-feature`)
37
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'google_speech/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "google_speech"
8
+ gem.version = GoogleSpeech::VERSION
9
+ gem.authors = ["Andrew Kuklewicz"]
10
+ gem.email = ["andrew@prx.org"]
11
+ gem.description = %q{This is a gem to call the google speech api.}
12
+ gem.summary = %q{This is a gem to call the google speech api.}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "excon"
21
+
22
+ gem.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,203 @@
1
+ require 'rubygems'
2
+ require 'google_speech/version'
3
+
4
+ require 'excon'
5
+
6
+ require 'tempfile'
7
+ require 'open3'
8
+ require 'logger'
9
+ require 'cgi'
10
+ require 'json'
11
+
12
+ module GoogleSpeech
13
+
14
+ SOX_ERROR_RE = /error:/
15
+
16
+ class Transcriber
17
+ attr_accessor :original_file, :options, :results
18
+
19
+ def initialize(original_file, options=nil)
20
+ @original_file = original_file
21
+ @options = {:language=>'en-US', :chunk_duration=>8, :overlap=>1, :max_results=>2}.merge(options || {})
22
+ @results = []
23
+ end
24
+
25
+ def transcribe
26
+ ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap]).each{|chunk|
27
+ result = chunk.to_hash
28
+ transcript = transcribe_data(chunk.data)
29
+ result[:text] = transcript['hypotheses'].first['utterance']
30
+ result[:confidence] = transcript['hypotheses'].first['confidence']
31
+ @results << result
32
+ puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
33
+ sleep(1)
34
+ }
35
+ @results
36
+ end
37
+
38
+ def transcribe_data(data)
39
+ params = {
40
+ :scheme => 'https',
41
+ :host => 'www.google.com',
42
+ :port => 443,
43
+ :path => "/speech-api/v1/recognize",
44
+ :query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}",
45
+ :body => data,
46
+ :method => 'POST',
47
+ :headers => {
48
+ 'Content-Type' => 'audio/x-flac; rate=16000',
49
+ 'Content-Length' => data.bytesize,
50
+ 'User-Agent' => "google_speech"
51
+ }
52
+ }
53
+ retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
54
+ retry_count = 0
55
+ result = nil
56
+ url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
57
+ while(!result && retry_count < retry_max)
58
+ connection = Excon.new(url)
59
+ response = connection.request(params)
60
+ if response.status.to_s.start_with?('2')
61
+ result = JSON.parse(response.body)
62
+ else
63
+ sleep(1)
64
+ retry_count += 1
65
+ end
66
+ end
67
+
68
+ result
69
+ end
70
+
71
+ end
72
+
73
+ # break wav audio into short files
74
+ class ChunkFactory
75
+ attr_accessor :original_file, :chunk_duration, :overlap
76
+
77
+ def initialize(original_file, chunk_duration=8, overlap=1)
78
+ @chunk_duration = chunk_duration.to_i
79
+ @original_file = original_file
80
+ @overlap = overlap
81
+ @original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
82
+ end
83
+
84
+ # return temp file for each chunk
85
+ def each
86
+ pos = 0
87
+ while(pos < @original_duration) do
88
+ chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
89
+ yield chunk
90
+ pos = pos + [chunk.duration, @chunk_duration].min
91
+ end
92
+ end
93
+ end
94
+
95
+ class Chunk
96
+ attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
97
+
98
+ def initialize(original_file, original_duration, start_time, duration)
99
+ @original_file = original_file
100
+ @original_duration = original_duration.to_i
101
+ @start_time = start_time.to_i
102
+ @duration = [duration.to_i, (@original_duration - @start_time)].min
103
+ @chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
104
+ # puts "@chunk_file: #{@chunk_file.path}"
105
+ Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
106
+ end
107
+
108
+ def to_hash
109
+ {
110
+ :start_time => @start_time,
111
+ :duration => @duration
112
+ }
113
+ end
114
+
115
+ def data
116
+ @data ||= @chunk_file.read
117
+ end
118
+ end
119
+
120
+ # send each to google api
121
+
122
+ class Utility
123
+ class <<self
124
+
125
+ def audio_file_duration(path)
126
+ check_local_file(path)
127
+
128
+ soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
129
+ duration = soxi_duration.chomp.to_f
130
+ duration
131
+ end
132
+
133
+ def trim_to_flac(wav_path, duration, flac_path, start, length)
134
+ check_local_file(wav_path)
135
+
136
+ command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
137
+ out, err = run_command(command)
138
+ response = out + err
139
+ response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
140
+ end
141
+
142
+ # Pass the command to run, and various options
143
+ # :timeout - seconds to wait for command to complete, defaults to 2 hours
144
+ # :echo_return - gets the return value via appended '; echo $?', true by default
145
+ # :nice - call with nice -19 by default, set to false to stop, or integer to set different level
146
+ def run_command(command, options={})
147
+ timeout = options[:timeout] || 7200
148
+
149
+ # default to adding a nice 19 if nothing specified
150
+ nice = if options.key?(:nice)
151
+ !options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
152
+ else
153
+ 'nice -n 19 '
154
+ end
155
+
156
+ echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
157
+
158
+ cmd = "#{nice}#{command}#{echo_return}"
159
+
160
+ # logger.debug "run_command: #{cmd}"
161
+ begin
162
+ result = Timeout::timeout(timeout) {
163
+ Open3::popen3(cmd) do |i,o,e|
164
+ out_str = ""
165
+ err_str = ""
166
+ i.close # important!
167
+ o.sync = true
168
+ e.sync = true
169
+ o.each{|line|
170
+ out_str << line
171
+ line.chomp!
172
+ # logger.debug "stdout: #{line}"
173
+ }
174
+ e.each { |line|
175
+ err_str << line
176
+ line.chomp!
177
+ # logger.debug "stderr: #{line}"
178
+ }
179
+ return out_str, err_str
180
+ end
181
+ }
182
+ rescue Timeout::Error => toe
183
+ # logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
184
+ raise toe
185
+ end
186
+ end
187
+
188
+ def check_local_file(file_path)
189
+ raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
190
+ end
191
+
192
+ def logger
193
+ @logger ||= Logger.new(STDOUT)
194
+ end
195
+
196
+ def logger=(l)
197
+ @logger = l
198
+ end
199
+
200
+ end
201
+ end
202
+
203
+ end
@@ -0,0 +1,3 @@
1
+ module GoogleSpeech
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_speech
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Kuklewicz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: excon
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: This is a gem to call the google speech api.
47
+ email:
48
+ - andrew@prx.org
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE.txt
56
+ - README.md
57
+ - Rakefile
58
+ - google_speech.gemspec
59
+ - lib/google_speech.rb
60
+ - lib/google_speech/version.rb
61
+ homepage: ''
62
+ licenses: []
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubyforge_project:
81
+ rubygems_version: 1.8.23
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: This is a gem to call the google speech api.
85
+ test_files: []
86
+ has_rdoc: