google_speech 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +37 -0
- data/Rakefile +1 -0
- data/google_speech.gemspec +23 -0
- data/lib/google_speech.rb +203 -0
- data/lib/google_speech/version.rb +3 -0
- metadata +86 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 The Public Radio Exchange, www.prx.org
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# GoogleSpeech
|
2
|
+
|
3
|
+
This is a gem to call the google speech api.
|
4
|
+
|
5
|
+
The gem expects pcm wav audio.
|
6
|
+
|
7
|
+
It returns JSON including confidence values, and timing (acts as amkind of transcription alignment).
|
8
|
+
|
9
|
+
It uses excon for the http communication, sox (http://sox.sourceforge.net/) for audio conversion and splitting, and the related soxi executable to get audio file info/length.
|
10
|
+
|
11
|
+
Inspired by https://github.com/taf2/speech2text
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
gem 'google_speech'
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install google_speech
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
TODO: Write usage instructions here
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'google_speech/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "google_speech"
|
8
|
+
gem.version = GoogleSpeech::VERSION
|
9
|
+
gem.authors = ["Andrew Kuklewicz"]
|
10
|
+
gem.email = ["andrew@prx.org"]
|
11
|
+
gem.description = %q{This is a gem to call the google speech api.}
|
12
|
+
gem.summary = %q{This is a gem to call the google speech api.}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency "excon"
|
21
|
+
|
22
|
+
gem.add_development_dependency "rake"
|
23
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'google_speech/version'
|
3
|
+
|
4
|
+
require 'excon'
|
5
|
+
|
6
|
+
require 'tempfile'
|
7
|
+
require 'open3'
|
8
|
+
require 'logger'
|
9
|
+
require 'cgi'
|
10
|
+
require 'json'
|
11
|
+
|
12
|
+
module GoogleSpeech
|
13
|
+
|
14
|
+
SOX_ERROR_RE = /error:/
|
15
|
+
|
16
|
+
class Transcriber
|
17
|
+
attr_accessor :original_file, :options, :results
|
18
|
+
|
19
|
+
def initialize(original_file, options=nil)
|
20
|
+
@original_file = original_file
|
21
|
+
@options = {:language=>'en-US', :chunk_duration=>8, :overlap=>1, :max_results=>2}.merge(options || {})
|
22
|
+
@results = []
|
23
|
+
end
|
24
|
+
|
25
|
+
def transcribe
|
26
|
+
ChunkFactory.new(@original_file, options[:chunk_duration], options[:overlap]).each{|chunk|
|
27
|
+
result = chunk.to_hash
|
28
|
+
transcript = transcribe_data(chunk.data)
|
29
|
+
result[:text] = transcript['hypotheses'].first['utterance']
|
30
|
+
result[:confidence] = transcript['hypotheses'].first['confidence']
|
31
|
+
@results << result
|
32
|
+
puts "\n#{result[:start_time]} - #{result[:start_time].to_i + result[:duration].to_i}: #{(result[:confidence].to_f * 100).to_i}%: #{result[:text]}"
|
33
|
+
sleep(1)
|
34
|
+
}
|
35
|
+
@results
|
36
|
+
end
|
37
|
+
|
38
|
+
def transcribe_data(data)
|
39
|
+
params = {
|
40
|
+
:scheme => 'https',
|
41
|
+
:host => 'www.google.com',
|
42
|
+
:port => 443,
|
43
|
+
:path => "/speech-api/v1/recognize",
|
44
|
+
:query => "xjerr=1&client=google_speech&lang=#{options[:language]}&maxresults=#{options[:max_results].to_i}",
|
45
|
+
:body => data,
|
46
|
+
:method => 'POST',
|
47
|
+
:headers => {
|
48
|
+
'Content-Type' => 'audio/x-flac; rate=16000',
|
49
|
+
'Content-Length' => data.bytesize,
|
50
|
+
'User-Agent' => "google_speech"
|
51
|
+
}
|
52
|
+
}
|
53
|
+
retry_max = options[:retry_max] ? [options[:retry_max].to_i, 1].max : 3
|
54
|
+
retry_count = 0
|
55
|
+
result = nil
|
56
|
+
url = "#{params[:scheme]}://#{params[:host]}:#{params[:port]}#{params[:path]}"
|
57
|
+
while(!result && retry_count < retry_max)
|
58
|
+
connection = Excon.new(url)
|
59
|
+
response = connection.request(params)
|
60
|
+
if response.status.to_s.start_with?('2')
|
61
|
+
result = JSON.parse(response.body)
|
62
|
+
else
|
63
|
+
sleep(1)
|
64
|
+
retry_count += 1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
result
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
# break wav audio into short files
|
74
|
+
class ChunkFactory
|
75
|
+
attr_accessor :original_file, :chunk_duration, :overlap
|
76
|
+
|
77
|
+
def initialize(original_file, chunk_duration=8, overlap=1)
|
78
|
+
@chunk_duration = chunk_duration.to_i
|
79
|
+
@original_file = original_file
|
80
|
+
@overlap = overlap
|
81
|
+
@original_duration = GoogleSpeech::Utility.audio_file_duration(@original_file.path).to_i
|
82
|
+
end
|
83
|
+
|
84
|
+
# return temp file for each chunk
|
85
|
+
def each
|
86
|
+
pos = 0
|
87
|
+
while(pos < @original_duration) do
|
88
|
+
chunk = Chunk.new(@original_file, @original_duration, pos, (@chunk_duration + @overlap))
|
89
|
+
yield chunk
|
90
|
+
pos = pos + [chunk.duration, @chunk_duration].min
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class Chunk
|
96
|
+
attr_accessor :original_file, :original_duration, :start_time, :duration, :chunk_file
|
97
|
+
|
98
|
+
def initialize(original_file, original_duration, start_time, duration)
|
99
|
+
@original_file = original_file
|
100
|
+
@original_duration = original_duration.to_i
|
101
|
+
@start_time = start_time.to_i
|
102
|
+
@duration = [duration.to_i, (@original_duration - @start_time)].min
|
103
|
+
@chunk_file = Tempfile.new([File.basename(@original_file), '.flac'])
|
104
|
+
# puts "@chunk_file: #{@chunk_file.path}"
|
105
|
+
Utility.trim_to_flac(@original_file.path, @duration, @chunk_file.path, @start_time, @duration)
|
106
|
+
end
|
107
|
+
|
108
|
+
def to_hash
|
109
|
+
{
|
110
|
+
:start_time => @start_time,
|
111
|
+
:duration => @duration
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
def data
|
116
|
+
@data ||= @chunk_file.read
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# send each to google api
|
121
|
+
|
122
|
+
class Utility
|
123
|
+
class <<self
|
124
|
+
|
125
|
+
def audio_file_duration(path)
|
126
|
+
check_local_file(path)
|
127
|
+
|
128
|
+
soxi_duration, err = run_command("soxi -V0 -D #{path}", :nice=>false, :echo_return=>false)
|
129
|
+
duration = soxi_duration.chomp.to_f
|
130
|
+
duration
|
131
|
+
end
|
132
|
+
|
133
|
+
def trim_to_flac(wav_path, duration, flac_path, start, length)
|
134
|
+
check_local_file(wav_path)
|
135
|
+
|
136
|
+
command = "sox -t wav '#{wav_path}' -t flac '#{flac_path}' trim #{start.to_i} #{length.to_i} rate 16k"
|
137
|
+
out, err = run_command(command)
|
138
|
+
response = out + err
|
139
|
+
response.split("\n").each{ |l| raise("trim_to_flac: error cmd: '#{command}'\nout: '#{response}'") if l =~ SOX_ERROR_RE }
|
140
|
+
end
|
141
|
+
|
142
|
+
# Pass the command to run, and various options
|
143
|
+
# :timeout - seconds to wait for command to complete, defaults to 2 hours
|
144
|
+
# :echo_return - gets the return value via appended '; echo $?', true by default
|
145
|
+
# :nice - call with nice -19 by default, set to false to stop, or integer to set different level
|
146
|
+
def run_command(command, options={})
|
147
|
+
timeout = options[:timeout] || 7200
|
148
|
+
|
149
|
+
# default to adding a nice 19 if nothing specified
|
150
|
+
nice = if options.key?(:nice)
|
151
|
+
!options[:nice] ? '' : "nice -n #{options[:nice].to_i} "
|
152
|
+
else
|
153
|
+
'nice -n 19 '
|
154
|
+
end
|
155
|
+
|
156
|
+
echo_return = (options.key?(:echo_return) && !options[:echo_return]) ? '' : '; echo $?'
|
157
|
+
|
158
|
+
cmd = "#{nice}#{command}#{echo_return}"
|
159
|
+
|
160
|
+
# logger.debug "run_command: #{cmd}"
|
161
|
+
begin
|
162
|
+
result = Timeout::timeout(timeout) {
|
163
|
+
Open3::popen3(cmd) do |i,o,e|
|
164
|
+
out_str = ""
|
165
|
+
err_str = ""
|
166
|
+
i.close # important!
|
167
|
+
o.sync = true
|
168
|
+
e.sync = true
|
169
|
+
o.each{|line|
|
170
|
+
out_str << line
|
171
|
+
line.chomp!
|
172
|
+
# logger.debug "stdout: #{line}"
|
173
|
+
}
|
174
|
+
e.each { |line|
|
175
|
+
err_str << line
|
176
|
+
line.chomp!
|
177
|
+
# logger.debug "stderr: #{line}"
|
178
|
+
}
|
179
|
+
return out_str, err_str
|
180
|
+
end
|
181
|
+
}
|
182
|
+
rescue Timeout::Error => toe
|
183
|
+
# logger.debug "run_command:Timeout Error - running command, took longer than #{timeout} seconds to execute: '#{cmd}'"
|
184
|
+
raise toe
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def check_local_file(file_path)
|
189
|
+
raise "File missing or 0 length: #{file_path}" unless (File.size?(file_path).to_i > 0)
|
190
|
+
end
|
191
|
+
|
192
|
+
def logger
|
193
|
+
@logger ||= Logger.new(STDOUT)
|
194
|
+
end
|
195
|
+
|
196
|
+
def logger=(l)
|
197
|
+
@logger = l
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_speech
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrew Kuklewicz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: excon
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: This is a gem to call the google speech api.
|
47
|
+
email:
|
48
|
+
- andrew@prx.org
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- .gitignore
|
54
|
+
- Gemfile
|
55
|
+
- LICENSE.txt
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- google_speech.gemspec
|
59
|
+
- lib/google_speech.rb
|
60
|
+
- lib/google_speech/version.rb
|
61
|
+
homepage: ''
|
62
|
+
licenses: []
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options: []
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ! '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.8.23
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: This is a gem to call the google speech api.
|
85
|
+
test_files: []
|
86
|
+
has_rdoc:
|